update
This commit is contained in:
@@ -194,7 +194,7 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
|||||||
if (fieldDump.pageSizeSet) {
|
if (fieldDump.pageSizeSet) {
|
||||||
log(`Set page size: ${fieldDump.pageSizeSet}`);
|
log(`Set page size: ${fieldDump.pageSizeSet}`);
|
||||||
} else {
|
} else {
|
||||||
log('No page size field found — result may be paginated');
|
log('No page size field found — will paginate through all results');
|
||||||
}
|
}
|
||||||
// Use Promise.all to start waiting for navigation BEFORE triggering the submit,
|
// Use Promise.all to start waiting for navigation BEFORE triggering the submit,
|
||||||
// otherwise waitForLoadState resolves against the already-idle current page.
|
// otherwise waitForLoadState resolves against the already-idle current page.
|
||||||
@@ -205,20 +205,110 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
|||||||
log(`After form submit: ${frame.url()}`);
|
log(`After form submit: ${frame.url()}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Collect rows across all pages
|
||||||
|
type ParsedRow = Awaited<ReturnType<typeof parseRowsFromTable>>[number];
|
||||||
|
const allRows: ParsedRow[] = [];
|
||||||
|
let pageNum = 1;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
// Log tables found for diagnostics
|
// Log tables found for diagnostics
|
||||||
const tableInfo = await frame.$$eval('table', (ts) =>
|
const tableInfo = await frame.$$eval('table', (ts) =>
|
||||||
ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`),
|
ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`),
|
||||||
);
|
);
|
||||||
log(`Tables: ${tableInfo.join(', ') || 'none'}`);
|
log(`Page ${pageNum} tables: ${tableInfo.join(', ') || 'none'}`);
|
||||||
|
|
||||||
// The member table uses class FdcLayList
|
|
||||||
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
|
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
|
||||||
|
|
||||||
|
const pageRows = await parseRowsFromTable(frame);
|
||||||
|
log(`Page ${pageNum}: parsed ${pageRows.length} rows`);
|
||||||
|
for (const row of pageRows) {
|
||||||
|
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}"`);
|
||||||
|
}
|
||||||
|
allRows.push(...pageRows);
|
||||||
|
|
||||||
|
// Check pagination status from "Datensatz X-Y von Z" text
|
||||||
|
const pagination = await frame.evaluate(() => {
|
||||||
|
const nav = document.querySelector('table.FdcLayListNav');
|
||||||
|
return nav?.textContent?.trim() ?? '';
|
||||||
|
});
|
||||||
|
log(`Pagination: "${pagination}"`);
|
||||||
|
|
||||||
|
// Parse "Datensatz X-Y von Z" to check if more pages exist
|
||||||
|
const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i);
|
||||||
|
if (pagMatch) {
|
||||||
|
const to = parseInt(pagMatch[2], 10);
|
||||||
|
const total = parseInt(pagMatch[3], 10);
|
||||||
|
if (to >= total) {
|
||||||
|
log(`All ${total} records loaded across ${pageNum} page(s)`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
log(`Loaded ${to} of ${total} — navigating to next page`);
|
||||||
|
} else {
|
||||||
|
// No pagination indicator found — assume single page
|
||||||
|
log('No pagination indicator found — assuming single page');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Click the "next page" link in FdcLayListNav
|
||||||
|
// FDISK uses __doPostBack links; find an <a> or <input> pointing to the next page
|
||||||
|
const nextClicked = await frame.evaluate(() => {
|
||||||
|
const nav = document.querySelector('table.FdcLayListNav');
|
||||||
|
if (!nav) return false;
|
||||||
|
const links = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"]'));
|
||||||
|
// Look for next-page indicator: ">" alone, ">>" alone, or title/alt "weiter"/"next"
|
||||||
|
for (const el of links) {
|
||||||
|
const text = ((el as HTMLElement).textContent ?? '').trim();
|
||||||
|
const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase();
|
||||||
|
const alt = ((el as HTMLImageElement).alt ?? '').toLowerCase();
|
||||||
|
if (text === '>' || text === '>>' || title.includes('nächst') || title.includes('weiter') ||
|
||||||
|
title.includes('next') || alt.includes('next') || alt.includes('weiter')) {
|
||||||
|
(el as HTMLElement).click();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!nextClicked) {
|
||||||
|
log('WARN: could not find next-page link — stopping pagination');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
await frame.waitForLoadState('networkidle', { timeout: 30000 });
|
||||||
|
pageNum++;
|
||||||
|
}
|
||||||
|
|
||||||
|
log(`Parsed ${allRows.length} rows total across ${pageNum} page(s)`);
|
||||||
|
|
||||||
|
const members: FdiskMember[] = [];
|
||||||
|
for (const row of allRows) {
|
||||||
|
if (!row.standesbuchNr || !row.vorname || !row.zuname) {
|
||||||
|
log(` SKIP: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" — missing required field`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const abmeldedatum = parseDate(row.abmeldedatum);
|
||||||
|
members.push({
|
||||||
|
standesbuchNr: row.standesbuchNr,
|
||||||
|
dienstgrad: row.dienstgrad,
|
||||||
|
vorname: row.vorname,
|
||||||
|
zuname: row.zuname,
|
||||||
|
geburtsdatum: parseDate(row.geburtsdatum),
|
||||||
|
svnr: row.svnr || null,
|
||||||
|
eintrittsdatum: parseDate(row.eintrittsdatum),
|
||||||
|
abmeldedatum,
|
||||||
|
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
|
||||||
|
detailUrl: row.href,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return members;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function parseRowsFromTable(frame: Frame) {
|
||||||
// Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad,
|
// Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad,
|
||||||
// 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon
|
// 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon
|
||||||
// Each <td> contains an <a title="value"> — the title is the clean cell text.
|
// Each <td> contains an <a title="value"> — the title is the clean cell text.
|
||||||
// The href on each <a> is the member detail URL (same link repeated across all cells in a row).
|
// The href on each <a> is the member detail URL (same link repeated across all cells in a row).
|
||||||
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
return frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||||||
trs.map((tr) => {
|
trs.map((tr) => {
|
||||||
const cells = Array.from(tr.querySelectorAll('td'));
|
const cells = Array.from(tr.querySelectorAll('td'));
|
||||||
const val = (i: number) => {
|
const val = (i: number) => {
|
||||||
@@ -242,33 +332,6 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
|||||||
};
|
};
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
log(`Parsed ${rows.length} rows from member table`);
|
|
||||||
for (const row of rows) {
|
|
||||||
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}"`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const members: FdiskMember[] = [];
|
|
||||||
for (const row of rows) {
|
|
||||||
if (!row.standesbuchNr || !row.vorname || !row.zuname) {
|
|
||||||
log(` SKIP: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" — missing required field`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const abmeldedatum = parseDate(row.abmeldedatum);
|
|
||||||
members.push({
|
|
||||||
standesbuchNr: row.standesbuchNr,
|
|
||||||
dienstgrad: row.dienstgrad,
|
|
||||||
vorname: row.vorname,
|
|
||||||
zuname: row.zuname,
|
|
||||||
geburtsdatum: parseDate(row.geburtsdatum),
|
|
||||||
svnr: row.svnr || null,
|
|
||||||
eintrittsdatum: parseDate(row.eintrittsdatum),
|
|
||||||
abmeldedatum,
|
|
||||||
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
|
|
||||||
detailUrl: row.href,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return members;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
||||||
|
|||||||
Reference in New Issue
Block a user