diff --git a/sync/src/scraper.ts b/sync/src/scraper.ts index cccf1d7..c71ca74 100644 --- a/sync/src/scraper.ts +++ b/sync/src/scraper.ts @@ -194,7 +194,7 @@ async function scrapeMembers(frame: Frame): Promise { if (fieldDump.pageSizeSet) { log(`Set page size: ${fieldDump.pageSizeSet}`); } else { - log('No page size field found — result may be paginated'); + log('No page size field found — will paginate through all results'); } // Use Promise.all to start waiting for navigation BEFORE triggering the submit, // otherwise waitForLoadState resolves against the already-idle current page. @@ -205,20 +205,110 @@ async function scrapeMembers(frame: Frame): Promise { log(`After form submit: ${frame.url()}`); } - // Log tables found for diagnostics - const tableInfo = await frame.$$eval('table', (ts) => - ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`), - ); - log(`Tables: ${tableInfo.join(', ') || 'none'}`); + // Collect rows across all pages + type ParsedRow = Awaited>[number]; + const allRows: ParsedRow[] = []; + let pageNum = 1; - // The member table uses class FdcLayList - await frame.waitForSelector('table.FdcLayList', { timeout: 20000 }); + while (true) { + // Log tables found for diagnostics + const tableInfo = await frame.$$eval('table', (ts) => + ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`), + ); + log(`Page ${pageNum} tables: ${tableInfo.join(', ') || 'none'}`); + await frame.waitForSelector('table.FdcLayList', { timeout: 20000 }); + + const pageRows = await parseRowsFromTable(frame); + log(`Page ${pageNum}: parsed ${pageRows.length} rows`); + for (const row of pageRows) { + log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}"`); + } + allRows.push(...pageRows); + + // Check pagination status from "Datensatz X-Y von Z" text + const pagination = await frame.evaluate(() => { + const nav = document.querySelector('table.FdcLayListNav'); + return nav?.textContent?.trim() ?? ''; + }); + log(`Pagination: "${pagination}"`); + + // Parse "Datensatz X-Y von Z" to check if more pages exist + const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i); + if (pagMatch) { + const to = parseInt(pagMatch[2], 10); + const total = parseInt(pagMatch[3], 10); + if (to >= total) { + log(`All ${total} records loaded across ${pageNum} page(s)`); + break; + } + log(`Loaded ${to} of ${total} — navigating to next page`); + } else { + // No pagination indicator found — assume single page + log('No pagination indicator found — assuming single page'); + break; + } + + // Click the "next page" link in FdcLayListNav + // FDISK uses __doPostBack links; find an or pointing to the next page + const nextClicked = await frame.evaluate(() => { + const nav = document.querySelector('table.FdcLayListNav'); + if (!nav) return false; + const links = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"]')); + // Look for next-page indicator: ">" alone, ">>" alone, or title/alt "weiter"/"next" + for (const el of links) { + const text = ((el as HTMLElement).textContent ?? '').trim(); + const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase(); + const alt = ((el as HTMLImageElement).alt ?? '').toLowerCase(); + if (text === '>' || text === '>>' || title.includes('nächst') || title.includes('weiter') || + title.includes('next') || alt.includes('next') || alt.includes('weiter')) { + (el as HTMLElement).click(); + return true; + } + } + return false; + }); + + if (!nextClicked) { + log('WARN: could not find next-page link — stopping pagination'); + break; + } + + await frame.waitForLoadState('networkidle', { timeout: 30000 }); + pageNum++; + } + + log(`Parsed ${allRows.length} rows total across ${pageNum} page(s)`); + + const members: FdiskMember[] = []; + for (const row of allRows) { + if (!row.standesbuchNr || !row.vorname || !row.zuname) { + log(` SKIP: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" — missing required field`); + continue; + } + const abmeldedatum = parseDate(row.abmeldedatum); + members.push({ + standesbuchNr: row.standesbuchNr, + dienstgrad: row.dienstgrad, + vorname: row.vorname, + zuname: row.zuname, + geburtsdatum: parseDate(row.geburtsdatum), + svnr: row.svnr || null, + eintrittsdatum: parseDate(row.eintrittsdatum), + abmeldedatum, + status: abmeldedatum ? 'ausgetreten' : 'aktiv', + detailUrl: row.href, + }); + } + return members; +} + +async function parseRowsFromTable(frame: Frame) { // Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad, // 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon // Each contains an — the title is the clean cell text. // The href on each is the member detail URL (same link repeated across all cells in a row). - const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) => + return frame.$$eval('table.FdcLayList tbody tr', (trs) => trs.map((tr) => { const cells = Array.from(tr.querySelectorAll('td')); const val = (i: number) => { @@ -242,33 +332,6 @@ async function scrapeMembers(frame: Frame): Promise { }; }), ); - - log(`Parsed ${rows.length} rows from member table`); - for (const row of rows) { - log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}"`); - } - - const members: FdiskMember[] = []; - for (const row of rows) { - if (!row.standesbuchNr || !row.vorname || !row.zuname) { - log(` SKIP: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" — missing required field`); - continue; - } - const abmeldedatum = parseDate(row.abmeldedatum); - members.push({ - standesbuchNr: row.standesbuchNr, - dienstgrad: row.dienstgrad, - vorname: row.vorname, - zuname: row.zuname, - geburtsdatum: parseDate(row.geburtsdatum), - svnr: row.svnr || null, - eintrittsdatum: parseDate(row.eintrittsdatum), - abmeldedatum, - status: abmeldedatum ? 'ausgetreten' : 'aktiv', - detailUrl: row.href, - }); - } - return members; } async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise {