This commit is contained in:
Matthias Hochmeister
2026-03-13 19:42:01 +01:00
parent c174edbb0b
commit 37c719e983

View File

@@ -194,7 +194,7 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
if (fieldDump.pageSizeSet) {
log(`Set page size: ${fieldDump.pageSizeSet}`);
} else {
log('No page size field found — result may be paginated');
log('No page size field found — will paginate through all results');
}
// Use Promise.all to start waiting for navigation BEFORE triggering the submit,
// otherwise waitForLoadState resolves against the already-idle current page.
@@ -205,20 +205,110 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
log(`After form submit: ${frame.url()}`);
}
// Collect rows across all pages
type ParsedRow = Awaited<ReturnType<typeof parseRowsFromTable>>[number];
const allRows: ParsedRow[] = [];
let pageNum = 1;
while (true) {
// Log tables found for diagnostics
const tableInfo = await frame.$$eval('table', (ts) =>
ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`),
);
log(`Tables: ${tableInfo.join(', ') || 'none'}`);
log(`Page ${pageNum} tables: ${tableInfo.join(', ') || 'none'}`);
// The member table uses class FdcLayList
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
const pageRows = await parseRowsFromTable(frame);
log(`Page ${pageNum}: parsed ${pageRows.length} rows`);
for (const row of pageRows) {
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}"`);
}
allRows.push(...pageRows);
// Check pagination status from "Datensatz X-Y von Z" text
const pagination = await frame.evaluate(() => {
const nav = document.querySelector('table.FdcLayListNav');
return nav?.textContent?.trim() ?? '';
});
log(`Pagination: "${pagination}"`);
// Parse "Datensatz X-Y von Z" to check if more pages exist
const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i);
if (pagMatch) {
const to = parseInt(pagMatch[2], 10);
const total = parseInt(pagMatch[3], 10);
if (to >= total) {
log(`All ${total} records loaded across ${pageNum} page(s)`);
break;
}
log(`Loaded ${to} of ${total} — navigating to next page`);
} else {
// No pagination indicator found — assume single page
log('No pagination indicator found — assuming single page');
break;
}
// Click the "next page" link in FdcLayListNav
// FDISK uses __doPostBack links; find an <a> or <input> pointing to the next page
const nextClicked = await frame.evaluate(() => {
const nav = document.querySelector('table.FdcLayListNav');
if (!nav) return false;
const links = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"]'));
// Look for next-page indicator: ">" alone, ">>" alone, or title/alt "weiter"/"next"
for (const el of links) {
const text = ((el as HTMLElement).textContent ?? '').trim();
const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase();
const alt = ((el as HTMLImageElement).alt ?? '').toLowerCase();
if (text === '>' || text === '>>' || title.includes('nächst') || title.includes('weiter') ||
title.includes('next') || alt.includes('next') || alt.includes('weiter')) {
(el as HTMLElement).click();
return true;
}
}
return false;
});
if (!nextClicked) {
log('WARN: could not find next-page link — stopping pagination');
break;
}
await frame.waitForLoadState('networkidle', { timeout: 30000 });
pageNum++;
}
log(`Parsed ${allRows.length} rows total across ${pageNum} page(s)`);
const members: FdiskMember[] = [];
for (const row of allRows) {
if (!row.standesbuchNr || !row.vorname || !row.zuname) {
log(` SKIP: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" — missing required field`);
continue;
}
const abmeldedatum = parseDate(row.abmeldedatum);
members.push({
standesbuchNr: row.standesbuchNr,
dienstgrad: row.dienstgrad,
vorname: row.vorname,
zuname: row.zuname,
geburtsdatum: parseDate(row.geburtsdatum),
svnr: row.svnr || null,
eintrittsdatum: parseDate(row.eintrittsdatum),
abmeldedatum,
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
detailUrl: row.href,
});
}
return members;
}
async function parseRowsFromTable(frame: Frame) {
// Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad,
// 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon
// Each <td> contains an <a title="value"> — the title is the clean cell text.
// The href on each <a> is the member detail URL (same link repeated across all cells in a row).
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
return frame.$$eval('table.FdcLayList tbody tr', (trs) =>
trs.map((tr) => {
const cells = Array.from(tr.querySelectorAll('td'));
const val = (i: number) => {
@@ -242,33 +332,6 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
};
}),
);
log(`Parsed ${rows.length} rows from member table`);
for (const row of rows) {
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}"`);
}
const members: FdiskMember[] = [];
for (const row of rows) {
if (!row.standesbuchNr || !row.vorname || !row.zuname) {
log(` SKIP: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" — missing required field`);
continue;
}
const abmeldedatum = parseDate(row.abmeldedatum);
members.push({
standesbuchNr: row.standesbuchNr,
dienstgrad: row.dienstgrad,
vorname: row.vorname,
zuname: row.zuname,
geburtsdatum: parseDate(row.geburtsdatum),
svnr: row.svnr || null,
eintrittsdatum: parseDate(row.eintrittsdatum),
abmeldedatum,
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
detailUrl: row.href,
});
}
return members;
}
async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {