update
This commit is contained in:
@@ -194,7 +194,7 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||||
if (fieldDump.pageSizeSet) {
|
||||
log(`Set page size: ${fieldDump.pageSizeSet}`);
|
||||
} else {
|
||||
log('No page size field found — result may be paginated');
|
||||
log('No page size field found — will paginate through all results');
|
||||
}
|
||||
// Use Promise.all to start waiting for navigation BEFORE triggering the submit,
|
||||
// otherwise waitForLoadState resolves against the already-idle current page.
|
||||
@@ -205,20 +205,110 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||||
log(`After form submit: ${frame.url()}`);
|
||||
}
|
||||
|
||||
// Collect rows across all pages
|
||||
type ParsedRow = Awaited<ReturnType<typeof parseRowsFromTable>>[number];
|
||||
const allRows: ParsedRow[] = [];
|
||||
let pageNum = 1;
|
||||
|
||||
while (true) {
|
||||
// Log tables found for diagnostics
|
||||
const tableInfo = await frame.$$eval('table', (ts) =>
|
||||
ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`),
|
||||
);
|
||||
log(`Tables: ${tableInfo.join(', ') || 'none'}`);
|
||||
log(`Page ${pageNum} tables: ${tableInfo.join(', ') || 'none'}`);
|
||||
|
||||
// The member table uses class FdcLayList
|
||||
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
|
||||
|
||||
const pageRows = await parseRowsFromTable(frame);
|
||||
log(`Page ${pageNum}: parsed ${pageRows.length} rows`);
|
||||
for (const row of pageRows) {
|
||||
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}"`);
|
||||
}
|
||||
allRows.push(...pageRows);
|
||||
|
||||
// Check pagination status from "Datensatz X-Y von Z" text
|
||||
const pagination = await frame.evaluate(() => {
|
||||
const nav = document.querySelector('table.FdcLayListNav');
|
||||
return nav?.textContent?.trim() ?? '';
|
||||
});
|
||||
log(`Pagination: "${pagination}"`);
|
||||
|
||||
// Parse "Datensatz X-Y von Z" to check if more pages exist
|
||||
const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i);
|
||||
if (pagMatch) {
|
||||
const to = parseInt(pagMatch[2], 10);
|
||||
const total = parseInt(pagMatch[3], 10);
|
||||
if (to >= total) {
|
||||
log(`All ${total} records loaded across ${pageNum} page(s)`);
|
||||
break;
|
||||
}
|
||||
log(`Loaded ${to} of ${total} — navigating to next page`);
|
||||
} else {
|
||||
// No pagination indicator found — assume single page
|
||||
log('No pagination indicator found — assuming single page');
|
||||
break;
|
||||
}
|
||||
|
||||
// Click the "next page" link in FdcLayListNav
|
||||
// FDISK uses __doPostBack links; find an <a> or <input> pointing to the next page
|
||||
const nextClicked = await frame.evaluate(() => {
|
||||
const nav = document.querySelector('table.FdcLayListNav');
|
||||
if (!nav) return false;
|
||||
const links = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"]'));
|
||||
// Look for next-page indicator: ">" alone, ">>" alone, or title/alt "weiter"/"next"
|
||||
for (const el of links) {
|
||||
const text = ((el as HTMLElement).textContent ?? '').trim();
|
||||
const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase();
|
||||
const alt = ((el as HTMLImageElement).alt ?? '').toLowerCase();
|
||||
if (text === '>' || text === '>>' || title.includes('nächst') || title.includes('weiter') ||
|
||||
title.includes('next') || alt.includes('next') || alt.includes('weiter')) {
|
||||
(el as HTMLElement).click();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (!nextClicked) {
|
||||
log('WARN: could not find next-page link — stopping pagination');
|
||||
break;
|
||||
}
|
||||
|
||||
await frame.waitForLoadState('networkidle', { timeout: 30000 });
|
||||
pageNum++;
|
||||
}
|
||||
|
||||
log(`Parsed ${allRows.length} rows total across ${pageNum} page(s)`);
|
||||
|
||||
const members: FdiskMember[] = [];
|
||||
for (const row of allRows) {
|
||||
if (!row.standesbuchNr || !row.vorname || !row.zuname) {
|
||||
log(` SKIP: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" — missing required field`);
|
||||
continue;
|
||||
}
|
||||
const abmeldedatum = parseDate(row.abmeldedatum);
|
||||
members.push({
|
||||
standesbuchNr: row.standesbuchNr,
|
||||
dienstgrad: row.dienstgrad,
|
||||
vorname: row.vorname,
|
||||
zuname: row.zuname,
|
||||
geburtsdatum: parseDate(row.geburtsdatum),
|
||||
svnr: row.svnr || null,
|
||||
eintrittsdatum: parseDate(row.eintrittsdatum),
|
||||
abmeldedatum,
|
||||
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
|
||||
detailUrl: row.href,
|
||||
});
|
||||
}
|
||||
return members;
|
||||
}
|
||||
|
||||
async function parseRowsFromTable(frame: Frame) {
|
||||
// Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad,
|
||||
// 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon
|
||||
// Each <td> contains an <a title="value"> — the title is the clean cell text.
|
||||
// The href on each <a> is the member detail URL (same link repeated across all cells in a row).
|
||||
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||||
return frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||||
trs.map((tr) => {
|
||||
const cells = Array.from(tr.querySelectorAll('td'));
|
||||
const val = (i: number) => {
|
||||
@@ -242,33 +332,6 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
log(`Parsed ${rows.length} rows from member table`);
|
||||
for (const row of rows) {
|
||||
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}"`);
|
||||
}
|
||||
|
||||
const members: FdiskMember[] = [];
|
||||
for (const row of rows) {
|
||||
if (!row.standesbuchNr || !row.vorname || !row.zuname) {
|
||||
log(` SKIP: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" — missing required field`);
|
||||
continue;
|
||||
}
|
||||
const abmeldedatum = parseDate(row.abmeldedatum);
|
||||
members.push({
|
||||
standesbuchNr: row.standesbuchNr,
|
||||
dienstgrad: row.dienstgrad,
|
||||
vorname: row.vorname,
|
||||
zuname: row.zuname,
|
||||
geburtsdatum: parseDate(row.geburtsdatum),
|
||||
svnr: row.svnr || null,
|
||||
eintrittsdatum: parseDate(row.eintrittsdatum),
|
||||
abmeldedatum,
|
||||
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
|
||||
detailUrl: row.href,
|
||||
});
|
||||
}
|
||||
return members;
|
||||
}
|
||||
|
||||
async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
||||
|
||||
Reference in New Issue
Block a user