This commit is contained in:
Matthias Hochmeister
2026-03-13 20:02:46 +01:00
parent 1b1a53cd8f
commit f5d1f7b061
3 changed files with 138 additions and 56 deletions

View File

@@ -236,6 +236,7 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
// Parse "Datensatz X-Y von Z" to check if more pages exist
const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i);
if (pagMatch) {
const from = parseInt(pagMatch[1], 10);
const to = parseInt(pagMatch[2], 10);
const total = parseInt(pagMatch[3], 10);
if (to >= total) {
@@ -243,46 +244,63 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
break;
}
log(`Loaded ${to} of ${total} — navigating to next page`);
// Calculate next page number to use as a fallback click target
const pageSize = to - from + 1;
const nextPageNum = Math.floor(to / pageSize) + 1;
// Click the "next page" link in FdcLayListNav.
// Strategy 1: text ">" or ">>" (common in FDISK)
// Strategy 2: title/alt containing navigation words
// Strategy 3: link text is the next page number (e.g. "2" when on page 1)
const nextClicked = await frame.evaluate((nextPg: number) => {
const nav = document.querySelector('table.FdcLayListNav');
if (!nav) return false;
const links = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"]'));
for (const el of links) {
const text = ((el as HTMLElement).textContent ?? '').trim();
const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase();
const alt = ((el as HTMLImageElement).alt ?? '').toLowerCase();
if (text === '>' || text === '>>' ||
title.includes('nächst') || title.includes('weiter') || title.includes('next') || title.includes('vor') ||
alt.includes('next') || alt.includes('weiter') || alt.includes('vor')) {
(el as HTMLElement).click();
return true;
}
}
// Fallback: find a link whose text is exactly the next page number
for (const el of links) {
const text = ((el as HTMLElement).textContent ?? '').trim();
if (text === String(nextPg)) {
(el as HTMLElement).click();
return true;
}
}
return false;
}, nextPageNum);
if (!nextClicked) {
// Dump nav HTML to help diagnose the missing next-page link
const navHtml = await frame.evaluate(() => {
const nav = document.querySelector('table.FdcLayListNav');
return nav?.innerHTML?.replace(/\s+/g, ' ').trim() ?? '(not found)';
});
log(`WARN: could not find next-page link (tried ">" and page "${nextPageNum}") — stopping pagination`);
log(`FdcLayListNav HTML: ${navHtml}`);
break;
}
await frame.waitForLoadState('networkidle', { timeout: 30000 });
pageNum++;
} else {
// No pagination indicator found — assume single page
log('No pagination indicator found — assuming single page');
break;
}
// Click the "next page" link in FdcLayListNav
// FDISK uses __doPostBack links; find an <a> or <input> pointing to the next page
const nextClicked = await frame.evaluate(() => {
const nav = document.querySelector('table.FdcLayListNav');
if (!nav) return false;
const links = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"]'));
// Look for next-page indicator: ">" alone, ">>" alone, or title/alt "weiter"/"next"
for (const el of links) {
const text = ((el as HTMLElement).textContent ?? '').trim();
const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase();
const alt = ((el as HTMLImageElement).alt ?? '').toLowerCase();
if (text === '>' || text === '>>' || title.includes('nächst') || title.includes('weiter') ||
title.includes('next') || alt.includes('next') || alt.includes('weiter')) {
(el as HTMLElement).click();
return true;
}
}
return false;
});
if (!nextClicked) {
// Dump nav HTML to help diagnose the missing next-page link
const navHtml = await frame.evaluate(() => {
const nav = document.querySelector('table.FdcLayListNav');
return nav?.innerHTML?.replace(/\s+/g, ' ').trim() ?? '(not found)';
});
log(`WARN: could not find next-page link — stopping pagination`);
log(`FdcLayListNav HTML: ${navHtml}`);
break;
}
await frame.waitForLoadState('networkidle', { timeout: 30000 });
pageNum++;
}
} // end while
log(`Parsed ${allRows.length} rows total across ${pageNum} page(s)`);