update
This commit is contained in:
@@ -236,6 +236,7 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||||
// Parse "Datensatz X-Y von Z" to check if more pages exist
|
||||
const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i);
|
||||
if (pagMatch) {
|
||||
const from = parseInt(pagMatch[1], 10);
|
||||
const to = parseInt(pagMatch[2], 10);
|
||||
const total = parseInt(pagMatch[3], 10);
|
||||
if (to >= total) {
|
||||
@@ -243,46 +244,63 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||||
break;
|
||||
}
|
||||
log(`Loaded ${to} of ${total} — navigating to next page`);
|
||||
|
||||
// Calculate next page number to use as a fallback click target
|
||||
const pageSize = to - from + 1;
|
||||
const nextPageNum = Math.floor(to / pageSize) + 1;
|
||||
|
||||
// Click the "next page" link in FdcLayListNav.
|
||||
// Strategy 1: text ">" or ">>" (common in FDISK)
|
||||
// Strategy 2: title/alt containing navigation words
|
||||
// Strategy 3: link text is the next page number (e.g. "2" when on page 1)
|
||||
const nextClicked = await frame.evaluate((nextPg: number) => {
|
||||
const nav = document.querySelector('table.FdcLayListNav');
|
||||
if (!nav) return false;
|
||||
const links = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"]'));
|
||||
|
||||
for (const el of links) {
|
||||
const text = ((el as HTMLElement).textContent ?? '').trim();
|
||||
const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase();
|
||||
const alt = ((el as HTMLImageElement).alt ?? '').toLowerCase();
|
||||
if (text === '>' || text === '>>' ||
|
||||
title.includes('nächst') || title.includes('weiter') || title.includes('next') || title.includes('vor') ||
|
||||
alt.includes('next') || alt.includes('weiter') || alt.includes('vor')) {
|
||||
(el as HTMLElement).click();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: find a link whose text is exactly the next page number
|
||||
for (const el of links) {
|
||||
const text = ((el as HTMLElement).textContent ?? '').trim();
|
||||
if (text === String(nextPg)) {
|
||||
(el as HTMLElement).click();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}, nextPageNum);
|
||||
|
||||
if (!nextClicked) {
|
||||
// Dump nav HTML to help diagnose the missing next-page link
|
||||
const navHtml = await frame.evaluate(() => {
|
||||
const nav = document.querySelector('table.FdcLayListNav');
|
||||
return nav?.innerHTML?.replace(/\s+/g, ' ').trim() ?? '(not found)';
|
||||
});
|
||||
log(`WARN: could not find next-page link (tried ">" and page "${nextPageNum}") — stopping pagination`);
|
||||
log(`FdcLayListNav HTML: ${navHtml}`);
|
||||
break;
|
||||
}
|
||||
|
||||
await frame.waitForLoadState('networkidle', { timeout: 30000 });
|
||||
pageNum++;
|
||||
} else {
|
||||
// No pagination indicator found — assume single page
|
||||
log('No pagination indicator found — assuming single page');
|
||||
break;
|
||||
}
|
||||
|
||||
// Click the "next page" link in FdcLayListNav
|
||||
// FDISK uses __doPostBack links; find an <a> or <input> pointing to the next page
|
||||
const nextClicked = await frame.evaluate(() => {
|
||||
const nav = document.querySelector('table.FdcLayListNav');
|
||||
if (!nav) return false;
|
||||
const links = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"]'));
|
||||
// Look for next-page indicator: ">" alone, ">>" alone, or title/alt "weiter"/"next"
|
||||
for (const el of links) {
|
||||
const text = ((el as HTMLElement).textContent ?? '').trim();
|
||||
const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase();
|
||||
const alt = ((el as HTMLImageElement).alt ?? '').toLowerCase();
|
||||
if (text === '>' || text === '>>' || title.includes('nächst') || title.includes('weiter') ||
|
||||
title.includes('next') || alt.includes('next') || alt.includes('weiter')) {
|
||||
(el as HTMLElement).click();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (!nextClicked) {
|
||||
// Dump nav HTML to help diagnose the missing next-page link
|
||||
const navHtml = await frame.evaluate(() => {
|
||||
const nav = document.querySelector('table.FdcLayListNav');
|
||||
return nav?.innerHTML?.replace(/\s+/g, ' ').trim() ?? '(not found)';
|
||||
});
|
||||
log(`WARN: could not find next-page link — stopping pagination`);
|
||||
log(`FdcLayListNav HTML: ${navHtml}`);
|
||||
break;
|
||||
}
|
||||
|
||||
await frame.waitForLoadState('networkidle', { timeout: 30000 });
|
||||
pageNum++;
|
||||
}
|
||||
} // end while
|
||||
|
||||
log(`Parsed ${allRows.length} rows total across ${pageNum} page(s)`);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user