diff --git a/sync/src/scraper.ts b/sync/src/scraper.ts index 1f9ab3b..45d681a 100644 --- a/sync/src/scraper.ts +++ b/sync/src/scraper.ts @@ -159,7 +159,9 @@ async function scrapeMembers(frame: Frame): Promise { el.value = ''; cleared.push(el.name || el.id); } - // Maximize page size: look for a select that controls rows per page + // Maximize page size: look for a select AND its paired hidden input + // FDISK uses a custom Dd widget where or similar. if ((name.includes('anzahl') || id.includes('anzahl') || name.includes('pagesize') || id.includes('pagesize') || name.includes('rows') || id.includes('rows')) && @@ -178,6 +180,16 @@ async function scrapeMembers(frame: Frame): Promise { if (bestOption) { select.value = bestOption.value; pageSizeSet = `${el.name || el.id}=${bestOption.value}`; + // Also update the paired hidden field used by the Dd custom widget. + // Common patterns: xDd_dd → xDd_id or xDd_hd + const baseName = (el.name || el.id).replace(/_dd$/i, ''); + for (const suffix of ['_id', '_hd', '_val']) { + const hidden = form.elements[baseName + suffix] as HTMLInputElement | undefined; + if (hidden && hidden.type === 'hidden') { + hidden.value = bestOption.value; + pageSizeSet += ` (also set ${baseName + suffix})`; + } + } } } } @@ -249,51 +261,76 @@ async function scrapeMembers(frame: Frame): Promise { const pageSize = to - from + 1; const nextPageNum = Math.floor(to / pageSize) + 1; - // Click the "next page" link in FdcLayListNav. - // Strategy 1: text ">" or ">>" (common in FDISK) - // Strategy 2: title/alt containing navigation words - // Strategy 3: link text is the next page number (e.g. "2" when on page 1) - const nextClicked = await frame.evaluate((nextPg: number) => { - const nav = document.querySelector('table.FdcLayListNav'); - if (!nav) return false; - const links = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"]')); + // Click the "next page" control in FdcLayListNav. + // FDISK renders pagination as plain inside (no wrappers). + // Use Playwright's click() which properly triggers JS event listeners attached via addEventListener. + // Try in order: b_next img → b_last img → any with ">" text → page-number link. + let nextClicked = false; - for (const el of links) { - const text = ((el as HTMLElement).textContent ?? '').trim(); - const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase(); - const alt = ((el as HTMLImageElement).alt ?? '').toLowerCase(); - if (text === '>' || text === '>>' || - title.includes('nächst') || title.includes('weiter') || title.includes('next') || title.includes('vor') || - alt.includes('next') || alt.includes('weiter') || alt.includes('vor')) { - (el as HTMLElement).click(); - return true; + const nextImg = frame.locator('table.FdcLayListNav td.Right img[src*="b_next"]'); + if (await nextImg.count() > 0) { + await nextImg.first().click({ timeout: 5000 }).catch(() => {}); + nextClicked = true; + } else { + // Fallback via evaluate for text/title/page-number patterns + const clicked = await frame.evaluate((nextPg: number) => { + const nav = document.querySelector('table.FdcLayListNav'); + if (!nav) return false; + const clickable = Array.from(nav.querySelectorAll('a, input[type="button"], input[type="submit"], td')); + for (const el of clickable) { + const text = ((el as HTMLElement).textContent ?? '').trim(); + const title = ((el as HTMLElement).getAttribute('title') ?? '').toLowerCase(); + const onclick = ((el as HTMLElement).getAttribute('onclick') ?? '').toLowerCase(); + if (text === '>' || text === '>>' || + title.includes('nächst') || title.includes('weiter') || title.includes('next') || + onclick.includes('next') || onclick.includes('weiter') || + text === String(nextPg)) { + (el as HTMLElement).click(); + return true; + } } - } - - // Fallback: find a link whose text is exactly the next page number - for (const el of links) { - const text = ((el as HTMLElement).textContent ?? '').trim(); - if (text === String(nextPg)) { - (el as HTMLElement).click(); - return true; - } - } - - return false; - }, nextPageNum); + return false; + }, nextPageNum); + nextClicked = clicked; + } if (!nextClicked) { - // Dump nav HTML to help diagnose the missing next-page link const navHtml = await frame.evaluate(() => { const nav = document.querySelector('table.FdcLayListNav'); return nav?.innerHTML?.replace(/\s+/g, ' ').trim() ?? '(not found)'; }); - log(`WARN: could not find next-page link (tried ">" and page "${nextPageNum}") — stopping pagination`); + log(`WARN: could not find next-page link — stopping pagination`); log(`FdcLayListNav HTML: ${navHtml}`); break; } await frame.waitForLoadState('networkidle', { timeout: 30000 }); + + // Verify we actually moved to the next page — if pagination didn't advance, stop + const newPagination = await frame.evaluate(() => + document.querySelector('table.FdcLayListNav')?.textContent?.trim() ?? '' + ); + if (newPagination === pagination) { + log(`WARN: pagination did not advance after click (still "${pagination}") — stopping`); + // Try clicking the Right td directly as a last resort + const tdClicked = await frame.evaluate(() => { + const td = document.querySelector('table.FdcLayListNav td.Right') as HTMLElement | null; + if (td) { td.click(); return true; } + return false; + }); + if (tdClicked) { + await frame.waitForLoadState('networkidle', { timeout: 30000 }); + const afterTdPagination = await frame.evaluate(() => + document.querySelector('table.FdcLayListNav')?.textContent?.trim() ?? '' + ); + if (afterTdPagination === pagination) { + log('WARN: td.Right click also did not advance — pagination is disabled, stopping'); + break; + } + } else { + break; + } + } pageNum++; } else { // No pagination indicator found — assume single page