import { chromium, Page, Frame } from '@playwright/test'; import { FdiskMember, FdiskAusbildung, FdiskBefoerderung, FdiskUntersuchung, FdiskFahrgenehmigung, } from './types'; const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at'; const ID_FEUERWEHREN = process.env.FDISK_ID_FEUERWEHREN ?? '164'; const ID_INSTANZEN = process.env.FDISK_ID_INSTANZEN ?? '2853'; const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`; const MEMBERS_URL = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/meine_Mitglieder.aspx`; function log(msg: string) { console.log(`[scraper] ${new Date().toISOString()} ${msg}`); } /** * Parse a date string from FDISK (DD.MM.YYYY) to ISO format (YYYY-MM-DD). * Returns null if empty or unparseable. */ function parseDate(raw: string | null | undefined): string | null { if (!raw) return null; const trimmed = raw.trim(); if (!trimmed) return null; const match = trimmed.match(/^(\d{2})\.(\d{2})\.(\d{4})$/); if (!match) return null; return `${match[3]}-${match[2]}-${match[1]}`; } /** * Extract text content from a cell, trimmed, or null if empty. */ function cellText(text: string | undefined | null): string | null { const t = (text ?? '').trim(); return t || null; } export async function scrapeAll(username: string, password: string): Promise<{ members: FdiskMember[]; ausbildungen: FdiskAusbildung[]; befoerderungen: FdiskBefoerderung[]; untersuchungen: FdiskUntersuchung[]; fahrgenehmigungen: FdiskFahrgenehmigung[]; }> { const browser = await chromium.launch({ headless: true, args: ['--disable-gpu', '--disable-software-rasterizer'], }); const context = await browser.newContext({ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', }); const page = await context.newPage(); try { await login(page, username, password); // After login, page is on Start.aspx (frameset). // Direct navigation to MitgliedschaftenList.aspx causes a server BLError because // the server reads the org context from session variables set by the menu. // Navigate via the menu frame (left.aspx) to set session state properly. const mainFrame = await navigateToMemberList(page); const members = await scrapeMembers(mainFrame); log(`Found ${members.length} members`); const ausbildungen: FdiskAusbildung[] = []; const befoerderungen: FdiskBefoerderung[] = []; const untersuchungen: FdiskUntersuchung[] = []; const fahrgenehmigungen: FdiskFahrgenehmigung[] = []; for (const member of members) { try { // Navigate to member detail page — use direct URL if available, else search+click fallback const onDetail = member.detailUrl ? (await frame_goto(mainFrame, member.detailUrl), true) : await navigateToMemberDetailBySearch(mainFrame, member.standesbuchNr); if (!onDetail) { log(` SKIP ${member.vorname} ${member.zuname} (${member.standesbuchNr}): could not reach detail page`); continue; } // Scrape extra profile fields from the detail form const profileFields = await scrapeDetailProfileFields(mainFrame); member.geburtsort = profileFields.geburtsort; member.geschlecht = profileFields.geschlecht; member.beruf = profileFields.beruf; member.wohnort = profileFields.wohnort; member.plz = profileFields.plz; // Extract mitgliedschaft params from the current URL for constructing sub-section URLs. // PersonenForm.aspx is in the personen module; sub-sections are in mitgliedschaften module. // The links to Beförderungen/Untersuchungen/Fahrgenehmigungen live in the navigation // frame (not the content mainFrame), so we construct the URLs directly. const currentUrl = mainFrame.url(); const urlObj = new URL(currentUrl); const idMitgliedschaft = urlObj.searchParams.get('id_mitgliedschaften'); const idInstanzen = urlObj.searchParams.get('id_instanzen') ?? ID_INSTANZEN; const idFeuerwehren = urlObj.searchParams.get('id_feuerwehren') ?? ID_FEUERWEHREN; // Ausbildungen const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member); ausbildungen.push(...quals); // Beförderungen const befos = idMitgliedschaft ? await scrapeMemberBefoerderungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idInstanzen, idFeuerwehren) : []; befoerderungen.push(...befos); // Untersuchungen const unters = idMitgliedschaft ? await scrapeMemberUntersuchungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idInstanzen, idFeuerwehren) : []; untersuchungen.push(...unters); // Fahrgenehmigungen const fahrg = idMitgliedschaft ? await scrapeMemberFahrgenehmigungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idInstanzen, idFeuerwehren) : []; fahrgenehmigungen.push(...fahrg); log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen, ${befos.length} Beförderungen, ${unters.length} Untersuchungen, ${fahrg.length} Fahrgenehmigungen`); await page.waitForTimeout(500); } catch (err) { log(` WARN: could not scrape detail for ${member.vorname} ${member.zuname}: ${err}`); } } return { members, ausbildungen, befoerderungen, untersuchungen, fahrgenehmigungen }; } finally { await browser.close(); } } /** Navigate a frame, waiting for networkidle. Wrapper to avoid repetition. */ async function frame_goto(frame: Frame, url: string): Promise { await frame.goto(url, { waitUntil: 'networkidle' }); } async function login(page: Page, username: string, password: string): Promise { log(`Navigating to ${LOGIN_URL}`); await page.goto(LOGIN_URL, { waitUntil: 'domcontentloaded' }); await page.waitForLoadState('networkidle'); // Check if already logged in const currentUrlBefore = page.url(); if (!currentUrlBefore.toLowerCase().includes('login')) { log(`Already logged in, on: ${currentUrlBefore}`); return; } // Exact selectors from the known login form HTML const usernameField = page.locator('#login'); const passwordField = page.locator('#password'); const submitButton = page.locator('#Submit2'); await usernameField.waitFor({ state: 'visible', timeout: 10000 }); await usernameField.fill(username); await passwordField.fill(password); await submitButton.click(); // Wait for navigation away from the login page (up to 15s) try { await page.waitForURL( (url) => !url.toString().toLowerCase().includes('login'), { waitUntil: 'networkidle', timeout: 15000 }, ); } catch { // waitForURL timed out — fall through to the URL check below } // Verify we're logged in const currentUrl = page.url(); if (currentUrl.toLowerCase().includes('login')) { throw new Error(`Login failed — still on login page: ${currentUrl}`); } log(`Logged in successfully, redirected to: ${currentUrl}`); } /** * Fallback navigation to a member's detail page when no direct URL is available. * Navigates to the member list, filters by exact standesbuchNr, then clicks the result row. * Returns true if we successfully landed on a detail page. */ async function navigateToMemberDetailBySearch(frame: Frame, standesbuchNr: string): Promise { // Navigate to the member list await frame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' }); await frame.waitForLoadState('networkidle'); // Set exact standesbuchNr filter in the search form const formOk = await frame.evaluate((stNr) => { const form = (document as any).forms['frmsearch']; if (!form) return false; const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement | null; const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement | null; if (!fromFld || !toFld) return false; fromFld.value = stNr; toFld.value = stNr; return true; }, standesbuchNr); if (!formOk) { log(` WARN navigateToMemberDetailBySearch: search form not usable for StNr ${standesbuchNr}`); return false; } await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }), ]); // Click on the first data row — FDISK rows navigate to the detail page on click const firstRowLink = await frame.$('table.FdcLayList tbody tr:first-child a, table.FdcLayList tbody tr:first-child td'); if (!firstRowLink) { log(` WARN navigateToMemberDetailBySearch: no result row for StNr ${standesbuchNr}`); return false; } try { await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 15000 }), firstRowLink.click(), ]); } catch { // waitForNavigation may time out if click didn't navigate (e.g. onclick vs href) // Check whether the URL changed at all } const url = frame.url(); const onDetailPage = !url.includes('MitgliedschaftenList') && !url.includes('meine_Mitglieder'); if (onDetailPage) { log(` Navigated to detail via search+click: ${url}`); } else { log(` WARN navigateToMemberDetailBySearch: still on list page after click for StNr ${standesbuchNr}`); } return onDetailPage; } async function navigateToMemberList(page: Page): Promise { const mainFrame = page.frame({ name: 'mainFrame' }); if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset'); log(`Navigating mainFrame to: ${MEMBERS_URL}`); await mainFrame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' }); await mainFrame.waitForLoadState('networkidle'); const url = mainFrame.url(); const title = await mainFrame.title(); log(`mainFrame loaded: ${url} — title: "${title}"`); if (url.includes('BLError') || url.includes('support.aspx') || url.includes('Error')) { throw new Error(`Member list returned error page: ${url}`); } return mainFrame; } async function scrapeMembers(frame: Frame): Promise { log(`Scraping member list from: ${frame.url()}`); // Clear the Standesbuchnummer filter if the search form is present. // FDISK pre-fills the logged-in user's own Standesbuchnummer, which limits results to 1 member. // We clear it before submitting so all members of the fire station are returned. const hasForm = await frame.$('form[name="frmsearch"]') !== null; if (hasForm) { const fieldDump = await frame.evaluate(() => { const form = (document as any).forms['frmsearch']; if (!form) return { cleared: [], pageSizeSet: null as string | null, allFields: [] }; const cleared: string[] = []; const allFields: string[] = []; let pageSizeSet: string | null = null; for (const el of Array.from(form.elements) as HTMLInputElement[]) { if (el.type === 'hidden') continue; const name = (el.name ?? '').toLowerCase(); const id = (el.id ?? '').toLowerCase(); if (el.value) allFields.push(`${el.name || el.id}=${el.value}`); if (name.includes('standesbuch') || id.includes('standesbuch')) { el.value = ''; cleared.push(el.name || el.id); } // Maximize page size: look for a select AND its paired hidden input // FDISK uses a custom Dd widget where or similar. if ((name.includes('anzahl') || id.includes('anzahl') || name.includes('pagesize') || id.includes('pagesize') || name.includes('rows') || id.includes('rows')) && el.tagName === 'SELECT') { const select = el as unknown as HTMLSelectElement; // Pick the largest numeric option value, or the last option as fallback let bestOption: HTMLOptionElement | null = null; let bestVal = -1; for (const opt of Array.from(select.options)) { const n = parseInt(opt.value, 10); if (!isNaN(n) && n > bestVal) { bestVal = n; bestOption = opt; } } if (!bestOption && select.options.length > 0) { bestOption = select.options[select.options.length - 1]; } if (bestOption) { select.value = bestOption.value; pageSizeSet = `${el.name || el.id}=${bestOption.value}`; // Also update the paired hidden field used by the Dd custom widget. // Common patterns: xDd_dd → xDd_id or xDd_hd const baseName = (el.name || el.id).replace(/_dd$/i, ''); for (const suffix of ['_id', '_hd', '_val']) { const hidden = form.elements[baseName + suffix] as HTMLInputElement | undefined; if (hidden && hidden.type === 'hidden') { hidden.value = bestOption.value; pageSizeSet += ` (also set ${baseName + suffix})`; } } } } } return { cleared, pageSizeSet, allFields }; }); if (fieldDump.allFields.length > 0) { log(`Search form active filters before clear: ${fieldDump.allFields.join(', ')}`); } if (fieldDump.cleared.length > 0) { log(`Cleared Standesbuchnummer filter fields: ${fieldDump.cleared.join(', ')}`); } else { log('Search form found — no Standesbuchnummer field detected, submitting as-is'); } if (fieldDump.pageSizeSet) { log(`Set page size: ${fieldDump.pageSizeSet}`); } else { log('No page size field found — will paginate through all results'); } // Use Promise.all to start waiting for navigation BEFORE triggering the submit, // otherwise waitForLoadState resolves against the already-idle current page. await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }), ]); log(`After form submit: ${frame.url()}`); } // --- Phase 1: initial fetch (no StNr filter) to get the first batch and total count --- type ParsedRow = Awaited>[number]; await frame.waitForSelector('table.FdcLayList', { timeout: 20000 }); const firstRows = await parseRowsFromTable(frame); log(`Initial fetch: ${firstRows.length} rows`); // Log href debug info for the first row to diagnose URL extraction const rowDebug = await frame.evaluate(() => (window as any).__fdiskFirstRowDebug ?? 'no debug info'); log(`Row href debug: ${rowDebug}`); for (const row of firstRows) { log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`); } const pagination = await frame.evaluate(() => document.querySelector('table.FdcLayListNav')?.textContent?.trim() ?? '' ); log(`Pagination: "${pagination}"`); const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i); const totalExpected = pagMatch ? parseInt(pagMatch[3], 10) : null; const shownSoFar = pagMatch ? parseInt(pagMatch[2], 10) : null; const seenStNrs = new Set(firstRows.map(r => r.standesbuchNr).filter(Boolean)); const allRows: ParsedRow[] = [...firstRows]; // --- Phase 2: if more members exist and pagination is disabled, use StNr range queries --- if (totalExpected && shownSoFar && shownSoFar < totalExpected) { log(`Pagination disabled (FDISK limitation). Switching to StNr range queries to fetch remaining ${totalExpected - seenStNrs.size} members.`); const BATCH = 15; // fetch 15 StNr slots at a time — safely under the 20-row page limit const MAX_STNR = 9999; // upper bound; we stop earlier if we have all members let startNr = 1; let consecutiveEmpty = 0; while (seenStNrs.size < totalExpected && startNr <= MAX_STNR && consecutiveEmpty < 5) { const endNr = startNr + BATCH - 1; // Set StNr range in the search form and submit const formOk = await frame.evaluate(({ s, e }: { s: number; e: number }) => { const form = (document as any).forms['frmsearch']; if (!form) return false; const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement; const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement; if (!fromFld || !toFld) return false; fromFld.value = String(s); toFld.value = String(e); return true; }, { s: startNr, e: endNr }); if (!formOk) { log('WARN: could not set StNr range fields — aborting range queries'); break; } await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }), ]); const rangeRows = await parseRowsFromTable(frame); const newRows = rangeRows.filter(r => r.standesbuchNr && !seenStNrs.has(r.standesbuchNr)); newRows.forEach(r => { if (r.standesbuchNr) seenStNrs.add(r.standesbuchNr); }); allRows.push(...newRows); log(`StNr ${startNr}–${endNr}: ${newRows.length} new members (collected ${seenStNrs.size}/${totalExpected})`); for (const row of newRows) { log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`); } consecutiveEmpty = newRows.length === 0 ? consecutiveEmpty + 1 : 0; startNr = endNr + 1; } log(`Range queries complete: ${seenStNrs.size} unique members collected (expected ${totalExpected})`); } log(`Parsed ${allRows.length} raw rows total`); const members: FdiskMember[] = []; for (const row of allRows) { if (!row.standesbuchNr || !row.vorname || !row.zuname) continue; const abmeldedatum = parseDate(row.abmeldedatum); members.push({ standesbuchNr: row.standesbuchNr, dienstgrad: row.dienstgrad, vorname: row.vorname, zuname: row.zuname, geburtsdatum: parseDate(row.geburtsdatum), svnr: row.svnr || null, eintrittsdatum: parseDate(row.eintrittsdatum), abmeldedatum, status: abmeldedatum ? 'ausgetreten' : 'aktiv', detailUrl: row.href, geburtsort: null, geschlecht: null, beruf: null, wohnort: null, plz: null, }); } return members; } async function parseRowsFromTable(frame: Frame) { // Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad, // 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon // Each contains an — the title is the clean cell text. // Navigation may be via href or onclick handlers (FDISK uses both depending on version). return frame.$$eval('table.FdcLayList tbody tr', (trs) => trs.map((tr, rowIdx) => { const cells = Array.from(tr.querySelectorAll('td')); const val = (i: number) => { const a = cells[i]?.querySelector('a'); const title = a?.getAttribute('title')?.trim(); // Use title only if non-empty; otherwise fall back to textContent return (title || cells[i]?.textContent || '').trim(); }; // Extract detail URL — try multiple strategies: // 1. Standard pointing to an .aspx page // 2. onclick attribute on , , or containing an .aspx URL let href: string | null = null; let debugInfo = ''; for (const a of Array.from(tr.querySelectorAll('a'))) { const rawHref = (a as Element).getAttribute('href') ?? ''; debugInfo += `a.href="${rawHref}" `; if (rawHref && rawHref !== '#' && rawHref !== '' && !rawHref.startsWith('javascript:')) { href = (a as HTMLAnchorElement).href; // resolves relative → absolute break; } } if (!href) { // Scan onclick on all ancestors + cells + anchors for .aspx URLs const candidates: Element[] = [tr, ...Array.from(tr.querySelectorAll('a, td'))]; for (const el of candidates) { const onclick = el.getAttribute('onclick') ?? ''; if (onclick) debugInfo += `onclick="${onclick}" `; const match = onclick.match(/['"]([^'"]*\.aspx[^'"]*)['"]/); if (match) { try { href = new URL(match[1], (window as Window).location.href).href; } catch { href = match[1]; } break; } } } // Log debug info for first data row to help diagnose href extraction issues if (rowIdx === 0 && val(2)) { (window as any).__fdiskFirstRowDebug = `StNr=${val(2)} href=${href} debug=${debugInfo}`; } return { status: val(1), standesbuchNr: val(2), dienstgrad: val(3), vorname: val(4), zuname: val(5), geburtsdatum: val(6), svnr: val(7), eintrittsdatum: val(8), abmeldedatum: val(9), href, }; }), ); } /** * Scrape additional profile fields from the member detail form. * Called while the frame is already on the member detail page. */ async function scrapeDetailProfileFields(frame: Frame): Promise<{ geburtsort: string | null; geschlecht: string | null; beruf: string | null; wohnort: string | null; plz: string | null; }> { return frame.evaluate(() => { const val = (selector: string): string | null => { const el = document.querySelector(selector) as HTMLInputElement | HTMLSelectElement | null; if (!el) return null; if (el.tagName === 'SELECT') { const sel = el as HTMLSelectElement; const opt = sel.options[sel.selectedIndex]; return opt ? (opt.text || opt.value || '').trim() || null : null; } return (el as HTMLInputElement).value?.trim() || null; }; return { geburtsort: val('input[name="geburtsort"]') ?? val('input[id*="geburtsort"]'), geschlecht: val('select[name*="geschlecht"]') ?? val('select[id*="geschlecht"]'), beruf: val('input[name="beruf"]') ?? val('input[id*="beruf"]'), wohnort: val('input[name="ort"]') ?? val('input[id*="_ort"]') ?? val('input[name="wohnort"]'), plz: val('input[name="plz"]') ?? val('input[id*="plz"]'), }; }); } /** * Scrape Ausbildungen from the detail page (already loaded). * Navigates to the Ausbildung sub-page if needed. */ async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMember): Promise { // Look for Ausbildungsliste section — it's likely a table or list const ausbildungSection = frame.locator('text=Ausbildung, text=Ausbildungsliste').first(); const hasSec = await ausbildungSection.isVisible().catch(() => false); if (!hasSec) { // Try navigating to an Ausbildung tab/link if present const ausbildungLink = frame.locator('a:has-text("Ausbildung")').first(); const hasLink = await ausbildungLink.isVisible().catch(() => false); if (hasLink) { await ausbildungLink.click(); await frame.waitForLoadState('networkidle').catch(() => {}); } } // Parse the qualification table const tables = await frame.$$('table'); const ausbildungen: FdiskAusbildung[] = []; for (const table of tables) { const rows = await table.$$eval('tr', (rows) => { return rows.map(row => ({ cells: Array.from(row.querySelectorAll('td, th')).map(c => (c as Element).textContent?.trim() ?? ''), })); }); if (rows.length < 2) continue; const header = rows[0].cells.map(c => c.toLowerCase()); const isAusbildungTable = header.some(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung')); if (!isAusbildungTable) continue; const kursnameIdx = header.findIndex(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung')); const datumIdx = header.findIndex(h => h.includes('datum') || h.includes('abschluss')); const ablaufIdx = header.findIndex(h => h.includes('ablauf') || h.includes('gültig')); const ortIdx = header.findIndex(h => h.includes('ort')); const bemIdx = header.findIndex(h => h.includes('bem') || h.includes('info')); for (const row of rows.slice(1)) { const kursname = cellText(row.cells[kursnameIdx >= 0 ? kursnameIdx : 0]); if (!kursname) continue; const kursDatum = parseDate(datumIdx >= 0 ? row.cells[datumIdx] : null); const syncKey = `${member.standesbuchNr}::${kursname}::${kursDatum ?? ''}`; ausbildungen.push({ standesbuchNr: member.standesbuchNr, kursname, kursDatum, ablaufdatum: parseDate(ablaufIdx >= 0 ? row.cells[ablaufIdx] : null), ort: ortIdx >= 0 ? cellText(row.cells[ortIdx]) : null, bemerkung: bemIdx >= 0 ? cellText(row.cells[bemIdx]) : null, syncKey, }); } break; // only process the first Ausbildung table found } return ausbildungen; } /** * Navigate to the Beförderungen sub-page and scrape all promotions. * URL is constructed from the mitgliedschaft ID extracted from PersonenForm URL. */ async function scrapeMemberBefoerderungen( frame: Frame, standesbuchNr: string, idMitgliedschaft: string, idInstanzen: string, idFeuerwehren: string, ): Promise { const url = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/befoerderungenList.aspx` + `?id_mitgliedschaften=${idMitgliedschaft}&id_instanzen=${idInstanzen}&id_feuerwehren=${idFeuerwehren}`; await frame_goto(frame, url); const results: FdiskBefoerderung[] = []; try { await frame.waitForSelector('table.FdcLayList', { timeout: 10000 }); const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) => trs.map((tr) => { const cells = Array.from(tr.querySelectorAll('td')); const cell = (i: number) => (cells[i]?.textContent ?? '').trim(); return { datum: cell(0), dienstgrad: cell(1) }; }) ); for (const row of rows) { const dienstgrad = cellText(row.dienstgrad); if (!dienstgrad) continue; const datum = parseDate(row.datum); const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`; results.push({ standesbuchNr, datum, dienstgrad, syncKey }); } log(` Beförderungen for StNr ${standesbuchNr}: ${results.length} rows`); for (const b of results) { log(` ${b.datum ?? '—'} ${b.dienstgrad}`); } } catch { log(` WARN: could not parse Beförderungen table for StNr ${standesbuchNr} (url: ${url})`); } return results; } /** * Navigate to the Untersuchungen sub-page and scrape all medical exams. * Keeps all rows (one per art+datum); DB stores all, queries filter latest per category. */ async function scrapeMemberUntersuchungen( frame: Frame, standesbuchNr: string, idMitgliedschaft: string, idInstanzen: string, idFeuerwehren: string, ): Promise { const url = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/UntersuchungenList.aspx` + `?id_mitgliedschaften=${idMitgliedschaft}&id_instanzen=${idInstanzen}&id_feuerwehren=${idFeuerwehren}`; await frame_goto(frame, url); const results: FdiskUntersuchung[] = []; try { await frame.waitForSelector('table.FdcLayList', { timeout: 10000 }); const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) => trs.map((tr) => { const cells = Array.from(tr.querySelectorAll('td')); const cell = (i: number) => (cells[i]?.textContent ?? '').trim(); // Columns: 0=Datum, 1=Anmerkungen, 2=Untersuchungsart, 3=Tauglichkeitsstufe return { datum: cell(0), anmerkungen: cell(1), art: cell(2), ergebnis: cell(3), }; }) ); for (const row of rows) { const art = cellText(row.art); if (!art) continue; const datum = parseDate(row.datum); const syncKey = `${standesbuchNr}::${art}::${datum ?? ''}`; results.push({ standesbuchNr, datum, anmerkungen: cellText(row.anmerkungen), art, ergebnis: cellText(row.ergebnis), syncKey, }); } log(` Untersuchungen for StNr ${standesbuchNr}: ${results.length} rows`); for (const u of results) { log(` ${u.datum ?? '—'} [${u.art}] ${u.ergebnis ?? '—'} | ${u.anmerkungen ?? ''}`); } } catch { log(` WARN: could not parse Untersuchungen table for StNr ${standesbuchNr} (url: ${url})`); } return results; } /** * Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries. * This is an inline-edit (ListEdit) page — values are in fields. */ async function scrapeMemberFahrgenehmigungen( frame: Frame, standesbuchNr: string, idMitgliedschaft: string, idInstanzen: string, idFeuerwehren: string, ): Promise { const url = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/Ges_fahrgenehmigungenListEdit.aspx` + `?id_mitgliedschaften=${idMitgliedschaft}&id_instanzen=${idInstanzen}&id_feuerwehren=${idFeuerwehren}`; await frame_goto(frame, url); const results: FdiskFahrgenehmigung[] = []; try { await frame.waitForSelector('table.FdcLayList', { timeout: 10000 }); // ListEdit pages: each data row has inline fields instead of plain text. // Columns: 0=Ausstellungsdatum, 1=Gültig bis, 2=Behörde, 3=Nummer, 4=Fahrgenehmigungsklasse const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) => trs.map((tr) => { const cells = Array.from(tr.querySelectorAll('td')); const cellVal = (i: number): string => { const cell = cells[i]; if (!cell) return ''; const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null; if (input) return input.value?.trim() ?? ''; const select = cell.querySelector('select') as HTMLSelectElement | null; if (select) { const opt = select.options[select.selectedIndex]; return (opt?.text || opt?.value || '').trim(); } return cell.textContent?.trim() ?? ''; }; return { ausstellungsdatum: cellVal(0), gueltigBis: cellVal(1), behoerde: cellVal(2), nummer: cellVal(3), klasse: cellVal(4), }; }) ); for (const row of rows) { const klasse = cellText(row.klasse); if (!klasse) continue; const ausstellungsdatum = parseDate(row.ausstellungsdatum); const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`; results.push({ standesbuchNr, ausstellungsdatum, gueltigBis: parseDate(row.gueltigBis), behoerde: cellText(row.behoerde), nummer: cellText(row.nummer), klasse, syncKey, }); } log(` Fahrgenehmigungen for StNr ${standesbuchNr}: ${results.length} rows`); for (const f of results) { log(` ${f.ausstellungsdatum ?? '—'} [${f.klasse}] ${f.behoerde ?? ''} ${f.nummer ?? ''}`); } } catch { log(` WARN: could not parse Fahrgenehmigungen table for StNr ${standesbuchNr} (url: ${url})`); } return results; } // Legacy export kept for compatibility — delegates to the new unified flow export async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise { if (!member.detailUrl) return []; await frame_goto(frame, member.detailUrl); return scrapeAusbildungenFromDetailPage(frame, member); }