import { chromium, Page, Frame } from '@playwright/test'; import { FdiskMember, FdiskAusbildung, FdiskBefoerderung, FdiskUntersuchung, FdiskFahrgenehmigung, } from './types'; const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at'; const ID_FEUERWEHREN = process.env.FDISK_ID_FEUERWEHREN ?? '164'; const ID_INSTANZEN = process.env.FDISK_ID_INSTANZEN ?? '2853'; const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`; const MEMBERS_URL = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/meine_Mitglieder.aspx`; /** * Maps a raw FDISK status string to a dashboard status value. * Returns null for unknown/unneeded statuses — those members should be skipped. */ function mapFdiskStatus(raw: string): 'aktiv' | 'kind' | 'jugend' | 'reserve' | null { switch (raw.trim()) { case 'Aktiv': return 'aktiv'; case 'Kind': return 'kind'; case 'Jugend': return 'jugend'; case 'Reserve': return 'reserve'; default: return null; } } function log(msg: string) { console.log(`[scraper] ${new Date().toISOString()} ${msg}`); } /** * Parse a date string from FDISK (DD.MM.YYYY) to ISO format (YYYY-MM-DD). * Returns null if empty or unparseable. */ function parseDate(raw: string | null | undefined): string | null { if (!raw) return null; const trimmed = raw.trim(); if (!trimmed) return null; // Accept 1–2 digit day/month with optional trailing time (e.g. "10.9.2011 00:00:00") const match = trimmed.match(/^(\d{1,2})\.(\d{1,2})\.(\d{4})/); if (!match) return null; return `${match[3]}-${match[2].padStart(2, '0')}-${match[1].padStart(2, '0')}`; } /** * Extract text content from a cell, trimmed, or null if empty. */ function cellText(text: string | undefined | null): string | null { const t = (text ?? '').trim(); return t || null; } /** * Fetch only members we care about, rather than scraping the full member list. * * Phase 1: one search per known StNr (exact match). * Phase 2: if knownNames is non-empty, a single unfiltered fetch (page 1 only) * to pick up members matched by name (first-time linking). * * Returns deduplicated FdiskMember[]. */ async function scrapeKnownMembers( frame: Frame, knownStNrs: Set, knownNames: Set, ): Promise { type ParsedRow = Awaited>[number]; const seenStNrs = new Set(); const allRows: ParsedRow[] = []; // --- Phase 1: fetch by exact StNr --- log(`scrapeKnownMembers: fetching ${knownStNrs.size} known StNrs`); for (const stNr of knownStNrs) { const formOk = await frame.evaluate((sn) => { const form = (document as any).forms['frmsearch']; if (!form) return false; const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement | null; const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement | null; if (!fromFld || !toFld) return false; fromFld.value = sn; toFld.value = sn; return true; }, stNr); if (!formOk) { log(` WARN: search form not usable for StNr ${stNr}`); continue; } await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }), ]); const rows = await parseRowsFromTable(frame); for (const r of rows) { if (r.standesbuchNr && !seenStNrs.has(r.standesbuchNr)) { seenStNrs.add(r.standesbuchNr); allRows.push(r); } } log(` StNr ${stNr}: ${rows.length} row(s)`); // Be gentle on the server await frame.page().waitForTimeout(300); } // --- Phase 2: single unfiltered fetch for name-matching --- if (knownNames.size > 0) { log(`scrapeKnownMembers: unfiltered fetch for ${knownNames.size} name-based matches`); // Clear StNr filter await frame.evaluate(() => { const form = (document as any).forms['frmsearch']; if (!form) return; const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement | null; const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement | null; if (fromFld) fromFld.value = ''; if (toFld) toFld.value = ''; }); await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }), ]); const rows = await parseRowsFromTable(frame); let matched = 0; for (const r of rows) { if (!r.standesbuchNr || seenStNrs.has(r.standesbuchNr)) continue; const nameKey = `${(r.vorname || '').toLowerCase()}::${(r.zuname || '').toLowerCase()}`; if (knownNames.has(nameKey)) { seenStNrs.add(r.standesbuchNr); allRows.push(r); matched++; } } log(` Unfiltered page: ${rows.length} total rows, ${matched} name-matched`); } log(`scrapeKnownMembers: ${allRows.length} members collected`); // Build FdiskMember objects const members: FdiskMember[] = []; for (const row of allRows) { if (!row.standesbuchNr || !row.vorname || !row.zuname) continue; const status = mapFdiskStatus(row.status); if (!status) continue; // skip members with non-synced statuses const abmeldedatum = parseDate(row.abmeldedatum); members.push({ standesbuchNr: row.standesbuchNr, dienstgrad: row.dienstgrad, vorname: row.vorname, zuname: row.zuname, geburtsdatum: parseDate(row.geburtsdatum), svnr: row.svnr || null, eintrittsdatum: parseDate(row.eintrittsdatum), abmeldedatum, status, detailUrl: row.href, geburtsort: null, geschlecht: null, beruf: null, wohnort: null, plz: null, }); } return members; } export async function scrapeAll(username: string, password: string): Promise<{ members: FdiskMember[]; ausbildungen: FdiskAusbildung[]; befoerderungen: FdiskBefoerderung[]; untersuchungen: FdiskUntersuchung[]; fahrgenehmigungen: FdiskFahrgenehmigung[]; }> { const browser = await chromium.launch({ headless: true, args: ['--disable-gpu', '--disable-software-rasterizer'], }); const context = await browser.newContext({ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', }); const page = await context.newPage(); try { await login(page, username, password); // After login, page is on Start.aspx (frameset). // Direct navigation to MitgliedschaftenList.aspx causes a server BLError because // the server reads the org context from session variables set by the menu. // Navigate via the menu frame (left.aspx) to set session state properly. const mainFrame = await navigateToMemberList(page); const members = await scrapeMembers(mainFrame); log(`Found ${members.length} members (full scrape)`); const ausbildungen: FdiskAusbildung[] = []; const befoerderungen: FdiskBefoerderung[] = []; const untersuchungen: FdiskUntersuchung[] = []; const fahrgenehmigungen: FdiskFahrgenehmigung[] = []; for (const member of members) { try { // Navigate to member detail page — use direct URL if available, else search+click fallback const onDetail = member.detailUrl ? (await frame_goto(mainFrame, member.detailUrl), true) : await navigateToMemberDetailBySearch(mainFrame, member.standesbuchNr); if (!onDetail) { log(` SKIP ${member.vorname} ${member.zuname} (${member.standesbuchNr}): could not reach detail page`); continue; } // Scrape extra profile fields from the detail form const profileFields = await scrapeDetailProfileFields(mainFrame); member.geburtsort = profileFields.geburtsort; member.geschlecht = profileFields.geschlecht; member.beruf = profileFields.beruf; member.wohnort = profileFields.wohnort; member.plz = profileFields.plz; // Extract mitgliedschaft + person params from the current URL for constructing sub-section URLs. // PersonenForm.aspx is in the personen module; sub-sections are each in their own module. // URL pattern: ?search=1&searchid_mitgliedschaften=X&id_personen=Y&id_mitgliedschaften=X&searchid_personen=Y&searchid_maskmode= const currentUrl = mainFrame.url(); const urlObj = new URL(currentUrl); const idMitgliedschaft = urlObj.searchParams.get('id_mitgliedschaften'); const idPersonen = urlObj.searchParams.get('id_personen'); const idInstanzen = urlObj.searchParams.get('id_instanzen') ?? ID_INSTANZEN; // Ausbildungen if (idMitgliedschaft && idPersonen) { try { const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member, idMitgliedschaft, idPersonen); ausbildungen.push(...quals); log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`); } catch (err: any) { log(` WARN: Ausbildungen scrape failed for ${member.vorname} ${member.zuname} (StNr ${member.standesbuchNr}): ${err.message}`); } } // Beförderungen const befos = (idMitgliedschaft && idPersonen) ? await scrapeMemberBefoerderungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idPersonen) : []; befoerderungen.push(...befos); // Untersuchungen const unters = (idMitgliedschaft && idPersonen) ? await scrapeMemberUntersuchungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idPersonen) : []; untersuchungen.push(...unters); // Fahrgenehmigungen const fahrg = (idMitgliedschaft && idPersonen) ? await scrapeMemberFahrgenehmigungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idPersonen, idInstanzen) : []; fahrgenehmigungen.push(...fahrg); log(` ${member.vorname} ${member.zuname}: ${befos.length} Beförderungen, ${unters.length} Untersuchungen, ${fahrg.length} Fahrgenehmigungen`); await page.waitForTimeout(500); } catch (err) { log(` WARN: could not scrape detail for ${member.vorname} ${member.zuname}: ${err}`); } } return { members, ausbildungen, befoerderungen, untersuchungen, fahrgenehmigungen }; } finally { await browser.close(); } } /** Navigate a frame, waiting for networkidle. Wrapper to avoid repetition. */ async function frame_goto(frame: Frame, url: string): Promise { await frame.goto(url, { waitUntil: 'networkidle' }); } /** Select "Alle" in the anzeige_count dropdown to show all rows, then wait for reload. */ async function selectAlleAnzeige(frame: Frame): Promise { try { const sel = frame.locator('select[name="anzeige_count"], select#anzeige_count'); if (await sel.count() === 0) return; const current = await sel.inputValue().catch(() => ''); if (current === 'ALLE') return; // already showing all await sel.selectOption('ALLE'); await frame.waitForLoadState('networkidle').catch(() => {}); } catch { // Dropdown may not exist on all pages — that's OK } } async function login(page: Page, username: string, password: string): Promise { log(`Navigating to ${LOGIN_URL}`); await page.goto(LOGIN_URL, { waitUntil: 'domcontentloaded' }); await page.waitForLoadState('networkidle'); // Check if already logged in const currentUrlBefore = page.url(); if (!currentUrlBefore.toLowerCase().includes('login')) { log(`Already logged in, on: ${currentUrlBefore}`); return; } // Exact selectors from the known login form HTML const usernameField = page.locator('#login'); const passwordField = page.locator('#password'); const submitButton = page.locator('#Submit2'); await usernameField.waitFor({ state: 'visible', timeout: 10000 }); await usernameField.fill(username); await passwordField.fill(password); await submitButton.click(); // Wait for navigation away from the login page (up to 15s) try { await page.waitForURL( (url) => !url.toString().toLowerCase().includes('login'), { waitUntil: 'networkidle', timeout: 15000 }, ); } catch { // waitForURL timed out — fall through to the URL check below } // Verify we're logged in const currentUrl = page.url(); if (currentUrl.toLowerCase().includes('login')) { throw new Error(`Login failed — still on login page: ${currentUrl}`); } log(`Logged in successfully, redirected to: ${currentUrl}`); } /** * Fallback navigation to a member's detail page when no direct URL is available. * Navigates to the member list, filters by exact standesbuchNr, then clicks the result row. * Returns true if we successfully landed on a detail page. */ async function navigateToMemberDetailBySearch(frame: Frame, standesbuchNr: string): Promise { // Navigate to the member list await frame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' }); await frame.waitForLoadState('networkidle'); // Set exact standesbuchNr filter in the search form const formOk = await frame.evaluate((stNr) => { const form = (document as any).forms['frmsearch']; if (!form) return false; const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement | null; const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement | null; if (!fromFld || !toFld) return false; fromFld.value = stNr; toFld.value = stNr; return true; }, standesbuchNr); if (!formOk) { log(` WARN navigateToMemberDetailBySearch: search form not usable for StNr ${standesbuchNr}`); return false; } await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }), ]); // Click on the first data row — FDISK rows navigate to the detail page on click const firstRowLink = await frame.$('table.FdcLayList tbody tr:first-child a, table.FdcLayList tbody tr:first-child td'); if (!firstRowLink) { log(` WARN navigateToMemberDetailBySearch: no result row for StNr ${standesbuchNr}`); return false; } try { await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 15000 }), firstRowLink.click(), ]); } catch { // waitForNavigation may time out if click didn't navigate (e.g. onclick vs href) // Check whether the URL changed at all } const url = frame.url(); const onDetailPage = !url.includes('MitgliedschaftenList') && !url.includes('meine_Mitglieder'); if (onDetailPage) { log(` Navigated to detail via search+click: ${url}`); } else { log(` WARN navigateToMemberDetailBySearch: still on list page after click for StNr ${standesbuchNr}`); } return onDetailPage; } async function navigateToMemberList(page: Page): Promise { const mainFrame = page.frame({ name: 'mainFrame' }); if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset'); log(`Navigating mainFrame to: ${MEMBERS_URL}`); await mainFrame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' }); await mainFrame.waitForLoadState('networkidle'); const url = mainFrame.url(); const title = await mainFrame.title(); log(`mainFrame loaded: ${url} — title: "${title}"`); if (url.includes('BLError') || url.includes('support.aspx') || url.includes('Error')) { throw new Error(`Member list returned error page: ${url}`); } return mainFrame; } async function scrapeMembers(frame: Frame): Promise { log(`Scraping member list from: ${frame.url()}`); // Clear the Standesbuchnummer filter if the search form is present. // FDISK pre-fills the logged-in user's own Standesbuchnummer, which limits results to 1 member. // We clear it before submitting so all members of the fire station are returned. const hasForm = await frame.$('form[name="frmsearch"]') !== null; if (hasForm) { const fieldDump = await frame.evaluate(() => { const form = (document as any).forms['frmsearch']; if (!form) return { cleared: [], pageSizeSet: null as string | null, allFields: [] }; const cleared: string[] = []; const allFields: string[] = []; let pageSizeSet: string | null = null; for (const el of Array.from(form.elements) as HTMLInputElement[]) { if (el.type === 'hidden') continue; const name = (el.name ?? '').toLowerCase(); const id = (el.id ?? '').toLowerCase(); if (el.value) allFields.push(`${el.name || el.id}=${el.value}`); if (name.includes('standesbuch') || id.includes('standesbuch')) { el.value = ''; cleared.push(el.name || el.id); } // Maximize page size: look for a select AND its paired hidden input // FDISK uses a custom Dd widget where or similar. if ((name.includes('anzahl') || id.includes('anzahl') || name.includes('pagesize') || id.includes('pagesize') || name.includes('rows') || id.includes('rows')) && el.tagName === 'SELECT') { const select = el as unknown as HTMLSelectElement; // Pick the largest numeric option value, or the last option as fallback let bestOption: HTMLOptionElement | null = null; let bestVal = -1; for (const opt of Array.from(select.options)) { const n = parseInt(opt.value, 10); if (!isNaN(n) && n > bestVal) { bestVal = n; bestOption = opt; } } if (!bestOption && select.options.length > 0) { bestOption = select.options[select.options.length - 1]; } if (bestOption) { select.value = bestOption.value; pageSizeSet = `${el.name || el.id}=${bestOption.value}`; // Also update the paired hidden field used by the Dd custom widget. // Common patterns: xDd_dd → xDd_id or xDd_hd const baseName = (el.name || el.id).replace(/_dd$/i, ''); for (const suffix of ['_id', '_hd', '_val']) { const hidden = form.elements[baseName + suffix] as HTMLInputElement | undefined; if (hidden && hidden.type === 'hidden') { hidden.value = bestOption.value; pageSizeSet += ` (also set ${baseName + suffix})`; } } } } } return { cleared, pageSizeSet, allFields }; }); if (fieldDump.allFields.length > 0) { log(`Search form active filters before clear: ${fieldDump.allFields.join(', ')}`); } if (fieldDump.cleared.length > 0) { log(`Cleared Standesbuchnummer filter fields: ${fieldDump.cleared.join(', ')}`); } else { log('Search form found — no Standesbuchnummer field detected, submitting as-is'); } if (fieldDump.pageSizeSet) { log(`Set page size: ${fieldDump.pageSizeSet}`); } else { log('No page size field found — will paginate through all results'); } // Use Promise.all to start waiting for navigation BEFORE triggering the submit, // otherwise waitForLoadState resolves against the already-idle current page. await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }), ]); log(`After form submit: ${frame.url()}`); } // --- Phase 1: initial fetch (no StNr filter) to get the first batch and total count --- type ParsedRow = Awaited>[number]; await frame.waitForSelector('table.FdcLayList', { timeout: 20000 }); const firstRows = await parseRowsFromTable(frame); log(`Initial fetch: ${firstRows.length} rows`); // Log href debug info for the first row to diagnose URL extraction const rowDebug = await frame.evaluate(() => (window as any).__fdiskFirstRowDebug ?? 'no debug info'); log(`Row href debug: ${rowDebug}`); for (const row of firstRows) { log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`); } const pagination = await frame.evaluate(() => document.querySelector('table.FdcLayListNav')?.textContent?.trim() ?? '' ); log(`Pagination: "${pagination}"`); const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i); const totalExpected = pagMatch ? parseInt(pagMatch[3], 10) : null; const shownSoFar = pagMatch ? parseInt(pagMatch[2], 10) : null; const seenStNrs = new Set(firstRows.map(r => r.standesbuchNr).filter(Boolean)); const allRows: ParsedRow[] = [...firstRows]; // --- Phase 2: if more members exist and pagination is disabled, use StNr range queries --- if (totalExpected && shownSoFar && shownSoFar < totalExpected) { log(`Pagination disabled (FDISK limitation). Switching to StNr range queries to fetch remaining ${totalExpected - seenStNrs.size} members.`); const BATCH = 15; // fetch 15 StNr slots at a time — safely under the 20-row page limit const MAX_STNR = 9999; // upper bound; we stop earlier if we have all members let startNr = 1; let consecutiveEmpty = 0; while (seenStNrs.size < totalExpected && startNr <= MAX_STNR && consecutiveEmpty < 5) { const endNr = startNr + BATCH - 1; // Set StNr range in the search form and submit const formOk = await frame.evaluate(({ s, e }: { s: number; e: number }) => { const form = (document as any).forms['frmsearch']; if (!form) return false; const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement; const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement; if (!fromFld || !toFld) return false; fromFld.value = String(s); toFld.value = String(e); return true; }, { s: startNr, e: endNr }); if (!formOk) { log('WARN: could not set StNr range fields — aborting range queries'); break; } await Promise.all([ frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }), ]); const rangeRows = await parseRowsFromTable(frame); const newRows = rangeRows.filter(r => r.standesbuchNr && !seenStNrs.has(r.standesbuchNr)); newRows.forEach(r => { if (r.standesbuchNr) seenStNrs.add(r.standesbuchNr); }); allRows.push(...newRows); log(`StNr ${startNr}–${endNr}: ${newRows.length} new members (collected ${seenStNrs.size}/${totalExpected})`); for (const row of newRows) { log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`); } consecutiveEmpty = newRows.length === 0 ? consecutiveEmpty + 1 : 0; startNr = endNr + 1; } log(`Range queries complete: ${seenStNrs.size} unique members collected (expected ${totalExpected})`); } log(`Parsed ${allRows.length} raw rows total`); const members: FdiskMember[] = []; for (const row of allRows) { if (!row.standesbuchNr || !row.vorname || !row.zuname) continue; const status = mapFdiskStatus(row.status); if (!status) continue; // skip members with non-synced statuses const abmeldedatum = parseDate(row.abmeldedatum); members.push({ standesbuchNr: row.standesbuchNr, dienstgrad: row.dienstgrad, vorname: row.vorname, zuname: row.zuname, geburtsdatum: parseDate(row.geburtsdatum), svnr: row.svnr || null, eintrittsdatum: parseDate(row.eintrittsdatum), abmeldedatum, status, detailUrl: row.href, geburtsort: null, geschlecht: null, beruf: null, wohnort: null, plz: null, }); } return members; } async function parseRowsFromTable(frame: Frame) { // Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad, // 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon // Each contains an — the title is the clean cell text. // Navigation may be via href or onclick handlers (FDISK uses both depending on version). return frame.$$eval('table.FdcLayList tbody tr', (trs) => trs.map((tr, rowIdx) => { const cells = Array.from(tr.querySelectorAll('td')); const val = (i: number) => { const a = cells[i]?.querySelector('a'); const title = a?.getAttribute('title')?.trim(); // Use title only if non-empty; otherwise fall back to textContent return (title || cells[i]?.textContent || '').trim(); }; // Extract detail URL — try multiple strategies: // 1. Standard pointing to an .aspx page // 2. onclick attribute on , , or containing an .aspx URL let href: string | null = null; let debugInfo = ''; for (const a of Array.from(tr.querySelectorAll('a'))) { const rawHref = (a as Element).getAttribute('href') ?? ''; debugInfo += `a.href="${rawHref}" `; if (rawHref && rawHref !== '#' && rawHref !== '' && !rawHref.startsWith('javascript:')) { href = (a as HTMLAnchorElement).href; // resolves relative → absolute break; } } if (!href) { // Scan onclick on all ancestors + cells + anchors for .aspx URLs const candidates: Element[] = [tr, ...Array.from(tr.querySelectorAll('a, td'))]; for (const el of candidates) { const onclick = el.getAttribute('onclick') ?? ''; if (onclick) debugInfo += `onclick="${onclick}" `; const match = onclick.match(/['"]([^'"]*\.aspx[^'"]*)['"]/); if (match) { try { href = new URL(match[1], (window as Window).location.href).href; } catch { href = match[1]; } break; } } } // Log debug info for first data row to help diagnose href extraction issues if (rowIdx === 0 && val(2)) { (window as any).__fdiskFirstRowDebug = `StNr=${val(2)} href=${href} debug=${debugInfo}`; } return { status: val(1), standesbuchNr: val(2), dienstgrad: val(3), vorname: val(4), zuname: val(5), geburtsdatum: val(6), svnr: val(7), eintrittsdatum: val(8), abmeldedatum: val(9), href, }; }), ); } /** * Scrape additional profile fields from the member detail form. * Called while the frame is already on the member detail page. */ async function scrapeDetailProfileFields(frame: Frame): Promise<{ geburtsort: string | null; geschlecht: string | null; beruf: string | null; wohnort: string | null; plz: string | null; }> { return frame.evaluate(() => { const val = (selector: string): string | null => { const el = document.querySelector(selector) as HTMLInputElement | HTMLSelectElement | null; if (!el) return null; if (el.tagName === 'SELECT') { const sel = el as HTMLSelectElement; const opt = sel.options[sel.selectedIndex]; return opt ? (opt.text || opt.value || '').trim() || null : null; } return (el as HTMLInputElement).value?.trim() || null; }; return { geburtsort: val('input[name="geburtsort"]') ?? val('input[id*="geburtsort"]'), geschlecht: val('select[name*="geschlecht"]') ?? val('select[id*="geschlecht"]'), beruf: val('input[name="beruf"]') ?? val('input[id*="beruf"]'), wohnort: val('input[name="ort"]') ?? val('input[id*="_ort"]') ?? val('input[name="wohnort"]'), plz: val('input[name="plz"]') ?? val('input[id*="plz"]'), }; }); } /** * Scrape Kurse (courses) by navigating to the KursteilnehmerListEdit.aspx page. * This page uses indexed hidden form fields (kursart_bez_N, datum_von_N, etc.) * which are far more reliable than heuristic table column detection. */ async function scrapeAusbildungenFromDetailPage( frame: Frame, member: FdiskMember, idMitgliedschaft?: string | null, idPersonen?: string | null, ): Promise { if (!idMitgliedschaft || !idPersonen) { log(` Kurse for StNr ${member.standesbuchNr}: missing mitgliedschaft/personen IDs, skipping`); return []; } const url = `${BASE_URL}/fdisk/module/mgvw/kursteilnehmer/KursteilnehmerListEdit.aspx` + `?search=1&searchid_personen=${idPersonen}&searchid_mitgliedschaften=${idMitgliedschaft}` + `&id_personen=${idPersonen}&id_mitgliedschaften=${idMitgliedschaft}` + `&anzeige_count=ALLE`; await frame_goto(frame, url); const landed = frame.url(); const title = await frame.title().catch(() => ''); if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) { log(` → Kurse ERROR page: ${landed}`); return []; } // Ensure all rows are visible (the URL param should already set this, but belt-and-suspenders) await selectAlleAnzeige(frame); // Read indexed form fields — same pattern as scrapeMemberFahrgenehmigungen const rawRows = await frame.evaluate((stNr: string) => { const rows: Array<{ standesbuchNr: string; kursname: string; kursnummer: string | null; kurzbezeichnung: string | null; erfolgscode: string | null; kursDatum: string | null; syncKey: string; }> = []; for (let i = 0; i < 500; i++) { // kursart_bez is the sentinel — if it doesn't exist, we've passed all rows const kursartBezEl = document.querySelector(`input[name="kursart_bez_${i}"]`) as HTMLInputElement | null; if (!kursartBezEl) break; const kursname = kursartBezEl.value?.trim() || ''; if (!kursname) continue; const kursnummerEl = document.querySelector(`input[name="kursnummer_${i}"]`) as HTMLInputElement | null; const datumVonEl = document.querySelector(`input[name="datum_von_${i}"]`) as HTMLInputElement | null; const leistungsartEl = document.querySelector(`input[name="leistungsart_${i}"]`) as HTMLInputElement | null; const kursnummer = kursnummerEl?.value?.trim() || null; // datum_von format: "D.M.YYYY HH:MM:SS" — pass raw, parseDate handles it const kursDatum = datumVonEl?.value?.trim() || null; const erfolgscode = leistungsartEl?.value?.trim() || null; // Kurzbezeichnung: extract from in the same table row as the kursnummer input let kurzbezeichnung: string | null = null; const row = kursnummerEl?.closest('tr'); if (row) { const nobrs = row.querySelectorAll('nobr'); // First is kurzbezeichnung, second is kursname if (nobrs.length >= 1) { kurzbezeichnung = nobrs[0].textContent?.replace(/\u00A0/g, ' ').trim() || null; } } rows.push({ standesbuchNr: stNr, kursname, kursnummer, kurzbezeichnung, erfolgscode, kursDatum, syncKey: `${stNr}::${kursname}::${kursDatum ?? ''}`, }); } return rows; }, member.standesbuchNr).catch(() => [] as Array<{ standesbuchNr: string; kursname: string; kursnummer: string | null; kurzbezeichnung: string | null; erfolgscode: string | null; kursDatum: string | null; syncKey: string; }>); log(` → Kurse form-field extraction: ${rawRows.length} rows found`); // Post-process: parse dates and rebuild syncKeys with ISO dates const results: FdiskAusbildung[] = rawRows.map(a => { const kursDatum = parseDate(a.kursDatum); return { standesbuchNr: a.standesbuchNr, kursname: a.kursname, kursnummer: a.kursnummer, kurzbezeichnung: a.kurzbezeichnung, erfolgscode: a.erfolgscode, kursDatum, ablaufdatum: null, ort: null, bemerkung: null, syncKey: `${a.standesbuchNr}::${a.kursname}::${kursDatum ?? ''}`, }; }); return results; } /** * Navigate to a sub-section URL and wait for any data table. * Logs the actual URL and title so wrong-page issues are visible. * Returns all rows from the first table found, or null if none. */ async function navigateAndGetTableRows( frame: Frame, url: string, ): Promise<{ rows: Array<{ cells: string[] }>; dateColIdx: number } | null> { await frame_goto(frame, url); const landed = frame.url(); const title = await frame.title().catch(() => ''); // Check for FDISK error pages if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) { log(` → ERROR page: ${landed}`); return null; } // Show all rows (default is 10) await selectAlleAnzeige(frame); // Log all table classes on the page for diagnostics (first time only) const tableInfo = await frame.evaluate(() => { return Array.from(document.querySelectorAll('table')).map((t, i) => { const cls = t.className || '(no class)'; const id = t.id || ''; const rowCount = t.querySelectorAll('tr').length; return `${i}:cls="${cls}"${id ? ` id="${id}"` : ''} rows=${rowCount}`; }).join(' | '); }).catch(() => 'N/A'); log(` → tables: ${tableInfo}`); // Collect rows from ALL tables, reading input/select values for inline-edit pages const allRows = await frame.evaluate(() => { const results: Array<{ cells: string[]; tableClass: string }> = []; for (const table of Array.from(document.querySelectorAll('table'))) { const cls = table.className || ''; for (const tr of Array.from(table.querySelectorAll('tbody tr, tr'))) { // Skip rows that are nested inside a child table if (tr.closest('table') !== table) continue; const tds = Array.from(tr.querySelectorAll('td')); if (tds.length < 2) continue; // skip single-cell nav/header rows results.push({ tableClass: cls, cells: tds.map(td => { const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null; if (input && input.value?.trim()) return input.value.trim(); const sel = td.querySelector('select') as HTMLSelectElement | null; if (sel) { const opt = sel.options[sel.selectedIndex]; return (opt?.text || opt?.value || '').trim(); } // For FDISK list tables, the value is in inside each cell const anchor = td.querySelector('a'); const atitle = anchor?.getAttribute('title')?.trim(); if (atitle) return atitle; return td.textContent?.trim() ?? ''; }), }); } } return results; }).catch(() => [] as Array<{ cells: string[]; tableClass: string }>); // Prefer rows from FdcLayList-class tables const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList')); const resultRows = fdcRows.length > 0 ? fdcRows : allRows; // Strip \u00A0 (non-breaking space) from all cell values and trim const mapped = resultRows.map(r => ({ cells: r.cells.map(c => c.replace(/\u00A0/g, ' ').trim()), })); // Find date column dynamically: count date matches per column across ALL rows // and pick the column with the MOST matches (avoids picking stray date in nav tables) const datePattern = /^\d{2}\.\d{2}\.\d{4}$/; const dateCountByCol: Record = {}; for (const r of mapped) { for (let ci = 0; ci < r.cells.length; ci++) { if (datePattern.test(r.cells[ci] ?? '')) { dateCountByCol[ci] = (dateCountByCol[ci] || 0) + 1; } } } let dateColIdx = -1; let maxCount = 0; for (const [col, count] of Object.entries(dateCountByCol)) { const colNum = Number(col); if (count > maxCount || (count === maxCount && (dateColIdx === -1 || colNum < dateColIdx))) { dateColIdx = colNum; maxCount = count; } } const dataRows = dateColIdx >= 0 ? mapped.filter(r => datePattern.test(r.cells[dateColIdx] ?? '')) : []; log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (date in col ${dateColIdx})`); return { rows: dataRows, dateColIdx }; } /** * Navigate to the Beförderungen sub-page and scrape all promotions. */ async function scrapeMemberBefoerderungen( frame: Frame, standesbuchNr: string, idMitgliedschaft: string, idPersonen: string, ): Promise { const url = `${BASE_URL}/fdisk/module/mgvw/befoerderungen/befoerderungenList.aspx` + `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}` + `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`; const result = await navigateAndGetTableRows(frame, url); if (!result) return []; const { rows, dateColIdx } = result; const results: FdiskBefoerderung[] = []; for (const row of rows) { const datum = parseDate(row.cells[dateColIdx]); // The next non-empty column after the date holds the Dienstgrad let dienstgrad = ''; for (let ci = dateColIdx + 1; ci < row.cells.length; ci++) { const v = cellText(row.cells[ci]); if (v) { dienstgrad = v; break; } } const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`; results.push({ standesbuchNr, datum, dienstgrad, syncKey }); } log(` Beförderungen for StNr ${standesbuchNr}: ${results.length} rows`); for (const b of results) log(` ${b.datum ?? '—'} ${b.dienstgrad}`); return results; } /** * Navigate to the Untersuchungen sub-page and scrape all medical exams. */ async function scrapeMemberUntersuchungen( frame: Frame, standesbuchNr: string, idMitgliedschaft: string, idPersonen: string, ): Promise { const url = `${BASE_URL}/fdisk/module/mgvw/untersuchungen/UntersuchungenList.aspx` + `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}` + `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`; // Always dump for diagnosis when debug is on await frame_goto(frame, url); const landed = frame.url(); const title = await frame.title().catch(() => ''); if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) { log(` → Untersuchungen ERROR page: ${landed}`); return []; } // Show all rows await selectAlleAnzeige(frame); // Try to navigate to history/detail view if available // FDISK may show only the most recent per exam type on the list page. // Look for a "Verlauf" or "Detail" or "Alle anzeigen" link/button const hasHistoryLink = await frame.evaluate(() => { const links = Array.from(document.querySelectorAll('a, input[type="button"], button')); for (const el of links) { const text = (el.textContent || '').toLowerCase(); const title = (el.getAttribute('title') || '').toLowerCase(); if (text.includes('verlauf') || text.includes('historie') || text.includes('alle anzeigen') || title.includes('verlauf') || title.includes('historie')) { return (el as HTMLElement).id || (el as HTMLAnchorElement).href || text; } } return null; }).catch(() => null); if (hasHistoryLink) { log(` → Found history link: ${hasHistoryLink}, navigating...`); // Try to click or navigate to the history page for more complete data try { const navigated = await frame.evaluate(() => { const links = Array.from(document.querySelectorAll('a, input[type="button"], button')); for (const el of links) { const text = (el.textContent || '').toLowerCase(); const title = (el.getAttribute('title') || '').toLowerCase(); if (text.includes('verlauf') || text.includes('historie') || text.includes('alle anzeigen') || title.includes('verlauf') || title.includes('historie')) { if ((el as HTMLAnchorElement).href) { return (el as HTMLAnchorElement).href; } (el as HTMLElement).click(); return 'clicked'; } } return null; }).catch(() => null); if (navigated && navigated !== 'clicked') { await frame_goto(frame, navigated); } else if (navigated === 'clicked') { await frame.waitForNavigation({ timeout: 5000 }).catch(() => {}); } await selectAlleAnzeige(frame); } catch (e) { log(` → Failed to follow history link: ${e}`); } } // Parse the table using navigateAndGetTableRows logic (reuse existing page state) // Re-collect rows from the already-loaded page const allRows = await frame.evaluate(() => { const results: Array<{ cells: string[]; tableClass: string }> = []; for (const table of Array.from(document.querySelectorAll('table'))) { const cls = table.className || ''; for (const tr of Array.from(table.querySelectorAll('tbody tr, tr'))) { if (tr.closest('table') !== table) continue; const tds = Array.from(tr.querySelectorAll('td')); if (tds.length < 2) continue; results.push({ tableClass: cls, cells: tds.map(td => { const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null; if (input) return input.value?.trim() ?? ''; const sel = td.querySelector('select') as HTMLSelectElement | null; if (sel) { const opt = sel.options[sel.selectedIndex]; return (opt?.text || opt?.value || '').trim(); } const anchor = td.querySelector('a'); const atitle = anchor?.getAttribute('title')?.trim(); if (atitle) return atitle; return td.textContent?.trim() ?? ''; }), }); } } return results; }).catch(() => [] as Array<{ cells: string[]; tableClass: string }>); const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList')); const resultRows = fdcRows.length > 0 ? fdcRows : allRows; const mapped = resultRows.map(r => ({ cells: r.cells.map(c => c.replace(/\u00A0/g, ' ').trim()), })); // Find date column const datePattern = /^\d{2}\.\d{2}\.\d{4}$/; let dateColIdx = -1; for (const r of mapped) { for (let ci = 0; ci < r.cells.length; ci++) { if (datePattern.test(r.cells[ci] ?? '')) { dateColIdx = ci; break; } } if (dateColIdx >= 0) break; } const dataRows = dateColIdx >= 0 ? mapped.filter(r => datePattern.test(r.cells[dateColIdx] ?? '')) : []; log(` → Untersuchungen: ${allRows.length} total rows, ${dataRows.length} data rows (date in col ${dateColIdx})`); const results: FdiskUntersuchung[] = []; for (const row of dataRows) { const valueCols: string[] = []; for (let ci = dateColIdx + 1; ci < row.cells.length; ci++) { const v = cellText(row.cells[ci]); if (v !== null) valueCols.push(v); } const anmerkungen = valueCols[0] ?? null; const art = valueCols[1] ?? null; const ergebnis = valueCols[2] ?? null; if (!art) continue; const datum = parseDate(row.cells[dateColIdx]); const syncKey = `${standesbuchNr}::${art}::${datum ?? ''}`; results.push({ standesbuchNr, datum, anmerkungen, art, ergebnis, syncKey, }); } log(` Untersuchungen for StNr ${standesbuchNr}: ${results.length} rows`); for (const u of results) log(` ${u.datum ?? '—'} [${u.art}] ${u.ergebnis ?? '—'} | ${u.anmerkungen ?? ''}`); return results; } /** * Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries. * This page is a ListEdit page with form fields named by row index pattern: * ausstellungsdatum_{i}, gueltig_bis_{i}, behoerde_{i}, nummer_{i}, id_fahrgenehmigungsklassen_{i} * Falls back to table-based parsing if field IDs are not found. */ async function scrapeMemberFahrgenehmigungen( frame: Frame, standesbuchNr: string, idMitgliedschaft: string, idPersonen: string, idInstanzen: string, ): Promise { const url = `${BASE_URL}/fdisk/module/mgvw/ges_fahrgenehmigungen/Ges_fahrgenehmigungenListEdit.aspx` + `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}` + `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=` + `&searchid_instanzen=${idInstanzen}`; await frame_goto(frame, url); const landed = frame.url(); const title = await frame.title().catch(() => ''); if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) { log(` → Fahrgenehmigungen ERROR page: ${landed}`); return []; } // Show all rows (default is 10) await selectAlleAnzeige(frame); // Read form fields by ID pattern: {fieldname}_{rowIndex} const rawRows = await frame.evaluate(() => { const rows: Array<{ ausstellungsdatum: string; gueltigBis: string; behoerde: string; nummer: string; klasse: string; }> = []; for (let i = 0; i < 100; i++) { // Try to find any field for this row index — if none exist, we've passed all rows const ausstellungEl = document.querySelector(`input[name="ausstellungsdatum_${i}"], input[id="ausstellungsdatum_${i}"]`) as HTMLInputElement | null; const gueltigEl = document.querySelector(`input[name="gueltig_bis_${i}"], input[id="gueltig_bis_${i}"]`) as HTMLInputElement | null; const behoerdeEl = document.querySelector(`input[name="behoerde_${i}"], input[id="behoerde_${i}"]`) as HTMLInputElement | null; const nummerEl = document.querySelector(`input[name="nummer_${i}"], input[id="nummer_${i}"]`) as HTMLInputElement | null; const klasseEl = document.querySelector(`select[name="id_fahrgenehmigungsklassen_${i}"], select[id="id_fahrgenehmigungsklassen_${i}"]`) as HTMLSelectElement | null; // If no field found at all, stop if (!ausstellungEl && !gueltigEl && !behoerdeEl && !nummerEl && !klasseEl) break; // Read klasse from select: try selectedIndex, then fallback to [selected] attribute let klasse = ''; if (klasseEl) { const idx = klasseEl.selectedIndex; if (idx >= 0 && klasseEl.options[idx]) { klasse = (klasseEl.options[idx].text || klasseEl.options[idx].value || '').trim(); } if (!klasse) { const selectedOpt = klasseEl.querySelector('option[selected]') as HTMLOptionElement | null; if (selectedOpt) { klasse = (selectedOpt.text || selectedOpt.value || '').trim(); } } if (!klasse && klasseEl.value?.trim()) { klasse = klasseEl.value.trim(); } } rows.push({ ausstellungsdatum: ausstellungEl?.value?.trim() ?? '', gueltigBis: gueltigEl?.value?.trim() ?? '', behoerde: behoerdeEl?.value?.trim() ?? '', nummer: nummerEl?.value?.trim() ?? '', klasse, }); } return rows; }).catch(() => [] as Array<{ ausstellungsdatum: string; gueltigBis: string; behoerde: string; nummer: string; klasse: string }>); log(` → Fahrgenehmigungen form-field extraction: ${rawRows.length} rows found`); // If form-field approach found rows, use them if (rawRows.length > 0) { const VALID_LICENSE_CLASSES = new Set([ 'A', 'A1', 'A2', 'AM', 'B', 'B1', 'BE', 'C', 'C1', 'CE', 'C1E', 'D', 'D1', 'DE', 'D1E', 'F', 'G', 'L', 'T', ]); const results: FdiskFahrgenehmigung[] = []; for (const row of rawRows) { let klasse = cellText(row.klasse); if (!klasse) continue; // FDISK select option text includes prefix "KFZ-Führerschein / B" — extract just the class code if (klasse.includes(' / ')) klasse = klasse.split(' / ').pop()!.trim(); // Validate klasse against whitelist — skip non-class data if (!VALID_LICENSE_CLASSES.has(klasse.toUpperCase())) { log(` → Skipping invalid klasse: "${klasse}"`); continue; } const ausstellungsdatum = parseDate(row.ausstellungsdatum); const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`; results.push({ standesbuchNr, ausstellungsdatum, gueltigBis: parseDate(row.gueltigBis), behoerde: cellText(row.behoerde), nummer: cellText(row.nummer), klasse, syncKey, }); } log(` Fahrgenehmigungen for StNr ${standesbuchNr}: ${results.length} rows`); for (const f of results) log(` ${f.ausstellungsdatum ?? '—'} [${f.klasse}] ${f.behoerde ?? ''} ${f.nummer ?? ''}`); return results; } // Fallback: table-based parsing (original approach with extractCellValue) log(` → Fahrgenehmigungen: no form fields found, falling back to table parsing`); const pageData = await frame.evaluate(() => { const extractCellValue = (cell: Element): string => { const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null; if (input && input.value?.trim()) return input.value.trim(); const sel = cell.querySelector('select') as HTMLSelectElement | null; if (sel) { const idx = sel.selectedIndex; if (idx >= 0 && sel.options[idx]) { const t = (sel.options[idx].text || sel.options[idx].value || '').trim(); if (t) return t; } // Fallback: read the selected attribute directly from HTML const selectedOpt = sel.querySelector('option[selected]') as HTMLOptionElement | null; if (selectedOpt) { const t = (selectedOpt.text || selectedOpt.value || '').trim(); if (t) return t; } if (sel.value?.trim()) return sel.value.trim(); } const anchor = cell.querySelector('a'); const atitle = anchor?.getAttribute('title')?.trim(); if (atitle) return atitle; return cell.textContent?.trim() ?? ''; }; const tables: Array<{ tableClass: string; headers: string[]; rows: Array<{ cells: string[] }>; }> = []; for (const table of Array.from(document.querySelectorAll('table'))) { const cls = table.className || ''; const thElements = Array.from(table.querySelectorAll('thead th, tr th')); const headers = thElements.map(th => extractCellValue(th)); const dataRows: Array<{ cells: string[] }> = []; for (const tr of Array.from(table.querySelectorAll('tr'))) { if (tr.closest('table') !== table) continue; const tds = Array.from(tr.querySelectorAll('td')); if (tds.length < 2) continue; if (tr.querySelectorAll('th').length > 0) continue; dataRows.push({ cells: tds.map(td => extractCellValue(td)) }); } tables.push({ tableClass: cls, headers, rows: dataRows }); } return tables; }).catch(() => [] as Array<{ tableClass: string; headers: string[]; rows: Array<{ cells: string[] }> }>); // Diagnostic: log all tables found for (let ti = 0; ti < pageData.length; ti++) { const t = pageData[ti]; log(` → table ${ti}: cls="${t.tableClass}" headers=[${t.headers.join(', ')}] dataRows=${t.rows.length}`); for (let ri = 0; ri < t.rows.length; ri++) { const preview = t.rows[ri].cells.slice(0, 8).map((c, j) => `[${j}]="${c}"`).join(' '); log(` row ${ri}: ${preview}`); } } const bestTable = pageData.find(t => t.tableClass.includes('FdcLayList') && t.rows.length > 0) || pageData.filter(t => t.rows.length > 0).sort((a, b) => b.rows.length - a.rows.length)[0]; if (!bestTable || bestTable.rows.length === 0) { log(` Fahrgenehmigungen for StNr ${standesbuchNr}: no data table found`); return []; } const headers = bestTable.headers.map(h => h.toLowerCase()); log(` Fahrgenehmigungen headers: [${headers.join(', ')}]`); let klasseIdx = headers.findIndex(h => h.includes('klasse') || h.includes('fahrgenehmigung')); let ausstellungIdx = headers.findIndex(h => h.includes('ausstellung')); let gueltigIdx = headers.findIndex(h => h.includes('gültig') || h.includes('gultig') || h.includes('ablauf')); let behoerdeIdx = headers.findIndex(h => h.includes('behörde') || h.includes('behorde')); let nummerIdx = headers.findIndex(h => h.includes('nummer') || h.includes('nr')); const KNOWN_KLASSEN = new Set([ 'AM', 'A1', 'A2', 'A', 'B', 'BE', 'C1', 'C1E', 'C', 'CE', 'D1', 'D1E', 'D', 'DE', 'F', 'L', 'L17', 'B+E', 'C+E', 'D+E', ]); if (klasseIdx === -1) { for (const row of bestTable.rows.slice(0, 3)) { for (let ci = 0; ci < row.cells.length; ci++) { const val = row.cells[ci]?.trim(); // Match known klassen or values containing "Führerschein" etc. if (KNOWN_KLASSEN.has(val.toUpperCase()) || /führerschein|lenkberechtigung/i.test(val)) { klasseIdx = ci; log(` Fahrgenehmigungen: found Klasse in column ${ci} by data inspection`); break; } } if (klasseIdx >= 0) break; } } if (ausstellungIdx === -1) { const datePattern = /^\d{2}\.\d{2}\.\d{4}$/; for (const row of bestTable.rows.slice(0, 3)) { for (let ci = 0; ci < row.cells.length; ci++) { if (ci === klasseIdx) continue; if (datePattern.test(row.cells[ci]?.trim())) { ausstellungIdx = ci; break; } } if (ausstellungIdx >= 0) break; } } log(` Fahrgenehmigungen column map: klasse=${klasseIdx} ausstellung=${ausstellungIdx} gueltig=${gueltigIdx} behoerde=${behoerdeIdx} nummer=${nummerIdx}`); if (klasseIdx === -1) { log(` Fahrgenehmigungen for StNr ${standesbuchNr}: could not determine Klasse column. Returning empty.`); return []; } const results: FdiskFahrgenehmigung[] = []; for (const row of bestTable.rows) { const klasse = cellText(row.cells[klasseIdx]); if (!klasse) continue; if (/klasse|fahrgenehmigung|ausstellung|datensätze|information|tiefennavigation/i.test(klasse)) continue; if (/^\d{2}\.\d{2}\.\d{4}$/.test(klasse)) continue; const ausstellungsdatum = parseDate(ausstellungIdx >= 0 ? row.cells[ausstellungIdx] : undefined); const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`; results.push({ standesbuchNr, ausstellungsdatum, gueltigBis: parseDate(gueltigIdx >= 0 ? row.cells[gueltigIdx] : undefined), behoerde: cellText(behoerdeIdx >= 0 ? row.cells[behoerdeIdx] : undefined), nummer: cellText(nummerIdx >= 0 ? row.cells[nummerIdx] : undefined), klasse, syncKey, }); } log(` Fahrgenehmigungen for StNr ${standesbuchNr}: ${results.length} rows`); for (const f of results) log(` ${f.ausstellungsdatum ?? '—'} [${f.klasse}] ${f.behoerde ?? ''} ${f.nummer ?? ''}`); return results; } // Legacy export kept for compatibility — delegates to the new unified flow export async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise { if (!member.detailUrl) return []; await frame_goto(frame, member.detailUrl); // Try to extract IDs from the detail URL const urlObj = new URL(member.detailUrl, frame.url()); const idMitgliedschaft = urlObj.searchParams.get('id_mitgliedschaften'); const idPersonen = urlObj.searchParams.get('id_personen'); return scrapeAusbildungenFromDetailPage(frame, member, idMitgliedschaft, idPersonen); }