From cf6b3ad2d698a1cc7d89cde7acf610820937cf3c Mon Sep 17 00:00:00 2001 From: Matthias Hochmeister Date: Sat, 14 Mar 2026 13:45:53 +0100 Subject: [PATCH] update --- sync/src/db.ts | 8 ++++ sync/src/scraper.ts | 100 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 96 insertions(+), 12 deletions(-) diff --git a/sync/src/db.ts b/sync/src/db.ts index e424e8f..1cfdfac 100644 --- a/sync/src/db.ts +++ b/sync/src/db.ts @@ -352,6 +352,14 @@ async function syncFahrgenehmigungen( ): Promise<{ neu: number; updated: number; skipped: number }> { let neu = 0, updated = 0, skipped = 0; + // One-time cleanup: remove wrongly-stored records from broken parsing + const cleaned = await client.query( + `DELETE FROM fahrgenehmigungen WHERE klasse = 'Ausstellungsdatum'` + ); + if (cleaned.rowCount && cleaned.rowCount > 0) { + log(`Cleaned up ${cleaned.rowCount} invalid Fahrgenehmigung records (klasse='Ausstellungsdatum')`); + } + for (const f of fahrgenehmigungen) { const result = await client.query<{ user_id: string }>( `SELECT user_id FROM mitglieder_profile WHERE fdisk_standesbuch_nr = $1`, diff --git a/sync/src/scraper.ts b/sync/src/scraper.ts index 666f39a..06abd65 100644 --- a/sync/src/scraper.ts +++ b/sync/src/scraper.ts @@ -630,6 +630,7 @@ async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMembe async function navigateAndGetTableRows( frame: Frame, url: string, + opts?: { skipDateFilter?: boolean }, ): Promise | null> { await frame_goto(frame, url); @@ -689,11 +690,21 @@ async function navigateAndGetTableRows( const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList')); const resultRows = fdcRows.length > 0 ? fdcRows : allRows; + const mapped = resultRows.map(r => ({ cells: r.cells })); + + if (opts?.skipDateFilter) { + // Diagnostic: log all row contents for pages where cells[0] is not a date + for (let i = 0; i < mapped.length; i++) { + const preview = mapped[i].cells.slice(0, 8).map((c, j) => `[${j}]="${c}"`).join(' '); + log(` → row ${i}: ${preview}`); + } + log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, returning all ${mapped.length} rows (skipDateFilter)`); + return mapped; + } + // Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date const datePattern = /^\d{2}\.\d{2}\.\d{4}$/; - const dataRows = resultRows - .map(r => ({ cells: r.cells })) - .filter(r => datePattern.test(r.cells[0]?.trim() ?? '')); + const dataRows = mapped.filter(r => datePattern.test(r.cells[0]?.trim() ?? '')); log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (with date in cells[0])`); @@ -767,6 +778,8 @@ async function scrapeMemberUntersuchungen( /** * Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries. + * Uses header detection to find column indices dynamically, since this is a + * ListEdit page where cells[0] is NOT a date (it's the Klasse name). */ async function scrapeMemberFahrgenehmigungen( frame: Frame, @@ -780,22 +793,85 @@ async function scrapeMemberFahrgenehmigungen( + `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=` + `&searchid_instanzen=${idInstanzen}`; - const rows = await navigateAndGetTableRows(frame, url); - if (!rows) return []; + const rows = await navigateAndGetTableRows(frame, url, { skipDateFilter: true }); + if (!rows || rows.length === 0) return []; + + // Known Führerscheinklassen for validation + const KNOWN_KLASSEN = new Set([ + 'AM', 'A1', 'A2', 'A', 'B', 'BE', 'C1', 'C1E', 'C', 'CE', + 'D1', 'D1E', 'D', 'DE', 'F', 'L', 'L17', 'B+E', 'C+E', 'D+E', + ]); + + // Try header detection: find a row where cells contain keywords + let klasseIdx = -1, ausstellungIdx = -1, gueltigIdx = -1, behoerdeIdx = -1, nummerIdx = -1; + let headerRowIdx = -1; + + for (let i = 0; i < Math.min(rows.length, 3); i++) { + const lower = rows[i].cells.map(c => c.toLowerCase()); + const hasKlasse = lower.some(h => h.includes('klasse') || h.includes('fahrgenehmigung')); + const hasDatum = lower.some(h => h.includes('ausstellung') || h.includes('datum')); + if (hasKlasse || hasDatum) { + headerRowIdx = i; + klasseIdx = lower.findIndex(h => h.includes('klasse') || h.includes('fahrgenehmigung')); + ausstellungIdx = lower.findIndex(h => h.includes('ausstellung')); + gueltigIdx = lower.findIndex(h => h.includes('gültig') || h.includes('gultig') || h.includes('ablauf')); + behoerdeIdx = lower.findIndex(h => h.includes('behörde') || h.includes('behorde')); + nummerIdx = lower.findIndex(h => h.includes('nummer') || h.includes('nr')); + log(` Fahrgenehmigungen header detected at row ${i}: klasse=${klasseIdx} ausstellung=${ausstellungIdx} gueltig=${gueltigIdx} behoerde=${behoerdeIdx} nummer=${nummerIdx}`); + break; + } + } + + // If no header found, try positional detection from first data row + if (headerRowIdx === -1) { + // Check if first row's cells[0] looks like a Klasse (not a date) + const first = rows[0].cells[0]?.trim().toUpperCase() ?? ''; + if (KNOWN_KLASSEN.has(first)) { + // Layout: 0=Klasse, 1=Ausstellungsdatum, 2=Gültig bis, 3=Behörde, 4=Nummer + klasseIdx = 0; ausstellungIdx = 1; gueltigIdx = 2; behoerdeIdx = 3; nummerIdx = 4; + log(` Fahrgenehmigungen: no header, but cells[0]="${first}" is a known Klasse → positional layout A`); + } else if (/^\d{2}\.\d{2}\.\d{4}$/.test(rows[0].cells[0]?.trim() ?? '')) { + // Original layout: 0=Ausstellungsdatum, 1=Gültig bis, 2=Behörde, 3=Nummer, 4=Klasse + klasseIdx = 4; ausstellungIdx = 0; gueltigIdx = 1; behoerdeIdx = 2; nummerIdx = 3; + log(` Fahrgenehmigungen: no header, cells[0] is a date → original positional layout B`); + } else { + // Unknown layout — log and try to find a column with a known Klasse + for (let ci = 0; ci < (rows[0]?.cells.length ?? 0); ci++) { + if (KNOWN_KLASSEN.has(rows[0].cells[ci]?.trim().toUpperCase() ?? '')) { + klasseIdx = ci; + log(` Fahrgenehmigungen: found known Klasse in column ${ci} → using that as klasseIdx`); + break; + } + } + if (klasseIdx === -1) { + log(` Fahrgenehmigungen: unknown layout, cannot determine columns. Returning empty.`); + return []; + } + // Guess remaining columns relative to klasseIdx + ausstellungIdx = klasseIdx + 1; + gueltigIdx = klasseIdx + 2; + behoerdeIdx = klasseIdx + 3; + nummerIdx = klasseIdx + 4; + } + } + + const dataRows = headerRowIdx >= 0 ? rows.slice(headerRowIdx + 1) : rows; const results: FdiskFahrgenehmigung[] = []; - for (const row of rows) { - // Columns: 0=Ausstellungsdatum, 1=Gültig bis, 2=Behörde, 3=Nummer, 4=Fahrgenehmigungsklasse - const klasse = cellText(row.cells[4]); + for (const row of dataRows) { + const klasse = cellText(klasseIdx >= 0 ? row.cells[klasseIdx] : undefined); if (!klasse) continue; - const ausstellungsdatum = parseDate(row.cells[0]); + // Skip rows that look like headers (contain "klasse", "ausstellung", etc.) + if (/klasse|fahrgenehmigung|ausstellung/i.test(klasse)) continue; + + const ausstellungsdatum = parseDate(ausstellungIdx >= 0 ? row.cells[ausstellungIdx] : undefined); const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`; results.push({ standesbuchNr, ausstellungsdatum, - gueltigBis: parseDate(row.cells[1]), - behoerde: cellText(row.cells[2]), - nummer: cellText(row.cells[3]), + gueltigBis: parseDate(gueltigIdx >= 0 ? row.cells[gueltigIdx] : undefined), + behoerde: cellText(behoerdeIdx >= 0 ? row.cells[behoerdeIdx] : undefined), + nummer: cellText(nummerIdx >= 0 ? row.cells[nummerIdx] : undefined), klasse, syncKey, });