From 992ca8e104c33a33811d7a203e766866ba41ab4a Mon Sep 17 00:00:00 2001 From: Matthias Hochmeister Date: Sat, 14 Mar 2026 13:54:49 +0100 Subject: [PATCH] update --- sync/src/db.ts | 5 +- sync/src/scraper.ts | 183 ++++++++++++++++++++++++++++---------------- 2 files changed, 119 insertions(+), 69 deletions(-) diff --git a/sync/src/db.ts b/sync/src/db.ts index 1cfdfac..d4b78c1 100644 --- a/sync/src/db.ts +++ b/sync/src/db.ts @@ -353,11 +353,12 @@ async function syncFahrgenehmigungen( let neu = 0, updated = 0, skipped = 0; // One-time cleanup: remove wrongly-stored records from broken parsing + // Includes klasse='Ausstellungsdatum' and any klasse that looks like a date (DD.MM.YYYY) const cleaned = await client.query( - `DELETE FROM fahrgenehmigungen WHERE klasse = 'Ausstellungsdatum'` + `DELETE FROM fahrgenehmigungen WHERE klasse = 'Ausstellungsdatum' OR klasse ~ '^\\d{2}\\.\\d{2}\\.\\d{4}$'` ); if (cleaned.rowCount && cleaned.rowCount > 0) { - log(`Cleaned up ${cleaned.rowCount} invalid Fahrgenehmigung records (klasse='Ausstellungsdatum')`); + log(`Cleaned up ${cleaned.rowCount} invalid Fahrgenehmigung records (wrong klasse values)`); } for (const f of fahrgenehmigungen) { diff --git a/sync/src/scraper.ts b/sync/src/scraper.ts index 06abd65..8a6ab4e 100644 --- a/sync/src/scraper.ts +++ b/sync/src/scraper.ts @@ -630,7 +630,6 @@ async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMembe async function navigateAndGetTableRows( frame: Frame, url: string, - opts?: { skipDateFilter?: boolean }, ): Promise | null> { await frame_goto(frame, url); @@ -692,16 +691,6 @@ async function navigateAndGetTableRows( const mapped = resultRows.map(r => ({ cells: r.cells })); - if (opts?.skipDateFilter) { - // Diagnostic: log all row contents for pages where cells[0] is not a date - for (let i = 0; i < mapped.length; i++) { - const preview = mapped[i].cells.slice(0, 8).map((c, j) => `[${j}]="${c}"`).join(' '); - log(` → row ${i}: ${preview}`); - } - log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, returning all ${mapped.length} rows (skipDateFilter)`); - return mapped; - } - // Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date const datePattern = /^\d{2}\.\d{2}\.\d{4}$/; const dataRows = mapped.filter(r => datePattern.test(r.cells[0]?.trim() ?? '')); @@ -778,8 +767,8 @@ async function scrapeMemberUntersuchungen( /** * Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries. - * Uses header detection to find column indices dynamically, since this is a - * ListEdit page where cells[0] is NOT a date (it's the Klasse name). + * This page is a ListEdit page with a different structure than normal list pages. + * Uses its own page evaluation to read headers + //