From 3171fe1ce53a2854268f44c6f68b4609a3e95742 Mon Sep 17 00:00:00 2001 From: Matthias Hochmeister Date: Fri, 13 Mar 2026 21:41:25 +0100 Subject: [PATCH] update --- sync/src/scraper.ts | 87 ++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/sync/src/scraper.ts b/sync/src/scraper.ts index f387328..382e6b2 100644 --- a/sync/src/scraper.ts +++ b/sync/src/scraper.ts @@ -628,45 +628,69 @@ async function navigateAndGetTableRows( const landed = frame.url(); const title = await frame.title().catch(() => ''); - log(` → landed: ${landed} | title: "${title}"`); // Check for FDISK error pages if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) { - log(` → ERROR page, skipping`); + log(` → ERROR page: ${landed}`); return null; } - // Try table.FdcLayList first, then any table with tbody rows - const selectors = ['table.FdcLayList', 'table']; - for (const sel of selectors) { - const exists = await frame.$(sel).then(el => !!el).catch(() => false); - if (!exists) continue; + // Log all table classes on the page for diagnostics (first time only) + const tableInfo = await frame.evaluate(() => { + return Array.from(document.querySelectorAll('table')).map((t, i) => { + const cls = t.className || '(no class)'; + const id = t.id || ''; + const rowCount = t.querySelectorAll('tr').length; + return `${i}:cls="${cls}"${id ? ` id="${id}"` : ''} rows=${rowCount}`; + }).join(' | '); + }).catch(() => 'N/A'); + log(` → tables: ${tableInfo}`); - const rows = await frame.$$eval(`${sel} tbody tr`, (trs) => - trs.map((tr) => ({ - cells: Array.from(tr.querySelectorAll('td')).map(td => { - const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null; - if (input) return input.value?.trim() ?? ''; - const select = td.querySelector('select') as HTMLSelectElement | null; - if (select) { - const opt = select.options[select.selectedIndex]; - return (opt?.text || opt?.value || '').trim(); - } - return td.textContent?.trim() ?? ''; - }), - })) - ).catch(() => [] as Array<{ cells: string[] }>); - - if (rows.length > 0) { - log(` → found ${rows.length} rows via "${sel}"`); - return rows; + // Collect rows from ALL tables, reading input/select values for inline-edit pages + const allRows = await frame.evaluate(() => { + const results: Array<{ cells: string[]; tableClass: string }> = []; + for (const table of Array.from(document.querySelectorAll('table'))) { + const cls = table.className || ''; + for (const tr of Array.from(table.querySelectorAll('tbody tr, tr'))) { + // Skip rows that are nested inside a child table + if (tr.closest('table') !== table) continue; + const tds = Array.from(tr.querySelectorAll('td')); + if (tds.length < 2) continue; // skip single-cell nav/header rows + results.push({ + tableClass: cls, + cells: tds.map(td => { + const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null; + if (input) return input.value?.trim() ?? ''; + const sel = td.querySelector('select') as HTMLSelectElement | null; + if (sel) { + const opt = sel.options[sel.selectedIndex]; + return (opt?.text || opt?.value || '').trim(); + } + // For FDISK list tables, the value is in inside each cell + const anchor = td.querySelector('a'); + const atitle = anchor?.getAttribute('title')?.trim(); + if (atitle) return atitle; + return td.textContent?.trim() ?? ''; + }), + }); + } } - } + return results; + }).catch(() => [] as Array<{ cells: string[]; tableClass: string }>); - // No table rows found — page might be empty or structured differently - const bodyText = await frame.evaluate(() => document.body?.textContent?.slice(0, 300) ?? '').catch(() => ''); - log(` → no table rows found. Body preview: ${bodyText.replace(/\s+/g, ' ')}`); - return []; + // Prefer rows from FdcLayList-class tables + const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList')); + const resultRows = fdcRows.length > 0 ? fdcRows : allRows; + + // Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date + const datePattern = /^\d{2}\.\d{2}\.\d{4}$/; + const dataRows = resultRows + .map(r => ({ cells: r.cells })) + .filter(r => datePattern.test(r.cells[0]?.trim() ?? '')); + + log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (with date in cells[0])`); + + return dataRows; } /** @@ -687,9 +711,8 @@ async function scrapeMemberBefoerderungen( const results: FdiskBefoerderung[] = []; for (const row of rows) { - const dienstgrad = cellText(row.cells[1]); - if (!dienstgrad) continue; const datum = parseDate(row.cells[0]); + const dienstgrad = cellText(row.cells[1]) ?? ''; const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`; results.push({ standesbuchNr, datum, dienstgrad, syncKey }); }