update
This commit is contained in:
@@ -353,11 +353,12 @@ async function syncFahrgenehmigungen(
|
|||||||
let neu = 0, updated = 0, skipped = 0;
|
let neu = 0, updated = 0, skipped = 0;
|
||||||
|
|
||||||
// One-time cleanup: remove wrongly-stored records from broken parsing
|
// One-time cleanup: remove wrongly-stored records from broken parsing
|
||||||
|
// Includes klasse='Ausstellungsdatum' and any klasse that looks like a date (DD.MM.YYYY)
|
||||||
const cleaned = await client.query(
|
const cleaned = await client.query(
|
||||||
`DELETE FROM fahrgenehmigungen WHERE klasse = 'Ausstellungsdatum'`
|
`DELETE FROM fahrgenehmigungen WHERE klasse = 'Ausstellungsdatum' OR klasse ~ '^\\d{2}\\.\\d{2}\\.\\d{4}$'`
|
||||||
);
|
);
|
||||||
if (cleaned.rowCount && cleaned.rowCount > 0) {
|
if (cleaned.rowCount && cleaned.rowCount > 0) {
|
||||||
log(`Cleaned up ${cleaned.rowCount} invalid Fahrgenehmigung records (klasse='Ausstellungsdatum')`);
|
log(`Cleaned up ${cleaned.rowCount} invalid Fahrgenehmigung records (wrong klasse values)`);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const f of fahrgenehmigungen) {
|
for (const f of fahrgenehmigungen) {
|
||||||
|
|||||||
@@ -630,7 +630,6 @@ async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMembe
|
|||||||
async function navigateAndGetTableRows(
|
async function navigateAndGetTableRows(
|
||||||
frame: Frame,
|
frame: Frame,
|
||||||
url: string,
|
url: string,
|
||||||
opts?: { skipDateFilter?: boolean },
|
|
||||||
): Promise<Array<{ cells: string[] }> | null> {
|
): Promise<Array<{ cells: string[] }> | null> {
|
||||||
await frame_goto(frame, url);
|
await frame_goto(frame, url);
|
||||||
|
|
||||||
@@ -692,16 +691,6 @@ async function navigateAndGetTableRows(
|
|||||||
|
|
||||||
const mapped = resultRows.map(r => ({ cells: r.cells }));
|
const mapped = resultRows.map(r => ({ cells: r.cells }));
|
||||||
|
|
||||||
if (opts?.skipDateFilter) {
|
|
||||||
// Diagnostic: log all row contents for pages where cells[0] is not a date
|
|
||||||
for (let i = 0; i < mapped.length; i++) {
|
|
||||||
const preview = mapped[i].cells.slice(0, 8).map((c, j) => `[${j}]="${c}"`).join(' ');
|
|
||||||
log(` → row ${i}: ${preview}`);
|
|
||||||
}
|
|
||||||
log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, returning all ${mapped.length} rows (skipDateFilter)`);
|
|
||||||
return mapped;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date
|
// Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date
|
||||||
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||||
const dataRows = mapped.filter(r => datePattern.test(r.cells[0]?.trim() ?? ''));
|
const dataRows = mapped.filter(r => datePattern.test(r.cells[0]?.trim() ?? ''));
|
||||||
@@ -778,8 +767,8 @@ async function scrapeMemberUntersuchungen(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries.
|
* Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries.
|
||||||
* Uses header detection to find column indices dynamically, since this is a
|
* This page is a ListEdit page with a different structure than normal list pages.
|
||||||
* ListEdit page where cells[0] is NOT a date (it's the Klasse name).
|
* Uses its own page evaluation to read <th> headers + <td>/<input>/<select> data.
|
||||||
*/
|
*/
|
||||||
async function scrapeMemberFahrgenehmigungen(
|
async function scrapeMemberFahrgenehmigungen(
|
||||||
frame: Frame,
|
frame: Frame,
|
||||||
@@ -793,76 +782,136 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`
|
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`
|
||||||
+ `&searchid_instanzen=${idInstanzen}`;
|
+ `&searchid_instanzen=${idInstanzen}`;
|
||||||
|
|
||||||
const rows = await navigateAndGetTableRows(frame, url, { skipDateFilter: true });
|
await frame_goto(frame, url);
|
||||||
if (!rows || rows.length === 0) return [];
|
|
||||||
|
|
||||||
// Known Führerscheinklassen for validation
|
const landed = frame.url();
|
||||||
|
const title = await frame.title().catch(() => '');
|
||||||
|
if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) {
|
||||||
|
log(` → Fahrgenehmigungen ERROR page: ${landed}`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom page evaluation: extract headers (<th>) and data rows (<td> with input/select)
|
||||||
|
const pageData = await frame.evaluate(() => {
|
||||||
|
const extractCellValue = (cell: Element): string => {
|
||||||
|
const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||||||
|
if (input) return input.value?.trim() ?? '';
|
||||||
|
const sel = cell.querySelector('select') as HTMLSelectElement | null;
|
||||||
|
if (sel) {
|
||||||
|
const opt = sel.options[sel.selectedIndex];
|
||||||
|
return (opt?.text || opt?.value || '').trim();
|
||||||
|
}
|
||||||
|
const anchor = cell.querySelector('a');
|
||||||
|
const atitle = anchor?.getAttribute('title')?.trim();
|
||||||
|
if (atitle) return atitle;
|
||||||
|
return cell.textContent?.trim() ?? '';
|
||||||
|
};
|
||||||
|
|
||||||
|
const tables: Array<{
|
||||||
|
tableClass: string;
|
||||||
|
headers: string[];
|
||||||
|
rows: Array<{ cells: string[] }>;
|
||||||
|
}> = [];
|
||||||
|
|
||||||
|
for (const table of Array.from(document.querySelectorAll('table'))) {
|
||||||
|
const cls = table.className || '';
|
||||||
|
// Extract headers from <th> elements
|
||||||
|
const thElements = Array.from(table.querySelectorAll('thead th, tr th'));
|
||||||
|
const headers = thElements.map(th => extractCellValue(th));
|
||||||
|
// Extract data from <td> elements
|
||||||
|
const dataRows: Array<{ cells: string[] }> = [];
|
||||||
|
for (const tr of Array.from(table.querySelectorAll('tr'))) {
|
||||||
|
if (tr.closest('table') !== table) continue;
|
||||||
|
const tds = Array.from(tr.querySelectorAll('td'));
|
||||||
|
if (tds.length < 2) continue;
|
||||||
|
// Skip rows that contain <th> (header rows)
|
||||||
|
if (tr.querySelectorAll('th').length > 0) continue;
|
||||||
|
dataRows.push({ cells: tds.map(td => extractCellValue(td)) });
|
||||||
|
}
|
||||||
|
tables.push({ tableClass: cls, headers, rows: dataRows });
|
||||||
|
}
|
||||||
|
return tables;
|
||||||
|
}).catch(() => [] as Array<{ tableClass: string; headers: string[]; rows: Array<{ cells: string[] }> }>);
|
||||||
|
|
||||||
|
// Diagnostic: log all tables found
|
||||||
|
for (let ti = 0; ti < pageData.length; ti++) {
|
||||||
|
const t = pageData[ti];
|
||||||
|
log(` → table ${ti}: cls="${t.tableClass}" headers=[${t.headers.join(', ')}] dataRows=${t.rows.length}`);
|
||||||
|
for (let ri = 0; ri < t.rows.length; ri++) {
|
||||||
|
const preview = t.rows[ri].cells.slice(0, 8).map((c, j) => `[${j}]="${c}"`).join(' ');
|
||||||
|
log(` row ${ri}: ${preview}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick the best table: prefer FdcLayList tables, then largest table with data
|
||||||
|
const bestTable = pageData.find(t => t.tableClass.includes('FdcLayList') && t.rows.length > 0)
|
||||||
|
|| pageData.filter(t => t.rows.length > 0).sort((a, b) => b.rows.length - a.rows.length)[0];
|
||||||
|
|
||||||
|
if (!bestTable || bestTable.rows.length === 0) {
|
||||||
|
log(` Fahrgenehmigungen for StNr ${standesbuchNr}: no data table found`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const headers = bestTable.headers.map(h => h.toLowerCase());
|
||||||
|
log(` Fahrgenehmigungen headers: [${headers.join(', ')}]`);
|
||||||
|
|
||||||
|
// Map column indices from headers
|
||||||
|
let klasseIdx = headers.findIndex(h => h.includes('klasse') || h.includes('fahrgenehmigung'));
|
||||||
|
let ausstellungIdx = headers.findIndex(h => h.includes('ausstellung'));
|
||||||
|
let gueltigIdx = headers.findIndex(h => h.includes('gültig') || h.includes('gultig') || h.includes('ablauf'));
|
||||||
|
let behoerdeIdx = headers.findIndex(h => h.includes('behörde') || h.includes('behorde'));
|
||||||
|
let nummerIdx = headers.findIndex(h => h.includes('nummer') || h.includes('nr'));
|
||||||
|
|
||||||
|
// If headers didn't help, try scanning data rows for known Führerscheinklassen
|
||||||
const KNOWN_KLASSEN = new Set([
|
const KNOWN_KLASSEN = new Set([
|
||||||
'AM', 'A1', 'A2', 'A', 'B', 'BE', 'C1', 'C1E', 'C', 'CE',
|
'AM', 'A1', 'A2', 'A', 'B', 'BE', 'C1', 'C1E', 'C', 'CE',
|
||||||
'D1', 'D1E', 'D', 'DE', 'F', 'L', 'L17', 'B+E', 'C+E', 'D+E',
|
'D1', 'D1E', 'D', 'DE', 'F', 'L', 'L17', 'B+E', 'C+E', 'D+E',
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// Try header detection: find a row where cells contain keywords
|
|
||||||
let klasseIdx = -1, ausstellungIdx = -1, gueltigIdx = -1, behoerdeIdx = -1, nummerIdx = -1;
|
|
||||||
let headerRowIdx = -1;
|
|
||||||
|
|
||||||
for (let i = 0; i < Math.min(rows.length, 3); i++) {
|
|
||||||
const lower = rows[i].cells.map(c => c.toLowerCase());
|
|
||||||
const hasKlasse = lower.some(h => h.includes('klasse') || h.includes('fahrgenehmigung'));
|
|
||||||
const hasDatum = lower.some(h => h.includes('ausstellung') || h.includes('datum'));
|
|
||||||
if (hasKlasse || hasDatum) {
|
|
||||||
headerRowIdx = i;
|
|
||||||
klasseIdx = lower.findIndex(h => h.includes('klasse') || h.includes('fahrgenehmigung'));
|
|
||||||
ausstellungIdx = lower.findIndex(h => h.includes('ausstellung'));
|
|
||||||
gueltigIdx = lower.findIndex(h => h.includes('gültig') || h.includes('gultig') || h.includes('ablauf'));
|
|
||||||
behoerdeIdx = lower.findIndex(h => h.includes('behörde') || h.includes('behorde'));
|
|
||||||
nummerIdx = lower.findIndex(h => h.includes('nummer') || h.includes('nr'));
|
|
||||||
log(` Fahrgenehmigungen header detected at row ${i}: klasse=${klasseIdx} ausstellung=${ausstellungIdx} gueltig=${gueltigIdx} behoerde=${behoerdeIdx} nummer=${nummerIdx}`);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no header found, try positional detection from first data row
|
|
||||||
if (headerRowIdx === -1) {
|
|
||||||
// Check if first row's cells[0] looks like a Klasse (not a date)
|
|
||||||
const first = rows[0].cells[0]?.trim().toUpperCase() ?? '';
|
|
||||||
if (KNOWN_KLASSEN.has(first)) {
|
|
||||||
// Layout: 0=Klasse, 1=Ausstellungsdatum, 2=Gültig bis, 3=Behörde, 4=Nummer
|
|
||||||
klasseIdx = 0; ausstellungIdx = 1; gueltigIdx = 2; behoerdeIdx = 3; nummerIdx = 4;
|
|
||||||
log(` Fahrgenehmigungen: no header, but cells[0]="${first}" is a known Klasse → positional layout A`);
|
|
||||||
} else if (/^\d{2}\.\d{2}\.\d{4}$/.test(rows[0].cells[0]?.trim() ?? '')) {
|
|
||||||
// Original layout: 0=Ausstellungsdatum, 1=Gültig bis, 2=Behörde, 3=Nummer, 4=Klasse
|
|
||||||
klasseIdx = 4; ausstellungIdx = 0; gueltigIdx = 1; behoerdeIdx = 2; nummerIdx = 3;
|
|
||||||
log(` Fahrgenehmigungen: no header, cells[0] is a date → original positional layout B`);
|
|
||||||
} else {
|
|
||||||
// Unknown layout — log and try to find a column with a known Klasse
|
|
||||||
for (let ci = 0; ci < (rows[0]?.cells.length ?? 0); ci++) {
|
|
||||||
if (KNOWN_KLASSEN.has(rows[0].cells[ci]?.trim().toUpperCase() ?? '')) {
|
|
||||||
klasseIdx = ci;
|
|
||||||
log(` Fahrgenehmigungen: found known Klasse in column ${ci} → using that as klasseIdx`);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (klasseIdx === -1) {
|
if (klasseIdx === -1) {
|
||||||
log(` Fahrgenehmigungen: unknown layout, cannot determine columns. Returning empty.`);
|
// Scan first 3 data rows to find which column contains a known Klasse
|
||||||
|
for (const row of bestTable.rows.slice(0, 3)) {
|
||||||
|
for (let ci = 0; ci < row.cells.length; ci++) {
|
||||||
|
if (KNOWN_KLASSEN.has(row.cells[ci]?.trim().toUpperCase())) {
|
||||||
|
klasseIdx = ci;
|
||||||
|
log(` Fahrgenehmigungen: found Klasse in column ${ci} by data inspection`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (klasseIdx >= 0) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If still no klasse column found, also try matching date columns for Ausstellung
|
||||||
|
if (ausstellungIdx === -1) {
|
||||||
|
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||||
|
for (const row of bestTable.rows.slice(0, 3)) {
|
||||||
|
for (let ci = 0; ci < row.cells.length; ci++) {
|
||||||
|
if (ci === klasseIdx) continue;
|
||||||
|
if (datePattern.test(row.cells[ci]?.trim())) {
|
||||||
|
ausstellungIdx = ci;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ausstellungIdx >= 0) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log(` Fahrgenehmigungen column map: klasse=${klasseIdx} ausstellung=${ausstellungIdx} gueltig=${gueltigIdx} behoerde=${behoerdeIdx} nummer=${nummerIdx}`);
|
||||||
|
|
||||||
|
if (klasseIdx === -1) {
|
||||||
|
log(` Fahrgenehmigungen for StNr ${standesbuchNr}: could not determine Klasse column. Returning empty.`);
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
// Guess remaining columns relative to klasseIdx
|
|
||||||
ausstellungIdx = klasseIdx + 1;
|
|
||||||
gueltigIdx = klasseIdx + 2;
|
|
||||||
behoerdeIdx = klasseIdx + 3;
|
|
||||||
nummerIdx = klasseIdx + 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const dataRows = headerRowIdx >= 0 ? rows.slice(headerRowIdx + 1) : rows;
|
|
||||||
|
|
||||||
const results: FdiskFahrgenehmigung[] = [];
|
const results: FdiskFahrgenehmigung[] = [];
|
||||||
for (const row of dataRows) {
|
for (const row of bestTable.rows) {
|
||||||
const klasse = cellText(klasseIdx >= 0 ? row.cells[klasseIdx] : undefined);
|
const klasse = cellText(row.cells[klasseIdx]);
|
||||||
if (!klasse) continue;
|
if (!klasse) continue;
|
||||||
// Skip rows that look like headers (contain "klasse", "ausstellung", etc.)
|
// Skip non-data rows (pagination, info text, header-like rows)
|
||||||
if (/klasse|fahrgenehmigung|ausstellung/i.test(klasse)) continue;
|
if (/klasse|fahrgenehmigung|ausstellung|datensätze|information|tiefennavigation/i.test(klasse)) continue;
|
||||||
|
// Skip rows where klasse looks like a date (clearly wrong column)
|
||||||
|
if (/^\d{2}\.\d{2}\.\d{4}$/.test(klasse)) continue;
|
||||||
|
|
||||||
const ausstellungsdatum = parseDate(ausstellungIdx >= 0 ? row.cells[ausstellungIdx] : undefined);
|
const ausstellungsdatum = parseDate(ausstellungIdx >= 0 ? row.cells[ausstellungIdx] : undefined);
|
||||||
const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`;
|
const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`;
|
||||||
|
|||||||
Reference in New Issue
Block a user