update
This commit is contained in:
@@ -630,6 +630,7 @@ async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMembe
|
||||
async function navigateAndGetTableRows(
|
||||
frame: Frame,
|
||||
url: string,
|
||||
opts?: { skipDateFilter?: boolean },
|
||||
): Promise<Array<{ cells: string[] }> | null> {
|
||||
await frame_goto(frame, url);
|
||||
|
||||
@@ -689,11 +690,21 @@ async function navigateAndGetTableRows(
|
||||
const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList'));
|
||||
const resultRows = fdcRows.length > 0 ? fdcRows : allRows;
|
||||
|
||||
const mapped = resultRows.map(r => ({ cells: r.cells }));
|
||||
|
||||
if (opts?.skipDateFilter) {
|
||||
// Diagnostic: log all row contents for pages where cells[0] is not a date
|
||||
for (let i = 0; i < mapped.length; i++) {
|
||||
const preview = mapped[i].cells.slice(0, 8).map((c, j) => `[${j}]="${c}"`).join(' ');
|
||||
log(` → row ${i}: ${preview}`);
|
||||
}
|
||||
log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, returning all ${mapped.length} rows (skipDateFilter)`);
|
||||
return mapped;
|
||||
}
|
||||
|
||||
// Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date
|
||||
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||
const dataRows = resultRows
|
||||
.map(r => ({ cells: r.cells }))
|
||||
.filter(r => datePattern.test(r.cells[0]?.trim() ?? ''));
|
||||
const dataRows = mapped.filter(r => datePattern.test(r.cells[0]?.trim() ?? ''));
|
||||
|
||||
log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (with date in cells[0])`);
|
||||
|
||||
@@ -767,6 +778,8 @@ async function scrapeMemberUntersuchungen(
|
||||
|
||||
/**
|
||||
* Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries.
|
||||
* Uses header detection to find column indices dynamically, since this is a
|
||||
* ListEdit page where cells[0] is NOT a date (it's the Klasse name).
|
||||
*/
|
||||
async function scrapeMemberFahrgenehmigungen(
|
||||
frame: Frame,
|
||||
@@ -780,22 +793,85 @@ async function scrapeMemberFahrgenehmigungen(
|
||||
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`
|
||||
+ `&searchid_instanzen=${idInstanzen}`;
|
||||
|
||||
const rows = await navigateAndGetTableRows(frame, url);
|
||||
if (!rows) return [];
|
||||
const rows = await navigateAndGetTableRows(frame, url, { skipDateFilter: true });
|
||||
if (!rows || rows.length === 0) return [];
|
||||
|
||||
// Known Führerscheinklassen for validation
|
||||
const KNOWN_KLASSEN = new Set([
|
||||
'AM', 'A1', 'A2', 'A', 'B', 'BE', 'C1', 'C1E', 'C', 'CE',
|
||||
'D1', 'D1E', 'D', 'DE', 'F', 'L', 'L17', 'B+E', 'C+E', 'D+E',
|
||||
]);
|
||||
|
||||
// Try header detection: find a row where cells contain keywords
|
||||
let klasseIdx = -1, ausstellungIdx = -1, gueltigIdx = -1, behoerdeIdx = -1, nummerIdx = -1;
|
||||
let headerRowIdx = -1;
|
||||
|
||||
for (let i = 0; i < Math.min(rows.length, 3); i++) {
|
||||
const lower = rows[i].cells.map(c => c.toLowerCase());
|
||||
const hasKlasse = lower.some(h => h.includes('klasse') || h.includes('fahrgenehmigung'));
|
||||
const hasDatum = lower.some(h => h.includes('ausstellung') || h.includes('datum'));
|
||||
if (hasKlasse || hasDatum) {
|
||||
headerRowIdx = i;
|
||||
klasseIdx = lower.findIndex(h => h.includes('klasse') || h.includes('fahrgenehmigung'));
|
||||
ausstellungIdx = lower.findIndex(h => h.includes('ausstellung'));
|
||||
gueltigIdx = lower.findIndex(h => h.includes('gültig') || h.includes('gultig') || h.includes('ablauf'));
|
||||
behoerdeIdx = lower.findIndex(h => h.includes('behörde') || h.includes('behorde'));
|
||||
nummerIdx = lower.findIndex(h => h.includes('nummer') || h.includes('nr'));
|
||||
log(` Fahrgenehmigungen header detected at row ${i}: klasse=${klasseIdx} ausstellung=${ausstellungIdx} gueltig=${gueltigIdx} behoerde=${behoerdeIdx} nummer=${nummerIdx}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If no header found, try positional detection from first data row
|
||||
if (headerRowIdx === -1) {
|
||||
// Check if first row's cells[0] looks like a Klasse (not a date)
|
||||
const first = rows[0].cells[0]?.trim().toUpperCase() ?? '';
|
||||
if (KNOWN_KLASSEN.has(first)) {
|
||||
// Layout: 0=Klasse, 1=Ausstellungsdatum, 2=Gültig bis, 3=Behörde, 4=Nummer
|
||||
klasseIdx = 0; ausstellungIdx = 1; gueltigIdx = 2; behoerdeIdx = 3; nummerIdx = 4;
|
||||
log(` Fahrgenehmigungen: no header, but cells[0]="${first}" is a known Klasse → positional layout A`);
|
||||
} else if (/^\d{2}\.\d{2}\.\d{4}$/.test(rows[0].cells[0]?.trim() ?? '')) {
|
||||
// Original layout: 0=Ausstellungsdatum, 1=Gültig bis, 2=Behörde, 3=Nummer, 4=Klasse
|
||||
klasseIdx = 4; ausstellungIdx = 0; gueltigIdx = 1; behoerdeIdx = 2; nummerIdx = 3;
|
||||
log(` Fahrgenehmigungen: no header, cells[0] is a date → original positional layout B`);
|
||||
} else {
|
||||
// Unknown layout — log and try to find a column with a known Klasse
|
||||
for (let ci = 0; ci < (rows[0]?.cells.length ?? 0); ci++) {
|
||||
if (KNOWN_KLASSEN.has(rows[0].cells[ci]?.trim().toUpperCase() ?? '')) {
|
||||
klasseIdx = ci;
|
||||
log(` Fahrgenehmigungen: found known Klasse in column ${ci} → using that as klasseIdx`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (klasseIdx === -1) {
|
||||
log(` Fahrgenehmigungen: unknown layout, cannot determine columns. Returning empty.`);
|
||||
return [];
|
||||
}
|
||||
// Guess remaining columns relative to klasseIdx
|
||||
ausstellungIdx = klasseIdx + 1;
|
||||
gueltigIdx = klasseIdx + 2;
|
||||
behoerdeIdx = klasseIdx + 3;
|
||||
nummerIdx = klasseIdx + 4;
|
||||
}
|
||||
}
|
||||
|
||||
const dataRows = headerRowIdx >= 0 ? rows.slice(headerRowIdx + 1) : rows;
|
||||
|
||||
const results: FdiskFahrgenehmigung[] = [];
|
||||
for (const row of rows) {
|
||||
// Columns: 0=Ausstellungsdatum, 1=Gültig bis, 2=Behörde, 3=Nummer, 4=Fahrgenehmigungsklasse
|
||||
const klasse = cellText(row.cells[4]);
|
||||
for (const row of dataRows) {
|
||||
const klasse = cellText(klasseIdx >= 0 ? row.cells[klasseIdx] : undefined);
|
||||
if (!klasse) continue;
|
||||
const ausstellungsdatum = parseDate(row.cells[0]);
|
||||
// Skip rows that look like headers (contain "klasse", "ausstellung", etc.)
|
||||
if (/klasse|fahrgenehmigung|ausstellung/i.test(klasse)) continue;
|
||||
|
||||
const ausstellungsdatum = parseDate(ausstellungIdx >= 0 ? row.cells[ausstellungIdx] : undefined);
|
||||
const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`;
|
||||
results.push({
|
||||
standesbuchNr,
|
||||
ausstellungsdatum,
|
||||
gueltigBis: parseDate(row.cells[1]),
|
||||
behoerde: cellText(row.cells[2]),
|
||||
nummer: cellText(row.cells[3]),
|
||||
gueltigBis: parseDate(gueltigIdx >= 0 ? row.cells[gueltigIdx] : undefined),
|
||||
behoerde: cellText(behoerdeIdx >= 0 ? row.cells[behoerdeIdx] : undefined),
|
||||
nummer: cellText(nummerIdx >= 0 ? row.cells[nummerIdx] : undefined),
|
||||
klasse,
|
||||
syncKey,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user