update
This commit is contained in:
@@ -242,7 +242,7 @@ export async function scrapeAll(username: string, password: string, knownStNrs:
|
|||||||
const idInstanzen = urlObj.searchParams.get('id_instanzen') ?? ID_INSTANZEN;
|
const idInstanzen = urlObj.searchParams.get('id_instanzen') ?? ID_INSTANZEN;
|
||||||
|
|
||||||
// Ausbildungen
|
// Ausbildungen
|
||||||
const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member);
|
const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member, idMitgliedschaft, idPersonen);
|
||||||
ausbildungen.push(...quals);
|
ausbildungen.push(...quals);
|
||||||
|
|
||||||
// Beförderungen
|
// Beförderungen
|
||||||
@@ -688,76 +688,186 @@ async function scrapeDetailProfileFields(frame: Frame): Promise<{
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scrape Ausbildungen from the detail page (already loaded).
|
* Scrape Ausbildungen by navigating to the AusbildungenListEdit.aspx page.
|
||||||
* Navigates to the Ausbildung sub-page if needed.
|
* This is a ListEdit page (like Fahrgenehmigungen) with <input>/<select> elements.
|
||||||
*/
|
*/
|
||||||
async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
async function scrapeAusbildungenFromDetailPage(
|
||||||
// Look for Ausbildungsliste section — it's likely a table or list
|
frame: Frame,
|
||||||
const ausbildungSection = frame.locator('text=Ausbildung, text=Ausbildungsliste').first();
|
member: FdiskMember,
|
||||||
const hasSec = await ausbildungSection.isVisible().catch(() => false);
|
idMitgliedschaft?: string | null,
|
||||||
|
idPersonen?: string | null,
|
||||||
if (!hasSec) {
|
): Promise<FdiskAusbildung[]> {
|
||||||
// Try navigating to an Ausbildung tab/link if present
|
// If we don't have the IDs, we cannot navigate to the Ausbildungen page
|
||||||
const ausbildungLink = frame.locator('a:has-text("Ausbildung")').first();
|
if (!idMitgliedschaft || !idPersonen) {
|
||||||
const hasLink = await ausbildungLink.isVisible().catch(() => false);
|
log(` Ausbildungen for StNr ${member.standesbuchNr}: missing mitgliedschaft/personen IDs, skipping`);
|
||||||
if (hasLink) {
|
return [];
|
||||||
await ausbildungLink.click();
|
|
||||||
await frame.waitForLoadState('networkidle').catch(() => {});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse the qualification table
|
const url = `${BASE_URL}/fdisk/module/mgvw/ausbildungen/AusbildungenListEdit.aspx`
|
||||||
const tables = await frame.$$('table');
|
+ `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}`
|
||||||
const ausbildungen: FdiskAusbildung[] = [];
|
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`;
|
||||||
|
|
||||||
|
await frame_goto(frame, url);
|
||||||
|
|
||||||
|
const landed = frame.url();
|
||||||
|
const title = await frame.title().catch(() => '');
|
||||||
|
if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) {
|
||||||
|
log(` → Ausbildungen ERROR page: ${landed}`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dump HTML for debugging
|
||||||
|
await dumpHtml(frame, `ausbildungen_StNr${member.standesbuchNr}`);
|
||||||
|
|
||||||
|
// This is a ListEdit page — read form fields by ID pattern or parse table with extractCellValue
|
||||||
|
const ausbildungen = await frame.evaluate((stNr: string) => {
|
||||||
|
const extractCellValue = (cell: Element): string => {
|
||||||
|
const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||||||
|
if (input && input.value?.trim()) return input.value.trim();
|
||||||
|
const sel = cell.querySelector('select') as HTMLSelectElement | null;
|
||||||
|
if (sel) {
|
||||||
|
// Try selectedIndex first
|
||||||
|
const idx = sel.selectedIndex;
|
||||||
|
if (idx >= 0 && sel.options[idx]) {
|
||||||
|
const t = (sel.options[idx].text || sel.options[idx].value || '').trim();
|
||||||
|
if (t) return t;
|
||||||
|
}
|
||||||
|
// Fallback: read the selected attribute directly from HTML
|
||||||
|
const selectedOpt = sel.querySelector('option[selected]') as HTMLOptionElement | null;
|
||||||
|
if (selectedOpt) {
|
||||||
|
const t = (selectedOpt.text || selectedOpt.value || '').trim();
|
||||||
|
if (t) return t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const anchor = cell.querySelector('a');
|
||||||
|
const atitle = anchor?.getAttribute('title')?.trim();
|
||||||
|
if (atitle) return atitle;
|
||||||
|
return cell.textContent?.replace(/\u00A0/g, ' ').trim() ?? '';
|
||||||
|
};
|
||||||
|
|
||||||
|
const results: Array<{
|
||||||
|
standesbuchNr: string;
|
||||||
|
kursname: string | null;
|
||||||
|
kursDatum: string | null;
|
||||||
|
ablaufdatum: string | null;
|
||||||
|
ort: string | null;
|
||||||
|
bemerkung: string | null;
|
||||||
|
syncKey: string;
|
||||||
|
}> = [];
|
||||||
|
|
||||||
|
// Collect rows from all tables, find the data table
|
||||||
|
const tables = Array.from(document.querySelectorAll('table'));
|
||||||
|
let bestRows: Array<{ cells: string[] }> = [];
|
||||||
|
let bestHeaders: string[] = [];
|
||||||
|
|
||||||
for (const table of tables) {
|
for (const table of tables) {
|
||||||
const rows = await table.$$eval('tr', (rows) => {
|
const rows: Array<{ cells: string[] }> = [];
|
||||||
return rows.map(row => ({
|
const headerCells: string[] = [];
|
||||||
cells: Array.from(row.querySelectorAll('td, th')).map(c => (c as Element).textContent?.trim() ?? ''),
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
|
|
||||||
if (rows.length < 2) continue;
|
// Get headers
|
||||||
|
for (const th of Array.from(table.querySelectorAll('thead th, tr:first-child th'))) {
|
||||||
|
headerCells.push(extractCellValue(th));
|
||||||
|
}
|
||||||
|
|
||||||
const header = rows[0].cells.map(c => c.toLowerCase());
|
// Get data rows
|
||||||
const isAusbildungTable =
|
for (const tr of Array.from(table.querySelectorAll('tr'))) {
|
||||||
header.some(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
|
if (tr.closest('table') !== table) continue;
|
||||||
|
const tds = Array.from(tr.querySelectorAll('td'));
|
||||||
|
if (tds.length < 2) continue;
|
||||||
|
if (tr.querySelectorAll('th').length > 0) continue;
|
||||||
|
rows.push({ cells: tds.map(td => extractCellValue(td)) });
|
||||||
|
}
|
||||||
|
|
||||||
if (!isAusbildungTable) continue;
|
if (rows.length > bestRows.length) {
|
||||||
|
bestRows = rows;
|
||||||
|
bestHeaders = headerCells;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const kursnameIdx = header.findIndex(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
|
if (bestRows.length === 0) return results;
|
||||||
const datumIdx = header.findIndex(h => h.includes('datum') || h.includes('abschluss'));
|
|
||||||
const ablaufIdx = header.findIndex(h => h.includes('ablauf') || h.includes('gültig'));
|
|
||||||
const ortIdx = header.findIndex(h => h.includes('ort'));
|
|
||||||
const bemIdx = header.findIndex(h => h.includes('bem') || h.includes('info'));
|
|
||||||
|
|
||||||
for (const row of rows.slice(1)) {
|
// Try to find column indices from headers
|
||||||
const kursname = cellText(row.cells[kursnameIdx >= 0 ? kursnameIdx : 0]);
|
const hdr = bestHeaders.map(h => h.toLowerCase());
|
||||||
|
let kursnameIdx = hdr.findIndex(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
|
||||||
|
let datumIdx = hdr.findIndex(h => h.includes('datum') || h.includes('abschluss'));
|
||||||
|
let ablaufIdx = hdr.findIndex(h => h.includes('ablauf') || h.includes('gültig'));
|
||||||
|
let ortIdx = hdr.findIndex(h => h.includes('ort'));
|
||||||
|
let bemIdx = hdr.findIndex(h => h.includes('bem') || h.includes('info'));
|
||||||
|
|
||||||
|
// If headers didn't help, scan data for date-like columns and text columns
|
||||||
|
if (kursnameIdx === -1 && bestRows.length > 0) {
|
||||||
|
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||||
|
// Find date columns
|
||||||
|
const dateCols = new Set<number>();
|
||||||
|
const textCols: number[] = [];
|
||||||
|
for (const row of bestRows.slice(0, 3)) {
|
||||||
|
for (let ci = 0; ci < row.cells.length; ci++) {
|
||||||
|
const v = row.cells[ci]?.trim();
|
||||||
|
if (!v) continue;
|
||||||
|
if (datePattern.test(v)) dateCols.add(ci);
|
||||||
|
else if (v.length > 2 && !/^[\d.,]+$/.test(v)) textCols.push(ci);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// The longest text column is likely the Kursname
|
||||||
|
if (textCols.length > 0) {
|
||||||
|
let maxLen = 0;
|
||||||
|
for (const ci of textCols) {
|
||||||
|
const len = (bestRows[0]?.cells[ci] ?? '').length;
|
||||||
|
if (len > maxLen) { maxLen = len; kursnameIdx = ci; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// First date column is Datum, second is Ablaufdatum
|
||||||
|
const sortedDates = Array.from(dateCols).sort((a, b) => a - b);
|
||||||
|
if (sortedDates.length > 0 && datumIdx === -1) datumIdx = sortedDates[0];
|
||||||
|
if (sortedDates.length > 1 && ablaufIdx === -1) ablaufIdx = sortedDates[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const row of bestRows) {
|
||||||
|
const kursname = (kursnameIdx >= 0 ? row.cells[kursnameIdx] : row.cells[0])?.trim() || null;
|
||||||
if (!kursname) continue;
|
if (!kursname) continue;
|
||||||
|
// Skip header-like rows
|
||||||
|
if (/kurs|ausbildung|bezeichnung|datensätze|tiefennavigation/i.test(kursname)) continue;
|
||||||
|
|
||||||
const kursDatum = parseDate(datumIdx >= 0 ? row.cells[datumIdx] : null);
|
const rawDatum = datumIdx >= 0 ? row.cells[datumIdx]?.trim() : null;
|
||||||
const syncKey = `${member.standesbuchNr}::${kursname}::${kursDatum ?? ''}`;
|
const rawAblauf = ablaufIdx >= 0 ? row.cells[ablaufIdx]?.trim() : null;
|
||||||
|
const rawOrt = ortIdx >= 0 ? row.cells[ortIdx]?.trim() || null : null;
|
||||||
|
const rawBem = bemIdx >= 0 ? row.cells[bemIdx]?.trim() || null : null;
|
||||||
|
|
||||||
ausbildungen.push({
|
// parseDate is not available inside evaluate; return raw values
|
||||||
standesbuchNr: member.standesbuchNr,
|
results.push({
|
||||||
|
standesbuchNr: stNr,
|
||||||
kursname,
|
kursname,
|
||||||
kursDatum,
|
kursDatum: rawDatum || null,
|
||||||
ablaufdatum: parseDate(ablaufIdx >= 0 ? row.cells[ablaufIdx] : null),
|
ablaufdatum: rawAblauf || null,
|
||||||
ort: ortIdx >= 0 ? cellText(row.cells[ortIdx]) : null,
|
ort: rawOrt,
|
||||||
bemerkung: bemIdx >= 0 ? cellText(row.cells[bemIdx]) : null,
|
bemerkung: rawBem,
|
||||||
syncKey,
|
syncKey: `${stNr}::${kursname}::${rawDatum ?? ''}`,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
break; // only process the first Ausbildung table found
|
return results;
|
||||||
}
|
}, member.standesbuchNr).catch(() => [] as FdiskAusbildung[]);
|
||||||
|
|
||||||
|
// Post-process: parse dates and rebuild syncKeys
|
||||||
|
const results: FdiskAusbildung[] = ausbildungen.map(a => {
|
||||||
|
const kursDatum = parseDate(a.kursDatum);
|
||||||
|
return {
|
||||||
|
standesbuchNr: a.standesbuchNr,
|
||||||
|
kursname: a.kursname,
|
||||||
|
kursDatum,
|
||||||
|
ablaufdatum: parseDate(a.ablaufdatum),
|
||||||
|
ort: a.ort,
|
||||||
|
bemerkung: a.bemerkung,
|
||||||
|
syncKey: `${a.standesbuchNr}::${a.kursname}::${kursDatum ?? ''}`,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
// Debug: dump HTML when no Ausbildungen found
|
// Debug: dump HTML when no Ausbildungen found
|
||||||
if (ausbildungen.length === 0) {
|
if (results.length === 0) {
|
||||||
await dumpHtml(frame, `ausbildungen_StNr${member.standesbuchNr}`);
|
await dumpHtml(frame, `ausbildungen_empty_StNr${member.standesbuchNr}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ausbildungen;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -768,7 +878,7 @@ async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMembe
|
|||||||
async function navigateAndGetTableRows(
|
async function navigateAndGetTableRows(
|
||||||
frame: Frame,
|
frame: Frame,
|
||||||
url: string,
|
url: string,
|
||||||
): Promise<Array<{ cells: string[] }> | null> {
|
): Promise<{ rows: Array<{ cells: string[] }>; dateColIdx: number } | null> {
|
||||||
await frame_goto(frame, url);
|
await frame_goto(frame, url);
|
||||||
|
|
||||||
const landed = frame.url();
|
const landed = frame.url();
|
||||||
@@ -827,13 +937,29 @@ async function navigateAndGetTableRows(
|
|||||||
const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList'));
|
const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList'));
|
||||||
const resultRows = fdcRows.length > 0 ? fdcRows : allRows;
|
const resultRows = fdcRows.length > 0 ? fdcRows : allRows;
|
||||||
|
|
||||||
const mapped = resultRows.map(r => ({ cells: r.cells }));
|
// Strip \u00A0 (non-breaking space) from all cell values and trim
|
||||||
|
const mapped = resultRows.map(r => ({
|
||||||
|
cells: r.cells.map(c => c.replace(/\u00A0/g, ' ').trim()),
|
||||||
|
}));
|
||||||
|
|
||||||
// Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date
|
// Find date column dynamically: look for a DD.MM.YYYY pattern in any column
|
||||||
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||||
const dataRows = mapped.filter(r => datePattern.test(r.cells[0]?.trim() ?? ''));
|
let dateColIdx = -1;
|
||||||
|
for (const r of mapped) {
|
||||||
|
for (let ci = 0; ci < r.cells.length; ci++) {
|
||||||
|
if (datePattern.test(r.cells[ci] ?? '')) {
|
||||||
|
dateColIdx = ci;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (dateColIdx >= 0) break;
|
||||||
|
}
|
||||||
|
|
||||||
log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (with date in cells[0])`);
|
const dataRows = dateColIdx >= 0
|
||||||
|
? mapped.filter(r => datePattern.test(r.cells[dateColIdx] ?? ''))
|
||||||
|
: [];
|
||||||
|
|
||||||
|
log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (date in col ${dateColIdx})`);
|
||||||
|
|
||||||
// Debug: dump HTML when no data rows found
|
// Debug: dump HTML when no data rows found
|
||||||
if (dataRows.length === 0) {
|
if (dataRows.length === 0) {
|
||||||
@@ -841,7 +967,7 @@ async function navigateAndGetTableRows(
|
|||||||
await dumpHtml(frame, `navigateAndGetTableRows_${urlSlug}`);
|
await dumpHtml(frame, `navigateAndGetTableRows_${urlSlug}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return dataRows;
|
return { rows: dataRows, dateColIdx };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -857,13 +983,19 @@ async function scrapeMemberBefoerderungen(
|
|||||||
+ `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}`
|
+ `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}`
|
||||||
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`;
|
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`;
|
||||||
|
|
||||||
const rows = await navigateAndGetTableRows(frame, url);
|
const result = await navigateAndGetTableRows(frame, url);
|
||||||
if (!rows) return [];
|
if (!result) return [];
|
||||||
|
|
||||||
|
const { rows, dateColIdx } = result;
|
||||||
const results: FdiskBefoerderung[] = [];
|
const results: FdiskBefoerderung[] = [];
|
||||||
for (const row of rows) {
|
for (const row of rows) {
|
||||||
const datum = parseDate(row.cells[0]);
|
const datum = parseDate(row.cells[dateColIdx]);
|
||||||
const dienstgrad = cellText(row.cells[1]) ?? '';
|
// The next non-empty column after the date holds the Dienstgrad
|
||||||
|
let dienstgrad = '';
|
||||||
|
for (let ci = dateColIdx + 1; ci < row.cells.length; ci++) {
|
||||||
|
const v = cellText(row.cells[ci]);
|
||||||
|
if (v) { dienstgrad = v; break; }
|
||||||
|
}
|
||||||
const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`;
|
const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`;
|
||||||
results.push({ standesbuchNr, datum, dienstgrad, syncKey });
|
results.push({ standesbuchNr, datum, dienstgrad, syncKey });
|
||||||
}
|
}
|
||||||
@@ -885,22 +1017,32 @@ async function scrapeMemberUntersuchungen(
|
|||||||
+ `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}`
|
+ `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}`
|
||||||
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`;
|
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`;
|
||||||
|
|
||||||
const rows = await navigateAndGetTableRows(frame, url);
|
const result = await navigateAndGetTableRows(frame, url);
|
||||||
if (!rows) return [];
|
if (!result) return [];
|
||||||
|
|
||||||
|
const { rows, dateColIdx } = result;
|
||||||
const results: FdiskUntersuchung[] = [];
|
const results: FdiskUntersuchung[] = [];
|
||||||
for (const row of rows) {
|
for (const row of rows) {
|
||||||
// Columns: 0=Datum, 1=Anmerkungen, 2=Untersuchungsart, 3=Tauglichkeitsstufe
|
// Collect non-empty values from columns after the date column
|
||||||
const art = cellText(row.cells[2]);
|
const valueCols: string[] = [];
|
||||||
|
for (let ci = dateColIdx + 1; ci < row.cells.length; ci++) {
|
||||||
|
const v = cellText(row.cells[ci]);
|
||||||
|
if (v !== null) valueCols.push(v);
|
||||||
|
}
|
||||||
|
// Original layout: 0=Datum, 1=Anmerkungen, 2=Untersuchungsart, 3=Tauglichkeitsstufe
|
||||||
|
// With spacer columns the date may not be at 0; use relative offsets from collected values
|
||||||
|
const anmerkungen = valueCols[0] ?? null;
|
||||||
|
const art = valueCols[1] ?? null;
|
||||||
|
const ergebnis = valueCols[2] ?? null;
|
||||||
if (!art) continue;
|
if (!art) continue;
|
||||||
const datum = parseDate(row.cells[0]);
|
const datum = parseDate(row.cells[dateColIdx]);
|
||||||
const syncKey = `${standesbuchNr}::${art}::${datum ?? ''}`;
|
const syncKey = `${standesbuchNr}::${art}::${datum ?? ''}`;
|
||||||
results.push({
|
results.push({
|
||||||
standesbuchNr,
|
standesbuchNr,
|
||||||
datum,
|
datum,
|
||||||
anmerkungen: cellText(row.cells[1]),
|
anmerkungen,
|
||||||
art,
|
art,
|
||||||
ergebnis: cellText(row.cells[3]),
|
ergebnis,
|
||||||
syncKey,
|
syncKey,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -911,8 +1053,9 @@ async function scrapeMemberUntersuchungen(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries.
|
* Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries.
|
||||||
* This page is a ListEdit page with a different structure than normal list pages.
|
* This page is a ListEdit page with form fields named by row index pattern:
|
||||||
* Uses its own page evaluation to read <th> headers + <td>/<input>/<select> data.
|
* ausstellungsdatum_{i}, gueltig_bis_{i}, behoerde_{i}, nummer_{i}, id_fahrgenehmigungsklassen_{i}
|
||||||
|
* Falls back to table-based parsing if field IDs are not found.
|
||||||
*/
|
*/
|
||||||
async function scrapeMemberFahrgenehmigungen(
|
async function scrapeMemberFahrgenehmigungen(
|
||||||
frame: Frame,
|
frame: Frame,
|
||||||
@@ -935,7 +1078,88 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Custom page evaluation: extract headers (<th>) and data rows (<td> with input/select)
|
// Dump HTML for diagnostics
|
||||||
|
await dumpHtml(frame, `fahrgenehmigungen_StNr${standesbuchNr}`);
|
||||||
|
|
||||||
|
// Read form fields by ID pattern: {fieldname}_{rowIndex}
|
||||||
|
const rawRows = await frame.evaluate(() => {
|
||||||
|
const rows: Array<{
|
||||||
|
ausstellungsdatum: string;
|
||||||
|
gueltigBis: string;
|
||||||
|
behoerde: string;
|
||||||
|
nummer: string;
|
||||||
|
klasse: string;
|
||||||
|
}> = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < 100; i++) {
|
||||||
|
// Try to find any field for this row index — if none exist, we've passed all rows
|
||||||
|
const ausstellungEl = document.querySelector(`input[name="ausstellungsdatum_${i}"], input[id="ausstellungsdatum_${i}"]`) as HTMLInputElement | null;
|
||||||
|
const gueltigEl = document.querySelector(`input[name="gueltig_bis_${i}"], input[id="gueltig_bis_${i}"]`) as HTMLInputElement | null;
|
||||||
|
const behoerdeEl = document.querySelector(`input[name="behoerde_${i}"], input[id="behoerde_${i}"]`) as HTMLInputElement | null;
|
||||||
|
const nummerEl = document.querySelector(`input[name="nummer_${i}"], input[id="nummer_${i}"]`) as HTMLInputElement | null;
|
||||||
|
const klasseEl = document.querySelector(`select[name="id_fahrgenehmigungsklassen_${i}"], select[id="id_fahrgenehmigungsklassen_${i}"]`) as HTMLSelectElement | null;
|
||||||
|
|
||||||
|
// If no field found at all, stop
|
||||||
|
if (!ausstellungEl && !gueltigEl && !behoerdeEl && !nummerEl && !klasseEl) break;
|
||||||
|
|
||||||
|
// Read klasse from select: try selectedIndex, then fallback to [selected] attribute
|
||||||
|
let klasse = '';
|
||||||
|
if (klasseEl) {
|
||||||
|
const idx = klasseEl.selectedIndex;
|
||||||
|
if (idx >= 0 && klasseEl.options[idx]) {
|
||||||
|
klasse = (klasseEl.options[idx].text || klasseEl.options[idx].value || '').trim();
|
||||||
|
}
|
||||||
|
if (!klasse) {
|
||||||
|
const selectedOpt = klasseEl.querySelector('option[selected]') as HTMLOptionElement | null;
|
||||||
|
if (selectedOpt) {
|
||||||
|
klasse = (selectedOpt.text || selectedOpt.value || '').trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!klasse && klasseEl.value?.trim()) {
|
||||||
|
klasse = klasseEl.value.trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rows.push({
|
||||||
|
ausstellungsdatum: ausstellungEl?.value?.trim() ?? '',
|
||||||
|
gueltigBis: gueltigEl?.value?.trim() ?? '',
|
||||||
|
behoerde: behoerdeEl?.value?.trim() ?? '',
|
||||||
|
nummer: nummerEl?.value?.trim() ?? '',
|
||||||
|
klasse,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return rows;
|
||||||
|
}).catch(() => [] as Array<{ ausstellungsdatum: string; gueltigBis: string; behoerde: string; nummer: string; klasse: string }>);
|
||||||
|
|
||||||
|
log(` → Fahrgenehmigungen form-field extraction: ${rawRows.length} rows found`);
|
||||||
|
|
||||||
|
// If form-field approach found rows, use them
|
||||||
|
if (rawRows.length > 0) {
|
||||||
|
const results: FdiskFahrgenehmigung[] = [];
|
||||||
|
for (const row of rawRows) {
|
||||||
|
const klasse = cellText(row.klasse);
|
||||||
|
if (!klasse) continue;
|
||||||
|
const ausstellungsdatum = parseDate(row.ausstellungsdatum);
|
||||||
|
const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`;
|
||||||
|
results.push({
|
||||||
|
standesbuchNr,
|
||||||
|
ausstellungsdatum,
|
||||||
|
gueltigBis: parseDate(row.gueltigBis),
|
||||||
|
behoerde: cellText(row.behoerde),
|
||||||
|
nummer: cellText(row.nummer),
|
||||||
|
klasse,
|
||||||
|
syncKey,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
log(` Fahrgenehmigungen for StNr ${standesbuchNr}: ${results.length} rows`);
|
||||||
|
for (const f of results) log(` ${f.ausstellungsdatum ?? '—'} [${f.klasse}] ${f.behoerde ?? ''} ${f.nummer ?? ''}`);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: table-based parsing (original approach with extractCellValue)
|
||||||
|
log(` → Fahrgenehmigungen: no form fields found, falling back to table parsing`);
|
||||||
|
|
||||||
const pageData = await frame.evaluate(() => {
|
const pageData = await frame.evaluate(() => {
|
||||||
const extractCellValue = (cell: Element): string => {
|
const extractCellValue = (cell: Element): string => {
|
||||||
const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||||||
@@ -947,13 +1171,13 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
const t = (sel.options[idx].text || sel.options[idx].value || '').trim();
|
const t = (sel.options[idx].text || sel.options[idx].value || '').trim();
|
||||||
if (t) return t;
|
if (t) return t;
|
||||||
}
|
}
|
||||||
if (sel.value?.trim()) return sel.value.trim();
|
// Fallback: read the selected attribute directly from HTML
|
||||||
const selectedOpt = sel.querySelector('option[selected]') as HTMLOptionElement | null;
|
const selectedOpt = sel.querySelector('option[selected]') as HTMLOptionElement | null;
|
||||||
if (selectedOpt) {
|
if (selectedOpt) {
|
||||||
const t = (selectedOpt.text || selectedOpt.value || '').trim();
|
const t = (selectedOpt.text || selectedOpt.value || '').trim();
|
||||||
if (t) return t;
|
if (t) return t;
|
||||||
}
|
}
|
||||||
// fall through to textContent if select is empty
|
if (sel.value?.trim()) return sel.value.trim();
|
||||||
}
|
}
|
||||||
const anchor = cell.querySelector('a');
|
const anchor = cell.querySelector('a');
|
||||||
const atitle = anchor?.getAttribute('title')?.trim();
|
const atitle = anchor?.getAttribute('title')?.trim();
|
||||||
@@ -969,16 +1193,13 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
|
|
||||||
for (const table of Array.from(document.querySelectorAll('table'))) {
|
for (const table of Array.from(document.querySelectorAll('table'))) {
|
||||||
const cls = table.className || '';
|
const cls = table.className || '';
|
||||||
// Extract headers from <th> elements
|
|
||||||
const thElements = Array.from(table.querySelectorAll('thead th, tr th'));
|
const thElements = Array.from(table.querySelectorAll('thead th, tr th'));
|
||||||
const headers = thElements.map(th => extractCellValue(th));
|
const headers = thElements.map(th => extractCellValue(th));
|
||||||
// Extract data from <td> elements
|
|
||||||
const dataRows: Array<{ cells: string[] }> = [];
|
const dataRows: Array<{ cells: string[] }> = [];
|
||||||
for (const tr of Array.from(table.querySelectorAll('tr'))) {
|
for (const tr of Array.from(table.querySelectorAll('tr'))) {
|
||||||
if (tr.closest('table') !== table) continue;
|
if (tr.closest('table') !== table) continue;
|
||||||
const tds = Array.from(tr.querySelectorAll('td'));
|
const tds = Array.from(tr.querySelectorAll('td'));
|
||||||
if (tds.length < 2) continue;
|
if (tds.length < 2) continue;
|
||||||
// Skip rows that contain <th> (header rows)
|
|
||||||
if (tr.querySelectorAll('th').length > 0) continue;
|
if (tr.querySelectorAll('th').length > 0) continue;
|
||||||
dataRows.push({ cells: tds.map(td => extractCellValue(td)) });
|
dataRows.push({ cells: tds.map(td => extractCellValue(td)) });
|
||||||
}
|
}
|
||||||
@@ -997,7 +1218,6 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pick the best table: prefer FdcLayList tables, then largest table with data
|
|
||||||
const bestTable = pageData.find(t => t.tableClass.includes('FdcLayList') && t.rows.length > 0)
|
const bestTable = pageData.find(t => t.tableClass.includes('FdcLayList') && t.rows.length > 0)
|
||||||
|| pageData.filter(t => t.rows.length > 0).sort((a, b) => b.rows.length - a.rows.length)[0];
|
|| pageData.filter(t => t.rows.length > 0).sort((a, b) => b.rows.length - a.rows.length)[0];
|
||||||
|
|
||||||
@@ -1009,24 +1229,23 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
const headers = bestTable.headers.map(h => h.toLowerCase());
|
const headers = bestTable.headers.map(h => h.toLowerCase());
|
||||||
log(` Fahrgenehmigungen headers: [${headers.join(', ')}]`);
|
log(` Fahrgenehmigungen headers: [${headers.join(', ')}]`);
|
||||||
|
|
||||||
// Map column indices from headers
|
|
||||||
let klasseIdx = headers.findIndex(h => h.includes('klasse') || h.includes('fahrgenehmigung'));
|
let klasseIdx = headers.findIndex(h => h.includes('klasse') || h.includes('fahrgenehmigung'));
|
||||||
let ausstellungIdx = headers.findIndex(h => h.includes('ausstellung'));
|
let ausstellungIdx = headers.findIndex(h => h.includes('ausstellung'));
|
||||||
let gueltigIdx = headers.findIndex(h => h.includes('gültig') || h.includes('gultig') || h.includes('ablauf'));
|
let gueltigIdx = headers.findIndex(h => h.includes('gültig') || h.includes('gultig') || h.includes('ablauf'));
|
||||||
let behoerdeIdx = headers.findIndex(h => h.includes('behörde') || h.includes('behorde'));
|
let behoerdeIdx = headers.findIndex(h => h.includes('behörde') || h.includes('behorde'));
|
||||||
let nummerIdx = headers.findIndex(h => h.includes('nummer') || h.includes('nr'));
|
let nummerIdx = headers.findIndex(h => h.includes('nummer') || h.includes('nr'));
|
||||||
|
|
||||||
// If headers didn't help, try scanning data rows for known Führerscheinklassen
|
|
||||||
const KNOWN_KLASSEN = new Set([
|
const KNOWN_KLASSEN = new Set([
|
||||||
'AM', 'A1', 'A2', 'A', 'B', 'BE', 'C1', 'C1E', 'C', 'CE',
|
'AM', 'A1', 'A2', 'A', 'B', 'BE', 'C1', 'C1E', 'C', 'CE',
|
||||||
'D1', 'D1E', 'D', 'DE', 'F', 'L', 'L17', 'B+E', 'C+E', 'D+E',
|
'D1', 'D1E', 'D', 'DE', 'F', 'L', 'L17', 'B+E', 'C+E', 'D+E',
|
||||||
]);
|
]);
|
||||||
|
|
||||||
if (klasseIdx === -1) {
|
if (klasseIdx === -1) {
|
||||||
// Scan first 3 data rows to find which column contains a known Klasse
|
|
||||||
for (const row of bestTable.rows.slice(0, 3)) {
|
for (const row of bestTable.rows.slice(0, 3)) {
|
||||||
for (let ci = 0; ci < row.cells.length; ci++) {
|
for (let ci = 0; ci < row.cells.length; ci++) {
|
||||||
if (KNOWN_KLASSEN.has(row.cells[ci]?.trim().toUpperCase())) {
|
const val = row.cells[ci]?.trim();
|
||||||
|
// Match known klassen or values containing "Führerschein" etc.
|
||||||
|
if (KNOWN_KLASSEN.has(val.toUpperCase()) || /führerschein|lenkberechtigung/i.test(val)) {
|
||||||
klasseIdx = ci;
|
klasseIdx = ci;
|
||||||
log(` Fahrgenehmigungen: found Klasse in column ${ci} by data inspection`);
|
log(` Fahrgenehmigungen: found Klasse in column ${ci} by data inspection`);
|
||||||
break;
|
break;
|
||||||
@@ -1036,7 +1255,6 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If still no klasse column found, also try matching date columns for Ausstellung
|
|
||||||
if (ausstellungIdx === -1) {
|
if (ausstellungIdx === -1) {
|
||||||
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||||
for (const row of bestTable.rows.slice(0, 3)) {
|
for (const row of bestTable.rows.slice(0, 3)) {
|
||||||
@@ -1055,7 +1273,7 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
|
|
||||||
if (klasseIdx === -1) {
|
if (klasseIdx === -1) {
|
||||||
log(` Fahrgenehmigungen for StNr ${standesbuchNr}: could not determine Klasse column. Returning empty.`);
|
log(` Fahrgenehmigungen for StNr ${standesbuchNr}: could not determine Klasse column. Returning empty.`);
|
||||||
await dumpHtml(frame, `fahrgenehmigungen_StNr${standesbuchNr}`);
|
await dumpHtml(frame, `fahrgenehmigungen_fallback_StNr${standesbuchNr}`);
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1063,9 +1281,7 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
for (const row of bestTable.rows) {
|
for (const row of bestTable.rows) {
|
||||||
const klasse = cellText(row.cells[klasseIdx]);
|
const klasse = cellText(row.cells[klasseIdx]);
|
||||||
if (!klasse) continue;
|
if (!klasse) continue;
|
||||||
// Skip non-data rows (pagination, info text, header-like rows)
|
|
||||||
if (/klasse|fahrgenehmigung|ausstellung|datensätze|information|tiefennavigation/i.test(klasse)) continue;
|
if (/klasse|fahrgenehmigung|ausstellung|datensätze|information|tiefennavigation/i.test(klasse)) continue;
|
||||||
// Skip rows where klasse looks like a date (clearly wrong column)
|
|
||||||
if (/^\d{2}\.\d{2}\.\d{4}$/.test(klasse)) continue;
|
if (/^\d{2}\.\d{2}\.\d{4}$/.test(klasse)) continue;
|
||||||
|
|
||||||
const ausstellungsdatum = parseDate(ausstellungIdx >= 0 ? row.cells[ausstellungIdx] : undefined);
|
const ausstellungsdatum = parseDate(ausstellungIdx >= 0 ? row.cells[ausstellungIdx] : undefined);
|
||||||
@@ -1089,5 +1305,9 @@ async function scrapeMemberFahrgenehmigungen(
|
|||||||
export async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
export async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
||||||
if (!member.detailUrl) return [];
|
if (!member.detailUrl) return [];
|
||||||
await frame_goto(frame, member.detailUrl);
|
await frame_goto(frame, member.detailUrl);
|
||||||
return scrapeAusbildungenFromDetailPage(frame, member);
|
// Try to extract IDs from the detail URL
|
||||||
|
const urlObj = new URL(member.detailUrl, frame.url());
|
||||||
|
const idMitgliedschaft = urlObj.searchParams.get('id_mitgliedschaften');
|
||||||
|
const idPersonen = urlObj.searchParams.get('id_personen');
|
||||||
|
return scrapeAusbildungenFromDetailPage(frame, member, idMitgliedschaft, idPersonen);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user