fix(sync): switch FDISK course scraper from Ausbildungen to Kurse page
This commit is contained in:
@@ -597,6 +597,7 @@ function UebersichtTab({ haushaltsjahre, selectedJahrId, onJahrChange }: {
|
||||
queryKey: ['kontenTree', selectedJahrId],
|
||||
queryFn: () => buchhaltungApi.getKontenTree(selectedJahrId!),
|
||||
enabled: selectedJahrId != null,
|
||||
staleTime: 0,
|
||||
});
|
||||
const { data: kategorien = [] } = useQuery({
|
||||
queryKey: ['buchhaltung-kategorien', selectedJahrId],
|
||||
|
||||
@@ -59,9 +59,10 @@ function parseDate(raw: string | null | undefined): string | null {
|
||||
if (!raw) return null;
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed) return null;
|
||||
const match = trimmed.match(/^(\d{2})\.(\d{2})\.(\d{4})$/);
|
||||
// Accept 1–2 digit day/month with optional trailing time (e.g. "10.9.2011 00:00:00")
|
||||
const match = trimmed.match(/^(\d{1,2})\.(\d{1,2})\.(\d{4})/);
|
||||
if (!match) return null;
|
||||
return `${match[3]}-${match[2]}-${match[1]}`;
|
||||
return `${match[3]}-${match[2].padStart(2, '0')}-${match[1].padStart(2, '0')}`;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -736,8 +737,9 @@ async function scrapeDetailProfileFields(frame: Frame): Promise<{
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape Ausbildungen by navigating to the AusbildungenListEdit.aspx page.
|
||||
* This is a ListEdit page (like Fahrgenehmigungen) with <input>/<select> elements.
|
||||
* Scrape Kurse (courses) by navigating to the KursteilnehmerListEdit.aspx page.
|
||||
* This page uses indexed hidden form fields (kursart_bez_N, datum_von_N, etc.)
|
||||
* which are far more reliable than heuristic table column detection.
|
||||
*/
|
||||
async function scrapeAusbildungenFromDetailPage(
|
||||
frame: Frame,
|
||||
@@ -745,191 +747,110 @@ async function scrapeAusbildungenFromDetailPage(
|
||||
idMitgliedschaft?: string | null,
|
||||
idPersonen?: string | null,
|
||||
): Promise<FdiskAusbildung[]> {
|
||||
// If we don't have the IDs, we cannot navigate to the Ausbildungen page
|
||||
if (!idMitgliedschaft || !idPersonen) {
|
||||
log(` Ausbildungen for StNr ${member.standesbuchNr}: missing mitgliedschaft/personen IDs, skipping`);
|
||||
log(` Kurse for StNr ${member.standesbuchNr}: missing mitgliedschaft/personen IDs, skipping`);
|
||||
return [];
|
||||
}
|
||||
|
||||
const url = `${BASE_URL}/fdisk/module/mgvw/ausbildungen/AusbildungenListEdit.aspx`
|
||||
+ `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}`
|
||||
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`;
|
||||
const url = `${BASE_URL}/fdisk/module/mgvw/kursteilnehmer/KursteilnehmerListEdit.aspx`
|
||||
+ `?search=1&searchid_personen=${idPersonen}&searchid_mitgliedschaften=${idMitgliedschaft}`
|
||||
+ `&id_personen=${idPersonen}&id_mitgliedschaften=${idMitgliedschaft}`
|
||||
+ `&anzeige_count=ALLE`;
|
||||
|
||||
await frame_goto(frame, url);
|
||||
|
||||
const landed = frame.url();
|
||||
const title = await frame.title().catch(() => '');
|
||||
if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) {
|
||||
log(` → Ausbildungen ERROR page: ${landed}`);
|
||||
log(` → Kurse ERROR page: ${landed}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Show all rows (FDISK defaults to 10)
|
||||
// Ensure all rows are visible (the URL param should already set this, but belt-and-suspenders)
|
||||
await selectAlleAnzeige(frame);
|
||||
|
||||
// Dump HTML for debugging
|
||||
await dumpHtml(frame, `ausbildungen_StNr${member.standesbuchNr}`);
|
||||
await dumpHtml(frame, `kurse_StNr${member.standesbuchNr}`);
|
||||
|
||||
// This is a ListEdit page — read form fields by ID pattern or parse table with extractCellValue
|
||||
const ausbildungen = await frame.evaluate((stNr: string) => {
|
||||
const extractCellValue = (cell: Element): string => {
|
||||
const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||||
if (input && input.value?.trim()) return input.value.trim();
|
||||
const sel = cell.querySelector('select') as HTMLSelectElement | null;
|
||||
if (sel) {
|
||||
// Try selectedIndex first
|
||||
const idx = sel.selectedIndex;
|
||||
if (idx >= 0 && sel.options[idx]) {
|
||||
const t = (sel.options[idx].text || sel.options[idx].value || '').trim();
|
||||
if (t) return t;
|
||||
}
|
||||
// Fallback: read the selected attribute directly from HTML
|
||||
const selectedOpt = sel.querySelector('option[selected]') as HTMLOptionElement | null;
|
||||
if (selectedOpt) {
|
||||
const t = (selectedOpt.text || selectedOpt.value || '').trim();
|
||||
if (t) return t;
|
||||
}
|
||||
}
|
||||
const anchor = cell.querySelector('a');
|
||||
const atitle = anchor?.getAttribute('title')?.trim();
|
||||
if (atitle) return atitle;
|
||||
return cell.textContent?.replace(/\u00A0/g, ' ').trim() ?? '';
|
||||
};
|
||||
|
||||
const results: Array<{
|
||||
// Read indexed form fields — same pattern as scrapeMemberFahrgenehmigungen
|
||||
const rawRows = await frame.evaluate((stNr: string) => {
|
||||
const rows: Array<{
|
||||
standesbuchNr: string;
|
||||
kursname: string | null;
|
||||
kursname: string;
|
||||
kursnummer: string | null;
|
||||
kurzbezeichnung: string | null;
|
||||
erfolgscode: string | null;
|
||||
kursDatum: string | null;
|
||||
ablaufdatum: string | null;
|
||||
ort: string | null;
|
||||
bemerkung: string | null;
|
||||
syncKey: string;
|
||||
}> = [];
|
||||
|
||||
// Collect rows from all tables, find the data table
|
||||
const tables = Array.from(document.querySelectorAll('table'));
|
||||
let bestRows: Array<{ cells: string[] }> = [];
|
||||
let bestHeaders: string[] = [];
|
||||
for (let i = 0; i < 500; i++) {
|
||||
// kursart_bez is the sentinel — if it doesn't exist, we've passed all rows
|
||||
const kursartBezEl = document.querySelector(`input[name="kursart_bez_${i}"]`) as HTMLInputElement | null;
|
||||
if (!kursartBezEl) break;
|
||||
|
||||
for (const table of tables) {
|
||||
const rows: Array<{ cells: string[] }> = [];
|
||||
const headerCells: string[] = [];
|
||||
|
||||
// Get headers
|
||||
for (const th of Array.from(table.querySelectorAll('thead th, tr:first-child th'))) {
|
||||
headerCells.push(extractCellValue(th));
|
||||
}
|
||||
|
||||
// Get data rows
|
||||
for (const tr of Array.from(table.querySelectorAll('tr'))) {
|
||||
if (tr.closest('table') !== table) continue;
|
||||
const tds = Array.from(tr.querySelectorAll('td'));
|
||||
if (tds.length < 2) continue;
|
||||
if (tr.querySelectorAll('th').length > 0) continue;
|
||||
rows.push({ cells: tds.map(td => extractCellValue(td)) });
|
||||
}
|
||||
|
||||
if (rows.length > bestRows.length) {
|
||||
bestRows = rows;
|
||||
bestHeaders = headerCells;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestRows.length === 0) return results;
|
||||
|
||||
// Try to find column indices from headers
|
||||
const hdr = bestHeaders.map(h => h.toLowerCase());
|
||||
let kursnummerIdx = hdr.findIndex(h => h.includes('nummer'));
|
||||
let kurzIdx = hdr.findIndex(h => h === 'kurz' || (h.includes('kurz') && !h.includes('name')));
|
||||
// Exclude "kurzbezeichnung" from matching kursname — it already matches kurzIdx above
|
||||
let kursnameIdx = hdr.findIndex(h => !h.startsWith('kurz') && (h === 'kurs' || h.includes('ausbildung') || h.includes('bezeichnung')));
|
||||
let datumIdx = hdr.findIndex(h => h.includes('datum') || h.includes('abschluss'));
|
||||
let erfolgscodeIdx = hdr.findIndex(h => h.includes('erfolg') || h.includes('code'));
|
||||
let ablaufIdx = hdr.findIndex(h => h.includes('ablauf') || h.includes('gültig'));
|
||||
let ortIdx = hdr.findIndex(h => h.includes('ort'));
|
||||
let bemIdx = hdr.findIndex(h => h.includes('bem') || h.includes('info'));
|
||||
|
||||
// If headers didn't help, scan data for date-like columns and text columns
|
||||
if (kursnameIdx === -1 && bestRows.length > 0) {
|
||||
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||
// Find date columns
|
||||
const dateCols = new Set<number>();
|
||||
const textCols: number[] = [];
|
||||
for (const row of bestRows.slice(0, 3)) {
|
||||
for (let ci = 0; ci < row.cells.length; ci++) {
|
||||
const v = row.cells[ci]?.trim();
|
||||
if (!v) continue;
|
||||
if (datePattern.test(v)) dateCols.add(ci);
|
||||
else if (v.length > 2 && !/^[\d.,]+$/.test(v)) textCols.push(ci);
|
||||
}
|
||||
}
|
||||
// The longest text column is likely the Kursname
|
||||
if (textCols.length > 0) {
|
||||
let maxLen = 0;
|
||||
for (const ci of textCols) {
|
||||
const len = (bestRows[0]?.cells[ci] ?? '').length;
|
||||
if (len > maxLen) { maxLen = len; kursnameIdx = ci; }
|
||||
}
|
||||
}
|
||||
// First date column is Datum, second is Ablaufdatum
|
||||
const sortedDates = Array.from(dateCols).sort((a, b) => a - b);
|
||||
if (sortedDates.length > 0 && datumIdx === -1) datumIdx = sortedDates[0];
|
||||
if (sortedDates.length > 1 && ablaufIdx === -1) ablaufIdx = sortedDates[1];
|
||||
}
|
||||
|
||||
for (const row of bestRows) {
|
||||
const kursname = ((kursnameIdx >= 0 ? row.cells[kursnameIdx] : row.cells[0])?.trim()) || '';
|
||||
const kursname = kursartBezEl.value?.trim() || '';
|
||||
if (!kursname) continue;
|
||||
// Skip rows that are column headers or pagination entries (not real course data)
|
||||
if (/^(kurz|kurzbezeichnung|bezeichnung|tiefennavigation|anzahl)$/i.test(kursname)) continue;
|
||||
if (/datensätze\s*\d/i.test(kursname)) continue;
|
||||
|
||||
const rawDatum = datumIdx >= 0 ? row.cells[datumIdx]?.trim() : null;
|
||||
const rawAblauf = ablaufIdx >= 0 ? row.cells[ablaufIdx]?.trim() : null;
|
||||
const rawOrt = ortIdx >= 0 ? row.cells[ortIdx]?.trim() || null : null;
|
||||
const rawBem = bemIdx >= 0 ? row.cells[bemIdx]?.trim() || null : null;
|
||||
const kursnummerEl = document.querySelector(`input[name="kursnummer_${i}"]`) as HTMLInputElement | null;
|
||||
const datumVonEl = document.querySelector(`input[name="datum_von_${i}"]`) as HTMLInputElement | null;
|
||||
const leistungsartEl = document.querySelector(`input[name="leistungsart_${i}"]`) as HTMLInputElement | null;
|
||||
|
||||
// parseDate is not available inside evaluate; return raw values
|
||||
results.push({
|
||||
const kursnummer = kursnummerEl?.value?.trim() || null;
|
||||
// datum_von format: "D.M.YYYY HH:MM:SS" — pass raw, parseDate handles it
|
||||
const kursDatum = datumVonEl?.value?.trim() || null;
|
||||
const erfolgscode = leistungsartEl?.value?.trim() || null;
|
||||
|
||||
// Kurzbezeichnung: extract from <nobr> in the same table row as the kursnummer input
|
||||
let kurzbezeichnung: string | null = null;
|
||||
const row = kursnummerEl?.closest('tr');
|
||||
if (row) {
|
||||
const nobrs = row.querySelectorAll('nobr');
|
||||
// First <nobr> is kurzbezeichnung, second is kursname
|
||||
if (nobrs.length >= 1) {
|
||||
kurzbezeichnung = nobrs[0].textContent?.replace(/\u00A0/g, ' ').trim() || null;
|
||||
}
|
||||
}
|
||||
|
||||
rows.push({
|
||||
standesbuchNr: stNr,
|
||||
kursnummer: (kursnummerIdx >= 0 ? row.cells[kursnummerIdx] : null)?.trim() || null,
|
||||
kurzbezeichnung: (kurzIdx >= 0 ? row.cells[kurzIdx] : null)?.trim() || null,
|
||||
kursname,
|
||||
kursDatum: rawDatum || null,
|
||||
ablaufdatum: rawAblauf || null,
|
||||
ort: rawOrt,
|
||||
bemerkung: rawBem,
|
||||
erfolgscode: (erfolgscodeIdx >= 0 ? row.cells[erfolgscodeIdx] : null)?.trim() || null,
|
||||
syncKey: `${stNr}::${kursname}::${rawDatum ?? ''}`,
|
||||
kursnummer,
|
||||
kurzbezeichnung,
|
||||
erfolgscode,
|
||||
kursDatum,
|
||||
syncKey: `${stNr}::${kursname}::${kursDatum ?? ''}`,
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}, member.standesbuchNr).catch(() => [] as FdiskAusbildung[]);
|
||||
return rows;
|
||||
}, member.standesbuchNr).catch(() => [] as Array<{
|
||||
standesbuchNr: string; kursname: string; kursnummer: string | null;
|
||||
kurzbezeichnung: string | null; erfolgscode: string | null; kursDatum: string | null;
|
||||
syncKey: string;
|
||||
}>);
|
||||
|
||||
// Post-process: parse dates and rebuild syncKeys
|
||||
const results: FdiskAusbildung[] = ausbildungen.filter(a => !!a.kursname).map(a => {
|
||||
log(` → Kurse form-field extraction: ${rawRows.length} rows found`);
|
||||
|
||||
// Post-process: parse dates and rebuild syncKeys with ISO dates
|
||||
const results: FdiskAusbildung[] = rawRows.map(a => {
|
||||
const kursDatum = parseDate(a.kursDatum);
|
||||
return {
|
||||
standesbuchNr: a.standesbuchNr,
|
||||
kursname: a.kursname as string,
|
||||
kursname: a.kursname,
|
||||
kursnummer: a.kursnummer,
|
||||
kurzbezeichnung: a.kurzbezeichnung,
|
||||
erfolgscode: a.erfolgscode,
|
||||
kursDatum,
|
||||
ablaufdatum: parseDate(a.ablaufdatum),
|
||||
ort: a.ort,
|
||||
bemerkung: a.bemerkung,
|
||||
ablaufdatum: null,
|
||||
ort: null,
|
||||
bemerkung: null,
|
||||
syncKey: `${a.standesbuchNr}::${a.kursname}::${kursDatum ?? ''}`,
|
||||
};
|
||||
});
|
||||
|
||||
// Debug: dump HTML when no Ausbildungen found
|
||||
if (results.length === 0) {
|
||||
await dumpHtml(frame, `ausbildungen_empty_StNr${member.standesbuchNr}`);
|
||||
await dumpHtml(frame, `kurse_empty_StNr${member.standesbuchNr}`);
|
||||
}
|
||||
|
||||
return results;
|
||||
|
||||
Reference in New Issue
Block a user