new features

This commit is contained in:
Matthias Hochmeister
2026-03-23 14:01:39 +01:00
parent d2dc64d54a
commit 3326156b15
35 changed files with 1341 additions and 257 deletions

View File

@@ -241,9 +241,25 @@ export async function scrapeAll(username: string, password: string, knownStNrs:
const idPersonen = urlObj.searchParams.get('id_personen');
const idInstanzen = urlObj.searchParams.get('id_instanzen') ?? ID_INSTANZEN;
// Ausbildungen — disabled: requires different page/approach (TODO)
// const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member, idMitgliedschaft, idPersonen);
// ausbildungen.push(...quals);
// Ausbildungen
if (idMitgliedschaft && idPersonen) {
try {
const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member, idMitgliedschaft, idPersonen);
ausbildungen.push(...quals);
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`);
} catch (err: any) {
log(` WARN: Ausbildungen scrape failed for ${member.vorname} ${member.zuname} (StNr ${member.standesbuchNr}): ${err.message}`);
// Always dump HTML on failure for diagnosis
try {
const debugDir = path.resolve(process.cwd(), 'debug');
fs.mkdirSync(debugDir, { recursive: true });
const html = await mainFrame.content();
const filePath = path.join(debugDir, `ausbildungen_error_StNr${member.standesbuchNr}.html`);
fs.writeFileSync(filePath, html, 'utf-8');
log(` [debug] saved error HTML → ${filePath}`);
} catch { /* ignore dump errors */ }
}
}
// Beförderungen
const befos = (idMitgliedschaft && idPersonen)
@@ -1034,20 +1050,110 @@ async function scrapeMemberUntersuchungen(
+ `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}`
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`;
const result = await navigateAndGetTableRows(frame, url);
if (!result) return [];
// Always dump for diagnosis when debug is on
await frame_goto(frame, url);
const landed = frame.url();
const title = await frame.title().catch(() => '');
if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) {
log(` → Untersuchungen ERROR page: ${landed}`);
await dumpHtml(frame, `untersuchungen_error_StNr${standesbuchNr}`);
return [];
}
// Show all rows
await selectAlleAnzeige(frame);
// Dump HTML for diagnosis (always when debug enabled)
await dumpHtml(frame, `untersuchungen_StNr${standesbuchNr}`);
// Try to navigate to history/detail view if available
// FDISK may show only the most recent per exam type on the list page.
// Look for a "Verlauf" or "Detail" or "Alle anzeigen" link/button
const hasHistoryLink = await frame.evaluate(() => {
const links = Array.from(document.querySelectorAll('a, input[type="button"], button'));
for (const el of links) {
const text = (el.textContent || '').toLowerCase();
const title = (el.getAttribute('title') || '').toLowerCase();
if (text.includes('verlauf') || text.includes('historie') || text.includes('alle anzeigen')
|| title.includes('verlauf') || title.includes('historie')) {
return (el as HTMLElement).id || (el as HTMLAnchorElement).href || text;
}
}
return null;
}).catch(() => null);
if (hasHistoryLink) {
log(` → Found history link: ${hasHistoryLink}`);
}
// Parse the table using navigateAndGetTableRows logic (reuse existing page state)
// Re-collect rows from the already-loaded page
const allRows = await frame.evaluate(() => {
const results: Array<{ cells: string[]; tableClass: string }> = [];
for (const table of Array.from(document.querySelectorAll('table'))) {
const cls = table.className || '';
for (const tr of Array.from(table.querySelectorAll('tbody tr, tr'))) {
if (tr.closest('table') !== table) continue;
const tds = Array.from(tr.querySelectorAll('td'));
if (tds.length < 2) continue;
results.push({
tableClass: cls,
cells: tds.map(td => {
const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
if (input) return input.value?.trim() ?? '';
const sel = td.querySelector('select') as HTMLSelectElement | null;
if (sel) {
const opt = sel.options[sel.selectedIndex];
return (opt?.text || opt?.value || '').trim();
}
const anchor = td.querySelector('a');
const atitle = anchor?.getAttribute('title')?.trim();
if (atitle) return atitle;
return td.textContent?.trim() ?? '';
}),
});
}
}
return results;
}).catch(() => [] as Array<{ cells: string[]; tableClass: string }>);
const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList'));
const resultRows = fdcRows.length > 0 ? fdcRows : allRows;
const mapped = resultRows.map(r => ({
cells: r.cells.map(c => c.replace(/\u00A0/g, ' ').trim()),
}));
// Find date column
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
let dateColIdx = -1;
for (const r of mapped) {
for (let ci = 0; ci < r.cells.length; ci++) {
if (datePattern.test(r.cells[ci] ?? '')) {
dateColIdx = ci;
break;
}
}
if (dateColIdx >= 0) break;
}
const dataRows = dateColIdx >= 0
? mapped.filter(r => datePattern.test(r.cells[dateColIdx] ?? ''))
: [];
log(` → Untersuchungen: ${allRows.length} total rows, ${dataRows.length} data rows (date in col ${dateColIdx})`);
if (dataRows.length === 0) {
await dumpHtml(frame, `untersuchungen_empty_StNr${standesbuchNr}`);
}
const { rows, dateColIdx } = result;
const results: FdiskUntersuchung[] = [];
for (const row of rows) {
// Collect non-empty values from columns after the date column
for (const row of dataRows) {
const valueCols: string[] = [];
for (let ci = dateColIdx + 1; ci < row.cells.length; ci++) {
const v = cellText(row.cells[ci]);
if (v !== null) valueCols.push(v);
}
// Original layout: 0=Datum, 1=Anmerkungen, 2=Untersuchungsart, 3=Tauglichkeitsstufe
// With spacer columns the date may not be at 0; use relative offsets from collected values
const anmerkungen = valueCols[0] ?? null;
const art = valueCols[1] ?? null;
const ergebnis = valueCols[2] ?? null;