new features
This commit is contained in:
@@ -65,6 +65,23 @@ function mapDienstgrad(raw: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Valid Austrian/EU driving license class patterns.
|
||||
* Filters out non-class data that the scraper may pick up from FDISK form fields.
|
||||
*/
|
||||
const VALID_LICENSE_CLASSES = new Set([
|
||||
'A', 'A1', 'A2', 'AM',
|
||||
'B', 'B1', 'BE',
|
||||
'C', 'C1', 'CE', 'C1E',
|
||||
'D', 'D1', 'DE', 'D1E',
|
||||
'F', 'G', 'L', 'T',
|
||||
]);
|
||||
|
||||
function isValidLicenseClass(klasse: string): boolean {
|
||||
const normalized = klasse.trim().toUpperCase();
|
||||
return VALID_LICENSE_CLASSES.has(normalized);
|
||||
}
|
||||
|
||||
export async function syncToDatabase(
|
||||
pool: Pool,
|
||||
members: FdiskMember[],
|
||||
@@ -362,6 +379,13 @@ async function syncFahrgenehmigungen(
|
||||
}
|
||||
|
||||
for (const f of fahrgenehmigungen) {
|
||||
// J2: Filter out non-class data that the scraper may pick up
|
||||
if (!f.klasse || !isValidLicenseClass(f.klasse)) {
|
||||
log(`Skipping Fahrgenehmigung: invalid klasse "${f.klasse}" for StNr ${f.standesbuchNr}`);
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const result = await client.query<{ user_id: string }>(
|
||||
`SELECT user_id FROM mitglieder_profile WHERE fdisk_standesbuch_nr = $1`,
|
||||
[f.standesbuchNr]
|
||||
|
||||
@@ -241,9 +241,25 @@ export async function scrapeAll(username: string, password: string, knownStNrs:
|
||||
const idPersonen = urlObj.searchParams.get('id_personen');
|
||||
const idInstanzen = urlObj.searchParams.get('id_instanzen') ?? ID_INSTANZEN;
|
||||
|
||||
// Ausbildungen — disabled: requires different page/approach (TODO)
|
||||
// const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member, idMitgliedschaft, idPersonen);
|
||||
// ausbildungen.push(...quals);
|
||||
// Ausbildungen
|
||||
if (idMitgliedschaft && idPersonen) {
|
||||
try {
|
||||
const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member, idMitgliedschaft, idPersonen);
|
||||
ausbildungen.push(...quals);
|
||||
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`);
|
||||
} catch (err: any) {
|
||||
log(` WARN: Ausbildungen scrape failed for ${member.vorname} ${member.zuname} (StNr ${member.standesbuchNr}): ${err.message}`);
|
||||
// Always dump HTML on failure for diagnosis
|
||||
try {
|
||||
const debugDir = path.resolve(process.cwd(), 'debug');
|
||||
fs.mkdirSync(debugDir, { recursive: true });
|
||||
const html = await mainFrame.content();
|
||||
const filePath = path.join(debugDir, `ausbildungen_error_StNr${member.standesbuchNr}.html`);
|
||||
fs.writeFileSync(filePath, html, 'utf-8');
|
||||
log(` [debug] saved error HTML → ${filePath}`);
|
||||
} catch { /* ignore dump errors */ }
|
||||
}
|
||||
}
|
||||
|
||||
// Beförderungen
|
||||
const befos = (idMitgliedschaft && idPersonen)
|
||||
@@ -1034,20 +1050,110 @@ async function scrapeMemberUntersuchungen(
|
||||
+ `?search=1&searchid_mitgliedschaften=${idMitgliedschaft}&id_personen=${idPersonen}`
|
||||
+ `&id_mitgliedschaften=${idMitgliedschaft}&searchid_personen=${idPersonen}&searchid_maskmode=`;
|
||||
|
||||
const result = await navigateAndGetTableRows(frame, url);
|
||||
if (!result) return [];
|
||||
// Always dump for diagnosis when debug is on
|
||||
await frame_goto(frame, url);
|
||||
|
||||
const landed = frame.url();
|
||||
const title = await frame.title().catch(() => '');
|
||||
if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) {
|
||||
log(` → Untersuchungen ERROR page: ${landed}`);
|
||||
await dumpHtml(frame, `untersuchungen_error_StNr${standesbuchNr}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Show all rows
|
||||
await selectAlleAnzeige(frame);
|
||||
|
||||
// Dump HTML for diagnosis (always when debug enabled)
|
||||
await dumpHtml(frame, `untersuchungen_StNr${standesbuchNr}`);
|
||||
|
||||
// Try to navigate to history/detail view if available
|
||||
// FDISK may show only the most recent per exam type on the list page.
|
||||
// Look for a "Verlauf" or "Detail" or "Alle anzeigen" link/button
|
||||
const hasHistoryLink = await frame.evaluate(() => {
|
||||
const links = Array.from(document.querySelectorAll('a, input[type="button"], button'));
|
||||
for (const el of links) {
|
||||
const text = (el.textContent || '').toLowerCase();
|
||||
const title = (el.getAttribute('title') || '').toLowerCase();
|
||||
if (text.includes('verlauf') || text.includes('historie') || text.includes('alle anzeigen')
|
||||
|| title.includes('verlauf') || title.includes('historie')) {
|
||||
return (el as HTMLElement).id || (el as HTMLAnchorElement).href || text;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}).catch(() => null);
|
||||
|
||||
if (hasHistoryLink) {
|
||||
log(` → Found history link: ${hasHistoryLink}`);
|
||||
}
|
||||
|
||||
// Parse the table using navigateAndGetTableRows logic (reuse existing page state)
|
||||
// Re-collect rows from the already-loaded page
|
||||
const allRows = await frame.evaluate(() => {
|
||||
const results: Array<{ cells: string[]; tableClass: string }> = [];
|
||||
for (const table of Array.from(document.querySelectorAll('table'))) {
|
||||
const cls = table.className || '';
|
||||
for (const tr of Array.from(table.querySelectorAll('tbody tr, tr'))) {
|
||||
if (tr.closest('table') !== table) continue;
|
||||
const tds = Array.from(tr.querySelectorAll('td'));
|
||||
if (tds.length < 2) continue;
|
||||
results.push({
|
||||
tableClass: cls,
|
||||
cells: tds.map(td => {
|
||||
const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||||
if (input) return input.value?.trim() ?? '';
|
||||
const sel = td.querySelector('select') as HTMLSelectElement | null;
|
||||
if (sel) {
|
||||
const opt = sel.options[sel.selectedIndex];
|
||||
return (opt?.text || opt?.value || '').trim();
|
||||
}
|
||||
const anchor = td.querySelector('a');
|
||||
const atitle = anchor?.getAttribute('title')?.trim();
|
||||
if (atitle) return atitle;
|
||||
return td.textContent?.trim() ?? '';
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}).catch(() => [] as Array<{ cells: string[]; tableClass: string }>);
|
||||
|
||||
const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList'));
|
||||
const resultRows = fdcRows.length > 0 ? fdcRows : allRows;
|
||||
const mapped = resultRows.map(r => ({
|
||||
cells: r.cells.map(c => c.replace(/\u00A0/g, ' ').trim()),
|
||||
}));
|
||||
|
||||
// Find date column
|
||||
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||
let dateColIdx = -1;
|
||||
for (const r of mapped) {
|
||||
for (let ci = 0; ci < r.cells.length; ci++) {
|
||||
if (datePattern.test(r.cells[ci] ?? '')) {
|
||||
dateColIdx = ci;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (dateColIdx >= 0) break;
|
||||
}
|
||||
|
||||
const dataRows = dateColIdx >= 0
|
||||
? mapped.filter(r => datePattern.test(r.cells[dateColIdx] ?? ''))
|
||||
: [];
|
||||
|
||||
log(` → Untersuchungen: ${allRows.length} total rows, ${dataRows.length} data rows (date in col ${dateColIdx})`);
|
||||
|
||||
if (dataRows.length === 0) {
|
||||
await dumpHtml(frame, `untersuchungen_empty_StNr${standesbuchNr}`);
|
||||
}
|
||||
|
||||
const { rows, dateColIdx } = result;
|
||||
const results: FdiskUntersuchung[] = [];
|
||||
for (const row of rows) {
|
||||
// Collect non-empty values from columns after the date column
|
||||
for (const row of dataRows) {
|
||||
const valueCols: string[] = [];
|
||||
for (let ci = dateColIdx + 1; ci < row.cells.length; ci++) {
|
||||
const v = cellText(row.cells[ci]);
|
||||
if (v !== null) valueCols.push(v);
|
||||
}
|
||||
// Original layout: 0=Datum, 1=Anmerkungen, 2=Untersuchungsart, 3=Tauglichkeitsstufe
|
||||
// With spacer columns the date may not be at 0; use relative offsets from collected values
|
||||
const anmerkungen = valueCols[0] ?? null;
|
||||
const art = valueCols[1] ?? null;
|
||||
const ergebnis = valueCols[2] ?? null;
|
||||
|
||||
Reference in New Issue
Block a user