This commit is contained in:
Matthias Hochmeister
2026-03-13 21:41:25 +01:00
parent 8941dc7e09
commit 3171fe1ce5

View File

@@ -628,45 +628,69 @@ async function navigateAndGetTableRows(
const landed = frame.url();
const title = await frame.title().catch(() => '');
log(` → landed: ${landed} | title: "${title}"`);
// Check for FDISK error pages
if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) {
log(` → ERROR page, skipping`);
log(` → ERROR page: ${landed}`);
return null;
}
// Try table.FdcLayList first, then any table with tbody rows
const selectors = ['table.FdcLayList', 'table'];
for (const sel of selectors) {
const exists = await frame.$(sel).then(el => !!el).catch(() => false);
if (!exists) continue;
// Log all table classes on the page for diagnostics (first time only)
const tableInfo = await frame.evaluate(() => {
return Array.from(document.querySelectorAll('table')).map((t, i) => {
const cls = t.className || '(no class)';
const id = t.id || '';
const rowCount = t.querySelectorAll('tr').length;
return `${i}:cls="${cls}"${id ? ` id="${id}"` : ''} rows=${rowCount}`;
}).join(' | ');
}).catch(() => 'N/A');
log(` → tables: ${tableInfo}`);
const rows = await frame.$$eval(`${sel} tbody tr`, (trs) =>
trs.map((tr) => ({
cells: Array.from(tr.querySelectorAll('td')).map(td => {
// Collect rows from ALL tables, reading input/select values for inline-edit pages
const allRows = await frame.evaluate(() => {
const results: Array<{ cells: string[]; tableClass: string }> = [];
for (const table of Array.from(document.querySelectorAll('table'))) {
const cls = table.className || '';
for (const tr of Array.from(table.querySelectorAll('tbody tr, tr'))) {
// Skip rows that are nested inside a child table
if (tr.closest('table') !== table) continue;
const tds = Array.from(tr.querySelectorAll('td'));
if (tds.length < 2) continue; // skip single-cell nav/header rows
results.push({
tableClass: cls,
cells: tds.map(td => {
const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
if (input) return input.value?.trim() ?? '';
const select = td.querySelector('select') as HTMLSelectElement | null;
if (select) {
const opt = select.options[select.selectedIndex];
const sel = td.querySelector('select') as HTMLSelectElement | null;
if (sel) {
const opt = sel.options[sel.selectedIndex];
return (opt?.text || opt?.value || '').trim();
}
// For FDISK list tables, the value is in <a title="..."> inside each cell
const anchor = td.querySelector('a');
const atitle = anchor?.getAttribute('title')?.trim();
if (atitle) return atitle;
return td.textContent?.trim() ?? '';
}),
}))
).catch(() => [] as Array<{ cells: string[] }>);
if (rows.length > 0) {
log(` → found ${rows.length} rows via "${sel}"`);
return rows;
});
}
}
return results;
}).catch(() => [] as Array<{ cells: string[]; tableClass: string }>);
// No table rows found — page might be empty or structured differently
const bodyText = await frame.evaluate(() => document.body?.textContent?.slice(0, 300) ?? '').catch(() => '');
log(` → no table rows found. Body preview: ${bodyText.replace(/\s+/g, ' ')}`);
return [];
// Prefer rows from FdcLayList-class tables
const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList'));
const resultRows = fdcRows.length > 0 ? fdcRows : allRows;
// Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
const dataRows = resultRows
.map(r => ({ cells: r.cells }))
.filter(r => datePattern.test(r.cells[0]?.trim() ?? ''));
log(`${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (with date in cells[0])`);
return dataRows;
}
/**
@@ -687,9 +711,8 @@ async function scrapeMemberBefoerderungen(
const results: FdiskBefoerderung[] = [];
for (const row of rows) {
const dienstgrad = cellText(row.cells[1]);
if (!dienstgrad) continue;
const datum = parseDate(row.cells[0]);
const dienstgrad = cellText(row.cells[1]) ?? '';
const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`;
results.push({ standesbuchNr, datum, dienstgrad, syncKey });
}