update
This commit is contained in:
@@ -628,45 +628,69 @@ async function navigateAndGetTableRows(
|
||||
|
||||
const landed = frame.url();
|
||||
const title = await frame.title().catch(() => '');
|
||||
log(` → landed: ${landed} | title: "${title}"`);
|
||||
|
||||
// Check for FDISK error pages
|
||||
if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) {
|
||||
log(` → ERROR page, skipping`);
|
||||
log(` → ERROR page: ${landed}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Try table.FdcLayList first, then any table with tbody rows
|
||||
const selectors = ['table.FdcLayList', 'table'];
|
||||
for (const sel of selectors) {
|
||||
const exists = await frame.$(sel).then(el => !!el).catch(() => false);
|
||||
if (!exists) continue;
|
||||
// Log all table classes on the page for diagnostics (first time only)
|
||||
const tableInfo = await frame.evaluate(() => {
|
||||
return Array.from(document.querySelectorAll('table')).map((t, i) => {
|
||||
const cls = t.className || '(no class)';
|
||||
const id = t.id || '';
|
||||
const rowCount = t.querySelectorAll('tr').length;
|
||||
return `${i}:cls="${cls}"${id ? ` id="${id}"` : ''} rows=${rowCount}`;
|
||||
}).join(' | ');
|
||||
}).catch(() => 'N/A');
|
||||
log(` → tables: ${tableInfo}`);
|
||||
|
||||
const rows = await frame.$$eval(`${sel} tbody tr`, (trs) =>
|
||||
trs.map((tr) => ({
|
||||
cells: Array.from(tr.querySelectorAll('td')).map(td => {
|
||||
const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||||
if (input) return input.value?.trim() ?? '';
|
||||
const select = td.querySelector('select') as HTMLSelectElement | null;
|
||||
if (select) {
|
||||
const opt = select.options[select.selectedIndex];
|
||||
return (opt?.text || opt?.value || '').trim();
|
||||
}
|
||||
return td.textContent?.trim() ?? '';
|
||||
}),
|
||||
}))
|
||||
).catch(() => [] as Array<{ cells: string[] }>);
|
||||
|
||||
if (rows.length > 0) {
|
||||
log(` → found ${rows.length} rows via "${sel}"`);
|
||||
return rows;
|
||||
// Collect rows from ALL tables, reading input/select values for inline-edit pages
|
||||
const allRows = await frame.evaluate(() => {
|
||||
const results: Array<{ cells: string[]; tableClass: string }> = [];
|
||||
for (const table of Array.from(document.querySelectorAll('table'))) {
|
||||
const cls = table.className || '';
|
||||
for (const tr of Array.from(table.querySelectorAll('tbody tr, tr'))) {
|
||||
// Skip rows that are nested inside a child table
|
||||
if (tr.closest('table') !== table) continue;
|
||||
const tds = Array.from(tr.querySelectorAll('td'));
|
||||
if (tds.length < 2) continue; // skip single-cell nav/header rows
|
||||
results.push({
|
||||
tableClass: cls,
|
||||
cells: tds.map(td => {
|
||||
const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||||
if (input) return input.value?.trim() ?? '';
|
||||
const sel = td.querySelector('select') as HTMLSelectElement | null;
|
||||
if (sel) {
|
||||
const opt = sel.options[sel.selectedIndex];
|
||||
return (opt?.text || opt?.value || '').trim();
|
||||
}
|
||||
// For FDISK list tables, the value is in <a title="..."> inside each cell
|
||||
const anchor = td.querySelector('a');
|
||||
const atitle = anchor?.getAttribute('title')?.trim();
|
||||
if (atitle) return atitle;
|
||||
return td.textContent?.trim() ?? '';
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}).catch(() => [] as Array<{ cells: string[]; tableClass: string }>);
|
||||
|
||||
// No table rows found — page might be empty or structured differently
|
||||
const bodyText = await frame.evaluate(() => document.body?.textContent?.slice(0, 300) ?? '').catch(() => '');
|
||||
log(` → no table rows found. Body preview: ${bodyText.replace(/\s+/g, ' ')}`);
|
||||
return [];
|
||||
// Prefer rows from FdcLayList-class tables
|
||||
const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList'));
|
||||
const resultRows = fdcRows.length > 0 ? fdcRows : allRows;
|
||||
|
||||
// Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date
|
||||
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||
const dataRows = resultRows
|
||||
.map(r => ({ cells: r.cells }))
|
||||
.filter(r => datePattern.test(r.cells[0]?.trim() ?? ''));
|
||||
|
||||
log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (with date in cells[0])`);
|
||||
|
||||
return dataRows;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -687,9 +711,8 @@ async function scrapeMemberBefoerderungen(
|
||||
|
||||
const results: FdiskBefoerderung[] = [];
|
||||
for (const row of rows) {
|
||||
const dienstgrad = cellText(row.cells[1]);
|
||||
if (!dienstgrad) continue;
|
||||
const datum = parseDate(row.cells[0]);
|
||||
const dienstgrad = cellText(row.cells[1]) ?? '';
|
||||
const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`;
|
||||
results.push({ standesbuchNr, datum, dienstgrad, syncKey });
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user