update
This commit is contained in:
@@ -628,45 +628,69 @@ async function navigateAndGetTableRows(
|
|||||||
|
|
||||||
const landed = frame.url();
|
const landed = frame.url();
|
||||||
const title = await frame.title().catch(() => '');
|
const title = await frame.title().catch(() => '');
|
||||||
log(` → landed: ${landed} | title: "${title}"`);
|
|
||||||
|
|
||||||
// Check for FDISK error pages
|
// Check for FDISK error pages
|
||||||
if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) {
|
if (landed.includes('BLError') || landed.includes('support.aspx') || title.toLowerCase().includes('fehler')) {
|
||||||
log(` → ERROR page, skipping`);
|
log(` → ERROR page: ${landed}`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try table.FdcLayList first, then any table with tbody rows
|
// Log all table classes on the page for diagnostics (first time only)
|
||||||
const selectors = ['table.FdcLayList', 'table'];
|
const tableInfo = await frame.evaluate(() => {
|
||||||
for (const sel of selectors) {
|
return Array.from(document.querySelectorAll('table')).map((t, i) => {
|
||||||
const exists = await frame.$(sel).then(el => !!el).catch(() => false);
|
const cls = t.className || '(no class)';
|
||||||
if (!exists) continue;
|
const id = t.id || '';
|
||||||
|
const rowCount = t.querySelectorAll('tr').length;
|
||||||
|
return `${i}:cls="${cls}"${id ? ` id="${id}"` : ''} rows=${rowCount}`;
|
||||||
|
}).join(' | ');
|
||||||
|
}).catch(() => 'N/A');
|
||||||
|
log(` → tables: ${tableInfo}`);
|
||||||
|
|
||||||
const rows = await frame.$$eval(`${sel} tbody tr`, (trs) =>
|
// Collect rows from ALL tables, reading input/select values for inline-edit pages
|
||||||
trs.map((tr) => ({
|
const allRows = await frame.evaluate(() => {
|
||||||
cells: Array.from(tr.querySelectorAll('td')).map(td => {
|
const results: Array<{ cells: string[]; tableClass: string }> = [];
|
||||||
|
for (const table of Array.from(document.querySelectorAll('table'))) {
|
||||||
|
const cls = table.className || '';
|
||||||
|
for (const tr of Array.from(table.querySelectorAll('tbody tr, tr'))) {
|
||||||
|
// Skip rows that are nested inside a child table
|
||||||
|
if (tr.closest('table') !== table) continue;
|
||||||
|
const tds = Array.from(tr.querySelectorAll('td'));
|
||||||
|
if (tds.length < 2) continue; // skip single-cell nav/header rows
|
||||||
|
results.push({
|
||||||
|
tableClass: cls,
|
||||||
|
cells: tds.map(td => {
|
||||||
const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
const input = td.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||||||
if (input) return input.value?.trim() ?? '';
|
if (input) return input.value?.trim() ?? '';
|
||||||
const select = td.querySelector('select') as HTMLSelectElement | null;
|
const sel = td.querySelector('select') as HTMLSelectElement | null;
|
||||||
if (select) {
|
if (sel) {
|
||||||
const opt = select.options[select.selectedIndex];
|
const opt = sel.options[sel.selectedIndex];
|
||||||
return (opt?.text || opt?.value || '').trim();
|
return (opt?.text || opt?.value || '').trim();
|
||||||
}
|
}
|
||||||
|
// For FDISK list tables, the value is in <a title="..."> inside each cell
|
||||||
|
const anchor = td.querySelector('a');
|
||||||
|
const atitle = anchor?.getAttribute('title')?.trim();
|
||||||
|
if (atitle) return atitle;
|
||||||
return td.textContent?.trim() ?? '';
|
return td.textContent?.trim() ?? '';
|
||||||
}),
|
}),
|
||||||
}))
|
});
|
||||||
).catch(() => [] as Array<{ cells: string[] }>);
|
|
||||||
|
|
||||||
if (rows.length > 0) {
|
|
||||||
log(` → found ${rows.length} rows via "${sel}"`);
|
|
||||||
return rows;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return results;
|
||||||
|
}).catch(() => [] as Array<{ cells: string[]; tableClass: string }>);
|
||||||
|
|
||||||
// No table rows found — page might be empty or structured differently
|
// Prefer rows from FdcLayList-class tables
|
||||||
const bodyText = await frame.evaluate(() => document.body?.textContent?.slice(0, 300) ?? '').catch(() => '');
|
const fdcRows = allRows.filter(r => r.tableClass.includes('FdcLayList'));
|
||||||
log(` → no table rows found. Body preview: ${bodyText.replace(/\s+/g, ' ')}`);
|
const resultRows = fdcRows.length > 0 ? fdcRows : allRows;
|
||||||
return [];
|
|
||||||
|
// Filter: only keep rows where cells[0] looks like a DD.MM.YYYY date
|
||||||
|
const datePattern = /^\d{2}\.\d{2}\.\d{4}$/;
|
||||||
|
const dataRows = resultRows
|
||||||
|
.map(r => ({ cells: r.cells }))
|
||||||
|
.filter(r => datePattern.test(r.cells[0]?.trim() ?? ''));
|
||||||
|
|
||||||
|
log(` → ${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (with date in cells[0])`);
|
||||||
|
|
||||||
|
return dataRows;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -687,9 +711,8 @@ async function scrapeMemberBefoerderungen(
|
|||||||
|
|
||||||
const results: FdiskBefoerderung[] = [];
|
const results: FdiskBefoerderung[] = [];
|
||||||
for (const row of rows) {
|
for (const row of rows) {
|
||||||
const dienstgrad = cellText(row.cells[1]);
|
|
||||||
if (!dienstgrad) continue;
|
|
||||||
const datum = parseDate(row.cells[0]);
|
const datum = parseDate(row.cells[0]);
|
||||||
|
const dienstgrad = cellText(row.cells[1]) ?? '';
|
||||||
const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`;
|
const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`;
|
||||||
results.push({ standesbuchNr, datum, dienstgrad, syncKey });
|
results.push({ standesbuchNr, datum, dienstgrad, syncKey });
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user