This commit is contained in:
Matthias Hochmeister
2026-03-13 21:16:44 +01:00
parent 461d28fa0d
commit 0d4e7b480d
2 changed files with 115 additions and 18 deletions

View File

@@ -205,9 +205,7 @@ function Atemschutz() {
const q = search.toLowerCase(); const q = search.toLowerCase();
return traeger.filter((item) => { return traeger.filter((item) => {
const name = getDisplayName(item).toLowerCase(); const name = getDisplayName(item).toLowerCase();
const email = item.user_email.toLowerCase(); return name.includes(q);
const dienstgrad = (item.dienstgrad || '').toLowerCase();
return name.includes(q) || email.includes(q) || dienstgrad.includes(q);
}); });
}, [traeger, search]); }, [traeger, search]);
@@ -489,7 +487,6 @@ function Atemschutz() {
<TableHead> <TableHead>
<TableRow> <TableRow>
<TableCell>Name</TableCell> <TableCell>Name</TableCell>
<TableCell>Dienstgrad</TableCell>
<TableCell align="center">Lehrgang</TableCell> <TableCell align="center">Lehrgang</TableCell>
<TableCell>Untersuchung gültig bis</TableCell> <TableCell>Untersuchung gültig bis</TableCell>
<TableCell>Leistungstest gültig bis</TableCell> <TableCell>Leistungstest gültig bis</TableCell>
@@ -516,14 +513,6 @@ function Atemschutz() {
<Typography variant="body2" fontWeight={500}> <Typography variant="body2" fontWeight={500}>
{getDisplayName(item)} {getDisplayName(item)}
</Typography> </Typography>
<Typography variant="caption" color="text.secondary">
{item.user_email}
</Typography>
</TableCell>
<TableCell>
<Typography variant="body2">
{item.dienstgrad || '—'}
</Typography>
</TableCell> </TableCell>
<TableCell align="center"> <TableCell align="center">
{item.atemschutz_lehrgang ? ( {item.atemschutz_lehrgang ? (

View File

@@ -73,10 +73,16 @@ export async function scrapeAll(username: string, password: string): Promise<{
const fahrgenehmigungen: FdiskFahrgenehmigung[] = []; const fahrgenehmigungen: FdiskFahrgenehmigung[] = [];
for (const member of members) { for (const member of members) {
if (!member.detailUrl) continue;
try { try {
// Navigate to detail page and scrape all sub-sections // Navigate to member detail page — use direct URL if available, else search+click fallback
await frame_goto(mainFrame, member.detailUrl); const onDetail = member.detailUrl
? (await frame_goto(mainFrame, member.detailUrl), true)
: await navigateToMemberDetailBySearch(mainFrame, member.standesbuchNr);
if (!onDetail) {
log(` SKIP ${member.vorname} ${member.zuname} (${member.standesbuchNr}): could not reach detail page`);
continue;
}
// Scrape extra profile fields from the detail form // Scrape extra profile fields from the detail form
const profileFields = await scrapeDetailProfileFields(mainFrame); const profileFields = await scrapeDetailProfileFields(mainFrame);
@@ -160,6 +166,65 @@ async function login(page: Page, username: string, password: string): Promise<vo
log(`Logged in successfully, redirected to: ${currentUrl}`); log(`Logged in successfully, redirected to: ${currentUrl}`);
} }
/**
* Fallback navigation to a member's detail page when no direct URL is available.
* Navigates to the member list, filters by exact standesbuchNr, then clicks the result row.
* Returns true if we successfully landed on a detail page.
*/
async function navigateToMemberDetailBySearch(frame: Frame, standesbuchNr: string): Promise<boolean> {
// Navigate to the member list
await frame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' });
await frame.waitForLoadState('networkidle');
// Set exact standesbuchNr filter in the search form
const formOk = await frame.evaluate((stNr) => {
const form = (document as any).forms['frmsearch'];
if (!form) return false;
const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement | null;
const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement | null;
if (!fromFld || !toFld) return false;
fromFld.value = stNr;
toFld.value = stNr;
return true;
}, standesbuchNr);
if (!formOk) {
log(` WARN navigateToMemberDetailBySearch: search form not usable for StNr ${standesbuchNr}`);
return false;
}
await Promise.all([
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }),
frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }),
]);
// Click on the first data row — FDISK rows navigate to the detail page on click
const firstRowLink = await frame.$('table.FdcLayList tbody tr:first-child a, table.FdcLayList tbody tr:first-child td');
if (!firstRowLink) {
log(` WARN navigateToMemberDetailBySearch: no result row for StNr ${standesbuchNr}`);
return false;
}
try {
await Promise.all([
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 15000 }),
firstRowLink.click(),
]);
} catch {
// waitForNavigation may time out if click didn't navigate (e.g. onclick vs href)
// Check whether the URL changed at all
}
const url = frame.url();
const onDetailPage = !url.includes('MitgliedschaftenList') && !url.includes('meine_Mitglieder');
if (onDetailPage) {
log(` Navigated to detail via search+click: ${url}`);
} else {
log(` WARN navigateToMemberDetailBySearch: still on list page after click for StNr ${standesbuchNr}`);
}
return onDetailPage;
}
async function navigateToMemberList(page: Page): Promise<Frame> { async function navigateToMemberList(page: Page): Promise<Frame> {
const mainFrame = page.frame({ name: 'mainFrame' }); const mainFrame = page.frame({ name: 'mainFrame' });
if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset'); if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset');
@@ -266,6 +331,11 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 }); await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
const firstRows = await parseRowsFromTable(frame); const firstRows = await parseRowsFromTable(frame);
log(`Initial fetch: ${firstRows.length} rows`); log(`Initial fetch: ${firstRows.length} rows`);
// Log href debug info for the first row to diagnose URL extraction
const rowDebug = await frame.evaluate(() => (window as any).__fdiskFirstRowDebug ?? 'no debug info');
log(`Row href debug: ${rowDebug}`);
for (const row of firstRows) { for (const row of firstRows) {
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`); log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`);
} }
@@ -364,9 +434,9 @@ async function parseRowsFromTable(frame: Frame) {
// Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad, // Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad,
// 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon // 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon
// Each <td> contains an <a title="value"> — the title is the clean cell text. // Each <td> contains an <a title="value"> — the title is the clean cell text.
// The href on each <a> is the member detail URL (same link repeated across all cells in a row). // Navigation may be via href or onclick handlers (FDISK uses both depending on version).
return frame.$$eval('table.FdcLayList tbody tr', (trs) => return frame.$$eval('table.FdcLayList tbody tr', (trs) =>
trs.map((tr) => { trs.map((tr, rowIdx) => {
const cells = Array.from(tr.querySelectorAll('td')); const cells = Array.from(tr.querySelectorAll('td'));
const val = (i: number) => { const val = (i: number) => {
const a = cells[i]?.querySelector('a'); const a = cells[i]?.querySelector('a');
@@ -374,7 +444,45 @@ async function parseRowsFromTable(frame: Frame) {
// Use title only if non-empty; otherwise fall back to textContent // Use title only if non-empty; otherwise fall back to textContent
return (title || cells[i]?.textContent || '').trim(); return (title || cells[i]?.textContent || '').trim();
}; };
const href = (tr.querySelector('a') as HTMLAnchorElement | null)?.href ?? null;
// Extract detail URL — try multiple strategies:
// 1. Standard <a href="..."> pointing to an .aspx page
// 2. onclick attribute on <a>, <td>, or <tr> containing an .aspx URL
let href: string | null = null;
let debugInfo = '';
for (const a of Array.from(tr.querySelectorAll('a'))) {
const rawHref = (a as Element).getAttribute('href') ?? '';
debugInfo += `a.href="${rawHref}" `;
if (rawHref && rawHref !== '#' && rawHref !== '' && !rawHref.startsWith('javascript:')) {
href = (a as HTMLAnchorElement).href; // resolves relative → absolute
break;
}
}
if (!href) {
// Scan onclick on all ancestors + cells + anchors for .aspx URLs
const candidates: Element[] = [tr, ...Array.from(tr.querySelectorAll('a, td'))];
for (const el of candidates) {
const onclick = el.getAttribute('onclick') ?? '';
if (onclick) debugInfo += `onclick="${onclick}" `;
const match = onclick.match(/['"]([^'"]*\.aspx[^'"]*)['"]/);
if (match) {
try {
href = new URL(match[1], (window as Window).location.href).href;
} catch {
href = match[1];
}
break;
}
}
}
// Log debug info for first data row to help diagnose href extraction issues
if (rowIdx === 0 && val(2)) {
(window as any).__fdiskFirstRowDebug = `StNr=${val(2)} href=${href} debug=${debugInfo}`;
}
return { return {
status: val(1), status: val(1),
standesbuchNr: val(2), standesbuchNr: val(2),