Files
dashboard/sync/src/scraper.ts
Matthias Hochmeister b3266afbf8 update
2026-03-13 21:27:07 +01:00

801 lines
31 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { chromium, Page, Frame } from '@playwright/test';
import {
FdiskMember,
FdiskAusbildung,
FdiskBefoerderung,
FdiskUntersuchung,
FdiskFahrgenehmigung,
} from './types';
const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at';
const ID_FEUERWEHREN = process.env.FDISK_ID_FEUERWEHREN ?? '164';
const ID_INSTANZEN = process.env.FDISK_ID_INSTANZEN ?? '2853';
const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`;
const MEMBERS_URL = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/meine_Mitglieder.aspx`;
function log(msg: string) {
console.log(`[scraper] ${new Date().toISOString()} ${msg}`);
}
/**
* Parse a date string from FDISK (DD.MM.YYYY) to ISO format (YYYY-MM-DD).
* Returns null if empty or unparseable.
*/
function parseDate(raw: string | null | undefined): string | null {
if (!raw) return null;
const trimmed = raw.trim();
if (!trimmed) return null;
const match = trimmed.match(/^(\d{2})\.(\d{2})\.(\d{4})$/);
if (!match) return null;
return `${match[3]}-${match[2]}-${match[1]}`;
}
/**
* Extract text content from a cell, trimmed, or null if empty.
*/
function cellText(text: string | undefined | null): string | null {
const t = (text ?? '').trim();
return t || null;
}
export async function scrapeAll(username: string, password: string): Promise<{
members: FdiskMember[];
ausbildungen: FdiskAusbildung[];
befoerderungen: FdiskBefoerderung[];
untersuchungen: FdiskUntersuchung[];
fahrgenehmigungen: FdiskFahrgenehmigung[];
}> {
const browser = await chromium.launch({
headless: true,
args: ['--disable-gpu', '--disable-software-rasterizer'],
});
const context = await browser.newContext({
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
});
const page = await context.newPage();
try {
await login(page, username, password);
// After login, page is on Start.aspx (frameset).
// Direct navigation to MitgliedschaftenList.aspx causes a server BLError because
// the server reads the org context from session variables set by the menu.
// Navigate via the menu frame (left.aspx) to set session state properly.
const mainFrame = await navigateToMemberList(page);
const members = await scrapeMembers(mainFrame);
log(`Found ${members.length} members`);
const ausbildungen: FdiskAusbildung[] = [];
const befoerderungen: FdiskBefoerderung[] = [];
const untersuchungen: FdiskUntersuchung[] = [];
const fahrgenehmigungen: FdiskFahrgenehmigung[] = [];
for (const member of members) {
try {
// Navigate to member detail page — use direct URL if available, else search+click fallback
const onDetail = member.detailUrl
? (await frame_goto(mainFrame, member.detailUrl), true)
: await navigateToMemberDetailBySearch(mainFrame, member.standesbuchNr);
if (!onDetail) {
log(` SKIP ${member.vorname} ${member.zuname} (${member.standesbuchNr}): could not reach detail page`);
continue;
}
// Scrape extra profile fields from the detail form
const profileFields = await scrapeDetailProfileFields(mainFrame);
member.geburtsort = profileFields.geburtsort;
member.geschlecht = profileFields.geschlecht;
member.beruf = profileFields.beruf;
member.wohnort = profileFields.wohnort;
member.plz = profileFields.plz;
// Extract mitgliedschaft params from the current URL for constructing sub-section URLs.
// PersonenForm.aspx is in the personen module; sub-sections are in mitgliedschaften module.
// The links to Beförderungen/Untersuchungen/Fahrgenehmigungen live in the navigation
// frame (not the content mainFrame), so we construct the URLs directly.
const currentUrl = mainFrame.url();
const urlObj = new URL(currentUrl);
const idMitgliedschaft = urlObj.searchParams.get('id_mitgliedschaften');
const idInstanzen = urlObj.searchParams.get('id_instanzen') ?? ID_INSTANZEN;
const idFeuerwehren = urlObj.searchParams.get('id_feuerwehren') ?? ID_FEUERWEHREN;
// Ausbildungen
const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member);
ausbildungen.push(...quals);
// Beförderungen
const befos = idMitgliedschaft
? await scrapeMemberBefoerderungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idInstanzen, idFeuerwehren)
: [];
befoerderungen.push(...befos);
// Untersuchungen
const unters = idMitgliedschaft
? await scrapeMemberUntersuchungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idInstanzen, idFeuerwehren)
: [];
untersuchungen.push(...unters);
// Fahrgenehmigungen
const fahrg = idMitgliedschaft
? await scrapeMemberFahrgenehmigungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idInstanzen, idFeuerwehren)
: [];
fahrgenehmigungen.push(...fahrg);
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen, ${befos.length} Beförderungen, ${unters.length} Untersuchungen, ${fahrg.length} Fahrgenehmigungen`);
await page.waitForTimeout(500);
} catch (err) {
log(` WARN: could not scrape detail for ${member.vorname} ${member.zuname}: ${err}`);
}
}
return { members, ausbildungen, befoerderungen, untersuchungen, fahrgenehmigungen };
} finally {
await browser.close();
}
}
/** Navigate a frame, waiting for networkidle. Wrapper to avoid repetition. */
async function frame_goto(frame: Frame, url: string): Promise<void> {
await frame.goto(url, { waitUntil: 'networkidle' });
}
async function login(page: Page, username: string, password: string): Promise<void> {
log(`Navigating to ${LOGIN_URL}`);
await page.goto(LOGIN_URL, { waitUntil: 'domcontentloaded' });
await page.waitForLoadState('networkidle');
// Check if already logged in
const currentUrlBefore = page.url();
if (!currentUrlBefore.toLowerCase().includes('login')) {
log(`Already logged in, on: ${currentUrlBefore}`);
return;
}
// Exact selectors from the known login form HTML
const usernameField = page.locator('#login');
const passwordField = page.locator('#password');
const submitButton = page.locator('#Submit2');
await usernameField.waitFor({ state: 'visible', timeout: 10000 });
await usernameField.fill(username);
await passwordField.fill(password);
await submitButton.click();
// Wait for navigation away from the login page (up to 15s)
try {
await page.waitForURL(
(url) => !url.toString().toLowerCase().includes('login'),
{ waitUntil: 'networkidle', timeout: 15000 },
);
} catch {
// waitForURL timed out — fall through to the URL check below
}
// Verify we're logged in
const currentUrl = page.url();
if (currentUrl.toLowerCase().includes('login')) {
throw new Error(`Login failed — still on login page: ${currentUrl}`);
}
log(`Logged in successfully, redirected to: ${currentUrl}`);
}
/**
* Fallback navigation to a member's detail page when no direct URL is available.
* Navigates to the member list, filters by exact standesbuchNr, then clicks the result row.
* Returns true if we successfully landed on a detail page.
*/
async function navigateToMemberDetailBySearch(frame: Frame, standesbuchNr: string): Promise<boolean> {
// Navigate to the member list
await frame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' });
await frame.waitForLoadState('networkidle');
// Set exact standesbuchNr filter in the search form
const formOk = await frame.evaluate((stNr) => {
const form = (document as any).forms['frmsearch'];
if (!form) return false;
const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement | null;
const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement | null;
if (!fromFld || !toFld) return false;
fromFld.value = stNr;
toFld.value = stNr;
return true;
}, standesbuchNr);
if (!formOk) {
log(` WARN navigateToMemberDetailBySearch: search form not usable for StNr ${standesbuchNr}`);
return false;
}
await Promise.all([
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }),
frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }),
]);
// Click on the first data row — FDISK rows navigate to the detail page on click
const firstRowLink = await frame.$('table.FdcLayList tbody tr:first-child a, table.FdcLayList tbody tr:first-child td');
if (!firstRowLink) {
log(` WARN navigateToMemberDetailBySearch: no result row for StNr ${standesbuchNr}`);
return false;
}
try {
await Promise.all([
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 15000 }),
firstRowLink.click(),
]);
} catch {
// waitForNavigation may time out if click didn't navigate (e.g. onclick vs href)
// Check whether the URL changed at all
}
const url = frame.url();
const onDetailPage = !url.includes('MitgliedschaftenList') && !url.includes('meine_Mitglieder');
if (onDetailPage) {
log(` Navigated to detail via search+click: ${url}`);
} else {
log(` WARN navigateToMemberDetailBySearch: still on list page after click for StNr ${standesbuchNr}`);
}
return onDetailPage;
}
async function navigateToMemberList(page: Page): Promise<Frame> {
const mainFrame = page.frame({ name: 'mainFrame' });
if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset');
log(`Navigating mainFrame to: ${MEMBERS_URL}`);
await mainFrame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' });
await mainFrame.waitForLoadState('networkidle');
const url = mainFrame.url();
const title = await mainFrame.title();
log(`mainFrame loaded: ${url} — title: "${title}"`);
if (url.includes('BLError') || url.includes('support.aspx') || url.includes('Error')) {
throw new Error(`Member list returned error page: ${url}`);
}
return mainFrame;
}
async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
log(`Scraping member list from: ${frame.url()}`);
// Clear the Standesbuchnummer filter if the search form is present.
// FDISK pre-fills the logged-in user's own Standesbuchnummer, which limits results to 1 member.
// We clear it before submitting so all members of the fire station are returned.
const hasForm = await frame.$('form[name="frmsearch"]') !== null;
if (hasForm) {
const fieldDump = await frame.evaluate(() => {
const form = (document as any).forms['frmsearch'];
if (!form) return { cleared: [], pageSizeSet: null as string | null, allFields: [] };
const cleared: string[] = [];
const allFields: string[] = [];
let pageSizeSet: string | null = null;
for (const el of Array.from(form.elements) as HTMLInputElement[]) {
if (el.type === 'hidden') continue;
const name = (el.name ?? '').toLowerCase();
const id = (el.id ?? '').toLowerCase();
if (el.value) allFields.push(`${el.name || el.id}=${el.value}`);
if (name.includes('standesbuch') || id.includes('standesbuch')) {
el.value = '';
cleared.push(el.name || el.id);
}
// Maximize page size: look for a select AND its paired hidden input
// FDISK uses a custom Dd widget where <select name="xDd_dd"> is the visible dropdown
// but the actual POST value comes from <input type="hidden" name="xDd_id"> or similar.
if ((name.includes('anzahl') || id.includes('anzahl') ||
name.includes('pagesize') || id.includes('pagesize') ||
name.includes('rows') || id.includes('rows')) &&
el.tagName === 'SELECT') {
const select = el as unknown as HTMLSelectElement;
// Pick the largest numeric option value, or the last option as fallback
let bestOption: HTMLOptionElement | null = null;
let bestVal = -1;
for (const opt of Array.from(select.options)) {
const n = parseInt(opt.value, 10);
if (!isNaN(n) && n > bestVal) { bestVal = n; bestOption = opt; }
}
if (!bestOption && select.options.length > 0) {
bestOption = select.options[select.options.length - 1];
}
if (bestOption) {
select.value = bestOption.value;
pageSizeSet = `${el.name || el.id}=${bestOption.value}`;
// Also update the paired hidden field used by the Dd custom widget.
// Common patterns: xDd_dd → xDd_id or xDd_hd
const baseName = (el.name || el.id).replace(/_dd$/i, '');
for (const suffix of ['_id', '_hd', '_val']) {
const hidden = form.elements[baseName + suffix] as HTMLInputElement | undefined;
if (hidden && hidden.type === 'hidden') {
hidden.value = bestOption.value;
pageSizeSet += ` (also set ${baseName + suffix})`;
}
}
}
}
}
return { cleared, pageSizeSet, allFields };
});
if (fieldDump.allFields.length > 0) {
log(`Search form active filters before clear: ${fieldDump.allFields.join(', ')}`);
}
if (fieldDump.cleared.length > 0) {
log(`Cleared Standesbuchnummer filter fields: ${fieldDump.cleared.join(', ')}`);
} else {
log('Search form found — no Standesbuchnummer field detected, submitting as-is');
}
if (fieldDump.pageSizeSet) {
log(`Set page size: ${fieldDump.pageSizeSet}`);
} else {
log('No page size field found — will paginate through all results');
}
// Use Promise.all to start waiting for navigation BEFORE triggering the submit,
// otherwise waitForLoadState resolves against the already-idle current page.
await Promise.all([
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }),
frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }),
]);
log(`After form submit: ${frame.url()}`);
}
// --- Phase 1: initial fetch (no StNr filter) to get the first batch and total count ---
type ParsedRow = Awaited<ReturnType<typeof parseRowsFromTable>>[number];
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
const firstRows = await parseRowsFromTable(frame);
log(`Initial fetch: ${firstRows.length} rows`);
// Log href debug info for the first row to diagnose URL extraction
const rowDebug = await frame.evaluate(() => (window as any).__fdiskFirstRowDebug ?? 'no debug info');
log(`Row href debug: ${rowDebug}`);
for (const row of firstRows) {
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`);
}
const pagination = await frame.evaluate(() =>
document.querySelector('table.FdcLayListNav')?.textContent?.trim() ?? ''
);
log(`Pagination: "${pagination}"`);
const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i);
const totalExpected = pagMatch ? parseInt(pagMatch[3], 10) : null;
const shownSoFar = pagMatch ? parseInt(pagMatch[2], 10) : null;
const seenStNrs = new Set<string>(firstRows.map(r => r.standesbuchNr).filter(Boolean));
const allRows: ParsedRow[] = [...firstRows];
// --- Phase 2: if more members exist and pagination is disabled, use StNr range queries ---
if (totalExpected && shownSoFar && shownSoFar < totalExpected) {
log(`Pagination disabled (FDISK limitation). Switching to StNr range queries to fetch remaining ${totalExpected - seenStNrs.size} members.`);
const BATCH = 15; // fetch 15 StNr slots at a time — safely under the 20-row page limit
const MAX_STNR = 9999; // upper bound; we stop earlier if we have all members
let startNr = 1;
let consecutiveEmpty = 0;
while (seenStNrs.size < totalExpected && startNr <= MAX_STNR && consecutiveEmpty < 5) {
const endNr = startNr + BATCH - 1;
// Set StNr range in the search form and submit
const formOk = await frame.evaluate(({ s, e }: { s: number; e: number }) => {
const form = (document as any).forms['frmsearch'];
if (!form) return false;
const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement;
const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement;
if (!fromFld || !toFld) return false;
fromFld.value = String(s);
toFld.value = String(e);
return true;
}, { s: startNr, e: endNr });
if (!formOk) {
log('WARN: could not set StNr range fields — aborting range queries');
break;
}
await Promise.all([
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }),
frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }),
]);
const rangeRows = await parseRowsFromTable(frame);
const newRows = rangeRows.filter(r => r.standesbuchNr && !seenStNrs.has(r.standesbuchNr));
newRows.forEach(r => { if (r.standesbuchNr) seenStNrs.add(r.standesbuchNr); });
allRows.push(...newRows);
log(`StNr ${startNr}${endNr}: ${newRows.length} new members (collected ${seenStNrs.size}/${totalExpected})`);
for (const row of newRows) {
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`);
}
consecutiveEmpty = newRows.length === 0 ? consecutiveEmpty + 1 : 0;
startNr = endNr + 1;
}
log(`Range queries complete: ${seenStNrs.size} unique members collected (expected ${totalExpected})`);
}
log(`Parsed ${allRows.length} raw rows total`);
const members: FdiskMember[] = [];
for (const row of allRows) {
if (!row.standesbuchNr || !row.vorname || !row.zuname) continue;
const abmeldedatum = parseDate(row.abmeldedatum);
members.push({
standesbuchNr: row.standesbuchNr,
dienstgrad: row.dienstgrad,
vorname: row.vorname,
zuname: row.zuname,
geburtsdatum: parseDate(row.geburtsdatum),
svnr: row.svnr || null,
eintrittsdatum: parseDate(row.eintrittsdatum),
abmeldedatum,
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
detailUrl: row.href,
geburtsort: null,
geschlecht: null,
beruf: null,
wohnort: null,
plz: null,
});
}
return members;
}
async function parseRowsFromTable(frame: Frame) {
// Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad,
// 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon
// Each <td> contains an <a title="value"> — the title is the clean cell text.
// Navigation may be via href or onclick handlers (FDISK uses both depending on version).
return frame.$$eval('table.FdcLayList tbody tr', (trs) =>
trs.map((tr, rowIdx) => {
const cells = Array.from(tr.querySelectorAll('td'));
const val = (i: number) => {
const a = cells[i]?.querySelector('a');
const title = a?.getAttribute('title')?.trim();
// Use title only if non-empty; otherwise fall back to textContent
return (title || cells[i]?.textContent || '').trim();
};
// Extract detail URL — try multiple strategies:
// 1. Standard <a href="..."> pointing to an .aspx page
// 2. onclick attribute on <a>, <td>, or <tr> containing an .aspx URL
let href: string | null = null;
let debugInfo = '';
for (const a of Array.from(tr.querySelectorAll('a'))) {
const rawHref = (a as Element).getAttribute('href') ?? '';
debugInfo += `a.href="${rawHref}" `;
if (rawHref && rawHref !== '#' && rawHref !== '' && !rawHref.startsWith('javascript:')) {
href = (a as HTMLAnchorElement).href; // resolves relative → absolute
break;
}
}
if (!href) {
// Scan onclick on all ancestors + cells + anchors for .aspx URLs
const candidates: Element[] = [tr, ...Array.from(tr.querySelectorAll('a, td'))];
for (const el of candidates) {
const onclick = el.getAttribute('onclick') ?? '';
if (onclick) debugInfo += `onclick="${onclick}" `;
const match = onclick.match(/['"]([^'"]*\.aspx[^'"]*)['"]/);
if (match) {
try {
href = new URL(match[1], (window as Window).location.href).href;
} catch {
href = match[1];
}
break;
}
}
}
// Log debug info for first data row to help diagnose href extraction issues
if (rowIdx === 0 && val(2)) {
(window as any).__fdiskFirstRowDebug = `StNr=${val(2)} href=${href} debug=${debugInfo}`;
}
return {
status: val(1),
standesbuchNr: val(2),
dienstgrad: val(3),
vorname: val(4),
zuname: val(5),
geburtsdatum: val(6),
svnr: val(7),
eintrittsdatum: val(8),
abmeldedatum: val(9),
href,
};
}),
);
}
/**
* Scrape additional profile fields from the member detail form.
* Called while the frame is already on the member detail page.
*/
async function scrapeDetailProfileFields(frame: Frame): Promise<{
geburtsort: string | null;
geschlecht: string | null;
beruf: string | null;
wohnort: string | null;
plz: string | null;
}> {
return frame.evaluate(() => {
const val = (selector: string): string | null => {
const el = document.querySelector(selector) as HTMLInputElement | HTMLSelectElement | null;
if (!el) return null;
if (el.tagName === 'SELECT') {
const sel = el as HTMLSelectElement;
const opt = sel.options[sel.selectedIndex];
return opt ? (opt.text || opt.value || '').trim() || null : null;
}
return (el as HTMLInputElement).value?.trim() || null;
};
return {
geburtsort: val('input[name="geburtsort"]') ?? val('input[id*="geburtsort"]'),
geschlecht: val('select[name*="geschlecht"]') ?? val('select[id*="geschlecht"]'),
beruf: val('input[name="beruf"]') ?? val('input[id*="beruf"]'),
wohnort: val('input[name="ort"]') ?? val('input[id*="_ort"]') ?? val('input[name="wohnort"]'),
plz: val('input[name="plz"]') ?? val('input[id*="plz"]'),
};
});
}
/**
* Scrape Ausbildungen from the detail page (already loaded).
* Navigates to the Ausbildung sub-page if needed.
*/
async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
// Look for Ausbildungsliste section — it's likely a table or list
const ausbildungSection = frame.locator('text=Ausbildung, text=Ausbildungsliste').first();
const hasSec = await ausbildungSection.isVisible().catch(() => false);
if (!hasSec) {
// Try navigating to an Ausbildung tab/link if present
const ausbildungLink = frame.locator('a:has-text("Ausbildung")').first();
const hasLink = await ausbildungLink.isVisible().catch(() => false);
if (hasLink) {
await ausbildungLink.click();
await frame.waitForLoadState('networkidle').catch(() => {});
}
}
// Parse the qualification table
const tables = await frame.$$('table');
const ausbildungen: FdiskAusbildung[] = [];
for (const table of tables) {
const rows = await table.$$eval('tr', (rows) => {
return rows.map(row => ({
cells: Array.from(row.querySelectorAll('td, th')).map(c => (c as Element).textContent?.trim() ?? ''),
}));
});
if (rows.length < 2) continue;
const header = rows[0].cells.map(c => c.toLowerCase());
const isAusbildungTable =
header.some(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
if (!isAusbildungTable) continue;
const kursnameIdx = header.findIndex(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
const datumIdx = header.findIndex(h => h.includes('datum') || h.includes('abschluss'));
const ablaufIdx = header.findIndex(h => h.includes('ablauf') || h.includes('gültig'));
const ortIdx = header.findIndex(h => h.includes('ort'));
const bemIdx = header.findIndex(h => h.includes('bem') || h.includes('info'));
for (const row of rows.slice(1)) {
const kursname = cellText(row.cells[kursnameIdx >= 0 ? kursnameIdx : 0]);
if (!kursname) continue;
const kursDatum = parseDate(datumIdx >= 0 ? row.cells[datumIdx] : null);
const syncKey = `${member.standesbuchNr}::${kursname}::${kursDatum ?? ''}`;
ausbildungen.push({
standesbuchNr: member.standesbuchNr,
kursname,
kursDatum,
ablaufdatum: parseDate(ablaufIdx >= 0 ? row.cells[ablaufIdx] : null),
ort: ortIdx >= 0 ? cellText(row.cells[ortIdx]) : null,
bemerkung: bemIdx >= 0 ? cellText(row.cells[bemIdx]) : null,
syncKey,
});
}
break; // only process the first Ausbildung table found
}
return ausbildungen;
}
/**
* Navigate to the Beförderungen sub-page and scrape all promotions.
* URL is constructed from the mitgliedschaft ID extracted from PersonenForm URL.
*/
async function scrapeMemberBefoerderungen(
frame: Frame,
standesbuchNr: string,
idMitgliedschaft: string,
idInstanzen: string,
idFeuerwehren: string,
): Promise<FdiskBefoerderung[]> {
const url = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/befoerderungenList.aspx`
+ `?id_mitgliedschaften=${idMitgliedschaft}&id_instanzen=${idInstanzen}&id_feuerwehren=${idFeuerwehren}`;
await frame_goto(frame, url);
const results: FdiskBefoerderung[] = [];
try {
await frame.waitForSelector('table.FdcLayList', { timeout: 10000 });
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
trs.map((tr) => {
const cells = Array.from(tr.querySelectorAll('td'));
const cell = (i: number) => (cells[i]?.textContent ?? '').trim();
return { datum: cell(0), dienstgrad: cell(1) };
})
);
for (const row of rows) {
const dienstgrad = cellText(row.dienstgrad);
if (!dienstgrad) continue;
const datum = parseDate(row.datum);
const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`;
results.push({ standesbuchNr, datum, dienstgrad, syncKey });
}
log(` Beförderungen for StNr ${standesbuchNr}: ${results.length} rows`);
for (const b of results) {
log(` ${b.datum ?? '—'} ${b.dienstgrad}`);
}
} catch {
log(` WARN: could not parse Beförderungen table for StNr ${standesbuchNr} (url: ${url})`);
}
return results;
}
/**
* Navigate to the Untersuchungen sub-page and scrape all medical exams.
* Keeps all rows (one per art+datum); DB stores all, queries filter latest per category.
*/
async function scrapeMemberUntersuchungen(
frame: Frame,
standesbuchNr: string,
idMitgliedschaft: string,
idInstanzen: string,
idFeuerwehren: string,
): Promise<FdiskUntersuchung[]> {
const url = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/UntersuchungenList.aspx`
+ `?id_mitgliedschaften=${idMitgliedschaft}&id_instanzen=${idInstanzen}&id_feuerwehren=${idFeuerwehren}`;
await frame_goto(frame, url);
const results: FdiskUntersuchung[] = [];
try {
await frame.waitForSelector('table.FdcLayList', { timeout: 10000 });
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
trs.map((tr) => {
const cells = Array.from(tr.querySelectorAll('td'));
const cell = (i: number) => (cells[i]?.textContent ?? '').trim();
// Columns: 0=Datum, 1=Anmerkungen, 2=Untersuchungsart, 3=Tauglichkeitsstufe
return {
datum: cell(0),
anmerkungen: cell(1),
art: cell(2),
ergebnis: cell(3),
};
})
);
for (const row of rows) {
const art = cellText(row.art);
if (!art) continue;
const datum = parseDate(row.datum);
const syncKey = `${standesbuchNr}::${art}::${datum ?? ''}`;
results.push({
standesbuchNr,
datum,
anmerkungen: cellText(row.anmerkungen),
art,
ergebnis: cellText(row.ergebnis),
syncKey,
});
}
log(` Untersuchungen for StNr ${standesbuchNr}: ${results.length} rows`);
for (const u of results) {
log(` ${u.datum ?? '—'} [${u.art}] ${u.ergebnis ?? '—'} | ${u.anmerkungen ?? ''}`);
}
} catch {
log(` WARN: could not parse Untersuchungen table for StNr ${standesbuchNr} (url: ${url})`);
}
return results;
}
/**
* Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries.
* This is an inline-edit (ListEdit) page — values are in <input> fields.
*/
async function scrapeMemberFahrgenehmigungen(
frame: Frame,
standesbuchNr: string,
idMitgliedschaft: string,
idInstanzen: string,
idFeuerwehren: string,
): Promise<FdiskFahrgenehmigung[]> {
const url = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/Ges_fahrgenehmigungenListEdit.aspx`
+ `?id_mitgliedschaften=${idMitgliedschaft}&id_instanzen=${idInstanzen}&id_feuerwehren=${idFeuerwehren}`;
await frame_goto(frame, url);
const results: FdiskFahrgenehmigung[] = [];
try {
await frame.waitForSelector('table.FdcLayList', { timeout: 10000 });
// ListEdit pages: each data row has inline <input> fields instead of plain text.
// Columns: 0=Ausstellungsdatum, 1=Gültig bis, 2=Behörde, 3=Nummer, 4=Fahrgenehmigungsklasse
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
trs.map((tr) => {
const cells = Array.from(tr.querySelectorAll('td'));
const cellVal = (i: number): string => {
const cell = cells[i];
if (!cell) return '';
const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
if (input) return input.value?.trim() ?? '';
const select = cell.querySelector('select') as HTMLSelectElement | null;
if (select) {
const opt = select.options[select.selectedIndex];
return (opt?.text || opt?.value || '').trim();
}
return cell.textContent?.trim() ?? '';
};
return {
ausstellungsdatum: cellVal(0),
gueltigBis: cellVal(1),
behoerde: cellVal(2),
nummer: cellVal(3),
klasse: cellVal(4),
};
})
);
for (const row of rows) {
const klasse = cellText(row.klasse);
if (!klasse) continue;
const ausstellungsdatum = parseDate(row.ausstellungsdatum);
const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`;
results.push({
standesbuchNr,
ausstellungsdatum,
gueltigBis: parseDate(row.gueltigBis),
behoerde: cellText(row.behoerde),
nummer: cellText(row.nummer),
klasse,
syncKey,
});
}
log(` Fahrgenehmigungen for StNr ${standesbuchNr}: ${results.length} rows`);
for (const f of results) {
log(` ${f.ausstellungsdatum ?? '—'} [${f.klasse}] ${f.behoerde ?? ''} ${f.nummer ?? ''}`);
}
} catch {
log(` WARN: could not parse Fahrgenehmigungen table for StNr ${standesbuchNr} (url: ${url})`);
}
return results;
}
// Legacy export kept for compatibility — delegates to the new unified flow
export async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
if (!member.detailUrl) return [];
await frame_goto(frame, member.detailUrl);
return scrapeAusbildungenFromDetailPage(frame, member);
}