801 lines
31 KiB
TypeScript
801 lines
31 KiB
TypeScript
import { chromium, Page, Frame } from '@playwright/test';
|
||
import {
|
||
FdiskMember,
|
||
FdiskAusbildung,
|
||
FdiskBefoerderung,
|
||
FdiskUntersuchung,
|
||
FdiskFahrgenehmigung,
|
||
} from './types';
|
||
|
||
const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at';
|
||
const ID_FEUERWEHREN = process.env.FDISK_ID_FEUERWEHREN ?? '164';
|
||
const ID_INSTANZEN = process.env.FDISK_ID_INSTANZEN ?? '2853';
|
||
|
||
const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`;
|
||
const MEMBERS_URL = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/meine_Mitglieder.aspx`;
|
||
|
||
function log(msg: string) {
|
||
console.log(`[scraper] ${new Date().toISOString()} ${msg}`);
|
||
}
|
||
|
||
/**
|
||
* Parse a date string from FDISK (DD.MM.YYYY) to ISO format (YYYY-MM-DD).
|
||
* Returns null if empty or unparseable.
|
||
*/
|
||
function parseDate(raw: string | null | undefined): string | null {
|
||
if (!raw) return null;
|
||
const trimmed = raw.trim();
|
||
if (!trimmed) return null;
|
||
const match = trimmed.match(/^(\d{2})\.(\d{2})\.(\d{4})$/);
|
||
if (!match) return null;
|
||
return `${match[3]}-${match[2]}-${match[1]}`;
|
||
}
|
||
|
||
/**
|
||
* Extract text content from a cell, trimmed, or null if empty.
|
||
*/
|
||
function cellText(text: string | undefined | null): string | null {
|
||
const t = (text ?? '').trim();
|
||
return t || null;
|
||
}
|
||
|
||
export async function scrapeAll(username: string, password: string): Promise<{
|
||
members: FdiskMember[];
|
||
ausbildungen: FdiskAusbildung[];
|
||
befoerderungen: FdiskBefoerderung[];
|
||
untersuchungen: FdiskUntersuchung[];
|
||
fahrgenehmigungen: FdiskFahrgenehmigung[];
|
||
}> {
|
||
const browser = await chromium.launch({
|
||
headless: true,
|
||
args: ['--disable-gpu', '--disable-software-rasterizer'],
|
||
});
|
||
const context = await browser.newContext({
|
||
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||
});
|
||
const page = await context.newPage();
|
||
|
||
try {
|
||
await login(page, username, password);
|
||
|
||
// After login, page is on Start.aspx (frameset).
|
||
// Direct navigation to MitgliedschaftenList.aspx causes a server BLError because
|
||
// the server reads the org context from session variables set by the menu.
|
||
// Navigate via the menu frame (left.aspx) to set session state properly.
|
||
const mainFrame = await navigateToMemberList(page);
|
||
|
||
const members = await scrapeMembers(mainFrame);
|
||
log(`Found ${members.length} members`);
|
||
|
||
const ausbildungen: FdiskAusbildung[] = [];
|
||
const befoerderungen: FdiskBefoerderung[] = [];
|
||
const untersuchungen: FdiskUntersuchung[] = [];
|
||
const fahrgenehmigungen: FdiskFahrgenehmigung[] = [];
|
||
|
||
for (const member of members) {
|
||
try {
|
||
// Navigate to member detail page — use direct URL if available, else search+click fallback
|
||
const onDetail = member.detailUrl
|
||
? (await frame_goto(mainFrame, member.detailUrl), true)
|
||
: await navigateToMemberDetailBySearch(mainFrame, member.standesbuchNr);
|
||
|
||
if (!onDetail) {
|
||
log(` SKIP ${member.vorname} ${member.zuname} (${member.standesbuchNr}): could not reach detail page`);
|
||
continue;
|
||
}
|
||
|
||
// Scrape extra profile fields from the detail form
|
||
const profileFields = await scrapeDetailProfileFields(mainFrame);
|
||
member.geburtsort = profileFields.geburtsort;
|
||
member.geschlecht = profileFields.geschlecht;
|
||
member.beruf = profileFields.beruf;
|
||
member.wohnort = profileFields.wohnort;
|
||
member.plz = profileFields.plz;
|
||
|
||
// Extract mitgliedschaft params from the current URL for constructing sub-section URLs.
|
||
// PersonenForm.aspx is in the personen module; sub-sections are in mitgliedschaften module.
|
||
// The links to Beförderungen/Untersuchungen/Fahrgenehmigungen live in the navigation
|
||
// frame (not the content mainFrame), so we construct the URLs directly.
|
||
const currentUrl = mainFrame.url();
|
||
const urlObj = new URL(currentUrl);
|
||
const idMitgliedschaft = urlObj.searchParams.get('id_mitgliedschaften');
|
||
const idInstanzen = urlObj.searchParams.get('id_instanzen') ?? ID_INSTANZEN;
|
||
const idFeuerwehren = urlObj.searchParams.get('id_feuerwehren') ?? ID_FEUERWEHREN;
|
||
|
||
// Ausbildungen
|
||
const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member);
|
||
ausbildungen.push(...quals);
|
||
|
||
// Beförderungen
|
||
const befos = idMitgliedschaft
|
||
? await scrapeMemberBefoerderungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idInstanzen, idFeuerwehren)
|
||
: [];
|
||
befoerderungen.push(...befos);
|
||
|
||
// Untersuchungen
|
||
const unters = idMitgliedschaft
|
||
? await scrapeMemberUntersuchungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idInstanzen, idFeuerwehren)
|
||
: [];
|
||
untersuchungen.push(...unters);
|
||
|
||
// Fahrgenehmigungen
|
||
const fahrg = idMitgliedschaft
|
||
? await scrapeMemberFahrgenehmigungen(mainFrame, member.standesbuchNr, idMitgliedschaft, idInstanzen, idFeuerwehren)
|
||
: [];
|
||
fahrgenehmigungen.push(...fahrg);
|
||
|
||
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen, ${befos.length} Beförderungen, ${unters.length} Untersuchungen, ${fahrg.length} Fahrgenehmigungen`);
|
||
await page.waitForTimeout(500);
|
||
} catch (err) {
|
||
log(` WARN: could not scrape detail for ${member.vorname} ${member.zuname}: ${err}`);
|
||
}
|
||
}
|
||
|
||
return { members, ausbildungen, befoerderungen, untersuchungen, fahrgenehmigungen };
|
||
} finally {
|
||
await browser.close();
|
||
}
|
||
}
|
||
|
||
/** Navigate a frame, waiting for networkidle. Wrapper to avoid repetition. */
|
||
async function frame_goto(frame: Frame, url: string): Promise<void> {
|
||
await frame.goto(url, { waitUntil: 'networkidle' });
|
||
}
|
||
|
||
async function login(page: Page, username: string, password: string): Promise<void> {
|
||
log(`Navigating to ${LOGIN_URL}`);
|
||
await page.goto(LOGIN_URL, { waitUntil: 'domcontentloaded' });
|
||
await page.waitForLoadState('networkidle');
|
||
|
||
// Check if already logged in
|
||
const currentUrlBefore = page.url();
|
||
if (!currentUrlBefore.toLowerCase().includes('login')) {
|
||
log(`Already logged in, on: ${currentUrlBefore}`);
|
||
return;
|
||
}
|
||
|
||
// Exact selectors from the known login form HTML
|
||
const usernameField = page.locator('#login');
|
||
const passwordField = page.locator('#password');
|
||
const submitButton = page.locator('#Submit2');
|
||
|
||
await usernameField.waitFor({ state: 'visible', timeout: 10000 });
|
||
await usernameField.fill(username);
|
||
await passwordField.fill(password);
|
||
await submitButton.click();
|
||
|
||
// Wait for navigation away from the login page (up to 15s)
|
||
try {
|
||
await page.waitForURL(
|
||
(url) => !url.toString().toLowerCase().includes('login'),
|
||
{ waitUntil: 'networkidle', timeout: 15000 },
|
||
);
|
||
} catch {
|
||
// waitForURL timed out — fall through to the URL check below
|
||
}
|
||
|
||
// Verify we're logged in
|
||
const currentUrl = page.url();
|
||
if (currentUrl.toLowerCase().includes('login')) {
|
||
throw new Error(`Login failed — still on login page: ${currentUrl}`);
|
||
}
|
||
log(`Logged in successfully, redirected to: ${currentUrl}`);
|
||
}
|
||
|
||
/**
|
||
* Fallback navigation to a member's detail page when no direct URL is available.
|
||
* Navigates to the member list, filters by exact standesbuchNr, then clicks the result row.
|
||
* Returns true if we successfully landed on a detail page.
|
||
*/
|
||
async function navigateToMemberDetailBySearch(frame: Frame, standesbuchNr: string): Promise<boolean> {
|
||
// Navigate to the member list
|
||
await frame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' });
|
||
await frame.waitForLoadState('networkidle');
|
||
|
||
// Set exact standesbuchNr filter in the search form
|
||
const formOk = await frame.evaluate((stNr) => {
|
||
const form = (document as any).forms['frmsearch'];
|
||
if (!form) return false;
|
||
const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement | null;
|
||
const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement | null;
|
||
if (!fromFld || !toFld) return false;
|
||
fromFld.value = stNr;
|
||
toFld.value = stNr;
|
||
return true;
|
||
}, standesbuchNr);
|
||
|
||
if (!formOk) {
|
||
log(` WARN navigateToMemberDetailBySearch: search form not usable for StNr ${standesbuchNr}`);
|
||
return false;
|
||
}
|
||
|
||
await Promise.all([
|
||
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }),
|
||
frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }),
|
||
]);
|
||
|
||
// Click on the first data row — FDISK rows navigate to the detail page on click
|
||
const firstRowLink = await frame.$('table.FdcLayList tbody tr:first-child a, table.FdcLayList tbody tr:first-child td');
|
||
if (!firstRowLink) {
|
||
log(` WARN navigateToMemberDetailBySearch: no result row for StNr ${standesbuchNr}`);
|
||
return false;
|
||
}
|
||
|
||
try {
|
||
await Promise.all([
|
||
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 15000 }),
|
||
firstRowLink.click(),
|
||
]);
|
||
} catch {
|
||
// waitForNavigation may time out if click didn't navigate (e.g. onclick vs href)
|
||
// Check whether the URL changed at all
|
||
}
|
||
|
||
const url = frame.url();
|
||
const onDetailPage = !url.includes('MitgliedschaftenList') && !url.includes('meine_Mitglieder');
|
||
if (onDetailPage) {
|
||
log(` Navigated to detail via search+click: ${url}`);
|
||
} else {
|
||
log(` WARN navigateToMemberDetailBySearch: still on list page after click for StNr ${standesbuchNr}`);
|
||
}
|
||
return onDetailPage;
|
||
}
|
||
|
||
async function navigateToMemberList(page: Page): Promise<Frame> {
|
||
const mainFrame = page.frame({ name: 'mainFrame' });
|
||
if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset');
|
||
|
||
log(`Navigating mainFrame to: ${MEMBERS_URL}`);
|
||
await mainFrame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' });
|
||
await mainFrame.waitForLoadState('networkidle');
|
||
|
||
const url = mainFrame.url();
|
||
const title = await mainFrame.title();
|
||
log(`mainFrame loaded: ${url} — title: "${title}"`);
|
||
|
||
if (url.includes('BLError') || url.includes('support.aspx') || url.includes('Error')) {
|
||
throw new Error(`Member list returned error page: ${url}`);
|
||
}
|
||
|
||
return mainFrame;
|
||
}
|
||
|
||
async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||
log(`Scraping member list from: ${frame.url()}`);
|
||
|
||
// Clear the Standesbuchnummer filter if the search form is present.
|
||
// FDISK pre-fills the logged-in user's own Standesbuchnummer, which limits results to 1 member.
|
||
// We clear it before submitting so all members of the fire station are returned.
|
||
const hasForm = await frame.$('form[name="frmsearch"]') !== null;
|
||
if (hasForm) {
|
||
const fieldDump = await frame.evaluate(() => {
|
||
const form = (document as any).forms['frmsearch'];
|
||
if (!form) return { cleared: [], pageSizeSet: null as string | null, allFields: [] };
|
||
const cleared: string[] = [];
|
||
const allFields: string[] = [];
|
||
let pageSizeSet: string | null = null;
|
||
for (const el of Array.from(form.elements) as HTMLInputElement[]) {
|
||
if (el.type === 'hidden') continue;
|
||
const name = (el.name ?? '').toLowerCase();
|
||
const id = (el.id ?? '').toLowerCase();
|
||
if (el.value) allFields.push(`${el.name || el.id}=${el.value}`);
|
||
if (name.includes('standesbuch') || id.includes('standesbuch')) {
|
||
el.value = '';
|
||
cleared.push(el.name || el.id);
|
||
}
|
||
// Maximize page size: look for a select AND its paired hidden input
|
||
// FDISK uses a custom Dd widget where <select name="xDd_dd"> is the visible dropdown
|
||
// but the actual POST value comes from <input type="hidden" name="xDd_id"> or similar.
|
||
if ((name.includes('anzahl') || id.includes('anzahl') ||
|
||
name.includes('pagesize') || id.includes('pagesize') ||
|
||
name.includes('rows') || id.includes('rows')) &&
|
||
el.tagName === 'SELECT') {
|
||
const select = el as unknown as HTMLSelectElement;
|
||
// Pick the largest numeric option value, or the last option as fallback
|
||
let bestOption: HTMLOptionElement | null = null;
|
||
let bestVal = -1;
|
||
for (const opt of Array.from(select.options)) {
|
||
const n = parseInt(opt.value, 10);
|
||
if (!isNaN(n) && n > bestVal) { bestVal = n; bestOption = opt; }
|
||
}
|
||
if (!bestOption && select.options.length > 0) {
|
||
bestOption = select.options[select.options.length - 1];
|
||
}
|
||
if (bestOption) {
|
||
select.value = bestOption.value;
|
||
pageSizeSet = `${el.name || el.id}=${bestOption.value}`;
|
||
// Also update the paired hidden field used by the Dd custom widget.
|
||
// Common patterns: xDd_dd → xDd_id or xDd_hd
|
||
const baseName = (el.name || el.id).replace(/_dd$/i, '');
|
||
for (const suffix of ['_id', '_hd', '_val']) {
|
||
const hidden = form.elements[baseName + suffix] as HTMLInputElement | undefined;
|
||
if (hidden && hidden.type === 'hidden') {
|
||
hidden.value = bestOption.value;
|
||
pageSizeSet += ` (also set ${baseName + suffix})`;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return { cleared, pageSizeSet, allFields };
|
||
});
|
||
if (fieldDump.allFields.length > 0) {
|
||
log(`Search form active filters before clear: ${fieldDump.allFields.join(', ')}`);
|
||
}
|
||
if (fieldDump.cleared.length > 0) {
|
||
log(`Cleared Standesbuchnummer filter fields: ${fieldDump.cleared.join(', ')}`);
|
||
} else {
|
||
log('Search form found — no Standesbuchnummer field detected, submitting as-is');
|
||
}
|
||
if (fieldDump.pageSizeSet) {
|
||
log(`Set page size: ${fieldDump.pageSizeSet}`);
|
||
} else {
|
||
log('No page size field found — will paginate through all results');
|
||
}
|
||
// Use Promise.all to start waiting for navigation BEFORE triggering the submit,
|
||
// otherwise waitForLoadState resolves against the already-idle current page.
|
||
await Promise.all([
|
||
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }),
|
||
frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }),
|
||
]);
|
||
log(`After form submit: ${frame.url()}`);
|
||
}
|
||
|
||
// --- Phase 1: initial fetch (no StNr filter) to get the first batch and total count ---
|
||
type ParsedRow = Awaited<ReturnType<typeof parseRowsFromTable>>[number];
|
||
|
||
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
|
||
const firstRows = await parseRowsFromTable(frame);
|
||
log(`Initial fetch: ${firstRows.length} rows`);
|
||
|
||
// Log href debug info for the first row to diagnose URL extraction
|
||
const rowDebug = await frame.evaluate(() => (window as any).__fdiskFirstRowDebug ?? 'no debug info');
|
||
log(`Row href debug: ${rowDebug}`);
|
||
|
||
for (const row of firstRows) {
|
||
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`);
|
||
}
|
||
|
||
const pagination = await frame.evaluate(() =>
|
||
document.querySelector('table.FdcLayListNav')?.textContent?.trim() ?? ''
|
||
);
|
||
log(`Pagination: "${pagination}"`);
|
||
|
||
const pagMatch = pagination.match(/(\d+)-(\d+)\s+von\s+(\d+)/i);
|
||
const totalExpected = pagMatch ? parseInt(pagMatch[3], 10) : null;
|
||
const shownSoFar = pagMatch ? parseInt(pagMatch[2], 10) : null;
|
||
|
||
const seenStNrs = new Set<string>(firstRows.map(r => r.standesbuchNr).filter(Boolean));
|
||
const allRows: ParsedRow[] = [...firstRows];
|
||
|
||
// --- Phase 2: if more members exist and pagination is disabled, use StNr range queries ---
|
||
if (totalExpected && shownSoFar && shownSoFar < totalExpected) {
|
||
log(`Pagination disabled (FDISK limitation). Switching to StNr range queries to fetch remaining ${totalExpected - seenStNrs.size} members.`);
|
||
|
||
const BATCH = 15; // fetch 15 StNr slots at a time — safely under the 20-row page limit
|
||
const MAX_STNR = 9999; // upper bound; we stop earlier if we have all members
|
||
let startNr = 1;
|
||
let consecutiveEmpty = 0;
|
||
|
||
while (seenStNrs.size < totalExpected && startNr <= MAX_STNR && consecutiveEmpty < 5) {
|
||
const endNr = startNr + BATCH - 1;
|
||
|
||
// Set StNr range in the search form and submit
|
||
const formOk = await frame.evaluate(({ s, e }: { s: number; e: number }) => {
|
||
const form = (document as any).forms['frmsearch'];
|
||
if (!form) return false;
|
||
const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement;
|
||
const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement;
|
||
if (!fromFld || !toFld) return false;
|
||
fromFld.value = String(s);
|
||
toFld.value = String(e);
|
||
return true;
|
||
}, { s: startNr, e: endNr });
|
||
|
||
if (!formOk) {
|
||
log('WARN: could not set StNr range fields — aborting range queries');
|
||
break;
|
||
}
|
||
|
||
await Promise.all([
|
||
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }),
|
||
frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }),
|
||
]);
|
||
|
||
const rangeRows = await parseRowsFromTable(frame);
|
||
const newRows = rangeRows.filter(r => r.standesbuchNr && !seenStNrs.has(r.standesbuchNr));
|
||
newRows.forEach(r => { if (r.standesbuchNr) seenStNrs.add(r.standesbuchNr); });
|
||
allRows.push(...newRows);
|
||
|
||
log(`StNr ${startNr}–${endNr}: ${newRows.length} new members (collected ${seenStNrs.size}/${totalExpected})`);
|
||
for (const row of newRows) {
|
||
log(` Row: StNr="${row.standesbuchNr}" Vorname="${row.vorname}" Zuname="${row.zuname}" Status="${row.status}" Dienstgrad="${row.dienstgrad}"`);
|
||
}
|
||
|
||
consecutiveEmpty = newRows.length === 0 ? consecutiveEmpty + 1 : 0;
|
||
startNr = endNr + 1;
|
||
}
|
||
|
||
log(`Range queries complete: ${seenStNrs.size} unique members collected (expected ${totalExpected})`);
|
||
}
|
||
|
||
log(`Parsed ${allRows.length} raw rows total`);
|
||
|
||
const members: FdiskMember[] = [];
|
||
for (const row of allRows) {
|
||
if (!row.standesbuchNr || !row.vorname || !row.zuname) continue;
|
||
const abmeldedatum = parseDate(row.abmeldedatum);
|
||
members.push({
|
||
standesbuchNr: row.standesbuchNr,
|
||
dienstgrad: row.dienstgrad,
|
||
vorname: row.vorname,
|
||
zuname: row.zuname,
|
||
geburtsdatum: parseDate(row.geburtsdatum),
|
||
svnr: row.svnr || null,
|
||
eintrittsdatum: parseDate(row.eintrittsdatum),
|
||
abmeldedatum,
|
||
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
|
||
detailUrl: row.href,
|
||
geburtsort: null,
|
||
geschlecht: null,
|
||
beruf: null,
|
||
wohnort: null,
|
||
plz: null,
|
||
});
|
||
}
|
||
return members;
|
||
}
|
||
|
||
async function parseRowsFromTable(frame: Frame) {
|
||
// Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad,
|
||
// 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon
|
||
// Each <td> contains an <a title="value"> — the title is the clean cell text.
|
||
// Navigation may be via href or onclick handlers (FDISK uses both depending on version).
|
||
return frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||
trs.map((tr, rowIdx) => {
|
||
const cells = Array.from(tr.querySelectorAll('td'));
|
||
const val = (i: number) => {
|
||
const a = cells[i]?.querySelector('a');
|
||
const title = a?.getAttribute('title')?.trim();
|
||
// Use title only if non-empty; otherwise fall back to textContent
|
||
return (title || cells[i]?.textContent || '').trim();
|
||
};
|
||
|
||
// Extract detail URL — try multiple strategies:
|
||
// 1. Standard <a href="..."> pointing to an .aspx page
|
||
// 2. onclick attribute on <a>, <td>, or <tr> containing an .aspx URL
|
||
let href: string | null = null;
|
||
let debugInfo = '';
|
||
|
||
for (const a of Array.from(tr.querySelectorAll('a'))) {
|
||
const rawHref = (a as Element).getAttribute('href') ?? '';
|
||
debugInfo += `a.href="${rawHref}" `;
|
||
if (rawHref && rawHref !== '#' && rawHref !== '' && !rawHref.startsWith('javascript:')) {
|
||
href = (a as HTMLAnchorElement).href; // resolves relative → absolute
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (!href) {
|
||
// Scan onclick on all ancestors + cells + anchors for .aspx URLs
|
||
const candidates: Element[] = [tr, ...Array.from(tr.querySelectorAll('a, td'))];
|
||
for (const el of candidates) {
|
||
const onclick = el.getAttribute('onclick') ?? '';
|
||
if (onclick) debugInfo += `onclick="${onclick}" `;
|
||
const match = onclick.match(/['"]([^'"]*\.aspx[^'"]*)['"]/);
|
||
if (match) {
|
||
try {
|
||
href = new URL(match[1], (window as Window).location.href).href;
|
||
} catch {
|
||
href = match[1];
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Log debug info for first data row to help diagnose href extraction issues
|
||
if (rowIdx === 0 && val(2)) {
|
||
(window as any).__fdiskFirstRowDebug = `StNr=${val(2)} href=${href} debug=${debugInfo}`;
|
||
}
|
||
|
||
return {
|
||
status: val(1),
|
||
standesbuchNr: val(2),
|
||
dienstgrad: val(3),
|
||
vorname: val(4),
|
||
zuname: val(5),
|
||
geburtsdatum: val(6),
|
||
svnr: val(7),
|
||
eintrittsdatum: val(8),
|
||
abmeldedatum: val(9),
|
||
href,
|
||
};
|
||
}),
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Scrape additional profile fields from the member detail form.
|
||
* Called while the frame is already on the member detail page.
|
||
*/
|
||
async function scrapeDetailProfileFields(frame: Frame): Promise<{
|
||
geburtsort: string | null;
|
||
geschlecht: string | null;
|
||
beruf: string | null;
|
||
wohnort: string | null;
|
||
plz: string | null;
|
||
}> {
|
||
return frame.evaluate(() => {
|
||
const val = (selector: string): string | null => {
|
||
const el = document.querySelector(selector) as HTMLInputElement | HTMLSelectElement | null;
|
||
if (!el) return null;
|
||
if (el.tagName === 'SELECT') {
|
||
const sel = el as HTMLSelectElement;
|
||
const opt = sel.options[sel.selectedIndex];
|
||
return opt ? (opt.text || opt.value || '').trim() || null : null;
|
||
}
|
||
return (el as HTMLInputElement).value?.trim() || null;
|
||
};
|
||
|
||
return {
|
||
geburtsort: val('input[name="geburtsort"]') ?? val('input[id*="geburtsort"]'),
|
||
geschlecht: val('select[name*="geschlecht"]') ?? val('select[id*="geschlecht"]'),
|
||
beruf: val('input[name="beruf"]') ?? val('input[id*="beruf"]'),
|
||
wohnort: val('input[name="ort"]') ?? val('input[id*="_ort"]') ?? val('input[name="wohnort"]'),
|
||
plz: val('input[name="plz"]') ?? val('input[id*="plz"]'),
|
||
};
|
||
});
|
||
}
|
||
|
||
/**
|
||
* Scrape Ausbildungen from the detail page (already loaded).
|
||
* Navigates to the Ausbildung sub-page if needed.
|
||
*/
|
||
async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
||
// Look for Ausbildungsliste section — it's likely a table or list
|
||
const ausbildungSection = frame.locator('text=Ausbildung, text=Ausbildungsliste').first();
|
||
const hasSec = await ausbildungSection.isVisible().catch(() => false);
|
||
|
||
if (!hasSec) {
|
||
// Try navigating to an Ausbildung tab/link if present
|
||
const ausbildungLink = frame.locator('a:has-text("Ausbildung")').first();
|
||
const hasLink = await ausbildungLink.isVisible().catch(() => false);
|
||
if (hasLink) {
|
||
await ausbildungLink.click();
|
||
await frame.waitForLoadState('networkidle').catch(() => {});
|
||
}
|
||
}
|
||
|
||
// Parse the qualification table
|
||
const tables = await frame.$$('table');
|
||
const ausbildungen: FdiskAusbildung[] = [];
|
||
|
||
for (const table of tables) {
|
||
const rows = await table.$$eval('tr', (rows) => {
|
||
return rows.map(row => ({
|
||
cells: Array.from(row.querySelectorAll('td, th')).map(c => (c as Element).textContent?.trim() ?? ''),
|
||
}));
|
||
});
|
||
|
||
if (rows.length < 2) continue;
|
||
|
||
const header = rows[0].cells.map(c => c.toLowerCase());
|
||
const isAusbildungTable =
|
||
header.some(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
|
||
|
||
if (!isAusbildungTable) continue;
|
||
|
||
const kursnameIdx = header.findIndex(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
|
||
const datumIdx = header.findIndex(h => h.includes('datum') || h.includes('abschluss'));
|
||
const ablaufIdx = header.findIndex(h => h.includes('ablauf') || h.includes('gültig'));
|
||
const ortIdx = header.findIndex(h => h.includes('ort'));
|
||
const bemIdx = header.findIndex(h => h.includes('bem') || h.includes('info'));
|
||
|
||
for (const row of rows.slice(1)) {
|
||
const kursname = cellText(row.cells[kursnameIdx >= 0 ? kursnameIdx : 0]);
|
||
if (!kursname) continue;
|
||
|
||
const kursDatum = parseDate(datumIdx >= 0 ? row.cells[datumIdx] : null);
|
||
const syncKey = `${member.standesbuchNr}::${kursname}::${kursDatum ?? ''}`;
|
||
|
||
ausbildungen.push({
|
||
standesbuchNr: member.standesbuchNr,
|
||
kursname,
|
||
kursDatum,
|
||
ablaufdatum: parseDate(ablaufIdx >= 0 ? row.cells[ablaufIdx] : null),
|
||
ort: ortIdx >= 0 ? cellText(row.cells[ortIdx]) : null,
|
||
bemerkung: bemIdx >= 0 ? cellText(row.cells[bemIdx]) : null,
|
||
syncKey,
|
||
});
|
||
}
|
||
|
||
break; // only process the first Ausbildung table found
|
||
}
|
||
|
||
return ausbildungen;
|
||
}
|
||
|
||
/**
|
||
* Navigate to the Beförderungen sub-page and scrape all promotions.
|
||
* URL is constructed from the mitgliedschaft ID extracted from PersonenForm URL.
|
||
*/
|
||
async function scrapeMemberBefoerderungen(
|
||
frame: Frame,
|
||
standesbuchNr: string,
|
||
idMitgliedschaft: string,
|
||
idInstanzen: string,
|
||
idFeuerwehren: string,
|
||
): Promise<FdiskBefoerderung[]> {
|
||
const url = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/befoerderungenList.aspx`
|
||
+ `?id_mitgliedschaften=${idMitgliedschaft}&id_instanzen=${idInstanzen}&id_feuerwehren=${idFeuerwehren}`;
|
||
await frame_goto(frame, url);
|
||
|
||
const results: FdiskBefoerderung[] = [];
|
||
|
||
try {
|
||
await frame.waitForSelector('table.FdcLayList', { timeout: 10000 });
|
||
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||
trs.map((tr) => {
|
||
const cells = Array.from(tr.querySelectorAll('td'));
|
||
const cell = (i: number) => (cells[i]?.textContent ?? '').trim();
|
||
return { datum: cell(0), dienstgrad: cell(1) };
|
||
})
|
||
);
|
||
|
||
for (const row of rows) {
|
||
const dienstgrad = cellText(row.dienstgrad);
|
||
if (!dienstgrad) continue;
|
||
const datum = parseDate(row.datum);
|
||
const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`;
|
||
results.push({ standesbuchNr, datum, dienstgrad, syncKey });
|
||
}
|
||
log(` Beförderungen for StNr ${standesbuchNr}: ${results.length} rows`);
|
||
for (const b of results) {
|
||
log(` ${b.datum ?? '—'} ${b.dienstgrad}`);
|
||
}
|
||
} catch {
|
||
log(` WARN: could not parse Beförderungen table for StNr ${standesbuchNr} (url: ${url})`);
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
/**
|
||
* Navigate to the Untersuchungen sub-page and scrape all medical exams.
|
||
* Keeps all rows (one per art+datum); DB stores all, queries filter latest per category.
|
||
*/
|
||
async function scrapeMemberUntersuchungen(
|
||
frame: Frame,
|
||
standesbuchNr: string,
|
||
idMitgliedschaft: string,
|
||
idInstanzen: string,
|
||
idFeuerwehren: string,
|
||
): Promise<FdiskUntersuchung[]> {
|
||
const url = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/UntersuchungenList.aspx`
|
||
+ `?id_mitgliedschaften=${idMitgliedschaft}&id_instanzen=${idInstanzen}&id_feuerwehren=${idFeuerwehren}`;
|
||
await frame_goto(frame, url);
|
||
|
||
const results: FdiskUntersuchung[] = [];
|
||
|
||
try {
|
||
await frame.waitForSelector('table.FdcLayList', { timeout: 10000 });
|
||
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||
trs.map((tr) => {
|
||
const cells = Array.from(tr.querySelectorAll('td'));
|
||
const cell = (i: number) => (cells[i]?.textContent ?? '').trim();
|
||
// Columns: 0=Datum, 1=Anmerkungen, 2=Untersuchungsart, 3=Tauglichkeitsstufe
|
||
return {
|
||
datum: cell(0),
|
||
anmerkungen: cell(1),
|
||
art: cell(2),
|
||
ergebnis: cell(3),
|
||
};
|
||
})
|
||
);
|
||
|
||
for (const row of rows) {
|
||
const art = cellText(row.art);
|
||
if (!art) continue;
|
||
const datum = parseDate(row.datum);
|
||
const syncKey = `${standesbuchNr}::${art}::${datum ?? ''}`;
|
||
results.push({
|
||
standesbuchNr,
|
||
datum,
|
||
anmerkungen: cellText(row.anmerkungen),
|
||
art,
|
||
ergebnis: cellText(row.ergebnis),
|
||
syncKey,
|
||
});
|
||
}
|
||
log(` Untersuchungen for StNr ${standesbuchNr}: ${results.length} rows`);
|
||
for (const u of results) {
|
||
log(` ${u.datum ?? '—'} [${u.art}] ${u.ergebnis ?? '—'} | ${u.anmerkungen ?? ''}`);
|
||
}
|
||
} catch {
|
||
log(` WARN: could not parse Untersuchungen table for StNr ${standesbuchNr} (url: ${url})`);
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
/**
|
||
* Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries.
|
||
* This is an inline-edit (ListEdit) page — values are in <input> fields.
|
||
*/
|
||
async function scrapeMemberFahrgenehmigungen(
|
||
frame: Frame,
|
||
standesbuchNr: string,
|
||
idMitgliedschaft: string,
|
||
idInstanzen: string,
|
||
idFeuerwehren: string,
|
||
): Promise<FdiskFahrgenehmigung[]> {
|
||
const url = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/Ges_fahrgenehmigungenListEdit.aspx`
|
||
+ `?id_mitgliedschaften=${idMitgliedschaft}&id_instanzen=${idInstanzen}&id_feuerwehren=${idFeuerwehren}`;
|
||
await frame_goto(frame, url);
|
||
|
||
const results: FdiskFahrgenehmigung[] = [];
|
||
|
||
try {
|
||
await frame.waitForSelector('table.FdcLayList', { timeout: 10000 });
|
||
|
||
// ListEdit pages: each data row has inline <input> fields instead of plain text.
|
||
// Columns: 0=Ausstellungsdatum, 1=Gültig bis, 2=Behörde, 3=Nummer, 4=Fahrgenehmigungsklasse
|
||
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||
trs.map((tr) => {
|
||
const cells = Array.from(tr.querySelectorAll('td'));
|
||
const cellVal = (i: number): string => {
|
||
const cell = cells[i];
|
||
if (!cell) return '';
|
||
const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||
if (input) return input.value?.trim() ?? '';
|
||
const select = cell.querySelector('select') as HTMLSelectElement | null;
|
||
if (select) {
|
||
const opt = select.options[select.selectedIndex];
|
||
return (opt?.text || opt?.value || '').trim();
|
||
}
|
||
return cell.textContent?.trim() ?? '';
|
||
};
|
||
return {
|
||
ausstellungsdatum: cellVal(0),
|
||
gueltigBis: cellVal(1),
|
||
behoerde: cellVal(2),
|
||
nummer: cellVal(3),
|
||
klasse: cellVal(4),
|
||
};
|
||
})
|
||
);
|
||
|
||
for (const row of rows) {
|
||
const klasse = cellText(row.klasse);
|
||
if (!klasse) continue;
|
||
const ausstellungsdatum = parseDate(row.ausstellungsdatum);
|
||
const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`;
|
||
results.push({
|
||
standesbuchNr,
|
||
ausstellungsdatum,
|
||
gueltigBis: parseDate(row.gueltigBis),
|
||
behoerde: cellText(row.behoerde),
|
||
nummer: cellText(row.nummer),
|
||
klasse,
|
||
syncKey,
|
||
});
|
||
}
|
||
log(` Fahrgenehmigungen for StNr ${standesbuchNr}: ${results.length} rows`);
|
||
for (const f of results) {
|
||
log(` ${f.ausstellungsdatum ?? '—'} [${f.klasse}] ${f.behoerde ?? ''} ${f.nummer ?? ''}`);
|
||
}
|
||
} catch {
|
||
log(` WARN: could not parse Fahrgenehmigungen table for StNr ${standesbuchNr} (url: ${url})`);
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
// Legacy export kept for compatibility — delegates to the new unified flow
|
||
export async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
||
if (!member.detailUrl) return [];
|
||
await frame_goto(frame, member.detailUrl);
|
||
return scrapeAusbildungenFromDetailPage(frame, member);
|
||
}
|