update
This commit is contained in:
@@ -1,5 +1,11 @@
|
||||
import { chromium, Page, Frame } from '@playwright/test';
|
||||
import { FdiskMember, FdiskAusbildung } from './types';
|
||||
import {
|
||||
FdiskMember,
|
||||
FdiskAusbildung,
|
||||
FdiskBefoerderung,
|
||||
FdiskUntersuchung,
|
||||
FdiskFahrgenehmigung,
|
||||
} from './types';
|
||||
|
||||
const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at';
|
||||
const ID_FEUERWEHREN = process.env.FDISK_ID_FEUERWEHREN ?? '164';
|
||||
@@ -36,6 +42,9 @@ function cellText(text: string | undefined | null): string | null {
|
||||
export async function scrapeAll(username: string, password: string): Promise<{
|
||||
members: FdiskMember[];
|
||||
ausbildungen: FdiskAusbildung[];
|
||||
befoerderungen: FdiskBefoerderung[];
|
||||
untersuchungen: FdiskUntersuchung[];
|
||||
fahrgenehmigungen: FdiskFahrgenehmigung[];
|
||||
}> {
|
||||
const browser = await chromium.launch({
|
||||
headless: true,
|
||||
@@ -59,24 +68,58 @@ export async function scrapeAll(username: string, password: string): Promise<{
|
||||
log(`Found ${members.length} members`);
|
||||
|
||||
const ausbildungen: FdiskAusbildung[] = [];
|
||||
const befoerderungen: FdiskBefoerderung[] = [];
|
||||
const untersuchungen: FdiskUntersuchung[] = [];
|
||||
const fahrgenehmigungen: FdiskFahrgenehmigung[] = [];
|
||||
|
||||
for (const member of members) {
|
||||
if (!member.detailUrl) continue;
|
||||
try {
|
||||
const quals = await scrapeMemberAusbildung(mainFrame, member);
|
||||
// Navigate to detail page and scrape all sub-sections
|
||||
await frame_goto(mainFrame, member.detailUrl);
|
||||
|
||||
// Scrape extra profile fields from the detail form
|
||||
const profileFields = await scrapeDetailProfileFields(mainFrame);
|
||||
member.geburtsort = profileFields.geburtsort;
|
||||
member.geschlecht = profileFields.geschlecht;
|
||||
member.beruf = profileFields.beruf;
|
||||
member.wohnort = profileFields.wohnort;
|
||||
member.plz = profileFields.plz;
|
||||
|
||||
// Ausbildungen
|
||||
const quals = await scrapeAusbildungenFromDetailPage(mainFrame, member);
|
||||
ausbildungen.push(...quals);
|
||||
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`);
|
||||
|
||||
// Beförderungen
|
||||
const befos = await scrapeMemberBefoerderungen(mainFrame, member.standesbuchNr);
|
||||
befoerderungen.push(...befos);
|
||||
|
||||
// Untersuchungen
|
||||
const unters = await scrapeMemberUntersuchungen(mainFrame, member.standesbuchNr);
|
||||
untersuchungen.push(...unters);
|
||||
|
||||
// Fahrgenehmigungen
|
||||
const fahrg = await scrapeMemberFahrgenehmigungen(mainFrame, member.standesbuchNr);
|
||||
fahrgenehmigungen.push(...fahrg);
|
||||
|
||||
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen, ${befos.length} Beförderungen, ${unters.length} Untersuchungen, ${fahrg.length} Fahrgenehmigungen`);
|
||||
await page.waitForTimeout(500);
|
||||
} catch (err) {
|
||||
log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`);
|
||||
log(` WARN: could not scrape detail for ${member.vorname} ${member.zuname}: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
return { members, ausbildungen };
|
||||
return { members, ausbildungen, befoerderungen, untersuchungen, fahrgenehmigungen };
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
/** Navigate a frame, waiting for networkidle. Wrapper to avoid repetition. */
|
||||
async function frame_goto(frame: Frame, url: string): Promise<void> {
|
||||
await frame.goto(url, { waitUntil: 'networkidle' });
|
||||
}
|
||||
|
||||
async function login(page: Page, username: string, password: string): Promise<void> {
|
||||
log(`Navigating to ${LOGIN_URL}`);
|
||||
await page.goto(LOGIN_URL, { waitUntil: 'domcontentloaded' });
|
||||
@@ -307,6 +350,11 @@ async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||||
abmeldedatum,
|
||||
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
|
||||
detailUrl: row.href,
|
||||
geburtsort: null,
|
||||
geschlecht: null,
|
||||
beruf: null,
|
||||
wohnort: null,
|
||||
plz: null,
|
||||
});
|
||||
}
|
||||
return members;
|
||||
@@ -343,11 +391,44 @@ async function parseRowsFromTable(frame: Frame) {
|
||||
);
|
||||
}
|
||||
|
||||
async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
||||
if (!member.detailUrl) return [];
|
||||
/**
|
||||
* Scrape additional profile fields from the member detail form.
|
||||
* Called while the frame is already on the member detail page.
|
||||
*/
|
||||
async function scrapeDetailProfileFields(frame: Frame): Promise<{
|
||||
geburtsort: string | null;
|
||||
geschlecht: string | null;
|
||||
beruf: string | null;
|
||||
wohnort: string | null;
|
||||
plz: string | null;
|
||||
}> {
|
||||
return frame.evaluate(() => {
|
||||
const val = (selector: string): string | null => {
|
||||
const el = document.querySelector(selector) as HTMLInputElement | HTMLSelectElement | null;
|
||||
if (!el) return null;
|
||||
if (el.tagName === 'SELECT') {
|
||||
const sel = el as HTMLSelectElement;
|
||||
const opt = sel.options[sel.selectedIndex];
|
||||
return opt ? (opt.text || opt.value || '').trim() || null : null;
|
||||
}
|
||||
return (el as HTMLInputElement).value?.trim() || null;
|
||||
};
|
||||
|
||||
await frame.goto(member.detailUrl, { waitUntil: 'networkidle' });
|
||||
return {
|
||||
geburtsort: val('input[name="geburtsort"]') ?? val('input[id*="geburtsort"]'),
|
||||
geschlecht: val('select[name*="geschlecht"]') ?? val('select[id*="geschlecht"]'),
|
||||
beruf: val('input[name="beruf"]') ?? val('input[id*="beruf"]'),
|
||||
wohnort: val('input[name="ort"]') ?? val('input[id*="_ort"]') ?? val('input[name="wohnort"]'),
|
||||
plz: val('input[name="plz"]') ?? val('input[id*="plz"]'),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape Ausbildungen from the detail page (already loaded).
|
||||
* Navigates to the Ausbildung sub-page if needed.
|
||||
*/
|
||||
async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
||||
// Look for Ausbildungsliste section — it's likely a table or list
|
||||
const ausbildungSection = frame.locator('text=Ausbildung, text=Ausbildungsliste').first();
|
||||
const hasSec = await ausbildungSection.isVisible().catch(() => false);
|
||||
@@ -363,7 +444,6 @@ async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promis
|
||||
}
|
||||
|
||||
// Parse the qualification table
|
||||
// Expected columns: Kursname, Datum, Ablaufdatum, Ort, Bemerkung (may vary)
|
||||
const tables = await frame.$$('table');
|
||||
const ausbildungen: FdiskAusbildung[] = [];
|
||||
|
||||
@@ -376,7 +456,6 @@ async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promis
|
||||
|
||||
if (rows.length < 2) continue;
|
||||
|
||||
// Detect if this looks like an Ausbildung table
|
||||
const header = rows[0].cells.map(c => c.toLowerCase());
|
||||
const isAusbildungTable =
|
||||
header.some(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
|
||||
@@ -412,3 +491,197 @@ async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promis
|
||||
|
||||
return ausbildungen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate to the Beförderungen sub-page and scrape all promotions.
|
||||
* Navigates back to the member detail page afterwards.
|
||||
*/
|
||||
async function scrapeMemberBefoerderungen(frame: Frame, standesbuchNr: string): Promise<FdiskBefoerderung[]> {
|
||||
// Find sidebar link to Beförderungen
|
||||
const link = frame.locator('a[href*="befoerderungenList.aspx"], a[href*="BefoerderungenList.aspx"]').first();
|
||||
const hasLink = await link.isVisible().catch(() => false);
|
||||
if (!hasLink) {
|
||||
log(` No Beförderungen link for StNr ${standesbuchNr}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
const href = await link.getAttribute('href');
|
||||
if (!href) return [];
|
||||
|
||||
const url = href.startsWith('http') ? href : new URL(href, frame.url()).toString();
|
||||
await frame_goto(frame, url);
|
||||
|
||||
const results: FdiskBefoerderung[] = [];
|
||||
|
||||
try {
|
||||
await frame.waitForSelector('table.FdcLayList', { timeout: 10000 });
|
||||
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||||
trs.map((tr) => {
|
||||
const cells = Array.from(tr.querySelectorAll('td'));
|
||||
const cell = (i: number) => (cells[i]?.textContent ?? '').trim();
|
||||
return { datum: cell(0), dienstgrad: cell(1) };
|
||||
})
|
||||
);
|
||||
|
||||
for (const row of rows) {
|
||||
const dienstgrad = cellText(row.dienstgrad);
|
||||
if (!dienstgrad) continue;
|
||||
const datum = parseDate(row.datum);
|
||||
const syncKey = `${standesbuchNr}::${dienstgrad}::${datum ?? ''}`;
|
||||
results.push({ standesbuchNr, datum, dienstgrad, syncKey });
|
||||
}
|
||||
log(` Beförderungen for StNr ${standesbuchNr}: ${results.length} rows`);
|
||||
for (const b of results) {
|
||||
log(` ${b.datum ?? '—'} ${b.dienstgrad}`);
|
||||
}
|
||||
} catch {
|
||||
log(` WARN: could not parse Beförderungen table for StNr ${standesbuchNr}`);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate to the Untersuchungen sub-page and scrape all medical exams.
|
||||
* Keeps all rows (one per art+datum); DB stores all, queries filter latest per category.
|
||||
*/
|
||||
async function scrapeMemberUntersuchungen(frame: Frame, standesbuchNr: string): Promise<FdiskUntersuchung[]> {
|
||||
const link = frame.locator('a[href*="UntersuchungenList.aspx"]').first();
|
||||
const hasLink = await link.isVisible().catch(() => false);
|
||||
if (!hasLink) {
|
||||
log(` No Untersuchungen link for StNr ${standesbuchNr}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
const href = await link.getAttribute('href');
|
||||
if (!href) return [];
|
||||
|
||||
const url = href.startsWith('http') ? href : new URL(href, frame.url()).toString();
|
||||
await frame_goto(frame, url);
|
||||
|
||||
const results: FdiskUntersuchung[] = [];
|
||||
|
||||
try {
|
||||
await frame.waitForSelector('table.FdcLayList', { timeout: 10000 });
|
||||
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||||
trs.map((tr) => {
|
||||
const cells = Array.from(tr.querySelectorAll('td'));
|
||||
const cell = (i: number) => (cells[i]?.textContent ?? '').trim();
|
||||
// Columns: 0=Datum, 1=Anmerkungen, 2=Untersuchungsart, 3=Tauglichkeitsstufe
|
||||
return {
|
||||
datum: cell(0),
|
||||
anmerkungen: cell(1),
|
||||
art: cell(2),
|
||||
ergebnis: cell(3),
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
for (const row of rows) {
|
||||
const art = cellText(row.art);
|
||||
if (!art) continue;
|
||||
const datum = parseDate(row.datum);
|
||||
const syncKey = `${standesbuchNr}::${art}::${datum ?? ''}`;
|
||||
results.push({
|
||||
standesbuchNr,
|
||||
datum,
|
||||
anmerkungen: cellText(row.anmerkungen),
|
||||
art,
|
||||
ergebnis: cellText(row.ergebnis),
|
||||
syncKey,
|
||||
});
|
||||
}
|
||||
log(` Untersuchungen for StNr ${standesbuchNr}: ${results.length} rows`);
|
||||
for (const u of results) {
|
||||
log(` ${u.datum ?? '—'} [${u.art}] ${u.ergebnis ?? '—'} | ${u.anmerkungen ?? ''}`);
|
||||
}
|
||||
} catch {
|
||||
log(` WARN: could not parse Untersuchungen table for StNr ${standesbuchNr}`);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate to the Gesetzliche Fahrgenehmigungen sub-page and scrape all entries.
|
||||
* This is an inline-edit (ListEdit) page — values are in <input> fields.
|
||||
*/
|
||||
async function scrapeMemberFahrgenehmigungen(frame: Frame, standesbuchNr: string): Promise<FdiskFahrgenehmigung[]> {
|
||||
const link = frame.locator('a[href*="Ges_fahrgenehmigungenListEdit.aspx"], a[href*="ges_fahrgenehmigungenListEdit.aspx"]').first();
|
||||
const hasLink = await link.isVisible().catch(() => false);
|
||||
if (!hasLink) {
|
||||
log(` No Fahrgenehmigungen link for StNr ${standesbuchNr}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
const href = await link.getAttribute('href');
|
||||
if (!href) return [];
|
||||
|
||||
const url = href.startsWith('http') ? href : new URL(href, frame.url()).toString();
|
||||
await frame_goto(frame, url);
|
||||
|
||||
const results: FdiskFahrgenehmigung[] = [];
|
||||
|
||||
try {
|
||||
await frame.waitForSelector('table.FdcLayList', { timeout: 10000 });
|
||||
|
||||
// ListEdit pages: each data row has inline <input> fields instead of plain text.
|
||||
// Columns: 0=Ausstellungsdatum, 1=Gültig bis, 2=Behörde, 3=Nummer, 4=Fahrgenehmigungsklasse
|
||||
const rows = await frame.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||||
trs.map((tr) => {
|
||||
const cells = Array.from(tr.querySelectorAll('td'));
|
||||
const cellVal = (i: number): string => {
|
||||
const cell = cells[i];
|
||||
if (!cell) return '';
|
||||
// Prefer input value, then select text, then textContent
|
||||
const input = cell.querySelector('input[type="text"], input:not([type])') as HTMLInputElement | null;
|
||||
if (input) return input.value?.trim() ?? '';
|
||||
const select = cell.querySelector('select') as HTMLSelectElement | null;
|
||||
if (select) {
|
||||
const opt = select.options[select.selectedIndex];
|
||||
return (opt?.text || opt?.value || '').trim();
|
||||
}
|
||||
return cell.textContent?.trim() ?? '';
|
||||
};
|
||||
return {
|
||||
ausstellungsdatum: cellVal(0),
|
||||
gueltigBis: cellVal(1),
|
||||
behoerde: cellVal(2),
|
||||
nummer: cellVal(3),
|
||||
klasse: cellVal(4),
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
for (const row of rows) {
|
||||
const klasse = cellText(row.klasse);
|
||||
if (!klasse) continue;
|
||||
const ausstellungsdatum = parseDate(row.ausstellungsdatum);
|
||||
const syncKey = `${standesbuchNr}::${klasse}::${ausstellungsdatum ?? ''}`;
|
||||
results.push({
|
||||
standesbuchNr,
|
||||
ausstellungsdatum,
|
||||
gueltigBis: parseDate(row.gueltigBis),
|
||||
behoerde: cellText(row.behoerde),
|
||||
nummer: cellText(row.nummer),
|
||||
klasse,
|
||||
syncKey,
|
||||
});
|
||||
}
|
||||
log(` Fahrgenehmigungen for StNr ${standesbuchNr}: ${results.length} rows`);
|
||||
for (const f of results) {
|
||||
log(` ${f.ausstellungsdatum ?? '—'} [${f.klasse}] ${f.behoerde ?? ''} ${f.nummer ?? ''}`);
|
||||
}
|
||||
} catch {
|
||||
log(` WARN: could not parse Fahrgenehmigungen table for StNr ${standesbuchNr}`);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// Legacy export kept for compatibility — delegates to the new unified flow
|
||||
export async function scrapeMemberAusbildung(frame: Frame, member: FdiskMember): Promise<FdiskAusbildung[]> {
|
||||
if (!member.detailUrl) return [];
|
||||
await frame_goto(frame, member.detailUrl);
|
||||
return scrapeAusbildungenFromDetailPage(frame, member);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user