This commit is contained in:
Matthias Hochmeister
2026-03-16 15:17:28 +01:00
parent d780a284d3
commit 023bd7acbb
2 changed files with 37 additions and 1 deletions

View File

@@ -1,4 +1,6 @@
import { chromium, Page, Frame } from '@playwright/test';
import * as fs from 'fs';
import * as path from 'path';
import {
FdiskMember,
FdiskAusbildung,
@@ -10,10 +12,27 @@ import {
const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at';
const ID_FEUERWEHREN = process.env.FDISK_ID_FEUERWEHREN ?? '164';
const ID_INSTANZEN = process.env.FDISK_ID_INSTANZEN ?? '2853';
const DEBUG_HTML = process.env.FDISK_DEBUG_HTML === '1' || process.env.FDISK_DEBUG_HTML === 'true';
const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`;
const MEMBERS_URL = `${BASE_URL}/fdisk/module/mgvw/mitgliedschaften/meine_Mitglieder.aspx`;
/** Save frame HTML to debug/ folder when FDISK_DEBUG_HTML=1 */
async function dumpHtml(frame: Frame, label: string): Promise<void> {
if (!DEBUG_HTML) return;
try {
const debugDir = path.resolve(process.cwd(), 'debug');
fs.mkdirSync(debugDir, { recursive: true });
const html = await frame.content();
const safeName = label.replace(/[^a-zA-Z0-9_-]/g, '_');
const filePath = path.join(debugDir, `${safeName}.html`);
fs.writeFileSync(filePath, html, 'utf-8');
log(` [debug] saved HTML → ${filePath} (${(html.length / 1024).toFixed(1)} KB)`);
} catch (err: any) {
log(` [debug] failed to save HTML for "${label}": ${err.message}`);
}
}
function log(msg: string) {
console.log(`[scraper] ${new Date().toISOString()} ${msg}`);
}
@@ -183,6 +202,7 @@ export async function scrapeAll(username: string, password: string, knownStNrs:
const members = await scrapeKnownMembers(mainFrame, knownStNrs, knownNames);
log(`Found ${members.length} members (targeted query)`);
if (DEBUG_HTML) log(`[debug] HTML dump mode ON — saving pages to debug/`);
const ausbildungen: FdiskAusbildung[] = [];
const befoerderungen: FdiskBefoerderung[] = [];
@@ -209,6 +229,9 @@ export async function scrapeAll(username: string, password: string, knownStNrs:
member.wohnort = profileFields.wohnort;
member.plz = profileFields.plz;
// Debug: dump the member detail page (Ausbildungen are scraped from here)
await dumpHtml(mainFrame, `detail_StNr${member.standesbuchNr}`);
// Extract mitgliedschaft + person params from the current URL for constructing sub-section URLs.
// PersonenForm.aspx is in the personen module; sub-sections are each in their own module.
// URL pattern: ?search=1&searchid_mitgliedschaften=X&id_personen=Y&id_mitgliedschaften=X&searchid_personen=Y&searchid_maskmode=
@@ -729,6 +752,11 @@ async function scrapeAusbildungenFromDetailPage(frame: Frame, member: FdiskMembe
break; // only process the first Ausbildung table found
}
// Debug: dump HTML when no Ausbildungen found
if (ausbildungen.length === 0) {
await dumpHtml(frame, `ausbildungen_StNr${member.standesbuchNr}`);
}
return ausbildungen;
}
@@ -807,6 +835,12 @@ async function navigateAndGetTableRows(
log(`${allRows.length} total rows, ${fdcRows.length} FdcLayList rows, ${dataRows.length} data rows (with date in cells[0])`);
// Debug: dump HTML when no data rows found
if (dataRows.length === 0) {
const urlSlug = url.split('/').pop()?.split('?')[0] ?? 'unknown';
await dumpHtml(frame, `navigateAndGetTableRows_${urlSlug}`);
}
return dataRows;
}
@@ -1021,6 +1055,7 @@ async function scrapeMemberFahrgenehmigungen(
if (klasseIdx === -1) {
log(` Fahrgenehmigungen for StNr ${standesbuchNr}: could not determine Klasse column. Returning empty.`);
await dumpHtml(frame, `fahrgenehmigungen_StNr${standesbuchNr}`);
return [];
}