This commit is contained in:
Matthias Hochmeister
2026-03-14 14:10:05 +01:00
parent 992ca8e104
commit 8d03c13bee
7 changed files with 227 additions and 10 deletions

View File

@@ -39,6 +39,123 @@ function cellText(text: string | undefined | null): string | null {
return t || null;
}
/**
* Fetch only members we care about, rather than scraping the full member list.
*
* Phase 1: one search per known StNr (exact match).
* Phase 2: if knownNames is non-empty, a single unfiltered fetch (page 1 only)
* to pick up members matched by name (first-time linking).
*
* Returns deduplicated FdiskMember[].
*/
async function scrapeKnownMembers(
frame: Frame,
knownStNrs: Set<string>,
knownNames: Set<string>,
): Promise<FdiskMember[]> {
type ParsedRow = Awaited<ReturnType<typeof parseRowsFromTable>>[number];
const seenStNrs = new Set<string>();
const allRows: ParsedRow[] = [];
// --- Phase 1: fetch by exact StNr ---
log(`scrapeKnownMembers: fetching ${knownStNrs.size} known StNrs`);
for (const stNr of knownStNrs) {
const formOk = await frame.evaluate((sn) => {
const form = (document as any).forms['frmsearch'];
if (!form) return false;
const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement | null;
const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement | null;
if (!fromFld || !toFld) return false;
fromFld.value = sn;
toFld.value = sn;
return true;
}, stNr);
if (!formOk) {
log(` WARN: search form not usable for StNr ${stNr}`);
continue;
}
await Promise.all([
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }),
frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }),
]);
const rows = await parseRowsFromTable(frame);
for (const r of rows) {
if (r.standesbuchNr && !seenStNrs.has(r.standesbuchNr)) {
seenStNrs.add(r.standesbuchNr);
allRows.push(r);
}
}
log(` StNr ${stNr}: ${rows.length} row(s)`);
// Be gentle on the server
await frame.page().waitForTimeout(300);
}
// --- Phase 2: single unfiltered fetch for name-matching ---
if (knownNames.size > 0) {
log(`scrapeKnownMembers: unfiltered fetch for ${knownNames.size} name-based matches`);
// Clear StNr filter
await frame.evaluate(() => {
const form = (document as any).forms['frmsearch'];
if (!form) return;
const fromFld = form.elements['ListFilter$searchstandesbuchnummer'] as HTMLInputElement | null;
const toFld = form.elements['ListFilter$searchstandesbuchnummer_bis'] as HTMLInputElement | null;
if (fromFld) fromFld.value = '';
if (toFld) toFld.value = '';
});
await Promise.all([
frame.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }),
frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }),
]);
const rows = await parseRowsFromTable(frame);
let matched = 0;
for (const r of rows) {
if (!r.standesbuchNr || seenStNrs.has(r.standesbuchNr)) continue;
const nameKey = `${(r.vorname || '').toLowerCase()}::${(r.zuname || '').toLowerCase()}`;
if (knownNames.has(nameKey)) {
seenStNrs.add(r.standesbuchNr);
allRows.push(r);
matched++;
}
}
log(` Unfiltered page: ${rows.length} total rows, ${matched} name-matched`);
}
log(`scrapeKnownMembers: ${allRows.length} members collected`);
// Build FdiskMember objects
const members: FdiskMember[] = [];
for (const row of allRows) {
if (!row.standesbuchNr || !row.vorname || !row.zuname) continue;
const abmeldedatum = parseDate(row.abmeldedatum);
members.push({
standesbuchNr: row.standesbuchNr,
dienstgrad: row.dienstgrad,
vorname: row.vorname,
zuname: row.zuname,
geburtsdatum: parseDate(row.geburtsdatum),
svnr: row.svnr || null,
eintrittsdatum: parseDate(row.eintrittsdatum),
abmeldedatum,
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
detailUrl: row.href,
geburtsort: null,
geschlecht: null,
beruf: null,
wohnort: null,
plz: null,
});
}
return members;
}
export async function scrapeAll(username: string, password: string, knownStNrs: Set<string>, knownNames: Set<string>): Promise<{
members: FdiskMember[];
ausbildungen: FdiskAusbildung[];
@@ -64,8 +181,8 @@ export async function scrapeAll(username: string, password: string, knownStNrs:
// Navigate via the menu frame (left.aspx) to set session state properly.
const mainFrame = await navigateToMemberList(page);
const members = await scrapeMembers(mainFrame);
log(`Found ${members.length} members`);
const members = await scrapeKnownMembers(mainFrame, knownStNrs, knownNames);
log(`Found ${members.length} members (targeted query)`);
const ausbildungen: FdiskAusbildung[] = [];
const befoerderungen: FdiskBefoerderung[] = [];
@@ -73,13 +190,6 @@ export async function scrapeAll(username: string, password: string, knownStNrs:
const fahrgenehmigungen: FdiskFahrgenehmigung[] = [];
for (const member of members) {
// Only scrape detail pages for members with a dashboard account
// (matched by standesbuchNr or by name for first-time linking)
const nameKey = `${member.vorname.toLowerCase()}::${member.zuname.toLowerCase()}`;
if (!knownStNrs.has(member.standesbuchNr) && !knownNames.has(nameKey)) {
continue;
}
try {
// Navigate to member detail page — use direct URL if available, else search+click fallback
const onDetail = member.detailUrl