This commit is contained in:
Matthias Hochmeister
2026-03-13 13:21:19 +01:00
parent 072713ca3d
commit 86bb8a45c1

View File

@@ -48,12 +48,11 @@ export async function scrapeAll(username: string, password: string): Promise<{
try { try {
await login(page, username, password); await login(page, username, password);
// After login, page is on Start.aspx (frameset with top.topFrame etc.). // After login, page is on Start.aspx (frameset).
// The member list page runs alterBreadcrumbs() on load, which accesses top.topFrame. // Direct navigation to MitgliedschaftenList.aspx causes a server BLError because
// Navigating the whole page away from the frameset breaks that check → NoTabsAllowed redirect. // the server reads the org context from session variables set by the menu.
// Instead, navigate the mainFrame (inner frame) so the frameset context stays intact. // Navigate via the menu frame (left.aspx) to set session state properly.
const mainFrame = page.frame({ name: 'mainFrame' }); const mainFrame = await navigateToMemberList(page);
if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset');
const members = await scrapeMembers(mainFrame); const members = await scrapeMembers(mainFrame);
log(`Found ${members.length} members`); log(`Found ${members.length} members`);
@@ -65,7 +64,6 @@ export async function scrapeAll(username: string, password: string): Promise<{
const quals = await scrapeMemberAusbildung(mainFrame, member); const quals = await scrapeMemberAusbildung(mainFrame, member);
ausbildungen.push(...quals); ausbildungen.push(...quals);
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`); log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`);
// polite delay between requests
await page.waitForTimeout(500); await page.waitForTimeout(500);
} catch (err) { } catch (err) {
log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`); log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`);
@@ -118,34 +116,52 @@ async function login(page: Page, username: string, password: string): Promise<vo
log(`Logged in successfully, redirected to: ${currentUrl}`); log(`Logged in successfully, redirected to: ${currentUrl}`);
} }
async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> { async function navigateToMemberList(page: Page): Promise<Frame> {
log(`Navigating to members list: ${MEMBERS_URL}`); const menuFrame = page.frame({ name: 'menu' });
await frame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' }); if (!menuFrame) throw new Error('Menu frame (left.aspx) not found in Start.aspx frameset');
await frame.waitForLoadState('networkidle');
const frameUrl = frame.url(); await menuFrame.waitForLoadState('networkidle');
const frameTitle = await frame.title();
log(`Members frame loaded: ${frameUrl} — title: "${frameTitle}"`);
// The member list requires a POST (form submit) to show results. // Log all menu links for diagnostics
// Navigate to the base page first (which loads VIEWSTATE etc.), then submit the search form. const menuLinks = await menuFrame.$$eval('a', (as) =>
const formExists = await frame.$('form[name="frmsearch"]') !== null; as.map((a) => ({ text: (a.textContent ?? '').trim(), href: a.href })).filter((l) => l.href),
log(`Search form found: ${formExists}`); );
log(`Menu links (${menuLinks.length}): ${JSON.stringify(menuLinks.slice(0, 20))}`);
if (!formExists) { // Find the Mitgliedschaften link and click it — this sets the server-side session
const tableClasses = await frame.$$eval('table', (ts) => // context and navigates mainFrame to the correct URL
ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`), const mitgliedLink = menuFrame.locator('a[href*="MitgliedschaftenList"], a[href*="mitglied" i]').first();
); const found = await mitgliedLink.count() > 0;
log(`Tables in frame: ${tableClasses.join(', ') || 'none'}`); if (!found) {
throw new Error(`frmsearch form not found on ${frameUrl} — cannot load member list`); throw new Error('Could not find Mitgliedschaften link in menu frame — check menu link log above');
} }
// Submit the form as-is (no filters) to get all members const linkHref = await mitgliedLink.getAttribute('href');
await frame.evaluate(() => { log(`Clicking menu link: ${linkHref}`);
(document as any).forms['frmsearch'].submit(); await mitgliedLink.click();
});
await frame.waitForLoadState('networkidle'); // Wait for mainFrame to load the member list
log(`After form submit: ${frame.url()}`); await page.waitForLoadState('networkidle');
const mainFrame = page.frame({ name: 'mainFrame' });
if (!mainFrame) throw new Error('mainFrame not found after menu navigation');
log(`mainFrame after menu click: ${mainFrame.url()}`);
return mainFrame;
}
async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
log(`Scraping member list from: ${frame.url()}`);
// If the page landed on a search form (not results yet), submit it
const hasForm = await frame.$('form[name="frmsearch"]') !== null;
const hasTable = await frame.$('table.FdcLayList') !== null;
if (hasForm && !hasTable) {
log('Search form found without results — submitting...');
await frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); });
await frame.waitForLoadState('networkidle');
log(`After form submit: ${frame.url()}`);
}
// The member table uses class FdcLayList // The member table uses class FdcLayList
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 }); await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });