diff --git a/sync/src/scraper.ts b/sync/src/scraper.ts index 3c6515a..283fc0c 100644 --- a/sync/src/scraper.ts +++ b/sync/src/scraper.ts @@ -48,12 +48,11 @@ export async function scrapeAll(username: string, password: string): Promise<{ try { await login(page, username, password); - // After login, page is on Start.aspx (frameset with top.topFrame etc.). - // The member list page runs alterBreadcrumbs() on load, which accesses top.topFrame. - // Navigating the whole page away from the frameset breaks that check → NoTabsAllowed redirect. - // Instead, navigate the mainFrame (inner frame) so the frameset context stays intact. - const mainFrame = page.frame({ name: 'mainFrame' }); - if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset'); + // After login, page is on Start.aspx (frameset). + // Direct navigation to MitgliedschaftenList.aspx causes a server BLError because + // the server reads the org context from session variables set by the menu. + // Navigate via the menu frame (left.aspx) to set session state properly. + const mainFrame = await navigateToMemberList(page); const members = await scrapeMembers(mainFrame); log(`Found ${members.length} members`); @@ -65,7 +64,6 @@ export async function scrapeAll(username: string, password: string): Promise<{ const quals = await scrapeMemberAusbildung(mainFrame, member); ausbildungen.push(...quals); log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`); - // polite delay between requests await page.waitForTimeout(500); } catch (err) { log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`); @@ -118,34 +116,52 @@ async function login(page: Page, username: string, password: string): Promise { - log(`Navigating to members list: ${MEMBERS_URL}`); - await frame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' }); - await frame.waitForLoadState('networkidle'); +async function navigateToMemberList(page: Page): Promise { + const menuFrame = page.frame({ name: 'menu' }); + if (!menuFrame) throw new Error('Menu frame (left.aspx) not found in Start.aspx frameset'); - const frameUrl = frame.url(); - const frameTitle = await frame.title(); - log(`Members frame loaded: ${frameUrl} — title: "${frameTitle}"`); + await menuFrame.waitForLoadState('networkidle'); - // The member list requires a POST (form submit) to show results. - // Navigate to the base page first (which loads VIEWSTATE etc.), then submit the search form. - const formExists = await frame.$('form[name="frmsearch"]') !== null; - log(`Search form found: ${formExists}`); + // Log all menu links for diagnostics + const menuLinks = await menuFrame.$$eval('a', (as) => + as.map((a) => ({ text: (a.textContent ?? '').trim(), href: a.href })).filter((l) => l.href), + ); + log(`Menu links (${menuLinks.length}): ${JSON.stringify(menuLinks.slice(0, 20))}`); - if (!formExists) { - const tableClasses = await frame.$$eval('table', (ts) => - ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`), - ); - log(`Tables in frame: ${tableClasses.join(', ') || 'none'}`); - throw new Error(`frmsearch form not found on ${frameUrl} — cannot load member list`); + // Find the Mitgliedschaften link and click it — this sets the server-side session + // context and navigates mainFrame to the correct URL + const mitgliedLink = menuFrame.locator('a[href*="MitgliedschaftenList"], a[href*="mitglied" i]').first(); + const found = await mitgliedLink.count() > 0; + if (!found) { + throw new Error('Could not find Mitgliedschaften link in menu frame — check menu link log above'); } - // Submit the form as-is (no filters) to get all members - await frame.evaluate(() => { - (document as any).forms['frmsearch'].submit(); - }); - await frame.waitForLoadState('networkidle'); - log(`After form submit: ${frame.url()}`); + const linkHref = await mitgliedLink.getAttribute('href'); + log(`Clicking menu link: ${linkHref}`); + await mitgliedLink.click(); + + // Wait for mainFrame to load the member list + await page.waitForLoadState('networkidle'); + + const mainFrame = page.frame({ name: 'mainFrame' }); + if (!mainFrame) throw new Error('mainFrame not found after menu navigation'); + + log(`mainFrame after menu click: ${mainFrame.url()}`); + return mainFrame; +} + +async function scrapeMembers(frame: Frame): Promise { + log(`Scraping member list from: ${frame.url()}`); + + // If the page landed on a search form (not results yet), submit it + const hasForm = await frame.$('form[name="frmsearch"]') !== null; + const hasTable = await frame.$('table.FdcLayList') !== null; + if (hasForm && !hasTable) { + log('Search form found without results — submitting...'); + await frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); }); + await frame.waitForLoadState('networkidle'); + log(`After form submit: ${frame.url()}`); + } // The member table uses class FdcLayList await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });