update
This commit is contained in:
@@ -48,12 +48,11 @@ export async function scrapeAll(username: string, password: string): Promise<{
|
||||
try {
|
||||
await login(page, username, password);
|
||||
|
||||
// After login, page is on Start.aspx (frameset with top.topFrame etc.).
|
||||
// The member list page runs alterBreadcrumbs() on load, which accesses top.topFrame.
|
||||
// Navigating the whole page away from the frameset breaks that check → NoTabsAllowed redirect.
|
||||
// Instead, navigate the mainFrame (inner frame) so the frameset context stays intact.
|
||||
const mainFrame = page.frame({ name: 'mainFrame' });
|
||||
if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset');
|
||||
// After login, page is on Start.aspx (frameset).
|
||||
// Direct navigation to MitgliedschaftenList.aspx causes a server BLError because
|
||||
// the server reads the org context from session variables set by the menu.
|
||||
// Navigate via the menu frame (left.aspx) to set session state properly.
|
||||
const mainFrame = await navigateToMemberList(page);
|
||||
|
||||
const members = await scrapeMembers(mainFrame);
|
||||
log(`Found ${members.length} members`);
|
||||
@@ -65,7 +64,6 @@ export async function scrapeAll(username: string, password: string): Promise<{
|
||||
const quals = await scrapeMemberAusbildung(mainFrame, member);
|
||||
ausbildungen.push(...quals);
|
||||
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`);
|
||||
// polite delay between requests
|
||||
await page.waitForTimeout(500);
|
||||
} catch (err) {
|
||||
log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`);
|
||||
@@ -118,34 +116,52 @@ async function login(page: Page, username: string, password: string): Promise<vo
|
||||
log(`Logged in successfully, redirected to: ${currentUrl}`);
|
||||
}
|
||||
|
||||
async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||||
log(`Navigating to members list: ${MEMBERS_URL}`);
|
||||
await frame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' });
|
||||
await frame.waitForLoadState('networkidle');
|
||||
async function navigateToMemberList(page: Page): Promise<Frame> {
|
||||
const menuFrame = page.frame({ name: 'menu' });
|
||||
if (!menuFrame) throw new Error('Menu frame (left.aspx) not found in Start.aspx frameset');
|
||||
|
||||
const frameUrl = frame.url();
|
||||
const frameTitle = await frame.title();
|
||||
log(`Members frame loaded: ${frameUrl} — title: "${frameTitle}"`);
|
||||
await menuFrame.waitForLoadState('networkidle');
|
||||
|
||||
// The member list requires a POST (form submit) to show results.
|
||||
// Navigate to the base page first (which loads VIEWSTATE etc.), then submit the search form.
|
||||
const formExists = await frame.$('form[name="frmsearch"]') !== null;
|
||||
log(`Search form found: ${formExists}`);
|
||||
// Log all menu links for diagnostics
|
||||
const menuLinks = await menuFrame.$$eval('a', (as) =>
|
||||
as.map((a) => ({ text: (a.textContent ?? '').trim(), href: a.href })).filter((l) => l.href),
|
||||
);
|
||||
log(`Menu links (${menuLinks.length}): ${JSON.stringify(menuLinks.slice(0, 20))}`);
|
||||
|
||||
if (!formExists) {
|
||||
const tableClasses = await frame.$$eval('table', (ts) =>
|
||||
ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`),
|
||||
);
|
||||
log(`Tables in frame: ${tableClasses.join(', ') || 'none'}`);
|
||||
throw new Error(`frmsearch form not found on ${frameUrl} — cannot load member list`);
|
||||
// Find the Mitgliedschaften link and click it — this sets the server-side session
|
||||
// context and navigates mainFrame to the correct URL
|
||||
const mitgliedLink = menuFrame.locator('a[href*="MitgliedschaftenList"], a[href*="mitglied" i]').first();
|
||||
const found = await mitgliedLink.count() > 0;
|
||||
if (!found) {
|
||||
throw new Error('Could not find Mitgliedschaften link in menu frame — check menu link log above');
|
||||
}
|
||||
|
||||
// Submit the form as-is (no filters) to get all members
|
||||
await frame.evaluate(() => {
|
||||
(document as any).forms['frmsearch'].submit();
|
||||
});
|
||||
await frame.waitForLoadState('networkidle');
|
||||
log(`After form submit: ${frame.url()}`);
|
||||
const linkHref = await mitgliedLink.getAttribute('href');
|
||||
log(`Clicking menu link: ${linkHref}`);
|
||||
await mitgliedLink.click();
|
||||
|
||||
// Wait for mainFrame to load the member list
|
||||
await page.waitForLoadState('networkidle');
|
||||
|
||||
const mainFrame = page.frame({ name: 'mainFrame' });
|
||||
if (!mainFrame) throw new Error('mainFrame not found after menu navigation');
|
||||
|
||||
log(`mainFrame after menu click: ${mainFrame.url()}`);
|
||||
return mainFrame;
|
||||
}
|
||||
|
||||
async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||||
log(`Scraping member list from: ${frame.url()}`);
|
||||
|
||||
// If the page landed on a search form (not results yet), submit it
|
||||
const hasForm = await frame.$('form[name="frmsearch"]') !== null;
|
||||
const hasTable = await frame.$('table.FdcLayList') !== null;
|
||||
if (hasForm && !hasTable) {
|
||||
log('Search form found without results — submitting...');
|
||||
await frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); });
|
||||
await frame.waitForLoadState('networkidle');
|
||||
log(`After form submit: ${frame.url()}`);
|
||||
}
|
||||
|
||||
// The member table uses class FdcLayList
|
||||
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
|
||||
|
||||
Reference in New Issue
Block a user