update
This commit is contained in:
@@ -48,12 +48,11 @@ export async function scrapeAll(username: string, password: string): Promise<{
|
|||||||
try {
|
try {
|
||||||
await login(page, username, password);
|
await login(page, username, password);
|
||||||
|
|
||||||
// After login, page is on Start.aspx (frameset with top.topFrame etc.).
|
// After login, page is on Start.aspx (frameset).
|
||||||
// The member list page runs alterBreadcrumbs() on load, which accesses top.topFrame.
|
// Direct navigation to MitgliedschaftenList.aspx causes a server BLError because
|
||||||
// Navigating the whole page away from the frameset breaks that check → NoTabsAllowed redirect.
|
// the server reads the org context from session variables set by the menu.
|
||||||
// Instead, navigate the mainFrame (inner frame) so the frameset context stays intact.
|
// Navigate via the menu frame (left.aspx) to set session state properly.
|
||||||
const mainFrame = page.frame({ name: 'mainFrame' });
|
const mainFrame = await navigateToMemberList(page);
|
||||||
if (!mainFrame) throw new Error('mainFrame not found in Start.aspx frameset');
|
|
||||||
|
|
||||||
const members = await scrapeMembers(mainFrame);
|
const members = await scrapeMembers(mainFrame);
|
||||||
log(`Found ${members.length} members`);
|
log(`Found ${members.length} members`);
|
||||||
@@ -65,7 +64,6 @@ export async function scrapeAll(username: string, password: string): Promise<{
|
|||||||
const quals = await scrapeMemberAusbildung(mainFrame, member);
|
const quals = await scrapeMemberAusbildung(mainFrame, member);
|
||||||
ausbildungen.push(...quals);
|
ausbildungen.push(...quals);
|
||||||
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`);
|
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`);
|
||||||
// polite delay between requests
|
|
||||||
await page.waitForTimeout(500);
|
await page.waitForTimeout(500);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`);
|
log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`);
|
||||||
@@ -118,34 +116,52 @@ async function login(page: Page, username: string, password: string): Promise<vo
|
|||||||
log(`Logged in successfully, redirected to: ${currentUrl}`);
|
log(`Logged in successfully, redirected to: ${currentUrl}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
async function navigateToMemberList(page: Page): Promise<Frame> {
|
||||||
log(`Navigating to members list: ${MEMBERS_URL}`);
|
const menuFrame = page.frame({ name: 'menu' });
|
||||||
await frame.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' });
|
if (!menuFrame) throw new Error('Menu frame (left.aspx) not found in Start.aspx frameset');
|
||||||
await frame.waitForLoadState('networkidle');
|
|
||||||
|
|
||||||
const frameUrl = frame.url();
|
await menuFrame.waitForLoadState('networkidle');
|
||||||
const frameTitle = await frame.title();
|
|
||||||
log(`Members frame loaded: ${frameUrl} — title: "${frameTitle}"`);
|
|
||||||
|
|
||||||
// The member list requires a POST (form submit) to show results.
|
// Log all menu links for diagnostics
|
||||||
// Navigate to the base page first (which loads VIEWSTATE etc.), then submit the search form.
|
const menuLinks = await menuFrame.$$eval('a', (as) =>
|
||||||
const formExists = await frame.$('form[name="frmsearch"]') !== null;
|
as.map((a) => ({ text: (a.textContent ?? '').trim(), href: a.href })).filter((l) => l.href),
|
||||||
log(`Search form found: ${formExists}`);
|
);
|
||||||
|
log(`Menu links (${menuLinks.length}): ${JSON.stringify(menuLinks.slice(0, 20))}`);
|
||||||
|
|
||||||
if (!formExists) {
|
// Find the Mitgliedschaften link and click it — this sets the server-side session
|
||||||
const tableClasses = await frame.$$eval('table', (ts) =>
|
// context and navigates mainFrame to the correct URL
|
||||||
ts.map((t) => `${t.className || '(no-class)'}[${t.querySelectorAll('tr').length}rows]`),
|
const mitgliedLink = menuFrame.locator('a[href*="MitgliedschaftenList"], a[href*="mitglied" i]').first();
|
||||||
);
|
const found = await mitgliedLink.count() > 0;
|
||||||
log(`Tables in frame: ${tableClasses.join(', ') || 'none'}`);
|
if (!found) {
|
||||||
throw new Error(`frmsearch form not found on ${frameUrl} — cannot load member list`);
|
throw new Error('Could not find Mitgliedschaften link in menu frame — check menu link log above');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Submit the form as-is (no filters) to get all members
|
const linkHref = await mitgliedLink.getAttribute('href');
|
||||||
await frame.evaluate(() => {
|
log(`Clicking menu link: ${linkHref}`);
|
||||||
(document as any).forms['frmsearch'].submit();
|
await mitgliedLink.click();
|
||||||
});
|
|
||||||
await frame.waitForLoadState('networkidle');
|
// Wait for mainFrame to load the member list
|
||||||
log(`After form submit: ${frame.url()}`);
|
await page.waitForLoadState('networkidle');
|
||||||
|
|
||||||
|
const mainFrame = page.frame({ name: 'mainFrame' });
|
||||||
|
if (!mainFrame) throw new Error('mainFrame not found after menu navigation');
|
||||||
|
|
||||||
|
log(`mainFrame after menu click: ${mainFrame.url()}`);
|
||||||
|
return mainFrame;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeMembers(frame: Frame): Promise<FdiskMember[]> {
|
||||||
|
log(`Scraping member list from: ${frame.url()}`);
|
||||||
|
|
||||||
|
// If the page landed on a search form (not results yet), submit it
|
||||||
|
const hasForm = await frame.$('form[name="frmsearch"]') !== null;
|
||||||
|
const hasTable = await frame.$('table.FdcLayList') !== null;
|
||||||
|
if (hasForm && !hasTable) {
|
||||||
|
log('Search form found without results — submitting...');
|
||||||
|
await frame.evaluate(() => { (document as any).forms['frmsearch'].submit(); });
|
||||||
|
await frame.waitForLoadState('networkidle');
|
||||||
|
log(`After form submit: ${frame.url()}`);
|
||||||
|
}
|
||||||
|
|
||||||
// The member table uses class FdcLayList
|
// The member table uses class FdcLayList
|
||||||
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
|
await frame.waitForSelector('table.FdcLayList', { timeout: 20000 });
|
||||||
|
|||||||
Reference in New Issue
Block a user