"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.scrapeAll = scrapeAll; const test_1 = require("@playwright/test"); const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at'; const LOGIN_URL = `${BASE_URL}/fdisk/`; const MEMBERS_URL = `${BASE_URL}/fdisk/module/vws/Start.aspx`; function log(msg) { console.log(`[scraper] ${new Date().toISOString()} ${msg}`); } /** * Parse a date string from FDISK (DD.MM.YYYY) to ISO format (YYYY-MM-DD). * Returns null if empty or unparseable. */ function parseDate(raw) { if (!raw) return null; const trimmed = raw.trim(); if (!trimmed) return null; const match = trimmed.match(/^(\d{2})\.(\d{2})\.(\d{4})$/); if (!match) return null; return `${match[3]}-${match[2]}-${match[1]}`; } /** * Extract text content from a cell, trimmed, or null if empty. */ function cellText(text) { const t = (text ?? '').trim(); return t || null; } async function scrapeAll(username, password) { const browser = await test_1.chromium.launch({ headless: true }); const context = await browser.newContext({ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', }); const page = await context.newPage(); try { await login(page, username, password); const members = await scrapeMembers(page); log(`Found ${members.length} members`); const ausbildungen = []; for (const member of members) { if (!member.detailUrl) continue; try { const quals = await scrapeMemberAusbildung(page, member); ausbildungen.push(...quals); log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`); // polite delay between requests await page.waitForTimeout(500); } catch (err) { log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`); } } return { members, ausbildungen }; } finally { await browser.close(); } } async function login(page, username, password) { log(`Navigating to ${LOGIN_URL}`); await page.goto(LOGIN_URL, { waitUntil: 'networkidle' }); // ASP.NET WebForms login — try common selector patterns // Adjust these selectors if login fails const usernameField = page.locator('input[type="text"], input[name*="user"], input[name*="User"], input[id*="user"], input[id*="User"]').first(); const passwordField = page.locator('input[type="password"]').first(); await usernameField.fill(username); await passwordField.fill(password); // Submit — look for a login/submit button const submitButton = page.locator('input[type="submit"], button[type="submit"]').first(); await Promise.all([ page.waitForNavigation({ waitUntil: 'networkidle' }), submitButton.click(), ]); // Verify we're logged in by checking we're not still on the login page const currentUrl = page.url(); if (currentUrl.includes('login') || currentUrl.includes('Login') || currentUrl === LOGIN_URL) { throw new Error(`Login failed — still on login page: ${currentUrl}`); } log(`Logged in successfully, redirected to: ${currentUrl}`); } async function scrapeMembers(page) { log(`Navigating to members list: ${MEMBERS_URL}`); await page.goto(MEMBERS_URL, { waitUntil: 'networkidle' }); // Wait for the member table to appear // ASP.NET GridView renders as an HTML table — find the data table await page.waitForSelector('table', { timeout: 15000 }); // Find the main data table (likely the one with the most rows) // Columns: Status, St.-Nr., Dienstgrad, Vorname, Zuname, Geburtsdatum, SVNR, Eintrittsdatum, Abmeldedatum const rows = await page.$$eval('table tr', (rows) => { return rows.map(row => { const cells = Array.from(row.querySelectorAll('td')); const link = row.querySelector('a'); return { cells: cells.map(c => c.textContent?.trim() ?? ''), href: link?.href ?? null, onclick: link?.getAttribute('onclick') ?? row.getAttribute('onclick') ?? null, }; }); }); // Find the header row to determine column positions const headerRow = await page.$eval('table tr:first-child', (row) => { const cells = Array.from(row.querySelectorAll('th, td')); return cells.map(c => c.textContent?.trim().toLowerCase() ?? ''); }); // Detect column indices from headers const colIdx = { status: headerRow.findIndex(h => h.includes('status')), standesbuchNr: headerRow.findIndex(h => h.includes('st.-nr') || h.includes('stnr') || h.includes('nr')), dienstgrad: headerRow.findIndex(h => h.includes('dienstgrad')), vorname: headerRow.findIndex(h => h.includes('vorname')), zuname: headerRow.findIndex(h => h.includes('zuname') || h.includes('nachname')), geburtsdatum: headerRow.findIndex(h => h.includes('geburt')), svnr: headerRow.findIndex(h => h.includes('svnr') || h.includes('sv-nr')), eintrittsdatum: headerRow.findIndex(h => h.includes('eintritt')), abmeldedatum: headerRow.findIndex(h => h.includes('abmeld')), }; log(`Detected columns: ${JSON.stringify(colIdx)}`); // Fallback to positional columns if detection failed // Based on screenshot: Status(0), St.-Nr.(1), Dienstgrad(2), Vorname(3), Zuname(4), // Geburtsdatum(5), SVNR(6), Eintrittsdatum(7), Abmeldedatum(8) if (colIdx.standesbuchNr === -1) colIdx.standesbuchNr = 1; if (colIdx.dienstgrad === -1) colIdx.dienstgrad = 2; if (colIdx.vorname === -1) colIdx.vorname = 3; if (colIdx.zuname === -1) colIdx.zuname = 4; if (colIdx.geburtsdatum === -1) colIdx.geburtsdatum = 5; if (colIdx.svnr === -1) colIdx.svnr = 6; if (colIdx.eintrittsdatum === -1) colIdx.eintrittsdatum = 7; if (colIdx.abmeldedatum === -1) colIdx.abmeldedatum = 8; const members = []; for (const row of rows) { const { cells, href, onclick } = row; // Skip header rows and empty rows if (cells.length < 5) continue; const stnr = cellText(cells[colIdx.standesbuchNr]); const vorname = cellText(cells[colIdx.vorname]); const zuname = cellText(cells[colIdx.zuname]); if (!stnr || !vorname || !zuname) continue; const abmeldedatum = parseDate(cells[colIdx.abmeldedatum]); members.push({ standesbuchNr: stnr, dienstgrad: cellText(cells[colIdx.dienstgrad]) ?? '', vorname, zuname, geburtsdatum: parseDate(cells[colIdx.geburtsdatum]), svnr: cellText(cells[colIdx.svnr]), eintrittsdatum: parseDate(cells[colIdx.eintrittsdatum]), abmeldedatum, status: abmeldedatum ? 'ausgetreten' : 'aktiv', detailUrl: href, }); } return members; } async function scrapeMemberAusbildung(page, member) { if (!member.detailUrl) return []; await page.goto(member.detailUrl, { waitUntil: 'networkidle' }); // Look for Ausbildungsliste section — it's likely a table or list // Try to find it by heading text const ausbildungSection = page.locator('text=Ausbildung, text=Ausbildungsliste').first(); const hasSec = await ausbildungSection.isVisible().catch(() => false); if (!hasSec) { // Try navigating to an Ausbildung tab/link if present const ausbildungLink = page.locator('a:has-text("Ausbildung")').first(); const hasLink = await ausbildungLink.isVisible().catch(() => false); if (hasLink) { await Promise.all([ page.waitForNavigation({ waitUntil: 'networkidle' }).catch(() => { }), ausbildungLink.click(), ]); } } // Parse the qualification table // Expected columns: Kursname, Datum, Ablaufdatum, Ort, Bemerkung (may vary) const tables = await page.$$('table'); const ausbildungen = []; for (const table of tables) { const rows = await table.$$eval('tr', (rows) => { return rows.map(row => ({ cells: Array.from(row.querySelectorAll('td, th')).map(c => c.textContent?.trim() ?? ''), })); }); if (rows.length < 2) continue; // Detect if this looks like an Ausbildung table const header = rows[0].cells.map(c => c.toLowerCase()); const isAusbildungTable = header.some(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung')); if (!isAusbildungTable) continue; const kursnameIdx = header.findIndex(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung')); const datumIdx = header.findIndex(h => h.includes('datum') || h.includes('abschluss')); const ablaufIdx = header.findIndex(h => h.includes('ablauf') || h.includes('gültig')); const ortIdx = header.findIndex(h => h.includes('ort')); const bemIdx = header.findIndex(h => h.includes('bem') || h.includes('info')); for (const row of rows.slice(1)) { const kursname = cellText(row.cells[kursnameIdx >= 0 ? kursnameIdx : 0]); if (!kursname) continue; const kursDatum = parseDate(datumIdx >= 0 ? row.cells[datumIdx] : null); const syncKey = `${member.standesbuchNr}::${kursname}::${kursDatum ?? ''}`; ausbildungen.push({ standesbuchNr: member.standesbuchNr, kursname, kursDatum, ablaufdatum: parseDate(ablaufIdx >= 0 ? row.cells[ablaufIdx] : null), ort: ortIdx >= 0 ? cellText(row.cells[ortIdx]) : null, bemerkung: bemIdx >= 0 ? cellText(row.cells[bemIdx]) : null, syncKey, }); } break; // only process the first Ausbildung table found } return ausbildungen; }