From 618f1d4996698b303a9d4cc612ea71b420e635f9 Mon Sep 17 00:00:00 2001 From: Matthias Hochmeister Date: Fri, 13 Mar 2026 12:54:48 +0100 Subject: [PATCH] update sync --- docker-compose.yml | 2 + sync/src/scraper.ts | 141 ++++++++++++++++++-------------------------- 2 files changed, 61 insertions(+), 82 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index fce0650..27109dc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -101,6 +101,8 @@ services: environment: FDISK_USERNAME: ${FDISK_USERNAME:?FDISK_USERNAME is required} FDISK_PASSWORD: ${FDISK_PASSWORD:?FDISK_PASSWORD is required} + FDISK_ID_FEUERWEHREN: ${FDISK_ID_FEUERWEHREN:-164} + FDISK_ID_INSTANZEN: ${FDISK_ID_INSTANZEN:-2853} DB_HOST: postgres DB_PORT: 5432 DB_NAME: ${POSTGRES_DB:-feuerwehr_prod} diff --git a/sync/src/scraper.ts b/sync/src/scraper.ts index 6667259..a80a7b1 100644 --- a/sync/src/scraper.ts +++ b/sync/src/scraper.ts @@ -1,9 +1,18 @@ import { chromium, Page } from '@playwright/test'; import { FdiskMember, FdiskAusbildung } from './types'; -const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at'; -const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`; -const MEMBERS_URL = `${BASE_URL}/fdisk/module/vws/Start.aspx`; +const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at'; +const ID_FEUERWEHREN = process.env.FDISK_ID_FEUERWEHREN ?? '164'; +const ID_INSTANZEN = process.env.FDISK_ID_INSTANZEN ?? '2853'; + +const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`; +const MEMBERS_URL = `${BASE_URL}/fdisk/module/vws/vws/MitgliedschaftenList.aspx` + + `?search=1` + + `&searchid_feuerwehren=${ID_FEUERWEHREN}` + + `&searchid_instanzen=${ID_INSTANZEN}` + + `&id_instanzen=${ID_INSTANZEN}` + + `&searchid_instanzen_besitzer=${ID_INSTANZEN}` + + `&searchid_fahrzeugstatus=1`; function log(msg: string) { console.log(`[scraper] ${new Date().toISOString()} ${msg}`); @@ -77,16 +86,14 @@ async function login(page: Page, username: string, password: string): Promise { log(`Navigating to members list: ${MEMBERS_URL}`); - await page.goto(MEMBERS_URL, { waitUntil: 'networkidle' }); + await page.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' }); + await page.waitForLoadState('networkidle'); - // Wait for the member table to appear - // ASP.NET GridView renders as an HTML table — find the data table - await page.waitForSelector('table', { timeout: 15000 }); + // The member table uses class FdcLayList + await page.waitForSelector('table.FdcLayList', { timeout: 20000 }); - // Find the main data table (likely the one with the most rows) - // Columns: Status, St.-Nr., Dienstgrad, Vorname, Zuname, Geburtsdatum, SVNR, Eintrittsdatum, Abmeldedatum - const rows = await page.$$eval('table tr', (rows) => { - return rows.map(row => { - const cells = Array.from(row.querySelectorAll('td')); - const link = row.querySelector('a'); - return { - cells: cells.map(c => (c as Element).textContent?.trim() ?? ''), - href: link?.href ?? null, - onclick: link?.getAttribute('onclick') ?? row.getAttribute('onclick') ?? null, + // Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad, + // 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon + // Each contains an — the title is the clean cell text. + // The href on each is the member detail URL (same link repeated across all cells in a row). + const rows = await page.$$eval('table.FdcLayList tbody tr', (trs) => + trs.map((tr) => { + const cells = Array.from(tr.querySelectorAll('td')); + const val = (i: number) => { + const a = cells[i]?.querySelector('a'); + return (a?.getAttribute('title') ?? cells[i]?.textContent ?? '').trim(); }; - }); - }); + const href = (tr.querySelector('a') as HTMLAnchorElement | null)?.href ?? null; + return { + status: val(1), + standesbuchNr: val(2), + dienstgrad: val(3), + vorname: val(4), + zuname: val(5), + geburtsdatum: val(6), + svnr: val(7), + eintrittsdatum: val(8), + abmeldedatum: val(9), + href, + }; + }), + ); - // Find the header row to determine column positions - const headerRow = await page.$eval('table tr:first-child', (row) => { - const cells = Array.from(row.querySelectorAll('th, td')); - return cells.map(c => (c as Element).textContent?.trim().toLowerCase() ?? ''); - }); - - // Detect column indices from headers - const colIdx = { - status: headerRow.findIndex(h => h.includes('status')), - standesbuchNr: headerRow.findIndex(h => h.includes('st.-nr') || h.includes('stnr') || h.includes('nr')), - dienstgrad: headerRow.findIndex(h => h.includes('dienstgrad')), - vorname: headerRow.findIndex(h => h.includes('vorname')), - zuname: headerRow.findIndex(h => h.includes('zuname') || h.includes('nachname')), - geburtsdatum: headerRow.findIndex(h => h.includes('geburt')), - svnr: headerRow.findIndex(h => h.includes('svnr') || h.includes('sv-nr')), - eintrittsdatum: headerRow.findIndex(h => h.includes('eintritt')), - abmeldedatum: headerRow.findIndex(h => h.includes('abmeld')), - }; - - log(`Detected columns: ${JSON.stringify(colIdx)}`); - - // Fallback to positional columns if detection failed - // Based on screenshot: Status(0), St.-Nr.(1), Dienstgrad(2), Vorname(3), Zuname(4), - // Geburtsdatum(5), SVNR(6), Eintrittsdatum(7), Abmeldedatum(8) - if (colIdx.standesbuchNr === -1) colIdx.standesbuchNr = 1; - if (colIdx.dienstgrad === -1) colIdx.dienstgrad = 2; - if (colIdx.vorname === -1) colIdx.vorname = 3; - if (colIdx.zuname === -1) colIdx.zuname = 4; - if (colIdx.geburtsdatum === -1) colIdx.geburtsdatum = 5; - if (colIdx.svnr === -1) colIdx.svnr = 6; - if (colIdx.eintrittsdatum === -1) colIdx.eintrittsdatum = 7; - if (colIdx.abmeldedatum === -1) colIdx.abmeldedatum = 8; + log(`Parsed ${rows.length} rows from member table`); const members: FdiskMember[] = []; - for (const row of rows) { - const { cells, href, onclick } = row; - // Skip header rows and empty rows - if (cells.length < 5) continue; - const stnr = cellText(cells[colIdx.standesbuchNr]); - const vorname = cellText(cells[colIdx.vorname]); - const zuname = cellText(cells[colIdx.zuname]); - if (!stnr || !vorname || !zuname) continue; - - const abmeldedatum = parseDate(cells[colIdx.abmeldedatum]); - + if (!row.standesbuchNr || !row.vorname || !row.zuname) continue; + const abmeldedatum = parseDate(row.abmeldedatum); members.push({ - standesbuchNr: stnr, - dienstgrad: cellText(cells[colIdx.dienstgrad]) ?? '', - vorname, - zuname, - geburtsdatum: parseDate(cells[colIdx.geburtsdatum]), - svnr: cellText(cells[colIdx.svnr]), - eintrittsdatum: parseDate(cells[colIdx.eintrittsdatum]), + standesbuchNr: row.standesbuchNr, + dienstgrad: row.dienstgrad, + vorname: row.vorname, + zuname: row.zuname, + geburtsdatum: parseDate(row.geburtsdatum), + svnr: row.svnr || null, + eintrittsdatum: parseDate(row.eintrittsdatum), abmeldedatum, - status: abmeldedatum ? 'ausgetreten' : 'aktiv', - detailUrl: href, + status: abmeldedatum ? 'ausgetreten' : 'aktiv', + detailUrl: row.href, }); } - return members; } @@ -207,10 +186,8 @@ async function scrapeMemberAusbildung(page: Page, member: FdiskMember): Promise< const ausbildungLink = page.locator('a:has-text("Ausbildung")').first(); const hasLink = await ausbildungLink.isVisible().catch(() => false); if (hasLink) { - await Promise.all([ - page.waitForNavigation({ waitUntil: 'networkidle' }).catch(() => {}), - ausbildungLink.click(), - ]); + await ausbildungLink.click(); + await page.waitForLoadState('networkidle').catch(() => {}); } }