update sync
This commit is contained in:
@@ -101,6 +101,8 @@ services:
|
||||
environment:
|
||||
FDISK_USERNAME: ${FDISK_USERNAME:?FDISK_USERNAME is required}
|
||||
FDISK_PASSWORD: ${FDISK_PASSWORD:?FDISK_PASSWORD is required}
|
||||
FDISK_ID_FEUERWEHREN: ${FDISK_ID_FEUERWEHREN:-164}
|
||||
FDISK_ID_INSTANZEN: ${FDISK_ID_INSTANZEN:-2853}
|
||||
DB_HOST: postgres
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${POSTGRES_DB:-feuerwehr_prod}
|
||||
|
||||
@@ -2,8 +2,17 @@ import { chromium, Page } from '@playwright/test';
|
||||
import { FdiskMember, FdiskAusbildung } from './types';
|
||||
|
||||
const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at';
|
||||
const ID_FEUERWEHREN = process.env.FDISK_ID_FEUERWEHREN ?? '164';
|
||||
const ID_INSTANZEN = process.env.FDISK_ID_INSTANZEN ?? '2853';
|
||||
|
||||
const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`;
|
||||
const MEMBERS_URL = `${BASE_URL}/fdisk/module/vws/Start.aspx`;
|
||||
const MEMBERS_URL = `${BASE_URL}/fdisk/module/vws/vws/MitgliedschaftenList.aspx`
|
||||
+ `?search=1`
|
||||
+ `&searchid_feuerwehren=${ID_FEUERWEHREN}`
|
||||
+ `&searchid_instanzen=${ID_INSTANZEN}`
|
||||
+ `&id_instanzen=${ID_INSTANZEN}`
|
||||
+ `&searchid_instanzen_besitzer=${ID_INSTANZEN}`
|
||||
+ `&searchid_fahrzeugstatus=1`;
|
||||
|
||||
function log(msg: string) {
|
||||
console.log(`[scraper] ${new Date().toISOString()} ${msg}`);
|
||||
@@ -77,16 +86,14 @@ async function login(page: Page, username: string, password: string): Promise<vo
|
||||
return;
|
||||
}
|
||||
|
||||
// ASP.NET WebForms login — try common selector patterns
|
||||
const usernameField = page.locator('input[type="text"], input[name*="user"], input[name*="User"], input[id*="user"], input[id*="User"]').first();
|
||||
const passwordField = page.locator('input[type="password"]').first();
|
||||
// Exact selectors from the known login form HTML
|
||||
const usernameField = page.locator('#login');
|
||||
const passwordField = page.locator('#password');
|
||||
const submitButton = page.locator('#Submit2');
|
||||
|
||||
await usernameField.waitFor({ state: 'visible', timeout: 10000 });
|
||||
await usernameField.fill(username);
|
||||
await passwordField.fill(password);
|
||||
|
||||
// Submit — ASP.NET WebForms may use __doPostBack; click submit and wait for URL change
|
||||
const submitButton = page.locator('input[type="submit"], button[type="submit"]').first();
|
||||
await submitButton.click();
|
||||
|
||||
// Wait for navigation away from the login page (up to 15s)
|
||||
@@ -109,86 +116,58 @@ async function login(page: Page, username: string, password: string): Promise<vo
|
||||
|
||||
async function scrapeMembers(page: Page): Promise<FdiskMember[]> {
|
||||
log(`Navigating to members list: ${MEMBERS_URL}`);
|
||||
await page.goto(MEMBERS_URL, { waitUntil: 'networkidle' });
|
||||
await page.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' });
|
||||
await page.waitForLoadState('networkidle');
|
||||
|
||||
// Wait for the member table to appear
|
||||
// ASP.NET GridView renders as an HTML table — find the data table
|
||||
await page.waitForSelector('table', { timeout: 15000 });
|
||||
// The member table uses class FdcLayList
|
||||
await page.waitForSelector('table.FdcLayList', { timeout: 20000 });
|
||||
|
||||
// Find the main data table (likely the one with the most rows)
|
||||
// Columns: Status, St.-Nr., Dienstgrad, Vorname, Zuname, Geburtsdatum, SVNR, Eintrittsdatum, Abmeldedatum
|
||||
const rows = await page.$$eval('table tr', (rows) => {
|
||||
return rows.map(row => {
|
||||
const cells = Array.from(row.querySelectorAll('td'));
|
||||
const link = row.querySelector('a');
|
||||
// Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad,
|
||||
// 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon
|
||||
// Each <td> contains an <a title="value"> — the title is the clean cell text.
|
||||
// The href on each <a> is the member detail URL (same link repeated across all cells in a row).
|
||||
const rows = await page.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||||
trs.map((tr) => {
|
||||
const cells = Array.from(tr.querySelectorAll('td'));
|
||||
const val = (i: number) => {
|
||||
const a = cells[i]?.querySelector('a');
|
||||
return (a?.getAttribute('title') ?? cells[i]?.textContent ?? '').trim();
|
||||
};
|
||||
const href = (tr.querySelector('a') as HTMLAnchorElement | null)?.href ?? null;
|
||||
return {
|
||||
cells: cells.map(c => (c as Element).textContent?.trim() ?? ''),
|
||||
href: link?.href ?? null,
|
||||
onclick: link?.getAttribute('onclick') ?? row.getAttribute('onclick') ?? null,
|
||||
status: val(1),
|
||||
standesbuchNr: val(2),
|
||||
dienstgrad: val(3),
|
||||
vorname: val(4),
|
||||
zuname: val(5),
|
||||
geburtsdatum: val(6),
|
||||
svnr: val(7),
|
||||
eintrittsdatum: val(8),
|
||||
abmeldedatum: val(9),
|
||||
href,
|
||||
};
|
||||
});
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
// Find the header row to determine column positions
|
||||
const headerRow = await page.$eval('table tr:first-child', (row) => {
|
||||
const cells = Array.from(row.querySelectorAll('th, td'));
|
||||
return cells.map(c => (c as Element).textContent?.trim().toLowerCase() ?? '');
|
||||
});
|
||||
|
||||
// Detect column indices from headers
|
||||
const colIdx = {
|
||||
status: headerRow.findIndex(h => h.includes('status')),
|
||||
standesbuchNr: headerRow.findIndex(h => h.includes('st.-nr') || h.includes('stnr') || h.includes('nr')),
|
||||
dienstgrad: headerRow.findIndex(h => h.includes('dienstgrad')),
|
||||
vorname: headerRow.findIndex(h => h.includes('vorname')),
|
||||
zuname: headerRow.findIndex(h => h.includes('zuname') || h.includes('nachname')),
|
||||
geburtsdatum: headerRow.findIndex(h => h.includes('geburt')),
|
||||
svnr: headerRow.findIndex(h => h.includes('svnr') || h.includes('sv-nr')),
|
||||
eintrittsdatum: headerRow.findIndex(h => h.includes('eintritt')),
|
||||
abmeldedatum: headerRow.findIndex(h => h.includes('abmeld')),
|
||||
};
|
||||
|
||||
log(`Detected columns: ${JSON.stringify(colIdx)}`);
|
||||
|
||||
// Fallback to positional columns if detection failed
|
||||
// Based on screenshot: Status(0), St.-Nr.(1), Dienstgrad(2), Vorname(3), Zuname(4),
|
||||
// Geburtsdatum(5), SVNR(6), Eintrittsdatum(7), Abmeldedatum(8)
|
||||
if (colIdx.standesbuchNr === -1) colIdx.standesbuchNr = 1;
|
||||
if (colIdx.dienstgrad === -1) colIdx.dienstgrad = 2;
|
||||
if (colIdx.vorname === -1) colIdx.vorname = 3;
|
||||
if (colIdx.zuname === -1) colIdx.zuname = 4;
|
||||
if (colIdx.geburtsdatum === -1) colIdx.geburtsdatum = 5;
|
||||
if (colIdx.svnr === -1) colIdx.svnr = 6;
|
||||
if (colIdx.eintrittsdatum === -1) colIdx.eintrittsdatum = 7;
|
||||
if (colIdx.abmeldedatum === -1) colIdx.abmeldedatum = 8;
|
||||
log(`Parsed ${rows.length} rows from member table`);
|
||||
|
||||
const members: FdiskMember[] = [];
|
||||
|
||||
for (const row of rows) {
|
||||
const { cells, href, onclick } = row;
|
||||
// Skip header rows and empty rows
|
||||
if (cells.length < 5) continue;
|
||||
const stnr = cellText(cells[colIdx.standesbuchNr]);
|
||||
const vorname = cellText(cells[colIdx.vorname]);
|
||||
const zuname = cellText(cells[colIdx.zuname]);
|
||||
if (!stnr || !vorname || !zuname) continue;
|
||||
|
||||
const abmeldedatum = parseDate(cells[colIdx.abmeldedatum]);
|
||||
|
||||
if (!row.standesbuchNr || !row.vorname || !row.zuname) continue;
|
||||
const abmeldedatum = parseDate(row.abmeldedatum);
|
||||
members.push({
|
||||
standesbuchNr: stnr,
|
||||
dienstgrad: cellText(cells[colIdx.dienstgrad]) ?? '',
|
||||
vorname,
|
||||
zuname,
|
||||
geburtsdatum: parseDate(cells[colIdx.geburtsdatum]),
|
||||
svnr: cellText(cells[colIdx.svnr]),
|
||||
eintrittsdatum: parseDate(cells[colIdx.eintrittsdatum]),
|
||||
standesbuchNr: row.standesbuchNr,
|
||||
dienstgrad: row.dienstgrad,
|
||||
vorname: row.vorname,
|
||||
zuname: row.zuname,
|
||||
geburtsdatum: parseDate(row.geburtsdatum),
|
||||
svnr: row.svnr || null,
|
||||
eintrittsdatum: parseDate(row.eintrittsdatum),
|
||||
abmeldedatum,
|
||||
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
|
||||
detailUrl: href,
|
||||
detailUrl: row.href,
|
||||
});
|
||||
}
|
||||
|
||||
return members;
|
||||
}
|
||||
|
||||
@@ -207,10 +186,8 @@ async function scrapeMemberAusbildung(page: Page, member: FdiskMember): Promise<
|
||||
const ausbildungLink = page.locator('a:has-text("Ausbildung")').first();
|
||||
const hasLink = await ausbildungLink.isVisible().catch(() => false);
|
||||
if (hasLink) {
|
||||
await Promise.all([
|
||||
page.waitForNavigation({ waitUntil: 'networkidle' }).catch(() => {}),
|
||||
ausbildungLink.click(),
|
||||
]);
|
||||
await ausbildungLink.click();
|
||||
await page.waitForLoadState('networkidle').catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user