update sync
This commit is contained in:
@@ -101,6 +101,8 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
FDISK_USERNAME: ${FDISK_USERNAME:?FDISK_USERNAME is required}
|
FDISK_USERNAME: ${FDISK_USERNAME:?FDISK_USERNAME is required}
|
||||||
FDISK_PASSWORD: ${FDISK_PASSWORD:?FDISK_PASSWORD is required}
|
FDISK_PASSWORD: ${FDISK_PASSWORD:?FDISK_PASSWORD is required}
|
||||||
|
FDISK_ID_FEUERWEHREN: ${FDISK_ID_FEUERWEHREN:-164}
|
||||||
|
FDISK_ID_INSTANZEN: ${FDISK_ID_INSTANZEN:-2853}
|
||||||
DB_HOST: postgres
|
DB_HOST: postgres
|
||||||
DB_PORT: 5432
|
DB_PORT: 5432
|
||||||
DB_NAME: ${POSTGRES_DB:-feuerwehr_prod}
|
DB_NAME: ${POSTGRES_DB:-feuerwehr_prod}
|
||||||
|
|||||||
@@ -2,8 +2,17 @@ import { chromium, Page } from '@playwright/test';
|
|||||||
import { FdiskMember, FdiskAusbildung } from './types';
|
import { FdiskMember, FdiskAusbildung } from './types';
|
||||||
|
|
||||||
const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at';
|
const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at';
|
||||||
|
const ID_FEUERWEHREN = process.env.FDISK_ID_FEUERWEHREN ?? '164';
|
||||||
|
const ID_INSTANZEN = process.env.FDISK_ID_INSTANZEN ?? '2853';
|
||||||
|
|
||||||
const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`;
|
const LOGIN_URL = `${BASE_URL}/fdisk/module/vws/logins/logins.aspx`;
|
||||||
const MEMBERS_URL = `${BASE_URL}/fdisk/module/vws/Start.aspx`;
|
const MEMBERS_URL = `${BASE_URL}/fdisk/module/vws/vws/MitgliedschaftenList.aspx`
|
||||||
|
+ `?search=1`
|
||||||
|
+ `&searchid_feuerwehren=${ID_FEUERWEHREN}`
|
||||||
|
+ `&searchid_instanzen=${ID_INSTANZEN}`
|
||||||
|
+ `&id_instanzen=${ID_INSTANZEN}`
|
||||||
|
+ `&searchid_instanzen_besitzer=${ID_INSTANZEN}`
|
||||||
|
+ `&searchid_fahrzeugstatus=1`;
|
||||||
|
|
||||||
function log(msg: string) {
|
function log(msg: string) {
|
||||||
console.log(`[scraper] ${new Date().toISOString()} ${msg}`);
|
console.log(`[scraper] ${new Date().toISOString()} ${msg}`);
|
||||||
@@ -77,16 +86,14 @@ async function login(page: Page, username: string, password: string): Promise<vo
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ASP.NET WebForms login — try common selector patterns
|
// Exact selectors from the known login form HTML
|
||||||
const usernameField = page.locator('input[type="text"], input[name*="user"], input[name*="User"], input[id*="user"], input[id*="User"]').first();
|
const usernameField = page.locator('#login');
|
||||||
const passwordField = page.locator('input[type="password"]').first();
|
const passwordField = page.locator('#password');
|
||||||
|
const submitButton = page.locator('#Submit2');
|
||||||
|
|
||||||
await usernameField.waitFor({ state: 'visible', timeout: 10000 });
|
await usernameField.waitFor({ state: 'visible', timeout: 10000 });
|
||||||
await usernameField.fill(username);
|
await usernameField.fill(username);
|
||||||
await passwordField.fill(password);
|
await passwordField.fill(password);
|
||||||
|
|
||||||
// Submit — ASP.NET WebForms may use __doPostBack; click submit and wait for URL change
|
|
||||||
const submitButton = page.locator('input[type="submit"], button[type="submit"]').first();
|
|
||||||
await submitButton.click();
|
await submitButton.click();
|
||||||
|
|
||||||
// Wait for navigation away from the login page (up to 15s)
|
// Wait for navigation away from the login page (up to 15s)
|
||||||
@@ -109,86 +116,58 @@ async function login(page: Page, username: string, password: string): Promise<vo
|
|||||||
|
|
||||||
async function scrapeMembers(page: Page): Promise<FdiskMember[]> {
|
async function scrapeMembers(page: Page): Promise<FdiskMember[]> {
|
||||||
log(`Navigating to members list: ${MEMBERS_URL}`);
|
log(`Navigating to members list: ${MEMBERS_URL}`);
|
||||||
await page.goto(MEMBERS_URL, { waitUntil: 'networkidle' });
|
await page.goto(MEMBERS_URL, { waitUntil: 'domcontentloaded' });
|
||||||
|
await page.waitForLoadState('networkidle');
|
||||||
|
|
||||||
// Wait for the member table to appear
|
// The member table uses class FdcLayList
|
||||||
// ASP.NET GridView renders as an HTML table — find the data table
|
await page.waitForSelector('table.FdcLayList', { timeout: 20000 });
|
||||||
await page.waitForSelector('table', { timeout: 15000 });
|
|
||||||
|
|
||||||
// Find the main data table (likely the one with the most rows)
|
// Column layout (0-indexed td): 0=icon, 1=Status, 2=St.-Nr., 3=Dienstgrad,
|
||||||
// Columns: Status, St.-Nr., Dienstgrad, Vorname, Zuname, Geburtsdatum, SVNR, Eintrittsdatum, Abmeldedatum
|
// 4=Vorname, 5=Zuname, 6=Geburtsdatum, 7=SVNR, 8=Eintrittsdatum, 9=Abmeldedatum, 10=icon
|
||||||
const rows = await page.$$eval('table tr', (rows) => {
|
// Each <td> contains an <a title="value"> — the title is the clean cell text.
|
||||||
return rows.map(row => {
|
// The href on each <a> is the member detail URL (same link repeated across all cells in a row).
|
||||||
const cells = Array.from(row.querySelectorAll('td'));
|
const rows = await page.$$eval('table.FdcLayList tbody tr', (trs) =>
|
||||||
const link = row.querySelector('a');
|
trs.map((tr) => {
|
||||||
|
const cells = Array.from(tr.querySelectorAll('td'));
|
||||||
|
const val = (i: number) => {
|
||||||
|
const a = cells[i]?.querySelector('a');
|
||||||
|
return (a?.getAttribute('title') ?? cells[i]?.textContent ?? '').trim();
|
||||||
|
};
|
||||||
|
const href = (tr.querySelector('a') as HTMLAnchorElement | null)?.href ?? null;
|
||||||
return {
|
return {
|
||||||
cells: cells.map(c => (c as Element).textContent?.trim() ?? ''),
|
status: val(1),
|
||||||
href: link?.href ?? null,
|
standesbuchNr: val(2),
|
||||||
onclick: link?.getAttribute('onclick') ?? row.getAttribute('onclick') ?? null,
|
dienstgrad: val(3),
|
||||||
|
vorname: val(4),
|
||||||
|
zuname: val(5),
|
||||||
|
geburtsdatum: val(6),
|
||||||
|
svnr: val(7),
|
||||||
|
eintrittsdatum: val(8),
|
||||||
|
abmeldedatum: val(9),
|
||||||
|
href,
|
||||||
};
|
};
|
||||||
});
|
}),
|
||||||
});
|
);
|
||||||
|
|
||||||
// Find the header row to determine column positions
|
log(`Parsed ${rows.length} rows from member table`);
|
||||||
const headerRow = await page.$eval('table tr:first-child', (row) => {
|
|
||||||
const cells = Array.from(row.querySelectorAll('th, td'));
|
|
||||||
return cells.map(c => (c as Element).textContent?.trim().toLowerCase() ?? '');
|
|
||||||
});
|
|
||||||
|
|
||||||
// Detect column indices from headers
|
|
||||||
const colIdx = {
|
|
||||||
status: headerRow.findIndex(h => h.includes('status')),
|
|
||||||
standesbuchNr: headerRow.findIndex(h => h.includes('st.-nr') || h.includes('stnr') || h.includes('nr')),
|
|
||||||
dienstgrad: headerRow.findIndex(h => h.includes('dienstgrad')),
|
|
||||||
vorname: headerRow.findIndex(h => h.includes('vorname')),
|
|
||||||
zuname: headerRow.findIndex(h => h.includes('zuname') || h.includes('nachname')),
|
|
||||||
geburtsdatum: headerRow.findIndex(h => h.includes('geburt')),
|
|
||||||
svnr: headerRow.findIndex(h => h.includes('svnr') || h.includes('sv-nr')),
|
|
||||||
eintrittsdatum: headerRow.findIndex(h => h.includes('eintritt')),
|
|
||||||
abmeldedatum: headerRow.findIndex(h => h.includes('abmeld')),
|
|
||||||
};
|
|
||||||
|
|
||||||
log(`Detected columns: ${JSON.stringify(colIdx)}`);
|
|
||||||
|
|
||||||
// Fallback to positional columns if detection failed
|
|
||||||
// Based on screenshot: Status(0), St.-Nr.(1), Dienstgrad(2), Vorname(3), Zuname(4),
|
|
||||||
// Geburtsdatum(5), SVNR(6), Eintrittsdatum(7), Abmeldedatum(8)
|
|
||||||
if (colIdx.standesbuchNr === -1) colIdx.standesbuchNr = 1;
|
|
||||||
if (colIdx.dienstgrad === -1) colIdx.dienstgrad = 2;
|
|
||||||
if (colIdx.vorname === -1) colIdx.vorname = 3;
|
|
||||||
if (colIdx.zuname === -1) colIdx.zuname = 4;
|
|
||||||
if (colIdx.geburtsdatum === -1) colIdx.geburtsdatum = 5;
|
|
||||||
if (colIdx.svnr === -1) colIdx.svnr = 6;
|
|
||||||
if (colIdx.eintrittsdatum === -1) colIdx.eintrittsdatum = 7;
|
|
||||||
if (colIdx.abmeldedatum === -1) colIdx.abmeldedatum = 8;
|
|
||||||
|
|
||||||
const members: FdiskMember[] = [];
|
const members: FdiskMember[] = [];
|
||||||
|
|
||||||
for (const row of rows) {
|
for (const row of rows) {
|
||||||
const { cells, href, onclick } = row;
|
if (!row.standesbuchNr || !row.vorname || !row.zuname) continue;
|
||||||
// Skip header rows and empty rows
|
const abmeldedatum = parseDate(row.abmeldedatum);
|
||||||
if (cells.length < 5) continue;
|
|
||||||
const stnr = cellText(cells[colIdx.standesbuchNr]);
|
|
||||||
const vorname = cellText(cells[colIdx.vorname]);
|
|
||||||
const zuname = cellText(cells[colIdx.zuname]);
|
|
||||||
if (!stnr || !vorname || !zuname) continue;
|
|
||||||
|
|
||||||
const abmeldedatum = parseDate(cells[colIdx.abmeldedatum]);
|
|
||||||
|
|
||||||
members.push({
|
members.push({
|
||||||
standesbuchNr: stnr,
|
standesbuchNr: row.standesbuchNr,
|
||||||
dienstgrad: cellText(cells[colIdx.dienstgrad]) ?? '',
|
dienstgrad: row.dienstgrad,
|
||||||
vorname,
|
vorname: row.vorname,
|
||||||
zuname,
|
zuname: row.zuname,
|
||||||
geburtsdatum: parseDate(cells[colIdx.geburtsdatum]),
|
geburtsdatum: parseDate(row.geburtsdatum),
|
||||||
svnr: cellText(cells[colIdx.svnr]),
|
svnr: row.svnr || null,
|
||||||
eintrittsdatum: parseDate(cells[colIdx.eintrittsdatum]),
|
eintrittsdatum: parseDate(row.eintrittsdatum),
|
||||||
abmeldedatum,
|
abmeldedatum,
|
||||||
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
|
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
|
||||||
detailUrl: href,
|
detailUrl: row.href,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return members;
|
return members;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -207,10 +186,8 @@ async function scrapeMemberAusbildung(page: Page, member: FdiskMember): Promise<
|
|||||||
const ausbildungLink = page.locator('a:has-text("Ausbildung")').first();
|
const ausbildungLink = page.locator('a:has-text("Ausbildung")').first();
|
||||||
const hasLink = await ausbildungLink.isVisible().catch(() => false);
|
const hasLink = await ausbildungLink.isVisible().catch(() => false);
|
||||||
if (hasLink) {
|
if (hasLink) {
|
||||||
await Promise.all([
|
await ausbildungLink.click();
|
||||||
page.waitForNavigation({ waitUntil: 'networkidle' }).catch(() => {}),
|
await page.waitForLoadState('networkidle').catch(() => {});
|
||||||
ausbildungLink.click(),
|
|
||||||
]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user