add now features

This commit is contained in:
Matthias Hochmeister
2026-03-01 11:50:27 +01:00
parent 73ab6cea07
commit 681acd8203
25 changed files with 1518 additions and 4 deletions

251
sync/src/scraper.ts Normal file
View File

@@ -0,0 +1,251 @@
import { chromium, Page } from '@playwright/test';
import { FdiskMember, FdiskAusbildung } from './types';
const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at';
const LOGIN_URL = `${BASE_URL}/fdisk/`;
const MEMBERS_URL = `${BASE_URL}/fdisk/module/vws/Start.aspx`;
function log(msg: string) {
console.log(`[scraper] ${new Date().toISOString()} ${msg}`);
}
/**
* Parse a date string from FDISK (DD.MM.YYYY) to ISO format (YYYY-MM-DD).
* Returns null if empty or unparseable.
*/
function parseDate(raw: string | null | undefined): string | null {
if (!raw) return null;
const trimmed = raw.trim();
if (!trimmed) return null;
const match = trimmed.match(/^(\d{2})\.(\d{2})\.(\d{4})$/);
if (!match) return null;
return `${match[3]}-${match[2]}-${match[1]}`;
}
/**
* Extract text content from a cell, trimmed, or null if empty.
*/
function cellText(text: string | undefined | null): string | null {
const t = (text ?? '').trim();
return t || null;
}
export async function scrapeAll(username: string, password: string): Promise<{
members: FdiskMember[];
ausbildungen: FdiskAusbildung[];
}> {
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext({
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
});
const page = await context.newPage();
try {
await login(page, username, password);
const members = await scrapeMembers(page);
log(`Found ${members.length} members`);
const ausbildungen: FdiskAusbildung[] = [];
for (const member of members) {
if (!member.detailUrl) continue;
try {
const quals = await scrapeMemberAusbildung(page, member);
ausbildungen.push(...quals);
log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`);
// polite delay between requests
await page.waitForTimeout(500);
} catch (err) {
log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`);
}
}
return { members, ausbildungen };
} finally {
await browser.close();
}
}
async function login(page: Page, username: string, password: string): Promise<void> {
log(`Navigating to ${LOGIN_URL}`);
await page.goto(LOGIN_URL, { waitUntil: 'networkidle' });
// ASP.NET WebForms login — try common selector patterns
// Adjust these selectors if login fails
const usernameField = page.locator('input[type="text"], input[name*="user"], input[name*="User"], input[id*="user"], input[id*="User"]').first();
const passwordField = page.locator('input[type="password"]').first();
await usernameField.fill(username);
await passwordField.fill(password);
// Submit — look for a login/submit button
const submitButton = page.locator('input[type="submit"], button[type="submit"]').first();
await Promise.all([
page.waitForNavigation({ waitUntil: 'networkidle' }),
submitButton.click(),
]);
// Verify we're logged in by checking we're not still on the login page
const currentUrl = page.url();
if (currentUrl.includes('login') || currentUrl.includes('Login') || currentUrl === LOGIN_URL) {
throw new Error(`Login failed — still on login page: ${currentUrl}`);
}
log(`Logged in successfully, redirected to: ${currentUrl}`);
}
async function scrapeMembers(page: Page): Promise<FdiskMember[]> {
log(`Navigating to members list: ${MEMBERS_URL}`);
await page.goto(MEMBERS_URL, { waitUntil: 'networkidle' });
// Wait for the member table to appear
// ASP.NET GridView renders as an HTML table — find the data table
await page.waitForSelector('table', { timeout: 15000 });
// Find the main data table (likely the one with the most rows)
// Columns: Status, St.-Nr., Dienstgrad, Vorname, Zuname, Geburtsdatum, SVNR, Eintrittsdatum, Abmeldedatum
const rows = await page.$$eval('table tr', (rows) => {
return rows.map(row => {
const cells = Array.from(row.querySelectorAll('td'));
const link = row.querySelector('a');
return {
cells: cells.map(c => c.textContent?.trim() ?? ''),
href: link?.href ?? null,
onclick: link?.getAttribute('onclick') ?? row.getAttribute('onclick') ?? null,
};
});
});
// Find the header row to determine column positions
const headerRow = await page.$eval('table tr:first-child', (row) => {
const cells = Array.from(row.querySelectorAll('th, td'));
return cells.map(c => c.textContent?.trim().toLowerCase() ?? '');
});
// Detect column indices from headers
const colIdx = {
status: headerRow.findIndex(h => h.includes('status')),
standesbuchNr: headerRow.findIndex(h => h.includes('st.-nr') || h.includes('stnr') || h.includes('nr')),
dienstgrad: headerRow.findIndex(h => h.includes('dienstgrad')),
vorname: headerRow.findIndex(h => h.includes('vorname')),
zuname: headerRow.findIndex(h => h.includes('zuname') || h.includes('nachname')),
geburtsdatum: headerRow.findIndex(h => h.includes('geburt')),
svnr: headerRow.findIndex(h => h.includes('svnr') || h.includes('sv-nr')),
eintrittsdatum: headerRow.findIndex(h => h.includes('eintritt')),
abmeldedatum: headerRow.findIndex(h => h.includes('abmeld')),
};
log(`Detected columns: ${JSON.stringify(colIdx)}`);
// Fallback to positional columns if detection failed
// Based on screenshot: Status(0), St.-Nr.(1), Dienstgrad(2), Vorname(3), Zuname(4),
// Geburtsdatum(5), SVNR(6), Eintrittsdatum(7), Abmeldedatum(8)
if (colIdx.standesbuchNr === -1) colIdx.standesbuchNr = 1;
if (colIdx.dienstgrad === -1) colIdx.dienstgrad = 2;
if (colIdx.vorname === -1) colIdx.vorname = 3;
if (colIdx.zuname === -1) colIdx.zuname = 4;
if (colIdx.geburtsdatum === -1) colIdx.geburtsdatum = 5;
if (colIdx.svnr === -1) colIdx.svnr = 6;
if (colIdx.eintrittsdatum === -1) colIdx.eintrittsdatum = 7;
if (colIdx.abmeldedatum === -1) colIdx.abmeldedatum = 8;
const members: FdiskMember[] = [];
for (const row of rows) {
const { cells, href, onclick } = row;
// Skip header rows and empty rows
if (cells.length < 5) continue;
const stnr = cellText(cells[colIdx.standesbuchNr]);
const vorname = cellText(cells[colIdx.vorname]);
const zuname = cellText(cells[colIdx.zuname]);
if (!stnr || !vorname || !zuname) continue;
const abmeldedatum = parseDate(cells[colIdx.abmeldedatum]);
members.push({
standesbuchNr: stnr,
dienstgrad: cellText(cells[colIdx.dienstgrad]) ?? '',
vorname,
zuname,
geburtsdatum: parseDate(cells[colIdx.geburtsdatum]),
svnr: cellText(cells[colIdx.svnr]),
eintrittsdatum: parseDate(cells[colIdx.eintrittsdatum]),
abmeldedatum,
status: abmeldedatum ? 'ausgetreten' : 'aktiv',
detailUrl: href,
});
}
return members;
}
async function scrapeMemberAusbildung(page: Page, member: FdiskMember): Promise<FdiskAusbildung[]> {
if (!member.detailUrl) return [];
await page.goto(member.detailUrl, { waitUntil: 'networkidle' });
// Look for Ausbildungsliste section — it's likely a table or list
// Try to find it by heading text
const ausbildungSection = page.locator('text=Ausbildung, text=Ausbildungsliste').first();
const hasSec = await ausbildungSection.isVisible().catch(() => false);
if (!hasSec) {
// Try navigating to an Ausbildung tab/link if present
const ausbildungLink = page.locator('a:has-text("Ausbildung")').first();
const hasLink = await ausbildungLink.isVisible().catch(() => false);
if (hasLink) {
await Promise.all([
page.waitForNavigation({ waitUntil: 'networkidle' }).catch(() => {}),
ausbildungLink.click(),
]);
}
}
// Parse the qualification table
// Expected columns: Kursname, Datum, Ablaufdatum, Ort, Bemerkung (may vary)
const tables = await page.$$('table');
const ausbildungen: FdiskAusbildung[] = [];
for (const table of tables) {
const rows = await table.$$eval('tr', (rows) => {
return rows.map(row => ({
cells: Array.from(row.querySelectorAll('td, th')).map(c => c.textContent?.trim() ?? ''),
}));
});
if (rows.length < 2) continue;
// Detect if this looks like an Ausbildung table
const header = rows[0].cells.map(c => c.toLowerCase());
const isAusbildungTable =
header.some(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
if (!isAusbildungTable) continue;
const kursnameIdx = header.findIndex(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung'));
const datumIdx = header.findIndex(h => h.includes('datum') || h.includes('abschluss'));
const ablaufIdx = header.findIndex(h => h.includes('ablauf') || h.includes('gültig'));
const ortIdx = header.findIndex(h => h.includes('ort'));
const bemIdx = header.findIndex(h => h.includes('bem') || h.includes('info'));
for (const row of rows.slice(1)) {
const kursname = cellText(row.cells[kursnameIdx >= 0 ? kursnameIdx : 0]);
if (!kursname) continue;
const kursDatum = parseDate(datumIdx >= 0 ? row.cells[datumIdx] : null);
const syncKey = `${member.standesbuchNr}::${kursname}::${kursDatum ?? ''}`;
ausbildungen.push({
standesbuchNr: member.standesbuchNr,
kursname,
kursDatum,
ablaufdatum: parseDate(ablaufIdx >= 0 ? row.cells[ablaufIdx] : null),
ort: ortIdx >= 0 ? cellText(row.cells[ortIdx]) : null,
bemerkung: bemIdx >= 0 ? cellText(row.cells[bemIdx]) : null,
syncKey,
});
}
break; // only process the first Ausbildung table found
}
return ausbildungen;
}