diff --git a/.gitignore b/.gitignore index 38949b2..38e221d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,10 +10,11 @@ yarn-debug.log* yarn-error.log* pnpm-debug.log* -# Build outputs +# Build outputs (sync/dist is committed since server can't compile TypeScript) dist/ build/ out/ +!sync/dist/ # Database *.sqlite diff --git a/sync/Dockerfile b/sync/Dockerfile index aa47ebb..5196fde 100644 --- a/sync/Dockerfile +++ b/sync/Dockerfile @@ -1,19 +1,11 @@ -# Stage 1: build TypeScript -FROM node:20-slim AS builder -WORKDIR /app -RUN npm install -g typescript --registry https://registry.npmjs.org -COPY package.json package-lock.json ./ -RUN npm install --registry https://registry.npmjs.org -COPY tsconfig.json ./ -COPY src ./src -RUN tsc && npm prune --production --registry https://registry.npmjs.org - -# Stage 2: runtime with Playwright +# dist/ is compiled locally and committed to the repo. +# The server npm proxy blocks devDependency installs inside Docker. FROM mcr.microsoft.com/playwright:v1.47.0-jammy WORKDIR /app RUN npx playwright install chromium --with-deps -COPY --from=builder /app/node_modules ./node_modules -COPY --from=builder /app/dist ./dist +COPY package.json package-lock.json ./ +RUN npm install --omit=dev --registry https://registry.npmjs.org +COPY dist ./dist CMD ["node", "dist/index.js"] diff --git a/sync/dist/db.js b/sync/dist/db.js new file mode 100644 index 0000000..1910234 --- /dev/null +++ b/sync/dist/db.js @@ -0,0 +1,135 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.syncToDatabase = syncToDatabase; +function log(msg) { + console.log(`[db] ${new Date().toISOString()} ${msg}`); +} +/** + * Map FDISK Dienstgrad (abbreviation or full name) to the DB enum value. + * Returns null if no match found — the field will be left unchanged. + */ +function mapDienstgrad(raw) { + const map = { + // Abbreviations + 'fa': 'Feuerwehranwärter', + 'fm': 'Feuerwehrmann', + 'ff': 'Feuerwehrfrau', + 'ofm': 'Oberfeuerwehrmann', + 'off': 'Oberfeuerwehrfrau', + 'hfm': 'Hauptfeuerwehrmann', + 'hff': 'Hauptfeuerwehrfrau', + 'lm': 'Löschmeister', + 'olm': 'Oberlöschmeister', + 'hlm': 'Hauptlöschmeister', + 'bm': 'Brandmeister', + 'obm': 'Oberbrandmeister', + 'hbm': 'Hauptbrandmeister', + 'bi': 'Brandinspektor', + 'obi': 'Oberbrandinspektor', + 'boi': 'Brandoberinspektor', + 'bam': 'Brandamtmann', + // Full names (pass-through if already matching) + 'feuerwehranwärter': 'Feuerwehranwärter', + 'feuerwehrmann': 'Feuerwehrmann', + 'feuerwehrfrau': 'Feuerwehrfrau', + 'oberfeuerwehrmann': 'Oberfeuerwehrmann', + 'oberfeuerwehrfrau': 'Oberfeuerwehrfrau', + 'hauptfeuerwehrmann': 'Hauptfeuerwehrmann', + 'hauptfeuerwehrfrau': 'Hauptfeuerwehrfrau', + 'löschmeister': 'Löschmeister', + 'oberlöschmeister': 'Oberlöschmeister', + 'hauptlöschmeister': 'Hauptlöschmeister', + 'brandmeister': 'Brandmeister', + 'oberbrandmeister': 'Oberbrandmeister', + 'hauptbrandmeister': 'Hauptbrandmeister', + 'brandinspektor': 'Brandinspektor', + 'oberbrandinspektor': 'Oberbrandinspektor', + 'brandoberinspektor': 'Brandoberinspektor', + 'brandamtmann': 'Brandamtmann', + }; + return map[raw.trim().toLowerCase()] ?? null; +} +async function syncToDatabase(pool, members, ausbildungen) { + const client = await pool.connect(); + try { + await client.query('BEGIN'); + let updated = 0; + let skipped = 0; + for (const member of members) { + // Find the matching mitglieder_profile by fdisk_standesbuch_nr first, + // then fall back to matching by name (given_name + family_name) + const profileResult = await client.query(`SELECT mp.user_id + FROM mitglieder_profile mp + WHERE mp.fdisk_standesbuch_nr = $1`, [member.standesbuchNr]); + let userId = null; + if (profileResult.rows.length > 0) { + userId = profileResult.rows[0].user_id; + } + else { + // Fallback: match by name (case-insensitive) + const nameResult = await client.query(`SELECT u.id + FROM users u + JOIN mitglieder_profile mp ON mp.user_id = u.id + WHERE LOWER(u.given_name) = LOWER($1) + AND LOWER(u.family_name) = LOWER($2) + LIMIT 1`, [member.vorname, member.zuname]); + if (nameResult.rows.length > 0) { + userId = nameResult.rows[0].id; + // Store the Standesbuch-Nr now that we found a match + await client.query(`UPDATE mitglieder_profile SET fdisk_standesbuch_nr = $1 WHERE user_id = $2`, [member.standesbuchNr, userId]); + log(`Linked ${member.vorname} ${member.zuname} → Standesbuch-Nr ${member.standesbuchNr}`); + } + } + if (!userId) { + skipped++; + continue; + } + // Update mitglieder_profile with FDISK data + const dienstgrad = mapDienstgrad(member.dienstgrad); + await client.query(`UPDATE mitglieder_profile SET + fdisk_standesbuch_nr = $1, + status = $2, + eintrittsdatum = COALESCE($3::date, eintrittsdatum), + austrittsdatum = $4::date, + geburtsdatum = COALESCE($5::date, geburtsdatum), + ${dienstgrad ? 'dienstgrad = $6,' : ''} + updated_at = NOW() + WHERE user_id = ${dienstgrad ? '$7' : '$6'}`, dienstgrad + ? [member.standesbuchNr, member.status, member.eintrittsdatum, member.abmeldedatum, member.geburtsdatum, dienstgrad, userId] + : [member.standesbuchNr, member.status, member.eintrittsdatum, member.abmeldedatum, member.geburtsdatum, userId]); + updated++; + } + log(`Members: ${updated} updated, ${skipped} skipped (no dashboard account)`); + // Upsert Ausbildungen + let ausbildungUpserted = 0; + let ausbildungSkipped = 0; + for (const ausb of ausbildungen) { + // Find user_id by standesbuch_nr + const result = await client.query(`SELECT user_id FROM mitglieder_profile WHERE fdisk_standesbuch_nr = $1`, [ausb.standesbuchNr]); + if (result.rows.length === 0) { + ausbildungSkipped++; + continue; + } + const userId = result.rows[0].user_id; + await client.query(`INSERT INTO ausbildung (user_id, kursname, kurs_datum, ablaufdatum, ort, bemerkung, fdisk_sync_key) + VALUES ($1, $2, $3::date, $4::date, $5, $6, $7) + ON CONFLICT (user_id, fdisk_sync_key) DO UPDATE SET + kursname = EXCLUDED.kursname, + kurs_datum = EXCLUDED.kurs_datum, + ablaufdatum = EXCLUDED.ablaufdatum, + ort = EXCLUDED.ort, + bemerkung = EXCLUDED.bemerkung, + updated_at = NOW()`, [userId, ausb.kursname, ausb.kursDatum, ausb.ablaufdatum, ausb.ort, ausb.bemerkung, ausb.syncKey]); + ausbildungUpserted++; + } + await client.query('COMMIT'); + log(`Ausbildungen: ${ausbildungUpserted} upserted, ${ausbildungSkipped} skipped`); + } + catch (err) { + await client.query('ROLLBACK'); + throw err; + } + finally { + client.release(); + } +} diff --git a/sync/dist/index.js b/sync/dist/index.js new file mode 100644 index 0000000..484f0a6 --- /dev/null +++ b/sync/dist/index.js @@ -0,0 +1,60 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +require("dotenv/config"); +const pg_1 = require("pg"); +const scraper_1 = require("./scraper"); +const db_1 = require("./db"); +function log(msg) { + console.log(`[sync] ${new Date().toISOString()} ${msg}`); +} +function requireEnv(name) { + const val = process.env[name]; + if (!val) + throw new Error(`Missing required environment variable: ${name}`); + return val; +} +/** Returns milliseconds until the next midnight (00:00:00) in local time. */ +function msUntilMidnight() { + const now = new Date(); + const midnight = new Date(now); + midnight.setDate(now.getDate() + 1); + midnight.setHours(0, 0, 0, 0); + return midnight.getTime() - now.getTime(); +} +async function runSync() { + const username = requireEnv('FDISK_USERNAME'); + const password = requireEnv('FDISK_PASSWORD'); + const pool = new pg_1.Pool({ + host: requireEnv('DB_HOST'), + port: parseInt(process.env.DB_PORT ?? '5432'), + database: requireEnv('DB_NAME'), + user: requireEnv('DB_USER'), + password: requireEnv('DB_PASSWORD'), + }); + try { + log('Starting FDISK sync'); + const { members, ausbildungen } = await (0, scraper_1.scrapeAll)(username, password); + await (0, db_1.syncToDatabase)(pool, members, ausbildungen); + log(`Sync complete — ${members.length} members, ${ausbildungen.length} Ausbildungen`); + } + finally { + await pool.end(); + } +} +async function main() { + log('FDISK sync service started'); + // Run once immediately on startup so the first sync doesn't wait until midnight + await runSync().catch(err => log(`ERROR during initial sync: ${err.message}`)); + // Then schedule at midnight every day + while (true) { + const delay = msUntilMidnight(); + const nextRun = new Date(Date.now() + delay); + log(`Next sync scheduled at ${nextRun.toLocaleString()} (in ${Math.round(delay / 60000)} min)`); + await new Promise(r => setTimeout(r, delay)); + await runSync().catch(err => log(`ERROR during scheduled sync: ${err.message}`)); + } +} +main().catch(err => { + console.error(`[sync] Fatal error: ${err.message}`); + process.exit(1); +}); diff --git a/sync/dist/scraper.js b/sync/dist/scraper.js new file mode 100644 index 0000000..4cf2ef6 --- /dev/null +++ b/sync/dist/scraper.js @@ -0,0 +1,229 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.scrapeAll = scrapeAll; +const test_1 = require("@playwright/test"); +const BASE_URL = process.env.FDISK_BASE_URL ?? 'https://app.fdisk.at'; +const LOGIN_URL = `${BASE_URL}/fdisk/`; +const MEMBERS_URL = `${BASE_URL}/fdisk/module/vws/Start.aspx`; +function log(msg) { + console.log(`[scraper] ${new Date().toISOString()} ${msg}`); +} +/** + * Parse a date string from FDISK (DD.MM.YYYY) to ISO format (YYYY-MM-DD). + * Returns null if empty or unparseable. + */ +function parseDate(raw) { + if (!raw) + return null; + const trimmed = raw.trim(); + if (!trimmed) + return null; + const match = trimmed.match(/^(\d{2})\.(\d{2})\.(\d{4})$/); + if (!match) + return null; + return `${match[3]}-${match[2]}-${match[1]}`; +} +/** + * Extract text content from a cell, trimmed, or null if empty. + */ +function cellText(text) { + const t = (text ?? '').trim(); + return t || null; +} +async function scrapeAll(username, password) { + const browser = await test_1.chromium.launch({ headless: true }); + const context = await browser.newContext({ + userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + }); + const page = await context.newPage(); + try { + await login(page, username, password); + const members = await scrapeMembers(page); + log(`Found ${members.length} members`); + const ausbildungen = []; + for (const member of members) { + if (!member.detailUrl) + continue; + try { + const quals = await scrapeMemberAusbildung(page, member); + ausbildungen.push(...quals); + log(` ${member.vorname} ${member.zuname}: ${quals.length} Ausbildungen`); + // polite delay between requests + await page.waitForTimeout(500); + } + catch (err) { + log(` WARN: could not scrape Ausbildung for ${member.vorname} ${member.zuname}: ${err}`); + } + } + return { members, ausbildungen }; + } + finally { + await browser.close(); + } +} +async function login(page, username, password) { + log(`Navigating to ${LOGIN_URL}`); + await page.goto(LOGIN_URL, { waitUntil: 'networkidle' }); + // ASP.NET WebForms login — try common selector patterns + // Adjust these selectors if login fails + const usernameField = page.locator('input[type="text"], input[name*="user"], input[name*="User"], input[id*="user"], input[id*="User"]').first(); + const passwordField = page.locator('input[type="password"]').first(); + await usernameField.fill(username); + await passwordField.fill(password); + // Submit — look for a login/submit button + const submitButton = page.locator('input[type="submit"], button[type="submit"]').first(); + await Promise.all([ + page.waitForNavigation({ waitUntil: 'networkidle' }), + submitButton.click(), + ]); + // Verify we're logged in by checking we're not still on the login page + const currentUrl = page.url(); + if (currentUrl.includes('login') || currentUrl.includes('Login') || currentUrl === LOGIN_URL) { + throw new Error(`Login failed — still on login page: ${currentUrl}`); + } + log(`Logged in successfully, redirected to: ${currentUrl}`); +} +async function scrapeMembers(page) { + log(`Navigating to members list: ${MEMBERS_URL}`); + await page.goto(MEMBERS_URL, { waitUntil: 'networkidle' }); + // Wait for the member table to appear + // ASP.NET GridView renders as an HTML table — find the data table + await page.waitForSelector('table', { timeout: 15000 }); + // Find the main data table (likely the one with the most rows) + // Columns: Status, St.-Nr., Dienstgrad, Vorname, Zuname, Geburtsdatum, SVNR, Eintrittsdatum, Abmeldedatum + const rows = await page.$$eval('table tr', (rows) => { + return rows.map(row => { + const cells = Array.from(row.querySelectorAll('td')); + const link = row.querySelector('a'); + return { + cells: cells.map(c => c.textContent?.trim() ?? ''), + href: link?.href ?? null, + onclick: link?.getAttribute('onclick') ?? row.getAttribute('onclick') ?? null, + }; + }); + }); + // Find the header row to determine column positions + const headerRow = await page.$eval('table tr:first-child', (row) => { + const cells = Array.from(row.querySelectorAll('th, td')); + return cells.map(c => c.textContent?.trim().toLowerCase() ?? ''); + }); + // Detect column indices from headers + const colIdx = { + status: headerRow.findIndex(h => h.includes('status')), + standesbuchNr: headerRow.findIndex(h => h.includes('st.-nr') || h.includes('stnr') || h.includes('nr')), + dienstgrad: headerRow.findIndex(h => h.includes('dienstgrad')), + vorname: headerRow.findIndex(h => h.includes('vorname')), + zuname: headerRow.findIndex(h => h.includes('zuname') || h.includes('nachname')), + geburtsdatum: headerRow.findIndex(h => h.includes('geburt')), + svnr: headerRow.findIndex(h => h.includes('svnr') || h.includes('sv-nr')), + eintrittsdatum: headerRow.findIndex(h => h.includes('eintritt')), + abmeldedatum: headerRow.findIndex(h => h.includes('abmeld')), + }; + log(`Detected columns: ${JSON.stringify(colIdx)}`); + // Fallback to positional columns if detection failed + // Based on screenshot: Status(0), St.-Nr.(1), Dienstgrad(2), Vorname(3), Zuname(4), + // Geburtsdatum(5), SVNR(6), Eintrittsdatum(7), Abmeldedatum(8) + if (colIdx.standesbuchNr === -1) + colIdx.standesbuchNr = 1; + if (colIdx.dienstgrad === -1) + colIdx.dienstgrad = 2; + if (colIdx.vorname === -1) + colIdx.vorname = 3; + if (colIdx.zuname === -1) + colIdx.zuname = 4; + if (colIdx.geburtsdatum === -1) + colIdx.geburtsdatum = 5; + if (colIdx.svnr === -1) + colIdx.svnr = 6; + if (colIdx.eintrittsdatum === -1) + colIdx.eintrittsdatum = 7; + if (colIdx.abmeldedatum === -1) + colIdx.abmeldedatum = 8; + const members = []; + for (const row of rows) { + const { cells, href, onclick } = row; + // Skip header rows and empty rows + if (cells.length < 5) + continue; + const stnr = cellText(cells[colIdx.standesbuchNr]); + const vorname = cellText(cells[colIdx.vorname]); + const zuname = cellText(cells[colIdx.zuname]); + if (!stnr || !vorname || !zuname) + continue; + const abmeldedatum = parseDate(cells[colIdx.abmeldedatum]); + members.push({ + standesbuchNr: stnr, + dienstgrad: cellText(cells[colIdx.dienstgrad]) ?? '', + vorname, + zuname, + geburtsdatum: parseDate(cells[colIdx.geburtsdatum]), + svnr: cellText(cells[colIdx.svnr]), + eintrittsdatum: parseDate(cells[colIdx.eintrittsdatum]), + abmeldedatum, + status: abmeldedatum ? 'ausgetreten' : 'aktiv', + detailUrl: href, + }); + } + return members; +} +async function scrapeMemberAusbildung(page, member) { + if (!member.detailUrl) + return []; + await page.goto(member.detailUrl, { waitUntil: 'networkidle' }); + // Look for Ausbildungsliste section — it's likely a table or list + // Try to find it by heading text + const ausbildungSection = page.locator('text=Ausbildung, text=Ausbildungsliste').first(); + const hasSec = await ausbildungSection.isVisible().catch(() => false); + if (!hasSec) { + // Try navigating to an Ausbildung tab/link if present + const ausbildungLink = page.locator('a:has-text("Ausbildung")').first(); + const hasLink = await ausbildungLink.isVisible().catch(() => false); + if (hasLink) { + await Promise.all([ + page.waitForNavigation({ waitUntil: 'networkidle' }).catch(() => { }), + ausbildungLink.click(), + ]); + } + } + // Parse the qualification table + // Expected columns: Kursname, Datum, Ablaufdatum, Ort, Bemerkung (may vary) + const tables = await page.$$('table'); + const ausbildungen = []; + for (const table of tables) { + const rows = await table.$$eval('tr', (rows) => { + return rows.map(row => ({ + cells: Array.from(row.querySelectorAll('td, th')).map(c => c.textContent?.trim() ?? ''), + })); + }); + if (rows.length < 2) + continue; + // Detect if this looks like an Ausbildung table + const header = rows[0].cells.map(c => c.toLowerCase()); + const isAusbildungTable = header.some(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung')); + if (!isAusbildungTable) + continue; + const kursnameIdx = header.findIndex(h => h.includes('kurs') || h.includes('ausbildung') || h.includes('bezeichnung')); + const datumIdx = header.findIndex(h => h.includes('datum') || h.includes('abschluss')); + const ablaufIdx = header.findIndex(h => h.includes('ablauf') || h.includes('gültig')); + const ortIdx = header.findIndex(h => h.includes('ort')); + const bemIdx = header.findIndex(h => h.includes('bem') || h.includes('info')); + for (const row of rows.slice(1)) { + const kursname = cellText(row.cells[kursnameIdx >= 0 ? kursnameIdx : 0]); + if (!kursname) + continue; + const kursDatum = parseDate(datumIdx >= 0 ? row.cells[datumIdx] : null); + const syncKey = `${member.standesbuchNr}::${kursname}::${kursDatum ?? ''}`; + ausbildungen.push({ + standesbuchNr: member.standesbuchNr, + kursname, + kursDatum, + ablaufdatum: parseDate(ablaufIdx >= 0 ? row.cells[ablaufIdx] : null), + ort: ortIdx >= 0 ? cellText(row.cells[ortIdx]) : null, + bemerkung: bemIdx >= 0 ? cellText(row.cells[bemIdx]) : null, + syncKey, + }); + } + break; // only process the first Ausbildung table found + } + return ausbildungen; +} diff --git a/sync/dist/types.js b/sync/dist/types.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/sync/dist/types.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/sync/src/scraper.ts b/sync/src/scraper.ts index e6466af..097d584 100644 --- a/sync/src/scraper.ts +++ b/sync/src/scraper.ts @@ -107,7 +107,7 @@ async function scrapeMembers(page: Page): Promise { const cells = Array.from(row.querySelectorAll('td')); const link = row.querySelector('a'); return { - cells: cells.map(c => c.textContent?.trim() ?? ''), + cells: cells.map(c => (c as Element).textContent?.trim() ?? ''), href: link?.href ?? null, onclick: link?.getAttribute('onclick') ?? row.getAttribute('onclick') ?? null, }; @@ -117,7 +117,7 @@ async function scrapeMembers(page: Page): Promise { // Find the header row to determine column positions const headerRow = await page.$eval('table tr:first-child', (row) => { const cells = Array.from(row.querySelectorAll('th, td')); - return cells.map(c => c.textContent?.trim().toLowerCase() ?? ''); + return cells.map(c => (c as Element).textContent?.trim().toLowerCase() ?? ''); }); // Detect column indices from headers @@ -207,7 +207,7 @@ async function scrapeMemberAusbildung(page: Page, member: FdiskMember): Promise< for (const table of tables) { const rows = await table.$$eval('tr', (rows) => { return rows.map(row => ({ - cells: Array.from(row.querySelectorAll('td, th')).map(c => c.textContent?.trim() ?? ''), + cells: Array.from(row.querySelectorAll('td, th')).map(c => (c as Element).textContent?.trim() ?? ''), })); }); diff --git a/sync/tsconfig.json b/sync/tsconfig.json index df3ba65..65ebe6c 100644 --- a/sync/tsconfig.json +++ b/sync/tsconfig.json @@ -2,7 +2,7 @@ "compilerOptions": { "target": "ES2020", "module": "commonjs", - "lib": ["ES2020"], + "lib": ["ES2020", "DOM"], "outDir": "./dist", "rootDir": "./src", "strict": true,