138 lines
4.3 KiB
Bash
138 lines
4.3 KiB
Bash
#!/bin/bash
|
||
# Lexware Auto-Restore via Shadow-DB-Swap
|
||
# Ablauf pro DB:
|
||
# 1. Restore in {db}_shadow (Live-DB bleibt lesbar)
|
||
# 2. Verbindungen zu Live-DB trennen
|
||
# 3. Rename: {db} → {db}_old (< 100ms Downtime)
|
||
# 4. Rename: {db}_shadow → {db}
|
||
# 5. Drop {db}_old
|
||
|
||
DUMP_DIR="/opt/lexware-dumps"
|
||
STAMP_DIR="${DUMP_DIR}/.last_restore"
|
||
LOG_FILE="/var/log/lexware-restore.log"
|
||
DATABASES="${RESTORE_DB:-f1 f2 lexkonto lexkk rk lxoffice lx lxcatalog}"
|
||
KEEP_DUMPS=5
|
||
|
||
log() {
|
||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
psql_c() {
|
||
sudo -u postgres psql -v ON_ERROR_STOP=1 -c "$1" 2>>"$LOG_FILE"
|
||
}
|
||
|
||
# Exklusives Lock – verhindert parallele Restore-Läufe
|
||
exec 9>/var/lock/lexware-restore.lock
|
||
if ! flock -n 9; then
|
||
log "SKIP Restore läuft bereits (Lock belegt)"
|
||
exit 0
|
||
fi
|
||
|
||
restore_one() {
|
||
local db="$1"
|
||
local dump="$2"
|
||
local shadow="${db}_shadow"
|
||
|
||
# Shadow aufräumen falls noch vom letzten fehlgeschlagenen Lauf vorhanden
|
||
psql_c "DROP DATABASE IF EXISTS \"${shadow}\";" 2>/dev/null
|
||
|
||
# Frische Shadow-DB anlegen (gleiche Encoding/Locale wie Live-DB)
|
||
if ! psql_c "CREATE DATABASE \"${shadow}\" ENCODING 'UTF8' LC_COLLATE 'de_DE.UTF-8' LC_CTYPE 'de_DE.UTF-8' TEMPLATE template0;"; then
|
||
log "ERROR $db – Shadow-DB anlegen fehlgeschlagen"
|
||
return 1
|
||
fi
|
||
|
||
# Restore in Shadow (kein --clean nötig, DB ist leer)
|
||
local tmplog
|
||
tmplog=$(mktemp)
|
||
sudo -u postgres pg_restore -d "$shadow" "$dump" 2>"$tmplog"
|
||
local rc=$?
|
||
cat "$tmplog" >> "$LOG_FILE"
|
||
|
||
if [[ $rc -eq 1 ]]; then
|
||
# Nur harmlose Fehler (fehlende Rollen, Encoding-Einzelfehler)?
|
||
local real_errors
|
||
real_errors=$(grep "^pg_restore: error:" "$tmplog" \
|
||
| grep -v "encoding\|COPY failed\|does not exist.*role\|lxuser\|lxgarinnrole\|altertillattbruker" \
|
||
| wc -l)
|
||
rm -f "$tmplog"
|
||
if [[ $real_errors -gt 0 ]]; then
|
||
log "ERROR $db – Restore fehlgeschlagen ($real_errors kritische Fehler), Shadow wird verworfen"
|
||
psql_c "DROP DATABASE IF EXISTS \"${shadow}\";" 2>/dev/null
|
||
return 1
|
||
fi
|
||
log "WARN $db – Restore mit harmlosen Warnungen (Encoding/Rollen in Quelldaten)"
|
||
elif [[ $rc -gt 1 ]]; then
|
||
rm -f "$tmplog"
|
||
log "ERROR $db – pg_restore exit $rc, Shadow wird verworfen"
|
||
psql_c "DROP DATABASE IF EXISTS \"${shadow}\";" 2>/dev/null
|
||
return 1
|
||
else
|
||
rm -f "$tmplog"
|
||
fi
|
||
|
||
# Swap: Verbindungen trennen, dann zwei schnelle Renames
|
||
log "SWAP $db – Verbindungen trennen und umbenennen"
|
||
|
||
psql_c "SELECT pg_terminate_backend(pid)
|
||
FROM pg_stat_activity
|
||
WHERE datname = '${db}' AND pid <> pg_backend_pid();" 2>/dev/null
|
||
|
||
sleep 0.2 # kurz warten bis Verbindungen wirklich weg sind
|
||
|
||
if ! psql_c "ALTER DATABASE \"${db}\" RENAME TO \"${db}_old\";"; then
|
||
log "ERROR $db – Rename live→old fehlgeschlagen, Shadow bleibt erhalten"
|
||
return 1
|
||
fi
|
||
|
||
if ! psql_c "ALTER DATABASE \"${shadow}\" RENAME TO \"${db}\";"; then
|
||
log "ERROR $db – Rename shadow→live fehlgeschlagen! Manuell zurückbauen:"
|
||
log "ERROR $db – ALTER DATABASE \"${db}_old\" RENAME TO \"${db}\";"
|
||
return 1
|
||
fi
|
||
|
||
# Drop der alten DB im Hintergrund – f1 ist ab hier schon wieder live
|
||
( psql_c "DROP DATABASE IF EXISTS \"${db}_old\";" 2>/dev/null ) &
|
||
|
||
return 0
|
||
}
|
||
|
||
for db in $DATABASES; do
|
||
latest=$(ls -1 "${DUMP_DIR}/${db}_"*.dump 2>/dev/null | sort | tail -n 1)
|
||
|
||
if [[ -z "$latest" ]]; then
|
||
log "SKIP $db – kein Dump gefunden"
|
||
continue
|
||
fi
|
||
|
||
stamp_file="${STAMP_DIR}/${db}"
|
||
last_restore=""
|
||
[[ -f "$stamp_file" ]] && last_restore=$(cat "$stamp_file")
|
||
|
||
if [[ "$latest" == "$last_restore" ]]; then
|
||
log "SKIP $db – $latest bereits eingespielt"
|
||
continue
|
||
fi
|
||
|
||
|
||
# --- Logical Replication Guard ---
|
||
repl_active=$(sudo -u postgres psql -d "$db" -tAc \
|
||
"SELECT COUNT(*) FROM pg_stat_subscription WHERE subname = 'lx_sub_'||'$db' AND received_lsn IS NOT NULL;" 2>/dev/null | tr -d ' \t\n')
|
||
if [ "${repl_active:-0}" -gt "0" ]; then
|
||
log "SKIP $db -- Logical Replication aktiv (kein Dump-Restore noetig)"
|
||
continue
|
||
fi
|
||
# --- Ende Guard ---
|
||
|
||
log "START $db – $latest"
|
||
|
||
if restore_one "$db" "$latest"; then
|
||
echo "$latest" > "$stamp_file"
|
||
log "OK $db – swap abgeschlossen"
|
||
fi
|
||
done
|
||
|
||
# Retention: nur die letzten 5 Dumps pro DB behalten
|
||
for db in $DATABASES; do
|
||
ls -t "${DUMP_DIR}/${db}_"*.dump 2>/dev/null | tail -n +$((KEEP_DUMPS + 1)) | xargs -r rm -f
|
||
done |