commit ff2754e440d35e2eace2190c9726f0afa58d5479 Author: kitsunecafe Date: Thu Dec 19 00:16:41 2024 -0600 initial diff --git a/Containerfile b/Containerfile new file mode 100644 index 0000000..6c060cf --- /dev/null +++ b/Containerfile @@ -0,0 +1,6 @@ +FROM docker.io/library/python:3.13.1-alpine3.21 +RUN apk --update add postgresql xz; pip install b2 +WORKDIR app +ADD dump-db.sh archive.sh sync-b2.sh entrypoint.sh ./ +ENTRYPOINT ["./entrypoint.sh"] + diff --git a/archive.sh b/archive.sh new file mode 100755 index 0000000..2ec4a1e --- /dev/null +++ b/archive.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env sh + +# printc(args, description) +# Helper function to properly format text +printc() { + args="$1" + cmd="$2" + printf ' %s' "$1" + yes '' | head -n "$((20-${#args}))" | tr \\n ' ' + printf '%s' "$cmd" + echo +} + +# usage() +# Display usage information +usage() { + echo "usage: tartar [-0..9] [--level N] [--format F] DIR" + echo "Creates a compressed tape archive of every subdirectory in a given directory" + echo + echo "positional arguments:" + printc "DIR" "the directory to archive" + echo + echo "parameters:" + printc "-f, --format" "the compression format to use. default: xz" + printf '%44s %s\n' "options: gzip, xz" + printc "-0..-9, --level" "the level of compression to use: default 6" + echo + echo "environment variables:" + printc "ARCHIVE_DIR" "same as positional argument DIR" + printc "COMPRESSION_FORMAT" "same as --format" + printc "COMPRESSION_LEVEL" "same as --level" +} + +includes() { + test="$1" + shift + for x in "$@"; do + if test "$test" = "$x"; then + return 0 + fi + done + return 1 +} + +FORMATS="gzip xz" +LEVELS="0 1 2 3 4 5 6 7 8 9" + +COMPRESSION_FORMAT="xz" +COMPRESSION_LEVEL="-6" + +set_format() { + if includes "$1" "$FORMATS"; then + COMPRESSION_FORMAT="$1" + fi +} + +set_level() { + if includes "$1" "$LEVELS"; then + COMPRESSION_LEVEL="-$1" + fi +} + +parse_args() { + POSITIONAL_ARGS="" + + # parse args + while [ $# -gt 0 ]; do + case $1 in + -f|--format) + set_format "$2" + shift + shift + ;; + --level) + set_level "$2" + shift + shift + ;; + -0|-1|-2|-3|-4|-5|-6|-7|-8|-9) + COMPRESSION_LEVEL="$1" + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + POSITIONAL_ARGS="$POSITIONAL_ARGS $1" + shift # past argument + ;; + esac + done + + # drop the first space + POSITIONAL_ARGS=$( echo "$POSITIONAL_ARGS" | tail -c +2 ) +} + +can_archive() { + if test ! -f "${1}/backup.done"; then + echo "skipping $1; it is either incomplete or corrupted" + return 1 + fi + + if test ! -f "${1}/compression.done" || \ + test ! -f "${1}.tar.xz" || \ + ! sha1sum -sc "${1}/compression.done"; then + return 0 + else + return 1 + fi +} + +parse_args "$@" + +# get the first positional argument, trim whitespace +dir=$( echo "$POSITIONAL_ARGS" | cut -d' ' -f1 | xargs ) +dir=${dir:-"$ARCHIVE_DIR"} + + +cd "$dir" || exit 1 + +for dir in ./*/; do + dir="${dir%*/}" + if can_archive "$dir"; then + fname="$(basename $dir).tar.xz" + tar cv --exclude='*.done' "$dir" | "$COMPRESSION_FORMAT" "$COMPRESSION_LEVEL" > "$fname" + sha1sum "$fname" > "${dir}/compression.done" + fi +done + diff --git a/dump-db.sh b/dump-db.sh new file mode 100755 index 0000000..aa73d46 --- /dev/null +++ b/dump-db.sh @@ -0,0 +1,277 @@ +#!/usr/bin/env sh + +# printc(args, description) +# Helper function to properly format text +printc() { + args="$1" + cmd="$2" + printf ' %s' "$1" + yes '' | head -n "$((20-${#args}))" | tr \\n ' ' + printf '%s' "$cmd" + echo +} + +# usage() +# Display usage information +usage() { + echo "usage: pg_archive" + echo "Archives all databases and globals from a Postgres database" + echo + echo "The postgresql client must be installed for this to function" + echo "Either PGPASSFILE, PGPASSWORD, --passfile, or --password is" + echo "required, unless if the database is unsecured. Which is bad." + echo + echo "From least to most, the order of precedence for variables are" + echo " * environment variable" + echo " * PGPASSFILE" + echo " * DB_USER, DB_HOST, PGPASSWORD" + echo " * parameter" + echo " * --passfile" + echo " * --user, --host, --password" + echo + echo "parameters:" + printc "-u, --user" "the postgres user for login. default: postgres" + printc "-H, --host" "the postgres hostname. overrides --passfile host" + printc "-p, --passfile" "the .pgpass to use" + #printc "-f, --format" "the output format to use. default: d" + #printf '%20s' 'options: [p]lain, [c]ustom, [d]irectory, [t]ar' + #printc "-f, --force" "override existing archives of the same date" + printc "-c, --concurrency" "number of concurrent processes to use. default: 2" + printc "-w, --workers" "number of workers to use. default: 5" + printc "-d, --date" "date format when making directories. default %Y-%m-%d" + printc "-a, --dir" "the archive directory path. default: ./archive" + printc "-h, --help" "display this message and exit" + echo + echo "environment variables:" + printc "DB_USER" "same as --user" + printc "DB_HOST" "same as --host" + printc "PGPASSWORD" "same as --password" + printc "PGPASSFILE" "same as --passfile" + printc "CONCURRENCY" "same as --concurrency" + printc "WORKERS" "same as --workers" + printc "DATE" "same as --date" + printc "ARCHIVE_DIR" "same as --dir" +} + +# set_defaults() +# Set sane defaults to as many options as possible +set_defaults() { + DB_USER=${DB_USER:-"postgres"} + + #RETENTION_AMOUNT=3 + CONCURRENCY=${CONCURRENCY:-2} + WORKERS=${CONCURRENCY:-5} + DATE=${parameter:-'%Y-%m-%d'} + FORCE=${FORCE:-0} + ARCHIVE_DIR=${ARCHIVE_DIR-"./archive"} +} + +# verify_env(var_name) +# Verify whether an env var is set based on a string name +# +# Usage: +# verify_env "KBITY" # 1 +# export KBITY=":3" +# verify_env "KBITY" # 0 +verify_env() { + eval var="\$$1" + + if test -z "$var"; then + >&2 echo "$1 not set." + return 1 + fi +} + +# verify_file(path_string) +# Verify the existence of a file +# Usage: +# verify_file "test/file" # 0 +# verify_file "doesnt/exist" # 1 +verify_file() { + eval file="\$${1}_FILE" + if verify_env "${1}_FILE" && test -f "$file"; then + return 0 + else + return 1 + fi +} + +# verify_cmd(...args) +# Verify the existence of a command or any aliases +# Usage: +# verify_cmd ls not-ls # 0 +# verify_cmd not-ls # 1 +verify_cmd() { + for cmd in "$@"; do + if command -v "$cmd" 2>&1 >/dev/null; then + return 0 + fi + done + >&2 echo "$1 not found" + return 1 +} + +# parse_pgpass_file(path) +# Extract the HOST and USERNAME from a .pgpass file +# Usage: +# echo '*:*:*:postgres:example' > .pgpass +# parse_pgpass_file +# echo $DB_HOST $DB_USER # "postgres example" +parse_pgpass_file() { + if ! test -f "$1"; then + return 1 + fi + + host=$( cut -d':' -f1 "$1" ) + user=$( cut -d':' -f4 "$1" ) + + DB_HOST=${DB_HOST:-$host} + DB_USER=${DB_USER:-$user} +} + +# set_env(var_name, value) +# Set a variable based on its string name +# Usage: +# set_env "THE_FORP" "1514" +set_env() { + eval "$1"="$2" +} + +# get_env(var_name) +# Get the value of an environment variable, or +# if it is appended with _FILE, get the value +# of that file (this is a Docker/Podman +# convention) +# Usage: +# export PUPY="wraf!" +# get_env "PUPY" # 0 +# get_env "KBITY" # 1 +# cat "mrrp :3" > kbity.txt +# export KBITY_FILE="kbity.txt" +# get_env "KBITY" # 0 +get_env() { + found=0 + for v in "$@"; do + if verify_env "$v" 1>/dev/null; then + continue + elif verify_file "$v" 1>/dev/null; then + eval file="\$${v}_FILE" + set_env "$v $(cat "$file")" + continue + fi + + found=1 + >&2 echo "${v} not found" + done + + return "$found" +} + +# parse_pg_connection() +# Attempt to create a connection string from the given environment +parse_pg_connection() { + PGPASSFILE=${PGPASSFILE:-"$HOME/.pgpass"} + + get_env "DB_HOST" "DB_USER" 2>/dev/null + if ! parse_pgpass_file "$PGPASSFILE" && test -z "$PGPASSWORD"; then + echo "$PGPASSFILE not found and PGPASSWORD not set." + exit 1 + fi + + if ! get_env "DB_HOST" "DB_USER"; then + exit 1 + fi +} + +POSITIONAL_ARGS="" + +# parse args +while [ $# -gt 0 ]; do + case $1 in + -u|--user) + DB_USER="$2" + shift + shift + ;; + -H|--host) + DB_HOST="$2" + shift + shift + ;; + #-P|--password) + # PGPASSWORD="$2" + # shift + # ;; + -p|--passfile) + PGPASSFILE="$2" + shift + shift + ;; + -c|--concurrency) + CONCURRENCY="$2" + shift + shift + ;; + -w|--workers) + WORKERS="$2" + shift + shift + ;; + -d|--date) + DATE="$2" + shift + shift + ;; + -a|--dir) + ARCHIVE_DIR="$2" + shift + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + POSITIONAL_ARGS="$POSITIONAL_ARGS $1" + shift # past argument + ;; + esac +done + +verify_cmd psql + +parse_pg_connection + +set_defaults + +mkdir -p "$ARCHIVE_DIR" +cd "$ARCHIVE_DIR" || exit 1 + +dir=$( date "+${DATE}" ) + +if test -d "$dir" && test -f "$dir/backup.done"; then + echo "archive of $dir already exists. exiting" + echo "to correct this error, increase the date precision or clear" + echo "the existing files before archival." + exit 1 +elif test -d "$dir"; then + echo "files exist in $dir. possible incomplete archive. exiting" + exit 1 +fi + +mkdir "./${dir}" +cd "$dir" || exit 1 + +echo "starting archive of $PG_HOST at $dir" +pg_dumpall -h "$DB_HOST" -U "$DB_USER" -r -f roles.dump +pg_dumpall -h "$DB_HOST" -U "$DB_USER" -r -f tablespaces.dump + +psql -h "$DB_HOST" -U "$DB_USER" -qAtX -c "select datname from pg_database where datallowconn order by pg_database_size(oid) desc" | \ + tr '\n' '\0' | \ + xargs -0 -P "${CONCURRENCY}" -I % pg_dump -h "$DB_HOST" -U "$DB_USER" -F d -C -j "${WORKERS}" -f pg-%.dump % + +touch backup.done +echo "finished archiving" + +exit 0 + diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100755 index 0000000..b595e70 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh +set +e + +./dump-db.sh +./archive.sh +./sync-b2.sh $ARCHIVE_DIR + diff --git a/sync-b2.sh b/sync-b2.sh new file mode 100755 index 0000000..e31b7b2 --- /dev/null +++ b/sync-b2.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env sh + +ARCHIVE_DIR=${ARCHIVE_DIR:-"$1"} +ARCHIVE_DIR=${ARCHIVE_DIR:-"/archive"} + +b2v4 sync --skip-newer --exclude-regex '.*' --include-regex '.*\.tar\.xz' "$ARCHIVE_DIR" "b2://${BUCKET}" --dry-run + +