This commit is contained in:
Rowan 2024-12-19 00:16:41 -06:00
commit ff2754e440
5 changed files with 428 additions and 0 deletions

6
Containerfile Normal file
View file

@ -0,0 +1,6 @@
FROM docker.io/library/python:3.13.1-alpine3.21
RUN apk --update add postgresql xz; pip install b2
WORKDIR app
ADD dump-db.sh archive.sh sync-b2.sh entrypoint.sh ./
ENTRYPOINT ["./entrypoint.sh"]

130
archive.sh Executable file
View file

@ -0,0 +1,130 @@
#!/usr/bin/env sh
# printc(args, description)
# Helper function to properly format text
printc() {
args="$1"
cmd="$2"
printf ' %s' "$1"
yes '' | head -n "$((20-${#args}))" | tr \\n ' '
printf '%s' "$cmd"
echo
}
# usage()
# Display usage information
usage() {
echo "usage: tartar [-0..9] [--level N] [--format F] DIR"
echo "Creates a compressed tape archive of every subdirectory in a given directory"
echo
echo "positional arguments:"
printc "DIR" "the directory to archive"
echo
echo "parameters:"
printc "-f, --format" "the compression format to use. default: xz"
printf '%44s %s\n' "options: gzip, xz"
printc "-0..-9, --level" "the level of compression to use: default 6"
echo
echo "environment variables:"
printc "ARCHIVE_DIR" "same as positional argument DIR"
printc "COMPRESSION_FORMAT" "same as --format"
printc "COMPRESSION_LEVEL" "same as --level"
}
includes() {
test="$1"
shift
for x in "$@"; do
if test "$test" = "$x"; then
return 0
fi
done
return 1
}
FORMATS="gzip xz"
LEVELS="0 1 2 3 4 5 6 7 8 9"
COMPRESSION_FORMAT="xz"
COMPRESSION_LEVEL="-6"
set_format() {
if includes "$1" "$FORMATS"; then
COMPRESSION_FORMAT="$1"
fi
}
set_level() {
if includes "$1" "$LEVELS"; then
COMPRESSION_LEVEL="-$1"
fi
}
parse_args() {
POSITIONAL_ARGS=""
# parse args
while [ $# -gt 0 ]; do
case $1 in
-f|--format)
set_format "$2"
shift
shift
;;
--level)
set_level "$2"
shift
shift
;;
-0|-1|-2|-3|-4|-5|-6|-7|-8|-9)
COMPRESSION_LEVEL="$1"
shift
;;
-h|--help)
usage
exit 0
;;
*)
POSITIONAL_ARGS="$POSITIONAL_ARGS $1"
shift # past argument
;;
esac
done
# drop the first space
POSITIONAL_ARGS=$( echo "$POSITIONAL_ARGS" | tail -c +2 )
}
can_archive() {
if test ! -f "${1}/backup.done"; then
echo "skipping $1; it is either incomplete or corrupted"
return 1
fi
if test ! -f "${1}/compression.done" || \
test ! -f "${1}.tar.xz" || \
! sha1sum -sc "${1}/compression.done"; then
return 0
else
return 1
fi
}
parse_args "$@"
# get the first positional argument, trim whitespace
dir=$( echo "$POSITIONAL_ARGS" | cut -d' ' -f1 | xargs )
dir=${dir:-"$ARCHIVE_DIR"}
cd "$dir" || exit 1
for dir in ./*/; do
dir="${dir%*/}"
if can_archive "$dir"; then
fname="$(basename $dir).tar.xz"
tar cv --exclude='*.done' "$dir" | "$COMPRESSION_FORMAT" "$COMPRESSION_LEVEL" > "$fname"
sha1sum "$fname" > "${dir}/compression.done"
fi
done

277
dump-db.sh Executable file
View file

@ -0,0 +1,277 @@
#!/usr/bin/env sh
# printc(args, description)
# Helper function to properly format text
printc() {
args="$1"
cmd="$2"
printf ' %s' "$1"
yes '' | head -n "$((20-${#args}))" | tr \\n ' '
printf '%s' "$cmd"
echo
}
# usage()
# Display usage information
usage() {
echo "usage: pg_archive"
echo "Archives all databases and globals from a Postgres database"
echo
echo "The postgresql client must be installed for this to function"
echo "Either PGPASSFILE, PGPASSWORD, --passfile, or --password is"
echo "required, unless if the database is unsecured. Which is bad."
echo
echo "From least to most, the order of precedence for variables are"
echo " * environment variable"
echo " * PGPASSFILE"
echo " * DB_USER, DB_HOST, PGPASSWORD"
echo " * parameter"
echo " * --passfile"
echo " * --user, --host, --password"
echo
echo "parameters:"
printc "-u, --user" "the postgres user for login. default: postgres"
printc "-H, --host" "the postgres hostname. overrides --passfile host"
printc "-p, --passfile" "the .pgpass to use"
#printc "-f, --format" "the output format to use. default: d"
#printf '%20s' 'options: [p]lain, [c]ustom, [d]irectory, [t]ar'
#printc "-f, --force" "override existing archives of the same date"
printc "-c, --concurrency" "number of concurrent processes to use. default: 2"
printc "-w, --workers" "number of workers to use. default: 5"
printc "-d, --date" "date format when making directories. default %Y-%m-%d"
printc "-a, --dir" "the archive directory path. default: ./archive"
printc "-h, --help" "display this message and exit"
echo
echo "environment variables:"
printc "DB_USER" "same as --user"
printc "DB_HOST" "same as --host"
printc "PGPASSWORD" "same as --password"
printc "PGPASSFILE" "same as --passfile"
printc "CONCURRENCY" "same as --concurrency"
printc "WORKERS" "same as --workers"
printc "DATE" "same as --date"
printc "ARCHIVE_DIR" "same as --dir"
}
# set_defaults()
# Set sane defaults to as many options as possible
set_defaults() {
DB_USER=${DB_USER:-"postgres"}
#RETENTION_AMOUNT=3
CONCURRENCY=${CONCURRENCY:-2}
WORKERS=${CONCURRENCY:-5}
DATE=${parameter:-'%Y-%m-%d'}
FORCE=${FORCE:-0}
ARCHIVE_DIR=${ARCHIVE_DIR-"./archive"}
}
# verify_env(var_name)
# Verify whether an env var is set based on a string name
#
# Usage:
# verify_env "KBITY" # 1
# export KBITY=":3"
# verify_env "KBITY" # 0
verify_env() {
eval var="\$$1"
if test -z "$var"; then
>&2 echo "$1 not set."
return 1
fi
}
# verify_file(path_string)
# Verify the existence of a file
# Usage:
# verify_file "test/file" # 0
# verify_file "doesnt/exist" # 1
verify_file() {
eval file="\$${1}_FILE"
if verify_env "${1}_FILE" && test -f "$file"; then
return 0
else
return 1
fi
}
# verify_cmd(...args)
# Verify the existence of a command or any aliases
# Usage:
# verify_cmd ls not-ls # 0
# verify_cmd not-ls # 1
verify_cmd() {
for cmd in "$@"; do
if command -v "$cmd" 2>&1 >/dev/null; then
return 0
fi
done
>&2 echo "$1 not found"
return 1
}
# parse_pgpass_file(path)
# Extract the HOST and USERNAME from a .pgpass file
# Usage:
# echo '*:*:*:postgres:example' > .pgpass
# parse_pgpass_file
# echo $DB_HOST $DB_USER # "postgres example"
parse_pgpass_file() {
if ! test -f "$1"; then
return 1
fi
host=$( cut -d':' -f1 "$1" )
user=$( cut -d':' -f4 "$1" )
DB_HOST=${DB_HOST:-$host}
DB_USER=${DB_USER:-$user}
}
# set_env(var_name, value)
# Set a variable based on its string name
# Usage:
# set_env "THE_FORP" "1514"
set_env() {
eval "$1"="$2"
}
# get_env(var_name)
# Get the value of an environment variable, or
# if it is appended with _FILE, get the value
# of that file (this is a Docker/Podman
# convention)
# Usage:
# export PUPY="wraf!"
# get_env "PUPY" # 0
# get_env "KBITY" # 1
# cat "mrrp :3" > kbity.txt
# export KBITY_FILE="kbity.txt"
# get_env "KBITY" # 0
get_env() {
found=0
for v in "$@"; do
if verify_env "$v" 1>/dev/null; then
continue
elif verify_file "$v" 1>/dev/null; then
eval file="\$${v}_FILE"
set_env "$v $(cat "$file")"
continue
fi
found=1
>&2 echo "${v} not found"
done
return "$found"
}
# parse_pg_connection()
# Attempt to create a connection string from the given environment
parse_pg_connection() {
PGPASSFILE=${PGPASSFILE:-"$HOME/.pgpass"}
get_env "DB_HOST" "DB_USER" 2>/dev/null
if ! parse_pgpass_file "$PGPASSFILE" && test -z "$PGPASSWORD"; then
echo "$PGPASSFILE not found and PGPASSWORD not set."
exit 1
fi
if ! get_env "DB_HOST" "DB_USER"; then
exit 1
fi
}
POSITIONAL_ARGS=""
# parse args
while [ $# -gt 0 ]; do
case $1 in
-u|--user)
DB_USER="$2"
shift
shift
;;
-H|--host)
DB_HOST="$2"
shift
shift
;;
#-P|--password)
# PGPASSWORD="$2"
# shift
# ;;
-p|--passfile)
PGPASSFILE="$2"
shift
shift
;;
-c|--concurrency)
CONCURRENCY="$2"
shift
shift
;;
-w|--workers)
WORKERS="$2"
shift
shift
;;
-d|--date)
DATE="$2"
shift
shift
;;
-a|--dir)
ARCHIVE_DIR="$2"
shift
shift
;;
-h|--help)
usage
exit 0
;;
*)
POSITIONAL_ARGS="$POSITIONAL_ARGS $1"
shift # past argument
;;
esac
done
verify_cmd psql
parse_pg_connection
set_defaults
mkdir -p "$ARCHIVE_DIR"
cd "$ARCHIVE_DIR" || exit 1
dir=$( date "+${DATE}" )
if test -d "$dir" && test -f "$dir/backup.done"; then
echo "archive of $dir already exists. exiting"
echo "to correct this error, increase the date precision or clear"
echo "the existing files before archival."
exit 1
elif test -d "$dir"; then
echo "files exist in $dir. possible incomplete archive. exiting"
exit 1
fi
mkdir "./${dir}"
cd "$dir" || exit 1
echo "starting archive of $PG_HOST at $dir"
pg_dumpall -h "$DB_HOST" -U "$DB_USER" -r -f roles.dump
pg_dumpall -h "$DB_HOST" -U "$DB_USER" -r -f tablespaces.dump
psql -h "$DB_HOST" -U "$DB_USER" -qAtX -c "select datname from pg_database where datallowconn order by pg_database_size(oid) desc" | \
tr '\n' '\0' | \
xargs -0 -P "${CONCURRENCY}" -I % pg_dump -h "$DB_HOST" -U "$DB_USER" -F d -C -j "${WORKERS}" -f pg-%.dump %
touch backup.done
echo "finished archiving"
exit 0

7
entrypoint.sh Executable file
View file

@ -0,0 +1,7 @@
#!/usr/bin/env sh
set +e
./dump-db.sh
./archive.sh
./sync-b2.sh $ARCHIVE_DIR

8
sync-b2.sh Executable file
View file

@ -0,0 +1,8 @@
#!/usr/bin/env sh
ARCHIVE_DIR=${ARCHIVE_DIR:-"$1"}
ARCHIVE_DIR=${ARCHIVE_DIR:-"/archive"}
b2v4 sync --skip-newer --exclude-regex '.*' --include-regex '.*\.tar\.xz' "$ARCHIVE_DIR" "b2://${BUCKET}" --dry-run