diff options
Diffstat (limited to 'modules.d/slx-dmsetup/scripts/dmsetup-slx-device')
-rwxr-xr-x | modules.d/slx-dmsetup/scripts/dmsetup-slx-device | 446 |
1 files changed, 446 insertions, 0 deletions
diff --git a/modules.d/slx-dmsetup/scripts/dmsetup-slx-device b/modules.d/slx-dmsetup/scripts/dmsetup-slx-device new file mode 100755 index 00000000..1756865e --- /dev/null +++ b/modules.d/slx-dmsetup/scripts/dmsetup-slx-device @@ -0,0 +1,446 @@ +#!/usr/bin/env bash +# +# Script to back given read-only device using the block device +# specified by SLX_WRITABLE_DEVICE_IDENTIFIER in the SLX config. +# If SLX_WRITABLE_DEVICE_PARTITION_TABLE is sepcified, it will +# further create device mapper devices accordingly. +# +# Example partition config: +# <type> <name> <size> <crypt> +# thin-snapshot root 10G 1 +# thin-volume tmp 20G 0 +# linear data0 5-10G 1 +# linear data1 1-50% 1 +# +# NOTE: Encrypting thin-snapshot will actually encrypt the +# entire pool data device used for the pool. +# TODO: Support external keys + +type -p emergency_shell || . /lib/dracut-lib.sh + +# for debugging purposes +set -x +exec &> /run/openslx/dmsetup.log + +# read-only device to prepare for CoW +[ -z "$1" ] && emergency_shell "Read-only device was not given!" +declare -rg read_only_device="$1" +declare -rg read_only_device_size="$(blockdev --getsz $1)" + +# global array variables storing the configuration of the partitions +declare -ag linear snapshot thin_snapshot thin_volume +parse_config() { + [ -z "$1" ] && return 1 + while IFS= read -r line; do + [ -z "$line" ] && continue + read -r type name range crypt ignore <<< "$line" + type=${type//-/_} # to use the type as variable in eval + if ! [[ "$type" =~ \ + ^(linear|snapshot|thin_snapshot|thin_volume)$ ]]; then + echo "$0: Ignoring invalid type: $line" + continue + fi + if [[ -z "$name" ]]; then + echo "$0: Ignoring nameless entry: $line" + continue + fi + unset unit min max + # ranges can be like: 40G, 40-80G, 10-20% + if ! [[ $range =~ ^([0-9]+-)*([0-9])+[GgMmKkBb%]$ ]]; then + echo "$0: Ignoring invalid range: $line" + continue + fi + # process ranges: convert percentages (of read_only_device!) + # to actual sizes (in sectors!) before saving them + local unit=${range: -1} + local min="$(cut -d'-' -f1 <<< "${range%?}" )" + local max="$(cut -d'-' -f2 <<< "${range%?}" )" + if [ "$min" -gt "$max" ]; then + echo "$0: Ignoring invalid range: $line" + continue + fi + # default for bytes + local -i potency=0 + case "$unit" in + [%]) + if [ "$max" -gt 100 ]; then + echo "Ignoring invalid percentages: $min/$max" + continue + fi + min=$(( $writable_device_size * $min / 100 )) + max=$(( $writable_device_size * $max / 100 )) + ;; + [Kk]) potency=1 ;;& + [Mm]) potency=2 ;;& + [Gg]) potency=3 ;;& + *) + # => 1024 ** potency for G, M, K, etc results in bytes + # => bytes / 512 = sectors + min=$(( $min * ( 1024 ** $potency) / 512 )) + max=$(( $max * ( 1024 ** $potency) / 512 )) + ;; + esac + if ! [[ "$crypt" =~ ^[01]$ ]]; then + echo "$0: Disabling encryption due to invalid crypt argument: $line" + crypt=0 + fi + # finally save it to the global array for this type + eval "${type}"'+=("'${name} ${crypt} ${min} ${max}'")' + done <<< "$1" +} + +# Helper to call 'dmsetup setup' without syncing with udev +# and then actively create the devices with the mknodes command. +# dmsetup_create_noudevsync <name> <table> +dmsetup_create_noudevsync() { + ( + set -o errexit + dmsetup create "$1" --noudevsync --table "$2" + dmsetup mknodes --noudevsync "$1" + ) + local ret=$? + [ $ret -ne 0 ] && dmsetup remove --noudevsync "$1" + return $ret +} + +# encrypt_device <dev_path> <encrypted_name> [<size>] +encrypt_device() { + modprobe dm-crypt || echo "$0: dm-crypt loading failed, maybe builtin?" + [ -b "$1" ] || return 1 + [ -n "$2" ] || return 1 + [ -z "$3" ] && local size="$(blockdev --getsz $1)" + local key="$(head -c32 /dev/random | xxd -p | tr -d '\n')" + if ! dmsetup_create_noudevsync "$2" \ + "0 ${3:-${size}} crypt aes-xts-plain64 $key 0 $1 0 1 allow_discards"; then + echo "$0: Failed to encrypt $1." + return 1 + fi + return 0 +} +# create_snapshot "<name> <persist>" +create_snapshot() { + modprobe dm-snapshot || echo "$0: dm-snapshot loading failed, maybe builtin?" + read -r name persist ignore <<< "$1" + if ! dmsetup_create_noudevsync "$name" \ + "0 $read_only_device_size snapshot $read_only_device $writable_device ${persist:-N} 8"; then + echo "$0: Failed to create snapshot on '$writable_device' for '$read_only_device'." + return 1 + fi + return 0 +} + +# Call this to fallback to a RAMdisk stored under /run/openslx +# This will call terminate the whole script by calling finish_setup, if successful +ramdisk_fallback() { + echo "$0: Falling back to regular dm-snapshot on a RAMdisk." + local file="$(mktemp -u -p /run/openslx dnbd_cow.XXX)" + local size="$SLX_RAMDISK_SIZE_IN_MB" + [ -z "$size" ] && size="$(awk '/MemTotal/ {printf("%d\n", $2 / 2 / 1024 )}' /proc/meminfo)" + dd of="$file" seek="$size" bs=1M count=0 &> /dev/null + writable_device="$(losetup --show --find "$file")" + cow_device_candidate="root" + while [ -b "/dev/mapper/$cow_device_candidate" ]; do + cow_device_candidate="root.$RANDOM" + done + if [ -z "$writable_device" ] || ! create_snapshot "$cow_device_candidate N"; then + emergency_shell "CRITICAL: failed to setup RAMdisk fallback." + exit 1 + fi + finish_setup "$cow_device_candidate" "0" "$size" +} + +# finish_setup <device> <type> [<size>] +# <device> is the device name only, /dev/mapper will be prepended automatically. +# <type> denotes if the created device lies in a RAMdisk (0) or is backed by a disk (1). +# <size> is given in sectors. +finish_setup() { + if [ -z "$1" ] || [ ! -b "/dev/mapper/$1" ]; then + emergency_shell "'/dev/mapper/$1' not a block device. Failed to setup CoW layer." + exit 1 + fi + if ! [[ "$2" =~ ^[0-9]$ ]]; then + emergency_shell "'$2' not a valid type, 0 or 1 expected." + fi + # <size> optional? + ( + echo "# Generated by '$0'." + echo "SLX_DNBD3_DEVICE_COW=/dev/mapper/$1" + ) >> /etc/openslx + save_partition_info "$1" "/" "$2" "$3" + exit 0 +} + +# path to save the achieved setup to +declare -rg partitions_config="/run/openslx/dmsetup.state" +cat <<-EOF > "$partitions_config" +# Generated by '$0'. +# Format: <device_mapper_dev> <mount_point> <options> +# Options can be: +# * type -> CoW layer type: 0 is RAMdisk, 1 is disk, 2 is network +# * size -> in 512 byte sectors +EOF + +# save_partition_info <dm_dev> <mount_point> <type> [<size>] +save_partition_info() { + [ -b "/dev/mapper/$1" ] || return 1 + [ -n "$2" ] || return 1 + [[ "$3" =~ ^[0-9]$ ]] || return 1 + local opts="type=$3" + # plain size given + [[ "$4" =~ ^[0-9]+$ ]] && opts="$opts,physical_size=$4" + # <physical_backing_dev_size>-<virtual_size> + [[ "$4" =~ ^[0-9]+-[0-9]+$ ]] && opts="$opts,shared_physical_size=${4%-*},virtual_size=${4#*-}" + echo "/dev/mapper/$1 $2 ${opts}" >> "$partitions_config" +} + +### +## MAIN +### + +. /etc/openslx +# This is the main variable driving this script +declare -g writable_device= +if [ -n "$SLX_WRITABLE_DEVICE_IDENTIFIER" ]; then + # only first one for now TODO create linear devices of all ID44s + writable_device="$(slx-tools dev_find_partitions "$SLX_WRITABLE_DEVICE_IDENTIFIER" | head -n 1)" +fi +if [ -z "$writable_device" ]; then + echo "$0: Could not find writable device with id '$SLX_WRITABLE_DEVICE_IDENTIFIER'." + ramdisk_fallback +fi + +# NOTE: from here on out, every value related to size is in 512 bytes sectors! +declare -g writable_device_size="$(blockdev --getsz $writable_device)" + +# If SLX_WRITABLE_DEVICE_PARTITION_TABLE is not set, just do +# regular thin-snapshot for the CoW layer, else parse it. +if [ -n "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" ]; then + parse_config "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" +fi +# Default to thin-snapshot, if none were configured +if [ -z "$snapshot" ] && [ -z "$thin_snapshot" ]; then + parse_config "thin-snapshot root 100% 0" +fi + +# Sanity checks for weird configurations +if [ "${#snapshot[@]}" -gt 1 ]; then + echo "Multiple snapshots specified, using first one: ${snapshot[0]}" + snapshot="${snapshot[0]}" +fi +if [ "${#thin_snapshot[@]}" -gt 1 ]; then + echo "Multiple thin-snapshots specified, using first one: ${thin_snapshot[0]}" + thin_snapshot="${thin_snapshot[0]}" +fi +if [ -n "$snapshot" ] && [ -n "$thin_snapshot" ]; then + echo "$0: Both snapshot and thin-snapshot specified, prefering thin-snapshot." + snapshot= +fi + +### +## LINEAR SLICES +### + +# start allocating spaces to the configured devices +declare -g writable_device_allocated=0 +# reserve the space for the snapshot (of either type)... +read -r name crypt min max ignore <<< "${thin_snapshot:-${snapshot}}" + +declare -g scratch_device_size=0 +if (( $min <= $writable_device_size )); then + scratch_device_size=$max + while (( $scratch_device_size >= 0 )) && (( $scratch_device_size > $writable_device_size )); do + (( scratch_device_size -= 2097152 )) # 1G steps => 2097152 sectors + done + (( $scratch_device_size < $min )) && scratch_device_size="$min" +else + # minimum snapshot size is bigger than physical device size + echo "$0: Minimum snapshot size is too big for the scratch partition." + echo "$0: You probably need to use a more conservative value." + echo "$0: Using this client maximum scratch space ($writable_device_size sectors)." + scratch_device_size="$writable_device_size" +fi + +# ... and slice it from the start of the writable device (for performance). +declare -g scratch_device="/dev/mapper/scratch" +if ! dmsetup_create_noudevsync "${scratch_device##*/}" \ + "0 $scratch_device_size linear $writable_device $writable_device_allocated"; then + echo "$0: Failed to create scratch space for the CoW layer." + # TODO do not bail directly, but try to to create the linear devices at least? + ramdisk_fallback +fi +save_partition_info "${scratch_device##*/}" "*" "1" "$scratch_device_size" + +# encrypt the scratch device, if configured +if [ "$crypt" -ne 0 ] && encrypt_device \ + "$scratch_device" "${scratch_device##*/}-crypt" "$scratch_device_size"; then + scratch_device="${scratch_device}-crypt" +fi + +writable_device_allocated="$scratch_device_size" + +# first setup linear slices of the writable device +for i in ${!linear[@]}; do + [ -z "${linear[$i]}" ] && continue + read -r name crypt min max ignore <<< "${linear[$i]}" + free_space=$(( $writable_device_size - $writable_device_allocated )) + if [ "$min" -gt "$free_space" ]; then + echo "$0: Not enough space left for linear devices: ${linear[$i]}" + break + fi + # allocate its max if it fits within the free space, otherwise use the space left. + to_allocate="$max" + [ "$to_allocate" -gt "$free_space" ] && to_allocate="$free_space" + + if ! dmsetup_create_noudevsync "$name" "0 $to_allocate linear $writable_device $writable_device_allocated"; then + echo "$0: Failed to create linear device: ${linear[$i]}" + continue + fi + # TODO sane? + save_partition_info "$name" "*" "1" "$to_allocate" + if [ "$crypt" -ne 0 ] && \ + ! encrypt_device "/dev/mapper/$name" "${name}-crypt" "$to_allocate"; then + echo "$0: Failed to encrypt '$name'." + fi + writable_device_allocated=$(( $to_allocate + $writable_device_allocated )) +done + +# we are done with the physical device, use the scratch space from now on +writable_device="$scratch_device" +writable_device_size="$scratch_device_size" + +### +## THIN-PROVISIONING +### +declare -rg pool_metadata_dev="/dev/mapper/pool-metadata" +declare -rg pool_data_dev="/dev/mapper/pool-data" +declare -rg pool_dev="/dev/mapper/pool" +create_pool() { + # create external snapshot for read-only device + # create remaining thin volumes + modprobe dm-thin-pool || echo "$0: dm-thin-pool load failed, maybe builtin?" + # create temporary metadata device + data_block_size=255 + # calculate number of sectors needed and check boundaries: + metadata_dev_size="$(( 48 * $writable_device_size / $data_block_size / 512 ))" + # Min 2MB -> 4096 sectors, max 16GB -> 33554432 sectors + [ "$metadata_dev_size" -lt 4096 ] && metadata_dev_size="4096" + # TODO handle the exotic case of a too large metadata device to fit within RAM. + [ "$metadata_dev_size" -gt 33554432 ] && metadata_dev_size="33554432" + # TODO handle persistent metadata device on disk + # create RAMdisk in /run for metadata device + metadata_dev="$(mktemp -p /run/openslx .pool-metadata.XXX)" + dd of="$metadata_dev" bs=512 seek="$metadata_dev_size" &> /dev/null + metadata_dev="$(losetup --show --find $metadata_dev)" + if ! dmsetup_create_noudevsync "${pool_metadata_dev##*/}" \ + "0 $metadata_dev_size linear $metadata_dev 0"; then + echo "$0: Failed to create pool metadata device on '$writable_device'." + return 1 + fi + # For persistent metadata device we will need to cut that space off first: + # writable_device_size=$(( $writable_device_size - $metadata_dev_size )) + + if ! dmsetup_create_noudevsync "${pool_data_dev##*/}" \ + "0 $writable_device_size linear $writable_device 0"; then + echo "$0: Failed to create pool data device on '$writable_device'." + return 1 + fi + low_water_mark=32 + if ! dmsetup_create_noudevsync "${pool_dev##*/}" \ + "0 $writable_device_size thin-pool $pool_metadata_dev $pool_data_dev $data_block_size $low_water_mark"; then + echo "$0: Failed to create thin-pool device on '$writable_device'." + return 1 + fi + return 0 +} + +# create_volume "<name> <id> <size> <backing_dev>" +create_volume() { + if [ -z "$pool_dev" -o ! -b "$pool_dev" ]; then + echo "$0: Global pool device not set or present." + return 1 + fi + if [ $# -ne 1 -o -z "$1" ]; then + echo "$0: create_volume requires one non-empty argument." + return 1 + fi + local name id size backing_dev ignore + read -r name id size backing_dev ignore <<< "$1" + + if ! dmsetup message "$pool_dev" 0 "create_thin $id"; then + echo "$0: Failed to create thin volume with id '$id' in pool '$pool_dev'." + echo "$0: It might already exists, trying anyway..." + fi + if ! dmsetup_create_noudevsync "$name" "0 $size thin $pool_dev $id $backing_dev"; then + echo "$0: Failed to create external snapshot named '$name':" + echo " Size: $size" + echo " Backing device: $backing_dev" + echo " Thin volume id: $id" + return 1 + fi + return 0 +} +if [ -n "$thin_snapshot" ] || [ -n "$thin_volume" ]; then + if ! create_pool ; then + echo "Failed to create thin pool. Will ignore:" + echo -e "\tThin snapshot: $(declare -p thin_snapshot)" + echo -e "\tThin volumes: $(declare -p thin_volume)" + ramdisk_fallback + fi + # the order in which pool devices are created does not matter + # so start with thin volumes starting with id 2 and end with + # the thin-snapshot with id 1 which needs to call finish_setup. + volume_id=2 + # go over thin-volumes + for i in ${!thin_volume[@]}; do + [ -z "${thin_volume[$i]}" ] && continue + read -r name crypt min max ignore <<< "${thin_volume[$i]}" + # thin-volume can be safely created with max size, + # since they are overprovisioned anyway. + if ! create_volume "$name $(( volume_id++ )) $max"; then + echo "Failed to create thin volume '$name'." + fi + save_partition_info "$name" "*" "1" "${writable_device_size}-${max}" + if [ "$crypt" -ne 0 ] && ! encrypt_device \ + "/dev/mapper/$name" "$name-crypt" "$max"; then + echo "Failed to encrypt thin volume '$name'." + fi + done + + if [ -n "$thin_snapshot" ]; then + # create thin-snapshot, use first one + read -r name crypt min max ignore <<< "$thin_snapshot" + # min/max was used for the pool data device, ignore it here! + # NOTE: the filesystem will most likely malfunction if the size of the + # thin-snapshot is smaller than what it was upon creation. + # As such, the size of the thin-snapshot can only be $writable_device_size + # if it is larger than $read_only_device_size, otherwise we should only + # use $read_only_device_size. While live-shrinking the filesystem might be + # an option, it is not supported throughout all fileystems (xfs can't). + if (( writable_device_size >= read_only_device_size )); then + thin_snapshot_size="$writable_device_size" + else + thin_snapshot_size="$read_only_device_size" + fi + if ! create_volume "$name 1 $thin_snapshot_size $read_only_device"; then + echo "Failed to create external snapshot for '$read_only_device'." + ramdisk_fallback + fi + finish_setup "$name" "1" "$thin_snapshot_size" + fi +fi + +### +## SNAPSHOT (OLD FUNCTIONALITY) +### +if [ -n "$snapshot" ]; then + read -r name crypt min max ignore <<< "$snapshot" + if ! create_snapshot "$name $persist"; then + echo "Failed to create regular snapshot for '$read_only_device' on '$writable_device'." + ramdisk_fallback + fi + finish_setup "$name" "1" "$writable_device_size" +fi + +# ultimate fallback +ramdisk_fallback +exit 1 |