diff options
Diffstat (limited to 'modules.d/slx-dmsetup/hooks/dmsetup-slx-device')
-rwxr-xr-x | modules.d/slx-dmsetup/hooks/dmsetup-slx-device | 794 |
1 files changed, 794 insertions, 0 deletions
diff --git a/modules.d/slx-dmsetup/hooks/dmsetup-slx-device b/modules.d/slx-dmsetup/hooks/dmsetup-slx-device new file mode 100755 index 00000000..77e51c7d --- /dev/null +++ b/modules.d/slx-dmsetup/hooks/dmsetup-slx-device @@ -0,0 +1,794 @@ +#!/usr/bin/env bash +# +# Script to back given read-only device using the block device +# specified by SLX_WRITABLE_DEVICE_IDENTIFIER in the SLX config. +# If SLX_WRITABLE_DEVICE_PARTITION_TABLE is sepcified, it will +# further create device mapper devices accordingly. +# +# Example partition config: +# <type> <name> <size> <crypt> +# thin-snapshot root 10G 1 +# thin-volume tmp 20G 0 +# linear data0 5-10G 1 +# linear data1 1-50% 1 +# +# NOTE: Encrypting thin-snapshot will actually encrypt the +# entire pool data device used for the pool. +# TODO: Support external keys +# TODO: Put table in file in config.tgz + +type -p emergency_shell || . /lib/dracut-lib.sh + +# for debugging purposes +exec {BASH_XTRACEFD}> /run/openslx/dmsetup.log +set -x + +# read-only device to prepare for CoW +[ -z "$1" ] && emergency_shell "Read-only device was not given!" +declare -g read_only_device="$1" +declare -g read_only_device_sz="$( blockdev --getsz "$1" )" +# Use _sz suffix for sizes expressed in number of 512b sectors, +# _size for random other crap + +declare -rg ntfs_list="/run/openslx/.thin-ntfs-candidates" + +# handle_unit <value> <unit> +# Supply percentage, or size in [kmgt]bytes, +# returns appropriate value in number of 512b sectors +handle_unit() { + # default to bytes + local -i potency=0 + local -i val="$1" + case "$2" in + [%]) # These are relative to the writable CoW device + # Allow > 100% for over-provisioning + val="$(( remaining_device_sz * val / 100 ))" + ;; + [Kk]) potency=1 ;;& + [Mm]) potency=2 ;;& + [Gg]) potency=3 ;;& + [Tt]) potency=4 ;;& + *) + # => 1024 ** potency for G, M, K, etc results in bytes + # => bytes / 512 = sectors + val=$(( val * ( 1024 ** potency) / 512 )) + ;; + esac + echo "$val" +} + +parse_config() { + local remaining_device_sz="$writable_device_sz" + parse_config_int "$1" 0 + parse_config_int "$1" 1 +} + +# global array variables storing the configuration of the partitions +declare -ag linear snapshot thin_snapshot thin_volume +parse_config_int() { + [ -z "$1" ] && return 1 + local -i rel_only="$2" + while IFS= read -r line || [ -n "$line" ]; do + [ -z "$line" ] && continue + read -r type name range crypt ignore <<< "$line" + type=${type//-/_} # to use the type as variable in eval + if ! [[ "$type" =~ \ + ^(linear|snapshot|thin_snapshot|thin_volume)$ ]]; then + echo "$0: Ignoring invalid type: $line" + continue + fi + if [[ -z "$name" ]]; then + echo "$0: Ignoring nameless entry: $line" + continue + fi + unset min_unit max_unit min max + # ranges can be like: 40G, 40-80G, 10G-20% + if ! [[ "$range" =~ ^([0-9]+)([GgMmKkBb%]?)(-([0-9]+)([GgMmKkBb%]?))?$ ]]; then + echo "$0: Ignoring invalid range: $line" + continue + fi + local min="${BASH_REMATCH[1]}" + local max="${BASH_REMATCH[4]:-${BASH_REMATCH[1]}}" + local min_unit="${BASH_REMATCH[2]:-${BASH_REMATCH[5]}}" + local max_unit="${BASH_REMATCH[5]:-${BASH_REMATCH[2]}}" + # first pass we handle absolute values unly, second pass relative ones + if [[ "$min_unit" = "%" || "$max_unit" = "%" ]]; then + [ "$rel_only" != 1 ] && continue + else + [ "$rel_only" = 1 ] && continue + fi + if [ -z "$min_unit" ]; then + echo "$0: WARNING: No unit given in range, assuming BYTES: $line" + fi + min="$( handle_unit "$min" "$min_unit" )" + max="$( handle_unit "$max" "$max_unit" )" + if (( min > max )); then + # So, we might end up with something like 30G-100%, but the writable device + # is only 20GB. In that case we most likely want to contine, and not consider + # this an error. So let's try to come up with some logic on what is an error + # and what isn't. Probably anything involving a mix of percentage and + # non-percentage should not be an error. + if [[ "$min_unit" = "%" && "$max_unit" != "%" ]] \ + || [[ "$min_unit" != "%" && "$max_unit" = "%" ]]; then + # Let's hope for the best + max="$min" + else + echo "$0: Ignoring invalid range (min > max): $line" + continue + fi + fi + if ! [[ "$crypt" =~ ^[01]$ ]]; then + echo "$0: Disabling encryption due to invalid crypt argument: $line" + crypt=0 + fi + # finally save it to the global array for this type + case "$type" in + linear) linear+=("${name} ${crypt} ${min} ${max}") ;; + snapshot) snapshot+=("${name} ${crypt} ${min} ${max}") ;; + thin_snapshot) thin_snapshot+=("${name} ${crypt} ${min} ${max}") ;; + thin_volume) thin_volume+=("${name} ${crypt} ${min} ${max}") ;; + *) echo "$0: SOMETHING NOT GOOT CHECK SOURCE CODE" ;; + esac + # Decrease for upcoming calculations if we used fixed values here + if [ "$rel_only" != 1 ]; then + (( remaining_device_sz -= ( min + max ) / 2 )) + fi + done <<< "$1" +} + +# Helper to call 'dmsetup setup' without syncing with udev +# and then actively create the devices with the mknodes command. +# Either pass the table contents as $2, or pipe them into the function +# dmsetup_create_noudevsync <name> [table] +dmsetup_create_noudevsync() { + ( + set -eo pipefail + if [ -n "$2" ]; then + printf "%s\n" "$2" | dmsetup create "$1" --noudevsync + else + dmsetup create "$1" --noudevsync + fi + dmsetup mknodes --noudevsync "$1" + ) + local ret=$? + [ -b "/dev/mapper/$1" ] || ret=99 + [ $ret -ne 0 ] && dmsetup remove --noudevsync "$1" + return $ret +} + +# encrypt_device <dev_path> <encrypted_name> [<size>] +encrypt_device() { + # TODO: Send key back to us, demand ransom + modprobe dm-crypt || echo "$0: dm-crypt loading failed, maybe builtin?" + [ -b "$1" ] || return 1 + [ -n "$2" ] || return 1 + [ -z "$3" ] && local size="$( blockdev --getsz "$1" )" + local key + key="$( < /dev/urandom xxd -c32 -p -l32 )" + [ -z "$key" ] && key="$( < /dev/urandom tr -c -d 'a-f0-9' | dd count=1 bs=32 )" + [ -z "$key" ] && key="$( < /dev/urandom head -c32 | xxd -c32 -p )" + [ -z "$key" ] && key="$( < /dev/urandom xxd -c32 -p | head -n 1 )" + [ -z "$key" ] && echo "$0: ERROR: Could not generate encryption key" + if ! dmsetup_create_noudevsync "$2" \ + "0 ${3:-${size}} crypt aes-xts-plain64 $key 0 $1 0 1 allow_discards"; then + echo "$0: Failed to encrypt $1." + return 1 + fi + return 0 +} +# create_snapshot "<name> <persist>" +create_snapshot() { + modprobe dm-snapshot || echo "$0: dm-snapshot loading failed, maybe builtin?" + read -r name persist ignore <<< "$1" + if ! dmsetup_create_noudevsync "$name" \ + "0 $read_only_device_sz snapshot $read_only_device $writable_device ${persist:-N} 8"; then + echo "$0: Failed to create snapshot on '$writable_device' for '$read_only_device'." + return 1 + fi + return 0 +} + +# This function is called if no ID44 partition could be found or anoother kind +# of critical error occurs during the CoW layer setup. It will combine the +# the read-only device with a DM zero device to increase its virtual size +# by half the RAM size. A sparse file of that size will then be created and +# placed on a dedicated tmpfs. +# THIS FUNCTION MUST NEVER RETURN +ramdisk_fallback() { + echo "$0: Falling back to regular dm-snapshot on a RAMdisk." + + # RAM size in kb, note that this is equal to half + # of the entire RAM when interpreted as 512-bytes sectors. + local ram_cow_sz="$(awk '/^MemTotal:/ { printf("%d\n", $2 ); exit }' /proc/meminfo)" + + # try to prepare the zero extension device + local extended_device="/dev/mapper/${read_only_device##*/}-extended" + ( + set -e + lsmod | grep -q dm-zero || modprobe dm-zero + dmsetup_create_noudevsync "${extended_device##*/}" \ + "0 $read_only_device_sz linear $read_only_device 0 + $read_only_device_sz $ram_cow_sz zero" + ) + local ret="$?" + if [ "$ret" -eq 0 ]; then + read_only_device="$extended_device" + read_only_device_sz="$(( read_only_device_sz + ram_cow_sz ))" + else + echo "$0: Failed to setup the fake larger '$read_only_device'." + echo "$0: Continuing with its original size." + fi + + # prepare dedicated tmpfs mount point + local cow_tmpfs="/run/openslx/cow" + if ! mkdir -p "$cow_tmpfs"; then + cow_tmpfs="${cow_tmpfs}.$$.$RANDOM" + mkdir -p "$cow_tmpfs" + fi + if ! mount -t tmpfs cow-tmpfs -o size="$(( read_only_device_sz / 2 + 100 ))k" "$cow_tmpfs"; then + echo "$0: Failed to mount tmpfs in '$cow_tmpfs' of size '$(( read_only_device_sz / 2 + 100 ))KiB'." + fi + + # create sparse file there + local file="$(mktemp -u -p "$cow_tmpfs" dnbd_cow.XXX)" + if ! dd if=/dev/null of="$file" seek="$(( read_only_device_sz ))" bs=512 2> /dev/null; then + emergency_shell "Failed to allocate CoW file $file." + fi + declare -rg writable_device="$(losetup --show --find "$file")" + local cow_device_candidate="root" + while [ -b "/dev/mapper/$cow_device_candidate" ]; do + cow_device_candidate="root.$RANDOM" + done + if [ -z "$writable_device" ] || ! create_snapshot "$cow_device_candidate N"; then + emergency_shell "CRITICAL: failed to setup RAMdisk fallback." + exit 1 + fi + finish_setup "$cow_device_candidate" "0" "$read_only_device_sz" +} + +# finish_setup <device> <type> [<size>] +# <device> is the device name only, /dev/mapper will be prepended automatically. +# <type> denotes if the created device lies in a RAMdisk (0) or is backed by a disk (1). +# <size> is given in sectors. +# THIS FUNCTION MUST NEVER RETURN +finish_setup() { + if [ -z "$1" ] || ! [ -b "/dev/mapper/$1" ]; then + emergency_shell "'/dev/mapper/$1' not a block device. Failed to setup CoW layer." + exit 1 + fi + if ! [[ "$2" =~ ^[0-9]$ ]]; then + emergency_shell "'$2' not a valid type, 0 or 1 expected." + fi + # <size> optional? + { + echo "# Generated by '$0'." + echo "SLX_DNBD3_DEVICE_COW=/dev/mapper/$1" + } >> /etc/openslx + save_partition_info "$1" "/" "$2" "$3" + exit 0 +} + +# path to save the achieved setup to +declare -rg partitions_config="/run/openslx/dmsetup.state" +cat <<-EOF > "$partitions_config" +# Generated by '$0'. +# Format: <device_mapper_dev> <mount_point> <options> +# Options can be: +# * type -> CoW layer type: 0 is RAMdisk, 1 is disk, 2 is network +# * size -> in 512 byte sectors +EOF + +# save_partition_info <dm_dev> <mount_point> <type> [<size>] +save_partition_info() { + [ -b "/dev/mapper/$1" ] || return 1 + [ -n "$2" ] || return 1 + [[ "$3" =~ ^[0-9]$ ]] || return 1 + local opts="type=$3" + # plain size given + [[ "$4" =~ ^[0-9]+$ ]] && opts="$opts,physical_size=$4" + # <physical_backing_dev_size>-<virtual_size> + [[ "$4" =~ ^[0-9]+-[0-9]+$ ]] && opts="$opts,shared_physical_size=${4%-*},virtual_size=${4#*-}" + echo "/dev/mapper/$1 $2 ${opts}" >> "$partitions_config" +} + +# This will create another dm-linear on top of $scratch_device in case its +# size differs from $scratch_device_sz. This is useful for setups where you +# cannot explicitly configure how much space to use from the underlying device, +# and the partition table says not to use the entire $writable_device for cow +require_exact_scratch_size() { + local current_sz="$( blockdev --getsz "$scratch_device" )" + (( current_sz == scratch_device_sz )) && return 0 # Everything fine + if (( current_sz < scratch_device_sz )); then + echo "$0: WARNING: scratch_device_sz is larger than actual device." + echo "$0: This should never happen." + scratch_device_sz="$current_sz" + return 0 + fi + # We could check if $scratch_device already is a dm target, and just adjust its + # size, but I think that scenario isn't possible, currently. + if ! dmsetup_create_noudevsync "scratch" "0 $scratch_device_sz linear $scratch_device 0"; then + echo "$0: Failed to create scratch space for the CoW layer." + return 1 + fi + scratch_device="/dev/mapper/scratch" + save_partition_info "scratch" "*" "1" "$scratch_device_sz" + return 0 +} + +create_pool() { + declare -r data_block_sz=256 # Desired Block size (number of 512byte sectors) + declare -r wanted_low_mb=100 # Free space below this will trigger a dm event + # create external snapshot for read-only device + # create remaining thin volumes + modprobe dm-thin-pool || echo "$0: dm-thin-pool load failed, maybe builtin?" + # create temporary metadata device + # calculate number of sectors needed and check boundaries: + # XXX Formula from thin-pool.txt calculates size in *bytes*, we want 512b blocks + metadata_dev_sz="$(( 48 * scratch_device_sz / data_block_sz / 512 ))" + # If we want NTFS as a backup plan to extend the pool, check if the current size + # is less than 100GB, and only then consider this feature. + # Maybe make that thresold configurable one day, but the the desktop client + # use case this is sensible for now. + if [ "$SLX_NTFSFREE" = "backup" ] && (( scratch_device_sz < 209715200 )) \ + && [ -z "$metadata_persistent" ]; then + find_ntfs_partitions + if [ -s "$ntfs_list" ]; then + # Look what size we end up if we want at least 50GB + local sum="$( awk -v sum=0 \ + '{sum+=$1; if (sum >= 104857600) exit}END{printf "%.0f", sum}' \ + "$ntfs_list" )" + if (( sum > 0 )); then + (( sum > 209715200 )) && sum=209715200 # Max 100GB + # Account for this potential growth in the metadata device size for future expansion + metadata_dev_sz="$(( metadata_dev_sz + 48 * sum / data_block_sz / 512 ))" + echo "$sum" > "/run/openslx/.thin-ntfs-growsize" + root_ntfs_extra="$sum" + fi + fi + fi + # Min 2MB -> 4096 sectors, max 16GB -> 33554432 sectors + [ "$metadata_dev_sz" -lt 4096 ] && metadata_dev_sz="4096" + # TODO handle the exotic case of a too large metadata device to fit within RAM. + [ "$metadata_dev_sz" -gt 33554432 ] && metadata_dev_sz="33554432" + local scratch_device_offset=0 + local metadata_dev= + local metadata_persistent= + if [ -n "$metadata_persistent" ]; then + # create persistent slice of the writable device for the pool metadata + if ! dmsetup_create_noudevsync "pool-metadata" \ + "0 $metadata_dev_sz linear $scratch_device $scratch_device_offset"; then + echo "$0: Failed to create linear device for pool metadata device." + else + # Adjust size for pool-data down accordingly + scratch_device_offset="$metadata_dev_sz" + scratch_device_sz=$(( scratch_device_sz - metadata_dev_sz )) + declare -r metadata_dev="/dev/mapper/pool-metadata" + # TODO configurable wipe: dd if=/dev/zero of="$metadata_dev" count=1 bs=4096 + # TODO: If we fail later on in this function, we would actually have to destroy + # this target again, and re-adjust the offset and size back, so that the + # snapshot fallback would work properly. Or maybe just don't support fallback. + fi + fi + if [ -z "$metadata_dev" ]; then + # create RAMdisk in /run for metadata device + metadata_dev="$(mktemp -p /run/openslx .pool-metadata.XXX)" + # Create sparse file of required size + dd if=/dev/null of="$metadata_dev" bs=512 seek="$metadata_dev_sz" 2> /dev/null + declare -r metadata_dev="$( losetup --show --find "$metadata_dev" )" + fi + if [ -z "$metadata_dev" ]; then + echo "$0: Could not set up persistent or tmpfs-loop metadata device. Aborting." + return 1 + fi + + local pool_data_dev + if (( root_ntfs_extra == 0 )) && (( scratch_device_offset == 0 )); then + # No offset, no potential expansion, don't create another linear target + pool_data_dev="$scratch_device" + else + pool_data_dev="/dev/mapper/pool-data" + # Create linear device of the writable device, in case we have an offset from + # the on-disk meta data. Also this way we can easily extend it later. + if ! dmsetup_create_noudevsync "${pool_data_dev##*/}" \ + "0 $scratch_device_sz linear $scratch_device $scratch_device_offset"; then + echo "$0: Failed to create pool data device on '$scratch_device'." + return 1 + fi + fi + local low_water_mark + # Convert MB to blocks + low_water_mark=$(( wanted_low_mb * 2048 / data_block_sz )) + if ! dmsetup_create_noudevsync "${pool_dev##*/}" \ + "0 $scratch_device_sz thin-pool $metadata_dev $pool_data_dev $data_block_sz $low_water_mark 1 skip_block_zeroing"; then + echo "$0: Failed to create thin-pool device (meta: $metadata_dev, data: $pool_data_dev)" + return 1 + fi + return 0 +} + +# create_volume <name> <id> <size> [backing_dev] +create_volume() { + if [ -z "$pool_dev" ] || ! [ -b "$pool_dev" ]; then + echo "$0: Global pool device not set or present." + return 1 + fi + if [ $# -lt 3 ] || [ -z "$1" ]; then + echo "$0: create_volume: not enough arguments." + return 1 + fi + local name="$1" + local id="$2" + local size="$3" + local backing_dev="$4" # Optional, internal if empty + + if ! dmsetup message "$pool_dev" 0 "create_thin $id"; then + echo "$0: Failed to create thin volume with id '$id' in pool '$pool_dev'." + echo "$0: It might already exists, trying anyway..." + fi + if ! dmsetup_create_noudevsync "$name" "0 $size thin $pool_dev $id $backing_dev"; then + echo "$0: Failed to create external snapshot named '$name':" + echo " Size: $size" + echo " Backing device: $backing_dev" + echo " Thin volume id: $id" + return 1 + fi + return 0 +} + +# Find NTFS partitions with decently sized ranges of +# free space. We can use these as our writable layer +# for our thin-pool, if configured. +# If suitable, this will create the file $ntfs_list with +# one line per suitable partition, format +# total_size_blocks devpath +# Results are sorted by size, descending order +find_ntfs_partitions() { + [ -z "$SLX_NTFSFREE" ] && return + [ "$SLX_NTFSFREE" = "never" ] && return + [ -e "$ntfs_list" ] && return + if ! command -v ntfsfree &> /dev/null; then + echo "$0: ntfsfree not found, cannot use NTFS partitions as RW layer" + return + fi + local part sum ro dev + ntfs_extra_space_sz=0 + for part in /dev/disk/by-partuuid/*; do + # Skip empty/ro devices + dev="$( readlink -f "$part" )" + dev="${dev##*/}" + ro="$( cat "/sys/class/block/${dev}/ro" )" + [ "$ro" = 1 ] && continue + # Only count ranges >= 256MB, sum will be in number of 512b blocks + sum="$( ntfsfree --block-size 512 --min-size "$(( 256 * 1024 * 1024 ))" "$part" 2> /dev/null \ + | awk -v sum=0 '{if ($1 == "Range") sum += $4}END{printf "%.0f", sum}' )" + # Only consider volume if sum of these ranges > 1GB (this is BLOCKS, not bytes) + (( "$sum" > 2 * 1024 * 1024 )) || continue + echo "$sum $part" # only thing in loop going to stdout + (( ntfs_extra_space_sz += sum )) + done | sort -nr > "$ntfs_list" +} +ntfs_extra_space_sz=0 + +### +## MAIN +### + +. /etc/openslx + +. slx-tools +# "Preload" functions by executing them NOT in a subshell +dev_find_partitions &> /dev/null +dev_swap_version &> /dev/null + +# This is the main variable driving this script +declare -g id44_crypted= +declare -g writable_device= +if [ -z "$SLX_WRITABLE_DEVICE_IDENTIFIER" ]; then + SLX_WRITABLE_DEVICE_IDENTIFIER=("44" "87f86132-ff94-4987-b250-444444444444") + # TODO make scripts reading this variable compatible with list of IDs + echo "SLX_WRITABLE_DEVICE_IDENTIFIER='${SLX_WRITABLE_DEVICE_IDENTIFIER[0]}'" >> /etc/openslx + echo "SLX_WRITABLE_DEVICE_IDENTIFIERS='${SLX_WRITABLE_DEVICE_IDENTIFIER[*]}'" >> /etc/openslx +fi +# XXX The fuck? This may or may not be an array? Shit will defintely break some day... +if [ -n "$SLX_WRITABLE_DEVICE_IDENTIFIER" ]; then + declare -a writable_devices + writable_devices=( $( dev_find_partitions "${SLX_WRITABLE_DEVICE_IDENTIFIER[@]}" ) ) + if [[ "${#writable_devices[@]}" -eq 0 && "$SLX_NTFSFREE" != "never" ]] || [ "$SLX_NTFSFREE" = "always" ]; then + find_ntfs_partitions + fi + if [ -s "$ntfs_list" ] || [[ "${#writable_devices[@]}" -gt 1 ]]; then + # More than one device, and/or NTFS space, need linear + tbl="/run/openslx/dmsetup-linear-id44" + pos=0 + grow_max_sz=9999999999 + for dev in "${writable_devices[@]}"; do + max="$(( grow_max_sz - pos ))" + (( max <= 0 )) && break + sz="$( blockdev --getsz "$dev" )" + (( sz > 0 )) || continue + (( sz > max )) && sz="$max" + echo "$pos $sz linear $dev 0" + (( pos += sz )) + done > "$tbl" + if [ -s "$ntfs_list" ]; then + sum= + while read -r sum dev _ || [ -n "$sum" ]; do # each dev + word= + while read -r word range_start_b _ range_sz _ || [ -n "$word" ]; do # each slice of dev + [ "$word" = "Range" ] || continue + (( range_sz > 0 )) || continue + slice_sz="$(( grow_max_sz - pos ))" + (( slice_sz <= 0 )) && break + (( slice_sz > range_sz )) && slice_sz="$range_sz" + # Append line + if echo "$pos $slice_sz linear $dev $range_start_b" >> "$tbl"; then + # Update counter + (( pos += slice_sz )) + else + echo "$0: Could not write new table row into $tbl" + fi + done < <( ntfsfree --block-size 512 --min-size "$(( 256 * 1024 * 1024 ))" "$dev" ) + done < "$ntfs_list" + # Don't try to add NTFS space again later + SLX_NTFSFREE="never" + sed -i "s/^SLX_NTFSFREE.*$/# & # disabled in stage3\nSLX_NTFSFREE='never'/" "/etc/openslx" + rm -f -- "$ntfs_list" + fi + # See if we need a linear target at all + if ! [ -s "$tbl" ]; then + echo "$0: Empty tmp/id44 table, fallback to RAM" + elif [ "$( wc -l < "$tbl" )" -eq 1 ] && [[ "${#writable_devices[@]}" -ge 1 ]]; then + # Only one line, have writable device -> use directly + writable_device="${writable_devices[0]}" + else + # set up linera device + if ! dmsetup_create_noudevsync "id44-group" < "$tbl"; then + echo "$0: Error creating group of id44 devices. Fallback to RAM :-(" + else + writable_device="/dev/mapper/id44-group" + fi + fi + else + # Single device + writable_device="${writable_devices[0]}" + fi +fi +if [ -z "$writable_device" ]; then + echo "$0: Could not find writable device with id '$SLX_WRITABLE_DEVICE_IDENTIFIER'." + ramdisk_fallback +elif is_on "$SLX_ID44_CRYPT"; then + # Config option crypts the entire ID44 device(s), before any slices are taken from it. + if encrypt_device "$writable_device" "id44-crypt"; then + echo "$0: ID44 encrypted" + writable_device="/dev/mapper/id44-crypt" + # Remember the whole device is already encrypted, and ignore the crypt flag for the partition table later + id44_crypted=1 + else + echo "$0: Error encrypting ID44 partition" + fi +fi + +# NOTE: from here on out, every value related to size is in 512 bytes sectors! +declare -rg writable_device_sz="$( blockdev --getsz "$writable_device" )" + +# If SLX_WRITABLE_DEVICE_PARTITION_TABLE is not set, just do +# regular thin-snapshot for the CoW layer, else parse it. +if [ -z "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" ]; then + SLX_WRITABLE_DEVICE_PARTITION_TABLE="thin-snapshot root 100% 0" +fi + +# extra swap? +if grep -qFw 'slx.swap' "/proc/cmdline"; then + # Only if our basic writable_device is large enough, or we have ntfs backup + do_swap_sz=0 + if (( writable_device_sz > 80078125 )); then + # more than ~40GB, go ahead + do_swap_sz="$(( ( writable_device_sz - 70312500 ) / 2 ))" + # cap to 6GB + (( do_swap_sz > 11718750 )) && do_swap_sz=11718750 + elif [ "$SLX_NTFSFREE" = "backup" ] \ + && (( ntfs_extra_space_sz > 70312500 )) && (( writable_device_sz > 11718750 )); then + # more than 40GB NTFS backup space, more than 6GB ID44, make 4GB swap + do_swap_sz=7812500 + fi + # Check how many we have and if they're regular, unencrypted ones. + # If it's plenty, don't cut out swap from our backing device + swap_sz=0 + for part in $( dev_find_partitions "82" "0657fd6d-a4ab-43c4-84e5-0933c84b4f4f" ); do + dev_swap_version "$part" &> /dev/null || continue + this_sz="$( blockdev --getsz "$part" )" + (( this_sz > 0 )) && (( swap_sz += this_sz )) + done + echo "Have existing swap of $swap_sz blocks" + # Go ahead with swap? Only if existing swap < 4GB. If so, add line to table. + if (( do_swap_sz > 0 )) && (( swap_sz < 7812500 )); then + echo "Adding $do_swap_sz blocks of additional swap on backing dev" + skb="$(( do_swap_sz / 2 ))" + SLX_WRITABLE_DEVICE_PARTITION_TABLE="$( printf "%s\n%s" "linear slx-swap ${skb}K 0" \ + "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" )" + fi +fi + +parse_config "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" + +# Default to thin-snapshot, if none were configured +if [ "${#snapshot[@]}" = 0 ] && [ "${#thin_snapshot[@]}" = 0 ]; then + parse_config "thin-snapshot root 100% 0" +fi + +# Sanity checks for weird configurations +# XXX These were declared array and now turn into strings... +if [ "${#snapshot[@]}" -gt 1 ]; then + echo "Multiple snapshots specified, using first one: ${snapshot[0]}" +fi +snapshot="${snapshot[0]}" +if [ "${#thin_snapshot[@]}" -gt 1 ]; then + echo "Multiple thin-snapshots specified, using first one: ${thin_snapshot[0]}" +fi +thin_snapshot="${thin_snapshot[0]}" +if [ -n "$snapshot" ] && [ -n "$thin_snapshot" ]; then + echo "$0: Both snapshot and thin-snapshot specified, prefering thin-snapshot." + snapshot= +fi + +### +## LINEAR SLICES +### + +# start allocating spaces to the configured devices +declare -g writable_device_used_sz=0 + +# first, reserve the space for the rootfs cow snapshot (of either type)... +read -r name crypt min max ignore <<< "${thin_snapshot:-${snapshot}}" + +declare -g scratch_device="/dev/mapper/scratch" +declare -gi scratch_device_sz=0 +if (( min <= writable_device_sz )); then + scratch_device_sz="$max" + (( scratch_device_sz > writable_device_sz )) && scratch_device_sz="$writable_device_sz" +else + # minimum snapshot size is bigger than physical device size + echo "$0: Minimum snapshot size is too big for the scratch partition." + echo "$0: You probably need to use a more conservative value." + echo "$0: Using this client maximum scratch space ($writable_device_sz sectors)." + scratch_device_sz="$writable_device_sz" +fi + +# Create a linear target for the scratch device. This might seem superfluous, +# but it works around problems when using NVMe as pool data device directly. +if ! dmsetup_create_noudevsync "${scratch_device##*/}" \ + "0 $scratch_device_sz linear $writable_device $writable_device_used_sz"; then + echo "$0: Failed to create scratch space for the CoW layer." + # this should never fail, but if it does, we would likely not be able to use + # $writable_device for any dmsetup stuff, so just fallback to ramdisk + # until we have a better idea on what to do :) + ramdisk_fallback +fi +save_partition_info "${scratch_device##*/}" "*" "1" "$scratch_device_sz" + +# encrypt the scratch device, if configured +if [ -z "$id44_crypted" ]; then + if [ "$crypt" -ne 0 ] && encrypt_device \ + "$scratch_device" "${scratch_device##*/}-crypt" "$scratch_device_sz"; then + scratch_device="/dev/mapper/${scratch_device##*/}-crypt" + else + echo "$0: Continuing with unencrypted scratch" + fi +fi + +writable_device_used_sz="$scratch_device_sz" + +# setup linear slices of the writable device +for line in "${linear[@]}"; do + [ -z "$line" ] && continue + read -r name crypt min max ignore <<< "$line" + [ -n "$id44_crypted" ] && crypt=0 + free_space="$(( writable_device_sz - writable_device_used_sz ))" + if [ "$min" -gt "$free_space" ]; then + echo "$0: Not enough space left for linear devices: '$line'" + break + fi + # allocate its max if it fits within the free space, otherwise use the space left. + to_allocate="$max" + [ "$to_allocate" -gt "$free_space" ] && to_allocate="$free_space" + + if ! dmsetup_create_noudevsync "$name" "0 $to_allocate linear $writable_device $writable_device_used_sz"; then + echo "$0: Failed to create linear device: $line" + continue + fi + # TODO sane? + save_partition_info "$name" "*" "1" "$to_allocate" + if [ "$crypt" -ne 0 ] && \ + ! encrypt_device "/dev/mapper/$name" "${name}-crypt" "$to_allocate"; then + echo "$0: Failed to encrypt '$name'." + fi + writable_device_used_sz=$(( to_allocate + writable_device_used_sz )) +done + +### +## THIN-PROVISIONING +### +declare -rg pool_dev="/dev/mapper/pool" +declare -gi root_ntfs_extra=0 # Extra blocks to provision to root fs for later expansion +# Now decide what to do for the writable layer + +if [ -n "$thin_snapshot" ] || [ -n "$thin_volume" ]; then + if ! create_pool ; then + echo "Failed to create thin pool. Will ignore:" + echo -e "\tThin snapshot: $(declare -p thin_snapshot)" + echo -e "\tThin volumes: $(declare -p thin_volume)" + echo "Trying snapshot fallback..." + snapshot="$thin_snapshot" + else + # the order in which pool devices are created does not matter + # so start with thin volumes starting with id 2 and end with + # the thin-snapshot with id 1 which needs to call finish_setup. + volume_id=2 + # go over thin-volumes + for line in "${thin_volume[@]}"; do + [ -z "$line" ] && continue + read -r name crypt min max ignore <<< "$line" + [ -n "$id44_crypted" ] && crypt=0 + # thin-volume can be created with max size, + # since they are overprovisioned anyway. + if ! create_volume "$name" "$(( volume_id++ ))" "$max"; then + echo "Failed to create thin volume '$name'." + fi + save_partition_info "$name" "*" "1" "${scratch_device_sz}-${max}" + if [ "$crypt" -ne 0 ] && ! encrypt_device \ + "/dev/mapper/$name" "$name-crypt" "$max"; then + echo "Failed to encrypt thin volume '$name'." + fi + done + + if [ -n "$thin_snapshot" ]; then + # create thin-snapshot, use first one + read -r name crypt min max ignore <<< "$thin_snapshot" + [ -n "$id44_crypted" ] && crypt=0 + # min/max was used for the pool data device, ignore it here! + # Calculate how much of the CoW space we reserve for changes in the base + # system. Usually all the files in the base system should be static, but + # if someone decided to run apt dist-upgrade, this would change a lot of + # existing blocks, which is bad. + # Use MIN( readonly_size / 2, scratch_size / 10 ) + # until we come up with anything better. + # Given an RO image of 10GB, this gives us: + # 40GB scratch -> 46GB, so initially 36GB free space + # 5GB scratch -> 14.5GB, initially 4.5GB free space + declare -r max_reserved_sz="$(( scratch_device_sz / 10 ))" + reserved_sz="$(( read_only_device_sz / 2 ))" + (( reserved_sz > max_reserved_sz )) && reserved_sz="$max_reserved_sz" + thin_snapshot_sz="$(( scratch_device_sz + read_only_device_sz - reserved_sz ))" + # For later on-demand growing, overprovision by free space we found on + # clean NTFS volumes. This requires a user-space helper to listen for + # dm events in stage4, which should in turn add that free space to the pool-data + if (( root_ntfs_extra > 0 )); then + thin_snapshot_sz="$(( thin_snapshot_sz + root_ntfs_extra ))" + fi + if ! create_volume "$name" 1 "$thin_snapshot_sz" "$read_only_device"; then + echo "Failed to create external snapshot for '$read_only_device'." + ramdisk_fallback + fi + finish_setup "$name" "1" "$thin_snapshot_sz" + fi + echo "$0: Thin volumes defined, but no snapshot. Using tmpfs." + ramdisk_fallback + fi +fi + +### +## SNAPSHOT (OLD FUNCTIONALITY) +### +if [ -n "$snapshot" ] && require_exact_scratch_size; then + read -r name crypt min max ignore <<< "$snapshot" + [ -n "$id44_crypted" ] && crypt=0 + if ! create_snapshot "$name $persist"; then + echo "Failed to create regular snapshot for '$read_only_device' on '$scratch_device'." + else + finish_setup "$name" "1" "$scratch_device_sz" + fi +fi + +# ultimate fallback +ramdisk_fallback +exit 1 |