From 8e2da8d365b4014d14859f6738eac47b8aba5e96 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 1 Oct 2020 12:19:22 +0200 Subject: [slx-dmsetup] Avoid creating linear targets that are 1:1 mappings Also a bit more cleanup and comments. --- modules.d/slx-dmsetup/scripts/dmsetup-slx-device | 207 +++++++++++++---------- 1 file changed, 120 insertions(+), 87 deletions(-) diff --git a/modules.d/slx-dmsetup/scripts/dmsetup-slx-device b/modules.d/slx-dmsetup/scripts/dmsetup-slx-device index f9bbdd9d..9ef4f22d 100755 --- a/modules.d/slx-dmsetup/scripts/dmsetup-slx-device +++ b/modules.d/slx-dmsetup/scripts/dmsetup-slx-device @@ -143,6 +143,7 @@ create_snapshot() { # the read-only device with a DM zero device to increase its virtual size # by half the RAM size. A sparse file of that size will then be created and # placed on a dedicated tmpfs. +# THIS FUNCTION MUST NEVER RETURN ramdisk_fallback() { echo "$0: Falling back to regular dm-snapshot on a RAMdisk." @@ -158,7 +159,7 @@ ramdisk_fallback() { dmsetup_create_noudevsync zero "0 $ram_size_in_kb zero" dmsetup_create_noudevsync "${extended_device##*/}" \ "0 $read_only_device_size linear $read_only_device 0 - $read_only_device_size $ram_size_in_kb linear /dev/mapper/zero 0" + $read_only_device_size $ram_size_in_kb linear /dev/mapper/zero 0" ) if [ "$?" -eq 0 ]; then read_only_device="$extended_device" @@ -193,6 +194,7 @@ ramdisk_fallback() { done if [ -z "$writable_device" ] || ! create_snapshot "$cow_device_candidate N"; then emergency_shell "CRITICAL: failed to setup RAMdisk fallback." + exit 1 fi finish_setup "$cow_device_candidate" "0" "$read_only_device_size" } @@ -201,8 +203,9 @@ ramdisk_fallback() { # is the device name only, /dev/mapper will be prepended automatically. # denotes if the created device lies in a RAMdisk (0) or is backed by a disk (1). # is given in sectors. +# THIS FUNCTION MUST NEVER RETURN finish_setup() { - if [ -z "$1" ] || [ ! -b "/dev/mapper/$1" ]; then + if [ -z "$1" ] || ! [ -b "/dev/mapper/$1" ]; then emergency_shell "'/dev/mapper/$1' not a block device. Failed to setup CoW layer." exit 1 fi @@ -265,7 +268,7 @@ if [ -z "$writable_device" ]; then fi # NOTE: from here on out, every value related to size is in 512 bytes sectors! -declare -g writable_device_size="$( blockdev --getsz "$writable_device" )" +declare -rg writable_device_size="$( blockdev --getsz "$writable_device" )" # If SLX_WRITABLE_DEVICE_PARTITION_TABLE is not set, just do # regular thin-snapshot for the CoW layer, else parse it. @@ -298,16 +301,16 @@ fi # start allocating spaces to the configured devices declare -g writable_device_allocated=0 -# reserve the space for the snapshot (of either type)... + +# first, reserve the space for the rootfs cow snapshot (of either type)... read -r name crypt min max ignore <<< "${thin_snapshot:-${snapshot}}" -declare -g scratch_device_size=0 +declare -g scratch_device="$writable_device" +declare -gi scratch_device_size=0 if (( min <= writable_device_size )); then - scratch_device_size=$max - while (( scratch_device_size >= 0 )) && (( scratch_device_size > writable_device_size )); do - (( scratch_device_size -= 2097152 )) # 1G steps => 2097152 sectors - done + scratch_device_size="$max" (( scratch_device_size < min )) && scratch_device_size="$min" + (( scratch_device_size > writable_device_size )) && scratch_device_size="$writable_device_size" else # minimum snapshot size is bigger than physical device size echo "$0: Minimum snapshot size is too big for the scratch partition." @@ -316,20 +319,12 @@ else scratch_device_size="$writable_device_size" fi -# ... and slice it from the start of the writable device (for performance). -declare -g scratch_device="/dev/mapper/scratch" -if ! dmsetup_create_noudevsync "${scratch_device##*/}" \ - "0 $scratch_device_size linear $writable_device $writable_device_allocated"; then - echo "$0: Failed to create scratch space for the CoW layer." - # TODO do not bail directly, but try to to create the linear devices at least? - ramdisk_fallback -fi -save_partition_info "${scratch_device##*/}" "*" "1" "$scratch_device_size" - # encrypt the scratch device, if configured if [ "$crypt" -ne 0 ] && encrypt_device \ "$scratch_device" "${scratch_device##*/}-crypt" "$scratch_device_size"; then - scratch_device="${scratch_device}-crypt" + scratch_device="/dev/mapper/${scratch_device##*/}-crypt" +else + echo "$0: Continuing with unencrypted scratch" fi writable_device_allocated="$scratch_device_size" @@ -360,14 +355,33 @@ for line in "${linear[@]}"; do writable_device_allocated=$(( to_allocate + writable_device_allocated )) done -# we are done with the physical device, use the scratch space from now on -writable_device="$scratch_device" -writable_device_size="$scratch_device_size" +# This will create another dm-linear on top of $scratch_device in case its +# size differs from $scratch_device_size. This is useful for setups where you +# cannot explicitly configure how much space to use from the underlying device, +# and the partition table says not to use the entire $writable_device for cow +require_exact_scratch_size() { + local current_size="$( blockdev --getsz "$scratch_device" )" + (( current_size == scratch_device_size )) && return 0 # Everything fine + if (( current_size < scratch_device_size )); then + echo "$0: WARNING: scratch_device_size is larger than actual device." + echo "$0: This should never happen." + scratch_device_size="$current_size" + return 0 + fi + # We could check if $scratch_device already is a dm target, and just adjust its + # size, but I think that scenario isn't possible, currently. + if ! dmsetup_create_noudevsync "scratch" "0 $scratch_device_size linear $scratch_device 0"; then + echo "$0: Failed to create scratch space for the CoW layer." + return 1 + fi + scratch_device="/dev/mapper/scratch" + save_partition_info "scratch" "*" "1" "$scratch_device_size" + return 0 +} ### ## THIN-PROVISIONING ### -declare -rg pool_data_dev="/dev/mapper/pool-data" declare -rg pool_dev="/dev/mapper/pool" declare -gi root_ntfs_extra=0 # Extra blocks to provision to root fs for later expansion create_pool() { @@ -379,12 +393,12 @@ create_pool() { # create temporary metadata device # calculate number of sectors needed and check boundaries: # XXX Formula from thin-pool.txt calculates size in *bytes*, we want 512b blocks - metadata_dev_size="$(( 48 * writable_device_size / data_block_size / 512 ))" + metadata_dev_size="$(( 48 * scratch_device_size / data_block_size / 512 ))" # If we want NTFS as a backup plan to extend the pool, check if the current size # is less than 100GB, and only then consider this feature. # Maybe make that thresold configurable one day, but the the desktop client # use case this is sensible for now. - if [ "$SLX_NTFSFREE" = "backup" ] && (( writable_device_size < 209715200 )) \ + if [ "$SLX_NTFSFREE" = "backup" ] && (( scratch_device_size < 209715200 )) \ && [ -z "$metadata_persistent" ]; then find_ntfs_partitions if [ -s "$ntfs_list" ]; then @@ -405,19 +419,23 @@ create_pool() { [ "$metadata_dev_size" -lt 4096 ] && metadata_dev_size="4096" # TODO handle the exotic case of a too large metadata device to fit within RAM. [ "$metadata_dev_size" -gt 33554432 ] && metadata_dev_size="33554432" - local writable_device_offset=0 + local scratch_device_offset=0 local metadata_dev= local metadata_persistent= if [ -n "$metadata_persistent" ]; then # create persistent slice of the writable device for the pool metadata if ! dmsetup_create_noudevsync "pool-metadata" \ - "0 $metadata_dev_size linear $writable_device $writable_device_offset"; then + "0 $metadata_dev_size linear $scratch_device $scratch_device_offset"; then echo "$0: Failed to create linear device for pool metadata device." else - writable_device_offset="$metadata_dev_size" - writable_device_size=$(( writable_device_size - metadata_dev_size )) + # Adjust size for pool-data down accordingly + scratch_device_offset="$metadata_dev_size" + scratch_device_size=$(( scratch_device_size - metadata_dev_size )) declare -r metadata_dev="/dev/mapper/pool-metadata" # TODO configurable wipe: dd if=/dev/zero of="$metadata_dev" count=1 bs=4096 + # TODO: If we fail later on in this function, we would actually have to destroy + # this target again, and re-adjust the offset and size back, so that the + # snapshot fallback would work properly. Or maybe just don't support fallback. fi fi if [ -z "$metadata_dev" ]; then @@ -427,20 +445,31 @@ create_pool() { dd if=/dev/null of="$metadata_dev" bs=512 seek="$metadata_dev_size" 2> /dev/null declare -r metadata_dev="$( losetup --show --find "$metadata_dev" )" fi - - # Create linear device of the writable device, in case we have an offset from - # the on-disk meta data. Also this way we can easily extend it later. - if ! dmsetup_create_noudevsync "${pool_data_dev##*/}" \ - "0 $writable_device_size linear $writable_device $writable_device_offset"; then - echo "$0: Failed to create pool data device on '$writable_device'." + if [ -z "$metadata_dev" ]; then + echo "$0: Could not set up persistent or tmpfs-loop metadata device. Aborting." return 1 fi + + local pool_data_dev + if (( root_ntfs_extra == 0 )) && (( scratch_device_offset == 0 )); then + # No offset, no potential expansion, don't create another linear target + pool_data_dev="$scratch_device" + else + pool_data_dev="/dev/mapper/pool-data" + # Create linear device of the writable device, in case we have an offset from + # the on-disk meta data. Also this way we can easily extend it later. + if ! dmsetup_create_noudevsync "${pool_data_dev##*/}" \ + "0 $scratch_device_size linear $scratch_device $scratch_device_offset"; then + echo "$0: Failed to create pool data device on '$scratch_device'." + return 1 + fi + fi local low_water_mark # Convert MB to blocks low_water_mark=$(( wanted_low_mb * 2048 / data_block_size )) if ! dmsetup_create_noudevsync "${pool_dev##*/}" \ - "0 $writable_device_size thin-pool $metadata_dev $pool_data_dev $data_block_size $low_water_mark 1 skip_block_zeroing"; then - echo "$0: Failed to create thin-pool device on '$writable_device'." + "0 $scratch_device_size thin-pool $metadata_dev $pool_data_dev $data_block_size $low_water_mark 1 skip_block_zeroing"; then + echo "$0: Failed to create thin-pool device (meta: $metadata_dev, data: $pool_data_dev)" return 1 fi return 0 @@ -508,65 +537,69 @@ if [ -n "$thin_snapshot" ] || [ -n "$thin_volume" ]; then echo "Failed to create thin pool. Will ignore:" echo -e "\tThin snapshot: $(declare -p thin_snapshot)" echo -e "\tThin volumes: $(declare -p thin_volume)" - ramdisk_fallback - fi - # the order in which pool devices are created does not matter - # so start with thin volumes starting with id 2 and end with - # the thin-snapshot with id 1 which needs to call finish_setup. - volume_id=2 - # go over thin-volumes - for line in "${thin_volume[@]}"; do - [ -z "$line" ] && continue - read -r name crypt min max ignore <<< "$line" - # thin-volume can be created with max size, - # since they are overprovisioned anyway. - if ! create_volume "$name" "$(( volume_id++ ))" "$max"; then - echo "Failed to create thin volume '$name'." - fi - save_partition_info "$name" "*" "1" "${writable_device_size}-${max}" - if [ "$crypt" -ne 0 ] && ! encrypt_device \ - "/dev/mapper/$name" "$name-crypt" "$max"; then - echo "Failed to encrypt thin volume '$name'." - fi - done - - if [ -n "$thin_snapshot" ]; then - # create thin-snapshot, use first one - read -r name crypt min max ignore <<< "$thin_snapshot" - # min/max was used for the pool data device, ignore it here! - # NOTE: the filesystem will most likely malfunction if the size of the - # thin-snapshot is smaller than what it was upon creation. - # As such, the size of the thin-snapshot can only be $writable_device_size - # if it is larger than $read_only_device_size, otherwise we should only - # use $read_only_device_size. While live-shrinking the filesystem might be - # an option, it is not supported throughout all fileystems (xfs can't). - if (( writable_device_size >= read_only_device_size )); then - thin_snapshot_size="$writable_device_size" - else - thin_snapshot_size="$read_only_device_size" - fi - # For later on-demand growing - if (( root_ntfs_extra > 0 )); then - thin_snapshot_size="$(( thin_snapshot_size + root_ntfs_extra ))" - fi - if ! create_volume "$name" 1 "$thin_snapshot_size" "$read_only_device"; then - echo "Failed to create external snapshot for '$read_only_device'." - ramdisk_fallback + echo "Trying snapshot fallback..." + snapshot="$thin_snapshot" + else + # the order in which pool devices are created does not matter + # so start with thin volumes starting with id 2 and end with + # the thin-snapshot with id 1 which needs to call finish_setup. + volume_id=2 + # go over thin-volumes + for line in "${thin_volume[@]}"; do + [ -z "$line" ] && continue + read -r name crypt min max ignore <<< "$line" + # thin-volume can be created with max size, + # since they are overprovisioned anyway. + if ! create_volume "$name" "$(( volume_id++ ))" "$max"; then + echo "Failed to create thin volume '$name'." + fi + save_partition_info "$name" "*" "1" "${scratch_device_size}-${max}" + if [ "$crypt" -ne 0 ] && ! encrypt_device \ + "/dev/mapper/$name" "$name-crypt" "$max"; then + echo "Failed to encrypt thin volume '$name'." + fi + done + + if [ -n "$thin_snapshot" ]; then + # create thin-snapshot, use first one + read -r name crypt min max ignore <<< "$thin_snapshot" + # min/max was used for the pool data device, ignore it here! + # NOTE: the filesystem will most likely malfunction if the size of the + # thin-snapshot is smaller than what it was upon creation. + # As such, the size of the thin-snapshot can only be $scratch_device_size + # if it is larger than $read_only_device_size, otherwise we should only + # use $read_only_device_size. While live-shrinking the filesystem might be + # an option, it is not supported throughout all fileystems (xfs can't). + if (( scratch_device_size >= read_only_device_size )); then + thin_snapshot_size="$scratch_device_size" + else + thin_snapshot_size="$read_only_device_size" + fi + # For later on-demand growing + if (( root_ntfs_extra > 0 )); then + thin_snapshot_size="$(( thin_snapshot_size + root_ntfs_extra ))" + fi + if ! create_volume "$name" 1 "$thin_snapshot_size" "$read_only_device"; then + echo "Failed to create external snapshot for '$read_only_device'." + ramdisk_fallback + fi + finish_setup "$name" "1" "$thin_snapshot_size" fi - finish_setup "$name" "1" "$thin_snapshot_size" + echo "$0: Thin volumes defined, but no snapshot. Using tmpfs." + ramdisk_fallback fi fi ### ## SNAPSHOT (OLD FUNCTIONALITY) ### -if [ -n "$snapshot" ]; then +if [ -n "$snapshot" ] && require_exact_scratch_size; then read -r name crypt min max ignore <<< "$snapshot" if ! create_snapshot "$name $persist"; then - echo "Failed to create regular snapshot for '$read_only_device' on '$writable_device'." - ramdisk_fallback + echo "Failed to create regular snapshot for '$read_only_device' on '$scratch_device'." + else + finish_setup "$name" "1" "$scratch_device_size" fi - finish_setup "$name" "1" "$writable_device_size" fi # ultimate fallback -- cgit v1.2.3-55-g7522