From 7eca62ec51d030e959d52d79c370d789ace5334a Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Wed, 9 Apr 2025 13:59:49 +0200 Subject: [slx-dmsetup] Split dmsetup-slx-device into smaller services --- modules.d/dnbd3-rootfs/hooks/s3-connect-image.sh | 214 +++++ modules.d/dnbd3-rootfs/hooks/s3-dnbd3root.sh | 215 ----- modules.d/dnbd3-rootfs/module-setup.sh | 2 +- .../slx-dmsetup/bin/dmsetup_create_noudevsync | 25 + modules.d/slx-dmsetup/hooks/dmsetup-slx-device | 903 --------------------- modules.d/slx-dmsetup/hooks/s3-cow-setup.sh | 742 +++++++++++++++++ modules.d/slx-dmsetup/hooks/s3-mount-swap.sh | 10 + modules.d/slx-dmsetup/hooks/s3-prepare-rw-layer.sh | 80 ++ modules.d/slx-dmsetup/hooks/s3-scan-id44.sh | 23 + modules.d/slx-dmsetup/module-setup.sh | 21 +- modules.d/slx-extra-script/module-setup.sh | 3 +- .../services/s3-extra-post-dmsetup.service | 2 +- modules.d/slx-ntfsfree/hooks/s3-ntfsfree.sh | 30 + modules.d/slx-ntfsfree/module-setup.sh | 21 + 14 files changed, 1166 insertions(+), 1125 deletions(-) create mode 100755 modules.d/dnbd3-rootfs/hooks/s3-connect-image.sh delete mode 100755 modules.d/dnbd3-rootfs/hooks/s3-dnbd3root.sh create mode 100755 modules.d/slx-dmsetup/bin/dmsetup_create_noudevsync delete mode 100755 modules.d/slx-dmsetup/hooks/dmsetup-slx-device create mode 100755 modules.d/slx-dmsetup/hooks/s3-cow-setup.sh create mode 100755 modules.d/slx-dmsetup/hooks/s3-mount-swap.sh create mode 100755 modules.d/slx-dmsetup/hooks/s3-prepare-rw-layer.sh create mode 100755 modules.d/slx-dmsetup/hooks/s3-scan-id44.sh create mode 100755 modules.d/slx-ntfsfree/hooks/s3-ntfsfree.sh create mode 100755 modules.d/slx-ntfsfree/module-setup.sh (limited to 'modules.d') diff --git a/modules.d/dnbd3-rootfs/hooks/s3-connect-image.sh b/modules.d/dnbd3-rootfs/hooks/s3-connect-image.sh new file mode 100755 index 00000000..835a5613 --- /dev/null +++ b/modules.d/dnbd3-rootfs/hooks/s3-connect-image.sh @@ -0,0 +1,214 @@ +#!/usr/bin/env bash +type emergency_shell > /dev/null 2>&1 || source /lib/dracut-lib.sh +source /etc/openslx + +export PATH="/usr/local/bin:$PATH" +export LD_LIBRARY_PATH="/usr/local/lib" + + +# hardcode dnbd device path +declare -rg _dnbd3_dev="/dev/dnbd0" + +settle() { + echo "settle: Waiting for udev" + if command -v timeout &> /dev/null; then + timeout -s 9 15 udevadm settle -t 10 + else + local pid rem + udevadm settle -t 10 & + pid=$! + rem=15 + while (( --rem > 0 )) && kill -0 "$pid"; do + sleep 1 + done + fi + echo "settle: udev done" +} + +# all outputs are redirected to stderr, since this functions should +# only echo the path to the unpacked container to stdout. +container_unpack_raw() { + local in_device="$1" + local out_device="$(losetup -f)" + echo "raw: Preparing to access image at '$in_device' via '$out_device'" + if ! modprobe "loop"; then + echo "raw: Failed to load kernel module: loop" + fi + if ! losetup "$out_device" "$in_device" --partscan; then + echo "raw: Failed to attach '$in_device' to '$out_device'." + return 1 + fi + settle + read_only_device="$out_device" + return 0 +} + +container_unpack_xloop() { + local in_device="$1" + local out_device="$(xlosetup -f)" + echo "xloop: Preparing to access image at '$in_device' via '$out_device'" + for kmod in xloop xloop_file_fmt_qcow xloop_file_fmt_raw; do + if ! modprobe "${kmod}"; then + echo "xloop: Failed to load kernel module: $kmod" + fi + done + if ! xlosetup -r -t QCOW "$out_device" "$in_device" --partscan; then + echo "xloop: Failed to attach '$in_device' to '$out_device'." + return 1 + fi + settle + read_only_device="$out_device" + return 0 +} +# endregion + +# region connect dnbd3 image +# Determine path to dnbd3 image: either on the kcl or via config file +declare -r KCL_DNBD3_IMAGE="$(getarg slx.stage4.path=)" +if [ -n "$KCL_DNBD3_IMAGE" ]; then + SLX_DNBD3_IMAGE="$KCL_DNBD3_IMAGE" + echo "SLX_DNBD3_IMAGE='$SLX_DNBD3_IMAGE'" >> /etc/openslx +fi +if [ -z "$SLX_DNBD3_IMAGE" ]; then + emergency_shell "Failed to determine which DNBD3 image to use." \ + "It was neither specified on kernel command line nor in the" \ + "configuration file." +fi +declare -r KCL_DNBD3_RID="$(getarg slx.stage4.rid=)" +if [ -n "$KCL_DNBD3_RID" ]; then + # specified on the KCL? + SLX_DNBD3_RID="$KCL_DNBD3_RID" + echo "SLX_DNBD3_RID='$SLX_DNBD3_RID'" >> /etc/openslx +fi +unset _dnbd3_client_additional_args +declare -a _dnbd3_client_additional_args +if [ -n "$SLX_DNBD3_RID" ]; then + _dnbd3_client_additional_args=("--rid" "$SLX_DNBD3_RID") +fi + +if ! modprobe dnbd3; then + echo "Failed to load kernel module: dnbd3" +fi + +for try in {1..5} FINAL; do + if [ "$try" = "FINAL" ]; then + emergency_shell "Failed to connect '${SLX_DNBD3_IMAGE}'" \ + "(revision: ${SLX_DNBD3_RID:-0})" \ + "from one of '$SLX_DNBD3_SERVERS' to '$_dnbd3_dev'." \ + "Check if the image exists on one of the servers" \ + "and if any is reachable from this client." + fi + echo "dnbd3-connect: Try ($try/5), trying hosts '$SLX_DNBD3_SERVERS'" + if dnbd3-client \ + --host "$SLX_DNBD3_SERVERS" \ + --image "$SLX_DNBD3_IMAGE" \ + --device "$_dnbd3_dev" \ + "${_dnbd3_client_additional_args[@]}"; then + echo "dnbd3-connect: Connected" + break + fi + sleep 1 +done + +# endregion +# region unpack dnbd3 image +if [[ $SLX_QCOW_HANDLER = xmount ]]; then + emergency_shell "xmount support is unmaintained, broken, and has been removed. Consider using xloop" + exit 1 +fi +if ! [[ $SLX_QCOW_HANDLER =~ ^(kernel|xloop|raw)?$ ]]; then + emergency_shell "Unsupported image handler: $SLX_QCOW_HANDLER" \ + "Use either 'raw' or 'xloop'." +fi +if [ -z "$SLX_QCOW_HANDLER" ]; then + SLX_QCOW_HANDLER="xloop" + echo "SLX_QCOW_HANDLER='$SLX_QCOW_HANDLER'" >> /etc/openslx +fi +if [[ $SLX_QCOW_HANDLER =~ ^(kernel|xloop)$ ]]; then + container_unpack_xloop "$_dnbd3_dev" +fi +if [ -z "$read_only_device" ]; then + container_unpack_raw "$_dnbd3_dev" +fi + +# Fail fast if unpacking dnbd3 image failed. +if [ -z "$read_only_device" ]; then + emergency_shell "Failed to unpack the qcow2 image!" +fi + +# endregion +# region find system partition within image +if [[ "$SLX_SYSTEM_PARTITION_IDENTIFIER" =~ ^\+[0-9]+$ ]]; then + # Partition number, e.g. +2 for second partition + # + num="${SLX_SYSTEM_PARTITION_IDENTIFIER#+}" + echo "Got partition number $num from configuration, using that" + if [ -b "${read_only_device}p${num}" ]; then + read_only_partition="${read_only_device}p${num}" + elif [ -b "${read_only_device}${num}" ]; then + read_only_partition="${read_only_device}${num}" + else + emergency_shell "Failed to find partition $num from '$read_only_device'" + fi +else + # Find requested root partition by MBRID, GPT label, or GPT type - default to SLX_SYS label + # + identifier="${SLX_SYSTEM_PARTITION_IDENTIFIER:-SLX_SYS}" + echo "Trying to find partition with ID or label '$identifier' from '$read_only_device'" + declare -a parts + parts=( "${read_only_device}"?* ) + if [ -b "${parts[0]}" ]; then + # There is at least one partition on the device, scan + mapfile -t parts < <( slx-tools dev_find_partitions \ + "$read_only_device" "${identifier}" ) + if [ "${#parts[@]}" = 1 ]; then + # One match, perfect + read_only_partition="${parts[0]}" + echo "Found read-only partition by identifier: $read_only_partition" + elif [ "${#parts[@]}" = 0 ]; then + # Nothing found, scan all partitions. This will include the device itself. + parts=( "$read_only_device"* ) + fi + # If we found 2 or more matching partitions, they'll be probed below + else + # No partitions on device - check if it's a file-system + parts=( "$read_only_device" ) + fi +fi + +if [ -z "$read_only_partition" ]; then + # Do a scan + echo "Warning: Don't know which partition to use, trying all candidates (${parts[*]})" >&2 + p="/tmp/mounttest" + mkdir -p "$p" + for part in "${parts[@]}"; do + if mount -v -o ro "$part" "$p"; then + # See if it looks like a system partition + if [ -x "$p/sbin/init" ] || [ -x "$p/lib/systemd/systemd" ]; then + umount -lf "$p" + echo "Found init on $part, will try to boot off of it." + read_only_partition="$part" + break + fi + umount -lf "$p" + fi + done + echo "Warning: ***** Please fix this by either specifying SLX_SYSTEM_PARTITION_IDENTIFIER or labeling your root partition SLX_SYS *****" >&2 +fi +# endregion + +if [ -z "$read_only_partition" ]; then + echo "Error: Failed to find bootable partition" + exit 1 +fi +if ! [ -b "$read_only_partition" ]; then + echo "Error: Bootable partition $read_only_partition does not exist, or is not a block device" + exit 1 +fi + +echo "Using read-only partition: $read_only_partition" +echo "$read_only_partition" > "/.read_only_device" + +settle + +exit 0 diff --git a/modules.d/dnbd3-rootfs/hooks/s3-dnbd3root.sh b/modules.d/dnbd3-rootfs/hooks/s3-dnbd3root.sh deleted file mode 100755 index e78a007a..00000000 --- a/modules.d/dnbd3-rootfs/hooks/s3-dnbd3root.sh +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/env bash -type emergency_shell > /dev/null 2>&1 || source /lib/dracut-lib.sh -source /etc/openslx - -export PATH="/usr/local/bin:$PATH" -export LD_LIBRARY_PATH="/usr/local/lib" - - -# hardcode dnbd device path -declare -rg _dnbd3_dev="/dev/dnbd0" - -settle() { - echo "settle: Waiting for udev" - if command -v timeout &> /dev/null; then - timeout -s 9 15 udevadm settle -t 10 - else - local pid rem - udevadm settle -t 10 & - pid=$! - rem=15 - while (( --rem > 0 )) && kill -0 "$pid"; do - sleep 1 - done - fi - echo "settle: udev done" -} - -# all outputs are redirected to stderr, since this functions should -# only echo the path to the unpacked container to stdout. -container_unpack_raw() { - local in_device="$1" - local out_device="$(losetup -f)" - echo "raw: Preparing to access image at '$in_device' via '$out_device'" - if ! modprobe "loop"; then - echo "raw: Failed to load kernel module: loop" - fi - if ! losetup "$out_device" "$in_device" --partscan; then - echo "raw: Failed to attach '$in_device' to '$out_device'." - return 1 - fi - settle - read_only_device="$out_device" - return 0 -} - -container_unpack_xloop() { - local in_device="$1" - local out_device="$(xlosetup -f)" - echo "xloop: Preparing to access image at '$in_device' via '$out_device'" - for kmod in xloop xloop_file_fmt_qcow xloop_file_fmt_raw; do - if ! modprobe "${kmod}"; then - echo "xloop: Failed to load kernel module: $kmod" - fi - done - if ! xlosetup -r -t QCOW "$out_device" "$in_device" --partscan; then - echo "xloop: Failed to attach '$in_device' to '$out_device'." - return 1 - fi - settle - read_only_device="$out_device" - return 0 -} -# endregion - -# region connect dnbd3 image -# Determine path to dnbd3 image: either on the kcl or via config file -declare -r KCL_DNBD3_IMAGE="$(getarg slx.stage4.path=)" -if [ -n "$KCL_DNBD3_IMAGE" ]; then - SLX_DNBD3_IMAGE="$KCL_DNBD3_IMAGE" - echo "SLX_DNBD3_IMAGE='$SLX_DNBD3_IMAGE'" >> /etc/openslx -fi -if [ -z "$SLX_DNBD3_IMAGE" ]; then - emergency_shell "Failed to determine which DNBD3 image to use." \ - "It was neither specified on kernel command line nor in the" \ - "configuration file." -fi -declare -r KCL_DNBD3_RID="$(getarg slx.stage4.rid=)" -if [ -n "$KCL_DNBD3_RID" ]; then - # specified on the KCL? - SLX_DNBD3_RID="$KCL_DNBD3_RID" - echo "SLX_DNBD3_RID='$SLX_DNBD3_RID'" >> /etc/openslx -fi -unset _dnbd3_client_additional_args -declare -a _dnbd3_client_additional_args -if [ -n "$SLX_DNBD3_RID" ]; then - _dnbd3_client_additional_args=("--rid" "$SLX_DNBD3_RID") -fi - -if ! modprobe dnbd3; then - echo "Failed to load kernel module: dnbd3" -fi - -for try in {1..5} FINAL; do - if [ "$try" = "FINAL" ]; then - emergency_shell "Failed to connect '${SLX_DNBD3_IMAGE}'" \ - "(revision: ${SLX_DNBD3_RID:-0})" \ - "from one of '$SLX_DNBD3_SERVERS' to '$_dnbd3_dev'." \ - "Check if the image exists on one of the servers" \ - "and if any is reachable from this client." - fi - echo "dnbd3-connect: Try ($try/5), trying hosts '$SLX_DNBD3_SERVERS'" - if dnbd3-client \ - --host "$SLX_DNBD3_SERVERS" \ - --image "$SLX_DNBD3_IMAGE" \ - --device "$_dnbd3_dev" \ - "${_dnbd3_client_additional_args[@]}"; then - echo "dnbd3-connect: Connected" - break - fi - sleep 1 -done - -# endregion -# region unpack dnbd3 image -if [[ $SLX_QCOW_HANDLER = xmount ]]; then - emergency_shell "xmount support is unmaintained, broken, and has been removed. Consider using xloop" - exit 1 -fi -if ! [[ $SLX_QCOW_HANDLER =~ ^(kernel|xloop|raw)?$ ]]; then - emergency_shell "Unsupported image handler: $SLX_QCOW_HANDLER" \ - "Use either 'raw' or 'xloop'." -fi -if [ -z "$SLX_QCOW_HANDLER" ]; then - SLX_QCOW_HANDLER="xloop" - echo "SLX_QCOW_HANDLER='$SLX_QCOW_HANDLER'" >> /etc/openslx -fi -if [[ $SLX_QCOW_HANDLER =~ ^(kernel|xloop)$ ]]; then - container_unpack_xloop "$_dnbd3_dev" -fi -if [ -z "$read_only_device" ]; then - container_unpack_raw "$_dnbd3_dev" -fi - -# Fail fast if unpacking dnbd3 image failed. -if [ -z "$read_only_device" ]; then - emergency_shell "Failed to unpack the qcow2 image!" -fi - -# endregion -# region find system partition within image -if [[ "$SLX_SYSTEM_PARTITION_IDENTIFIER" =~ ^\+[0-9]+$ ]]; then - # Partition number, e.g. +2 for second partition - # - num="${SLX_SYSTEM_PARTITION_IDENTIFIER#+}" - echo "Got partition number $num from configuration, using that" - if [ -b "${read_only_device}p${num}" ]; then - read_only_partition="${read_only_device}p${num}" - elif [ -b "${read_only_device}${num}" ]; then - read_only_partition="${read_only_device}${num}" - else - emergency_shell "Failed to find partition $num from '$read_only_device'" - fi -else - # Find requested root partition by MBRID, GPT label, or GPT type - default to SLX_SYS label - # - identifier="${SLX_SYSTEM_PARTITION_IDENTIFIER:-SLX_SYS}" - echo "Trying to find partition with ID or label '$identifier' from '$read_only_device'" - declare -a parts - parts=( "${read_only_device}"?* ) - if [ -b "${parts[0]}" ]; then - # There is at least one partition on the device, scan - mapfile -t parts < <( slx-tools dev_find_partitions \ - "$read_only_device" "${identifier}" ) - if [ "${#parts[@]}" = 1 ]; then - # One match, perfect - read_only_partition="${parts[0]}" - echo "Found read-only partition by identifier: $read_only_partition" - elif [ "${#parts[@]}" = 0 ]; then - # Nothing found, scan all partitions. This will include the device itself. - parts=( "$read_only_device"* ) - fi - # If we found 2 or more matching partitions, they'll be probed below - else - # No partitions on device - check if it's a file-system - parts=( "$read_only_device" ) - fi -fi - -if [ -z "$read_only_partition" ]; then - # Do a scan - echo "Warning: Don't know which partition to use, trying all candidates (${parts[*]})" >&2 - p="/tmp/mounttest" - mkdir -p "$p" - for part in "${parts[@]}"; do - if mount -v -o ro "$part" "$p"; then - # See if it looks like a system partition - if [ -x "$p/sbin/init" ] || [ -x "$p/lib/systemd/systemd" ]; then - umount -lf "$p" - echo "Found init on $part, will try to boot off of it." - read_only_partition="$part" - break - fi - umount -lf "$p" - fi - done - echo "Warning: ***** Please fix this by either specifying SLX_SYSTEM_PARTITION_IDENTIFIER or labeling your root partition SLX_SYS *****" >&2 -fi -# endregion - -if [ -z "$read_only_partition" ]; then - echo "Error: Failed to find bootable partition" - exit 1 -fi -if ! [ -b "$read_only_partition" ]; then - echo "Error: Bootable partition $read_only_partition does not exist, or is not a block device" - exit 1 -fi - -echo "Using read-only partition: $read_only_partition" - -# region add rw layer to dnbd3 image -# don't be fooled to think we are done, the next part is crucial -dmsetup-slx-device "$read_only_partition" -settle -# endregion diff --git a/modules.d/dnbd3-rootfs/module-setup.sh b/modules.d/dnbd3-rootfs/module-setup.sh index b8cc10af..67907d10 100755 --- a/modules.d/dnbd3-rootfs/module-setup.sh +++ b/modules.d/dnbd3-rootfs/module-setup.sh @@ -149,7 +149,7 @@ install() { inst_hook cmdline 90 "$moddir/hooks/set-dracut-environment-variables.sh" # make the final blockdevice for the root system (dnbd3 -> xmount -> # device-mapper) - slx_service "s3-dnbd3root" "Setup dnbd3-based block device of rootfs" \ + slx_service "s3-connect-image" "Setup dnbd3-based block device of rootfs" \ --after "s3-fetch-config.service" \ --after "dracut-pre-mount.service" \ --wbefore "dracut-mount.service" \ diff --git a/modules.d/slx-dmsetup/bin/dmsetup_create_noudevsync b/modules.d/slx-dmsetup/bin/dmsetup_create_noudevsync new file mode 100755 index 00000000..ebcf5ed6 --- /dev/null +++ b/modules.d/slx-dmsetup/bin/dmsetup_create_noudevsync @@ -0,0 +1,25 @@ +#!/bin/bash + +# Helper to call 'dmsetup setup' without syncing with udev +# and then actively create the devices with the mknodes command. +# Either pass the table contents as $2, or pipe them into the function +# dmsetup_create_noudevsync [table] +( + set -eo pipefail + if [ -n "$2" ]; then + printf "%s\n" "$2" | dmsetup create "$1" --noudevsync + else + dmsetup create "$1" --noudevsync + fi + dmsetup mknodes --noudevsync "$1" + echo "dm: Created $1" +) +ret=$? + +[ -b "/dev/mapper/$1" ] || ret=99 +if [ $ret -ne 0 ]; then + echo "dm: Error creating $1, removing..." + dmsetup remove --noudevsync "$1" +fi + +exit $ret diff --git a/modules.d/slx-dmsetup/hooks/dmsetup-slx-device b/modules.d/slx-dmsetup/hooks/dmsetup-slx-device deleted file mode 100755 index 05a1b823..00000000 --- a/modules.d/slx-dmsetup/hooks/dmsetup-slx-device +++ /dev/null @@ -1,903 +0,0 @@ -#!/usr/bin/env bash -# -# Script to back given read-only device using the block device -# specified by SLX_WRITABLE_DEVICE_IDENTIFIER in the SLX config. -# If SLX_WRITABLE_DEVICE_PARTITION_TABLE is sepcified, it will -# further create device mapper devices accordingly. -# -# Example partition config: -# -# thin-snapshot root 10G 1 -# thin-volume tmp 20G 0 -# linear data0 5-10G 1 -# linear data1 1-50% 1 -# -# NOTE: Encrypting thin-snapshot will actually encrypt the -# entire pool data device used for the pool. -# TODO: Support external keys -# TODO: Put table in file in config.tgz - -drop_shell() { - . /lib/dracut-lib.sh - emergency_shell "$@" - exit 1 -} - -# for debugging purposes -exec {BASH_XTRACEFD}> /run/openslx/dmsetup.log -set -x - -# read-only device to prepare for CoW -[ -n "$1" ] || drop_shell "Read-only device was not given!" -[ -b "$1" ] || drop_shell "Given device '$1' does not exist or is not block device" - -declare -g read_only_device="$1" -declare -g read_only_device_sz="$( blockdev --getsz "$1" )" -# Use _sz suffix for sizes expressed in number of 512b sectors, -# _size for random other crap - -declare -rg ntfs_list="/run/openslx/.thin-ntfs-candidates" - -# handle_unit -# Supply percentage, or size in [kmgt]bytes, -# returns appropriate value in number of 512b sectors -handle_unit() { - # default to bytes - local -i potency=0 - local -i val="$1" - case "$2" in - [%]) # These are relative to the writable CoW device - # Allow > 100% for over-provisioning - val="$(( remaining_device_sz * val / 100 ))" - ;; - [Kk]) potency=1 ;;& - [Mm]) potency=2 ;;& - [Gg]) potency=3 ;;& - [Tt]) potency=4 ;;& - *) - # => 1024 ** potency for G, M, K, etc results in bytes - # => bytes / 512 = sectors - val=$(( val * ( 1024 ** potency) / 512 )) - ;; - esac - echo "$val" -} - -parse_config() { - local remaining_device_sz="$writable_device_sz" - # First, handle absolute definitions - parse_config_int "$1" 0 - # Then, distribute relative values to remaining space - parse_config_int "$1" 1 -} - -# global array variables storing the configuration of the partitions -declare -ag linear thin_volume -snapshot= -thin_snapshot= -parse_config_int() { - [ -z "$1" ] && return 1 - local -i rel_only="$2" - while IFS= read -r line || [ -n "$line" ]; do - [ -z "$line" ] && continue - read -r type name range crypt _ <<< "$line" - type=${type//-/_} # to use the type as variable in eval - if ! [[ "$type" =~ \ - ^(linear|snapshot|thin_snapshot|thin_volume)$ ]]; then - echo "$0: Ignoring invalid type: $line" - continue - fi - if [[ -z "$name" ]]; then - echo "$0: Ignoring nameless entry: $line" - continue - fi - unset min_unit max_unit min max - # ranges can be like: 40G, 40-80G, 10G-20% - if ! [[ "$range" =~ ^([0-9]+)([GgMmKkBb%]?)(-([0-9]+)([GgMmKkBb%]?))?$ ]]; then - echo "$0: Ignoring invalid range: $line" - continue - fi - local min="${BASH_REMATCH[1]}" - local max="${BASH_REMATCH[4]:-${BASH_REMATCH[1]}}" - local min_unit="${BASH_REMATCH[2]:-${BASH_REMATCH[5]}}" - local max_unit="${BASH_REMATCH[5]:-${BASH_REMATCH[2]}}" - # first pass we handle absolute values unly, second pass relative ones - if [[ "$min_unit" = "%" || "$max_unit" = "%" ]]; then - [ "$rel_only" != 1 ] && continue - else - [ "$rel_only" = 1 ] && continue - fi - if [ -z "$min_unit" ]; then - echo "$0: WARNING: No unit given in range, assuming BYTES: $line" - fi - min="$( handle_unit "$min" "$min_unit" )" - max="$( handle_unit "$max" "$max_unit" )" - if (( min > max )); then - # So, we might end up with something like 30G-100%, but the writable device - # is only 20GB. In that case we most likely want to contine, and not consider - # this an error. So let's try to come up with some logic on what is an error - # and what isn't. Probably anything involving a mix of percentage and - # non-percentage should not be an error. - if [[ "$min_unit" = "%" && "$max_unit" != "%" ]] \ - || [[ "$min_unit" != "%" && "$max_unit" = "%" ]]; then - # Let's hope for the best - max="$min" - else - echo "$0: Ignoring invalid range (min > max): $line" - continue - fi - fi - if ! [[ "$crypt" =~ ^[01]$ ]]; then - echo "$0: Disabling encryption due to invalid crypt argument: $line" - crypt=0 - fi - # finally save it to the global array for this type - case "$type" in - linear) linear+=("${name} ${crypt} ${min} ${max}") ;; - thin_volume) thin_volume+=("${name} ${crypt} ${min} ${max}") ;; - # Special - rootfs, only one makes sense - snapshot) - [ -n "$snapshot" ] && echo "Warning: More than one snapshot declared!" >&2 - snapshot="${name} ${crypt} ${min} ${max}" - ;; - thin_snapshot) - [ -n "$thin_snapshot" ] && echo "Warning: More than one thin_snapshot declared!" >&2 - thin_snapshot="${name} ${crypt} ${min} ${max}" - ;; - *) echo "$0: SOMETHING NOT GOOT CHECK SOURCE CODE" ;; - esac - # Decrease for upcoming calculations if we used fixed values here - if [ "$rel_only" != 1 ]; then - (( remaining_device_sz -= ( min + max ) / 2 )) - fi - done <<< "$1" -} - -# Helper to call 'dmsetup setup' without syncing with udev -# and then actively create the devices with the mknodes command. -# Either pass the table contents as $2, or pipe them into the function -# dmsetup_create_noudevsync [table] -dmsetup_create_noudevsync() { - ( - set -eo pipefail - if [ -n "$2" ]; then - printf "%s\n" "$2" | dmsetup create "$1" --noudevsync - else - dmsetup create "$1" --noudevsync - fi - dmsetup mknodes --noudevsync "$1" - echo "dm: Created $1" - ) - local ret=$? - [ -b "/dev/mapper/$1" ] || ret=99 - if [ $ret -ne 0 ]; then - echo "dm: Error creating $1, removing..." - dmsetup remove --noudevsync "$1" - fi - return $ret -} - -# encrypt_device [ ] -encrypt_device() { - # TODO: Send key back to us, demand ransom - modprobe dm-crypt || echo "encrypt: dm-crypt loading failed, maybe builtin?" - if ! [ -b "$1" ]; then - echo "encrypt: Not block device: '$1'" - return 1 - fi - if [ -z "$2" ]; then - echo "encrypt: No name given" - return 1 - fi - local dev_size="$( blockdev --getsz "$1" )" - if [ -z "$dev_size" ]; then - echo "encrypt: Cannot get size of $1" - return 1 - fi - echo "encrypt: Encrypting device $1 as $2" - local size="${4:-0}" - local start="${3:-0}" - # Sanitize (negated check to catch non-numeric values) - if ! [ "$start" -ge 0 ] 2> /dev/null; then - echo "encrypt: Invalid start offset '$start', using 0" - start=0 - fi - if ! [ "$size" -gt 0 ] 2> /dev/null; then - echo "encrypt: Invalid end offset '$size', using entire device" - size="$dev_size" - fi - # Put in bounds - if (( start >= dev_size )); then - echo "encrypt: Start offset $start past end of device ($dev_size)" - return 1 - fi - if (( start + size > dev_size )); then - echo "encrypt: End offset ($start + $size) past end of device ($dev_size), truncating" - size="$(( dev_size - start ))" - fi - local key - key="$( < /dev/urandom xxd -c32 -p -l32 )" - [ -z "$key" ] && key="$( < /dev/urandom tr -c -d 'a-f0-9' | dd count=1 bs=32 )" - [ -z "$key" ] && key="$( < /dev/urandom head -c32 | xxd -c32 -p )" - [ -z "$key" ] && key="$( < /dev/urandom xxd -c32 -p | head -n 1 )" - if [ -z "$key" ]; then - echo "encrypt: ERROR: Could not generate encryption key" - return 1 - fi - if ! dmsetup_create_noudevsync "$2" \ - "0 ${size} crypt aes-xts-plain64 $key 0 $1 ${start} 1 allow_discards"; then - echo "encrypt: Failed to encrypt $1." - return 1 - fi - echo "encrypt: Setup successful" - return 0 -} -# create_snapshot " " "cow_device" -create_snapshot() { - modprobe dm-snapshot || echo "snapshot: dm-snapshot loading failed, maybe builtin?" - read -r name persist _ <<< "$1" - echo "snapshot: Creating $name for $read_only_device (using $2)" - if ! dmsetup_create_noudevsync "$name" \ - "0 $read_only_device_sz snapshot $read_only_device $2 ${persist:-N} 8"; then - return 1 - fi - return 0 -} - -# This function is called if no ID44 partition could be found or anoother kind -# of critical error occurs during the CoW layer setup. It will combine the -# the read-only device with a DM zero device to increase its virtual size -# by half the RAM size. A sparse file of that size will then be created and -# placed on a dedicated tmpfs. -# THIS FUNCTION MUST NEVER RETURN -ramdisk_fallback() { - echo "ramdisk: Falling back to regular dm-snapshot on a RAMdisk." - - # RAM size in kb, note that this is equal to half - # of the entire RAM when interpreted as 512-bytes sectors. - local ram_cow_sz="$(awk '/^MemTotal:/ { printf("%d\n", $2 ); exit }' /proc/meminfo)" - - # try to prepare the zero extension device - local extended_device="/dev/mapper/${read_only_device##*/}-extended" - modprobe dm-zero - dmsetup_create_noudevsync "${extended_device##*/}" <<-EOF - 0 $read_only_device_sz linear $read_only_device 0 - $read_only_device_sz $ram_cow_sz zero - EOF - local ret="$?" - if [ "$ret" -eq 0 ]; then - read_only_device="$extended_device" - read_only_device_sz="$(( read_only_device_sz + ram_cow_sz ))" - else - echo "ramdisk: Failed to setup the virtual, larger '$read_only_device'." - echo "ramdisk: Continuing with its original size." - fi - - # prepare dedicated tmpfs mount point - echo "ramdisk: Preparing dedicated tmpfs" - local cow_tmpfs="/run/openslx/cow" - if ! mkdir -p "$cow_tmpfs"; then - cow_tmpfs="${cow_tmpfs}.$$.$RANDOM" - mkdir -p "$cow_tmpfs" - fi - if ! mount -t tmpfs cow-tmpfs -o size="$(( read_only_device_sz / 2 + 100 ))k" "$cow_tmpfs"; then - echo "ramdisk: Failed to mount tmpfs in '$cow_tmpfs' of size '$(( read_only_device_sz / 2 + 100 ))KiB', trying to use regular /run tmpfs." - cow_tmpfs="/run" - fi - - # create sparse file there - local file="$cow_tmpfs/tmpfs-snapshot" - if ! truncate -s "$(( read_only_device_sz * 512 ))" "$file" \ - && ! dd if=/dev/null of="$file" seek="$read_only_device_sz" bs=512; then - drop_shell "Failed to allocate RAMdisk CoW file $file." - fi - declare -rg writable_device="$( losetup --show --find "$file" )" - local cow_device_candidate="root" - while [ -b "/dev/mapper/$cow_device_candidate" ]; do - cow_device_candidate="root.$RANDOM" - done - if [ -z "$writable_device" ] || ! create_snapshot "$cow_device_candidate N" "$writable_device"; then - drop_shell "CRITICAL: failed to setup RAMdisk fallback." - fi - # [noreturn] - finish_setup "$cow_device_candidate" "0" "$read_only_device_sz" -} - -# finish_setup [] -# is the device name only, /dev/mapper will be prepended automatically. -# denotes if the created device lies in a RAMdisk (0) or is backed by a disk (1). -# is given in sectors. -# THIS FUNCTION MUST NEVER RETURN -finish_setup() { - if [ -z "$1" ] || ! [ -b "/dev/mapper/$1" ]; then - drop_shell "'/dev/mapper/$1' not a block device. Failed to setup CoW layer." - fi - if ! [[ "$2" =~ ^[0-9]$ ]]; then - drop_shell "'$2' not a valid type, 0 or 1 expected." - fi - # optional? - { - echo "# Generated by '$0'." - echo "SLX_DNBD3_DEVICE_COW=/dev/mapper/$1" - } >> /etc/openslx - save_partition_info "$1" "/" "$2" "$3" - exit 0 -} - -# path to save the achieved setup to -declare -rg partitions_config="/run/openslx/dmsetup.state" -cat <<-EOF > "$partitions_config" -# Generated by '$0'. -# Format: -# Options can be: -# * type -> CoW layer type: 0 is RAMdisk, 1 is disk, 2 is network -# * size -> in 512 byte sectors -EOF - -# save_partition_info [] -save_partition_info() { - [ -b "/dev/mapper/$1" ] || return 1 - [ -n "$2" ] || return 1 - [[ "$3" =~ ^[0-9]$ ]] || return 1 - local opts="type=$3" - # plain size given - [[ "$4" =~ ^[0-9]+$ ]] && opts="$opts,physical_size=$4" - # - - [[ "$4" =~ ^[0-9]+-[0-9]+$ ]] && opts="$opts,shared_physical_size=${4%-*},virtual_size=${4#*-}" - echo "/dev/mapper/$1 $2 ${opts}" >> "$partitions_config" -} - -# This will create another dm-linear on top of $scratch_device in case its -# size differs from $scratch_device_sz. This is useful for setups where you -# cannot explicitly configure how much space to use from the underlying device, -# and the partition table says not to use the entire $writable_device for cow -require_exact_scratch_size() { - local current_sz="$( blockdev --getsz "$scratch_device" )" - (( current_sz == scratch_device_sz )) && return 0 # Everything fine - echo "exact_scratch: Adding another layer; want: $scratch_device_sz, is: $current_sz" - if (( current_sz < scratch_device_sz )); then - echo "exact_scratch: WARNING: scratch_device_sz is larger than actual device." - echo "exact_scratch: This should never happen." - scratch_device_sz="$current_sz" - return 0 - fi - # We could check if $scratch_device already is a dm target, and just adjust its - # size, but I think that scenario isn't possible, currently. - if ! dmsetup_create_noudevsync "scratch" "0 $scratch_device_sz linear $scratch_device 0"; then - echo "exact_scratch: Failed to create scratch space for the CoW layer." - return 1 - fi - scratch_device="/dev/mapper/scratch" - save_partition_info "scratch" "*" "1" "$scratch_device_sz" - return 0 -} - -create_pool() { - declare -r data_block_sz=256 # Desired Block size (number of 512byte sectors) - declare -r wanted_low_mb=100 # Free space below this will trigger a dm event - # create external snapshot for read-only device - # create remaining thin volumes - echo "pool: Creating thinpool for cow" - modprobe dm-thin-pool || echo "pool: dm-thin-pool load failed, maybe builtin?" - # create temporary metadata device - # calculate number of sectors needed and check boundaries: - # XXX Formula from thin-pool.txt calculates size in *bytes*, we want 512b blocks - metadata_dev_sz="$(( 48 * scratch_device_sz / data_block_sz / 512 ))" - # If we want NTFS as a backup plan to extend the pool, check if the current size - # is less than 100GB, and only then consider this feature. - # Maybe make that thresold configurable one day, but for the desktop client - # use case this is sensible for now. - if [ "$SLX_NTFSFREE" = "backup" ] && (( scratch_device_sz < 209715200 )) \ - && [ -z "$metadata_persistent" ]; then - echo "pool: Considering NTFS partitions as backup since pool is small" - find_ntfs_partitions - if [ -s "$ntfs_list" ]; then - # Look what size we end up if we want at least 50GB - local sum="$( awk -v sum=0 \ - '{sum+=$1; if (sum >= 104857600) exit}END{printf "%.0f", sum}' \ - "$ntfs_list" )" - if (( sum > 0 )); then - (( sum > 209715200 )) && sum=209715200 # Max 100GB - # Account for this potential growth in the metadata device size for future expansion - metadata_dev_sz="$(( metadata_dev_sz + 48 * sum / data_block_sz / 512 ))" - echo "$sum" > "/run/openslx/.thin-ntfs-growsize" - root_ntfs_extra="$sum" - fi - fi - fi - # Min 2MB -> 4096 sectors, max 16GB -> 33554432 sectors - [ "$metadata_dev_sz" -lt 4096 ] && metadata_dev_sz="4096" - # TODO handle the exotic case of a too large metadata device to fit within RAM. - [ "$metadata_dev_sz" -gt 33554432 ] && metadata_dev_sz="33554432" - local scratch_device_offset=0 - local metadata_dev= - local metadata_persistent= - if [ -n "$metadata_persistent" ]; then - # create persistent slice of the writable device for the pool metadata - # Currently unused! Needs more work to reliably resume the pool on reboot, - # but only if booting exactly the same image - if ! dmsetup_create_noudevsync "pool-metadata" \ - "0 $metadata_dev_sz linear $scratch_device $scratch_device_offset"; then - echo "pool: Failed to create linear device for pool metadata device." - else - # Adjust size for pool-data down accordingly - scratch_device_offset="$metadata_dev_sz" - scratch_device_sz=$(( scratch_device_sz - metadata_dev_sz )) - declare -r metadata_dev="/dev/mapper/pool-metadata" - # TODO configurable wipe: dd if=/dev/zero of="$metadata_dev" count=1 bs=4096 - # TODO: If we fail later on in this function, we would actually have to destroy - # this target again, and re-adjust the offset and size back, so that the - # snapshot fallback would work properly. Or maybe just don't support fallback. - fi - fi - if [ -z "$metadata_dev" ]; then - # create RAMdisk in /run for metadata device - echo "pool: Creating loopdev in tmpfs for metadata" - mkdir -p /run/openslx - metadata_dev="$( mktemp /run/openslx/.pool-metadata.XXXXXX )" - # Create sparse file of required size - truncate -s "$(( metadata_dev_sz * 512 ))" "$metadata_dev" \ - || dd if=/dev/null of="$metadata_dev" bs=512 seek="$metadata_dev_sz" - declare -r metadata_dev="$( losetup --show --find "$metadata_dev" )" - fi - if [ -z "$metadata_dev" ]; then - echo "pool: Could not set up persistent or tmpfs-loop metadata device. Aborting." - return 1 - fi - - local pool_data_dev - if (( root_ntfs_extra == 0 )) && (( scratch_device_offset == 0 )); then - # No offset, no potential expansion, don't create another linear target - pool_data_dev="$scratch_device" - else - echo "pool: Creating additional linear target for data device" - pool_data_dev="/dev/mapper/pool-data" - # Create linear device of the writable device, in case we have an offset from - # the on-disk meta data. Also this way we can easily extend it later. - if ! dmsetup_create_noudevsync "${pool_data_dev##*/}" \ - "0 $scratch_device_sz linear $scratch_device $scratch_device_offset"; then - echo "pool: Failed to create pool data device on '$scratch_device'." - return 1 - fi - fi - local low_water_mark - # Convert MB to blocks - low_water_mark=$(( wanted_low_mb * 2048 / data_block_sz )) - echo "pool: Creating thinpool device" - if ! dmsetup_create_noudevsync "${pool_dev##*/}" \ - "0 $scratch_device_sz thin-pool $metadata_dev $pool_data_dev $data_block_sz $low_water_mark 1 skip_block_zeroing"; then - echo "pool: Failed to create thin-pool device (meta: $metadata_dev, data: $pool_data_dev)" - return 1 - fi - return 0 -} - -# create_volume [backing_dev] -create_volume() { - if [ -z "$pool_dev" ] || ! [ -b "$pool_dev" ]; then - echo "volume: Global pool device not set or present." - return 1 - fi - if [ $# -lt 3 ] || [ -z "$1" ]; then - echo "volume: not enough arguments." - return 1 - fi - local name="$1" - local id="$2" - local size="$3" - local backing_dev="$4" # Optional, internal if empty - - echo "volume: Creating $id/$name on $pool_dev" - if ! dmsetup message "$pool_dev" 0 "create_thin $id"; then - echo "volume: Failed to create thin volume with id '$id' in pool '$pool_dev'." - echo "volume: It might already exist, trying anyway..." - fi - if ! dmsetup_create_noudevsync "$name" "0 $size thin $pool_dev $id $backing_dev"; then - echo "volume: Failed to create external snapshot named '$name':" - echo " Size: $size" - echo " Backing device: $backing_dev" - echo " Thin volume id: $id" - return 1 - fi - return 0 -} - -# Find NTFS partitions with decently sized ranges of -# free space. We can use these as our writable layer -# for our thin-pool, if configured. -# If suitable, this will create the file $ntfs_list with -# one line per suitable partition, format -# total_size_blocks devpath -# Results are sorted by size, descending order -find_ntfs_partitions() { - [ -z "$SLX_NTFSFREE" ] && return - [ "$SLX_NTFSFREE" = "never" ] && return - [ -e "$ntfs_list" ] && return - echo "ntfs: Scanning for suitable NTFS partitions to use as writable device" - if ! command -v ntfsfree &> /dev/null; then - echo "ntfs: ntfsfree not found, cannot use NTFS partitions as RW layer" - return - fi - local part sum ro dev - ntfs_extra_space_sz=0 - for part in /dev/disk/by-partuuid/*; do - # Skip empty/ro devices - dev="$( readlink -f "$part" )" - dev="${dev##*/}" - ro="$( cat "/sys/class/block/${dev}/ro" )" - [ "$ro" = 1 ] && continue - # Only count ranges >= 256MB, sum will be in number of 512b blocks - sum="$( ntfsfree --block-size 512 --min-size "$(( 256 * 1024 * 1024 ))" "$part" 2> /dev/null \ - | awk -v sum=0 '{if ($1 == "Range") sum += $4}END{printf "%.0f", sum}' )" - # Only consider volume if sum of these ranges > 1GB (this is BLOCKS, not bytes) - (( sum > 2 * 1024 * 1024 )) || continue - echo "$sum $part" # only thing in loop going to stdout - (( ntfs_extra_space_sz += sum )) - done | sort -nr > "$ntfs_list" - echo "ntfs: Found $( wc -l < "$ntfs_list" ) suitable partitions" -} -ntfs_extra_space_sz=0 - -### -## MAIN -### - -. /etc/openslx - -. slx-tools -# "Preload" functions by executing them NOT in a subshell -dev_find_partitions &> /dev/null -dev_swap_version &> /dev/null - -# This is the main variable driving this script -declare -g id44_crypted= -declare -g writable_device= -if [ -z "$SLX_WRITABLE_DEVICE_IDENTIFIER" ]; then - SLX_WRITABLE_DEVICE_IDENTIFIER="44 87f86132-ff94-4987-b250-444444444444" - echo "SLX_WRITABLE_DEVICE_IDENTIFIER='${SLX_WRITABLE_DEVICE_IDENTIFIER}'" >> /etc/openslx -fi - -if [ -n "$SLX_WRITABLE_DEVICE_IDENTIFIER" ]; then - unset writable_devices - declare -a writable_devices - read -r -a list <<<"$SLX_WRITABLE_DEVICE_IDENTIFIER" - echo "Scanning for partitions with type/label ${list[*]}..." - while read -r -a list; do - writable_devices+=( "${list[@]}" ) - done < <( dev_find_partitions "${list[@]}" ) - echo "Found ${#writable_devices[@]} matching partitions" - if [[ "${#writable_devices[@]}" -eq 0 && "$SLX_NTFSFREE" != "never" ]] || [ "$SLX_NTFSFREE" = "always" ]; then - find_ntfs_partitions - fi - if [ -s "$ntfs_list" ] || [[ "${#writable_devices[@]}" -gt 1 ]]; then - # More than one device, and/or NTFS space, need linear - echo "Have more than one writable device, creating linear target" - tbl="/run/openslx/dmsetup-linear-id44" - pos=0 - grow_max_sz=9999999999 - for dev in "${writable_devices[@]}"; do - max="$(( grow_max_sz - pos ))" - (( max <= 0 )) && break - sz="$( blockdev --getsz "$dev" )" - (( sz > 0 )) || continue - (( sz > max )) && sz="$max" - echo "$pos $sz linear $dev 0" - (( pos += sz )) - done > "$tbl" - if [ -s "$ntfs_list" ]; then - sum= - while read -r sum dev _ || [ -n "$sum" ]; do # each dev - echo "Appending NTFS partition $dev..." - word= - while read -r word range_start_b _ range_sz _ || [ -n "$word" ]; do # each slice of dev - [ "$word" = "Range" ] || continue - (( range_sz > 0 )) || continue - slice_sz="$(( grow_max_sz - pos ))" - (( slice_sz <= 0 )) && break - (( slice_sz > range_sz )) && slice_sz="$range_sz" - # Append line - if echo "$pos $slice_sz linear $dev $range_start_b" >> "$tbl"; then - # Update counter - (( pos += slice_sz )) - else - echo "Could not write new table row into $tbl" - fi - done < <( ntfsfree --block-size 512 --min-size "$(( 256 * 1024 * 1024 ))" "$dev" ) - done < "$ntfs_list" - # Don't try to add NTFS space again later - SLX_NTFSFREE="never" - sed -i "s/^SLX_NTFSFREE.*$/# & # disabled in stage3\nSLX_NTFSFREE='never'/" "/etc/openslx" - rm -f -- "$ntfs_list" - fi - # See if we need a linear target at all - if ! [ -s "$tbl" ]; then - echo "Empty tmp/id44 table, fallback to RAM" - elif [ "$( wc -l < "$tbl" )" -eq 1 ] && [[ "${#writable_devices[@]}" -ge 1 ]]; then - # Only one line, have writable device -> use directly - echo "Table somehow ended up with one entry, discarding" - writable_device="${writable_devices[0]}" - else - # set up linera device - echo "Setting up linear id44 device with $( wc -l < "$tbl" ) slices" - if ! dmsetup_create_noudevsync "id44-group" < "$tbl"; then - echo "$0: Error creating group of writable devices. Fallback to RAM :-(" - else - writable_device="/dev/mapper/id44-group" - fi - fi - else - # Single device - echo "Have a single writable device, using it directly" - writable_device="${writable_devices[0]}" - fi -fi -if [ -z "$writable_device" ]; then - echo "Could not find writable device with id(s) '$SLX_WRITABLE_DEVICE_IDENTIFIER'." - ramdisk_fallback -elif is_on "$SLX_ID44_CRYPT"; then - # Config option crypts the entire ID44 device(s), before any slices are taken from it. - if encrypt_device "$writable_device" "id44-crypt"; then - writable_device="/dev/mapper/id44-crypt" - # Remember the whole device is already encrypted, and ignore the crypt flag for the partition table later - id44_crypted=1 - fi -fi - -# NOTE: from here on out, every value related to size is in 512 bytes sectors! -declare -rg writable_device_sz="$( blockdev --getsz "$writable_device" )" - -# If SLX_WRITABLE_DEVICE_PARTITION_TABLE is not set, just do -# regular thin-snapshot for the CoW layer, else parse it. -if [ -z "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" ]; then - SLX_WRITABLE_DEVICE_PARTITION_TABLE="thin-snapshot root 100% 0" -fi - -# extra swap? -if grep -qFw 'slx.swap' "/proc/cmdline"; then - # Only if our basic writable_device is large enough, or we have ntfs backup - echo "Additional swap on ID44 requested if existing is too small" - do_swap_sz=0 - if (( writable_device_sz > 80078125 )); then - # more than ~40GB, go ahead - do_swap_sz="$(( ( writable_device_sz - 70312500 ) / 2 ))" - # cap to 6GB - (( do_swap_sz > 11718750 )) && do_swap_sz=11718750 - elif [ "$SLX_NTFSFREE" = "backup" ] \ - && (( ntfs_extra_space_sz > 70312500 )) && (( writable_device_sz > 11718750 )); then - # more than 40GB NTFS backup space, more than 6GB ID44, make 4GB swap - do_swap_sz=7812500 - fi - # Check how many we have and if they're regular, unencrypted ones. - # If it's plenty, don't cut out swap from our backing device - swap_sz=0 - if (( do_swap_sz == 0 )); then - echo "Not enough ID44 space for swap..." - else - for part in $( dev_find_partitions "82" "0657fd6d-a4ab-43c4-84e5-0933c84b4f4f" ); do - dev_swap_version "$part" &> /dev/null || continue - this_sz="$( blockdev --getsz "$part" )" - (( this_sz > 0 )) && (( swap_sz += this_sz )) - done - echo "Have existing swap of $(( swap_sz / 2 / 1024 ))MiB" - (( do_swap_sz -= swap_sz )) - # Go ahead with swap? Only if we miss a reasonable amount... ( > 100MiB) - if (( do_swap_sz > 204800 )); then - echo "Adding $(( do_swap_sz / 2 / 1024 ))MiB of additional swap on backing dev" - skb="$(( do_swap_sz / 2 ))" - SLX_WRITABLE_DEVICE_PARTITION_TABLE="$( printf "%s\n%s" "linear slx-swap ${skb}K 0" \ - "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" )" - fi - fi -fi - -parse_config "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" - -# Default to thin-snapshot, if none were configured -if [ -z "${snapshot}" ] && [ -z "${thin_snapshot}" ]; then - parse_config "thin-snapshot root 100% 0" -fi - -# Sanity checks for weird configurations -if [ -n "$snapshot" ] && [ -n "$thin_snapshot" ]; then - echo "Warning: Both snapshot and thin-snapshot specified, prefering thin-snapshot." >&2 -fi - -### -## LINEAR SLICES -### - -# start allocating spaces to the configured devices -declare -g writable_device_used_sz=0 -declare -g pool_crypted= - -# first, reserve the space for the rootfs cow snapshot (of either type)... -# (this is the first line of our custom partition table) -read -r name crypt min max _ <<< "${thin_snapshot:-${snapshot}}" - -declare -g scratch_device="/dev/mapper/scratch" -declare -gi scratch_device_sz=0 -if (( min <= writable_device_sz )); then - scratch_device_sz="$max" - (( scratch_device_sz > writable_device_sz )) && scratch_device_sz="$writable_device_sz" -else - # minimum snapshot size is bigger than physical device size - echo "Warning: Minimum snapshot size is too big for the scratch partition." >&2 - echo "Warning: You probably need to use a more conservative value." >&2 - echo "Warning: Using this client maximum scratch space ($writable_device_sz sectors)." >&2 - scratch_device_sz="$writable_device_sz" -fi - -if (( scratch_device_sz == writable_device_sz )); then - # Only one, use directly, maybe crypt - if [ -z "$id44_crypted" ] && [ "$crypt" -ne 0 ]; then - if ! encrypt_device "$writable_device" "${scratch_device##*/}" 0 "$scratch_device_sz"; then - echo "Warning: Continuing with unencrypted scratch" >&2 - scratch_device="$writable_device" - fi - else - # Noop - scratch_device="$writable_device" - fi -else - # Smaller slice requested, device mapper to the rescure - # Round down to 4k border, so next slice won't be misaligned if we're on a 4k sector disk - scratch_device_sz="$(( (scratch_device_sz / 8) * 8 ))" - - # encrypt the scratch device, if configured - if [ -z "$id44_crypted" ] && (( crypt != 0 )); then - if encrypt_device "$writable_device" "${scratch_device##*/}" 0 "$scratch_device_sz"; then - pool_crypted=1 - else - echo "Warning: Continuing with unencrypted scratch" >&2 - crypt=0 # So we do the linear thing below - fi - fi - if (( crypt == 0 )) && ! dmsetup_create_noudevsync "${scratch_device##*/}" \ - "0 $scratch_device_sz linear $writable_device $writable_device_used_sz"; then - echo "Error: Failed to create scratch space for the CoW layer." >&2 - # this should never fail, but if it does, we would likely not be able to use - # $writable_device for any dmsetup stuff, so just fallback to ramdisk - # until we have a better idea on what to do :) - ramdisk_fallback - fi -fi -save_partition_info "${scratch_device##*/}" "*" "1" "$scratch_device_sz" - -writable_device_used_sz="$scratch_device_sz" - -# setup linear slices of the writable device -for line in "${linear[@]}"; do - [ -z "$line" ] && continue - read -r name crypt min max _ <<< "$line" - [ -n "$id44_crypted" ] && crypt=0 - echo "Creating linear slice '$name'..." - free_space="$(( writable_device_sz - writable_device_used_sz ))" - if [ "$min" -gt "$free_space" ]; then - echo "Error: Not enough space left for linear device $name - have $free_space sectors, need $min" >&2 - continue - fi - # allocate its max if it fits within the free space, otherwise use the space left. - to_allocate="$max" - (( to_allocate > free_space )) && to_allocate="$free_space" - # as above, round down to align on 4k sector devices - to_allocate="$(( (to_allocate / 8) * 8 ))" - - if (( crypt != 0 )) \ - && ! encrypt_device "$writable_device" "${name}" "$writable_device_used_sz" "$to_allocate"; then - echo "Warning: Failed to encrypt '$name', continuing without encryption." >&2 - crypt=0 - fi - if (( crypt == 0 )) && ! dmsetup_create_noudevsync \ - "$name" "0 $to_allocate linear $writable_device $writable_device_used_sz"; then - echo "Warning: Failed to create linear device: $line" >&2 - continue - fi - # TODO sane? - save_partition_info "$name" "*" "1" "$to_allocate" - writable_device_used_sz=$(( to_allocate + writable_device_used_sz )) -done - -### -## THIN-PROVISIONING -### -declare -rg pool_dev="/dev/mapper/pool" -declare -gi root_ntfs_extra=0 # Extra blocks to provision to root fs for later expansion -# Now decide what to do for the writable layer - -if [ -n "$thin_snapshot" ] || [ -n "${thin_volume[*]}" ]; then - if ! create_pool ; then - { - echo "Error: Failed to create thin pool. Will ignore:" - echo " Thin snapshot: $(declare -p thin_snapshot)" - echo " Thin volumes: $(declare -p thin_volume)" - echo "Trying snapshot fallback..." - } >&2 - [ -z "$snapshot" ] && snapshot="$thin_snapshot" - else - # Once we have created the pool, there is no point in snapshot fallback, - # as the space is already reserved by the pool - snapshot= - # the order in which pool devices are created does not matter - # so start with thin volumes starting with id 2 and end with - # the thin-snapshot with id 1 which needs to call finish_setup. - volume_id=2 - # go over thin-volumes - for line in "${thin_volume[@]}"; do - [ -z "$line" ] && continue - read -r name crypt min max _ <<< "$line" - if [ -n "$id44_crypted" ] || [ -n "$pool_crypted" ]; then - crypt=0 - fi - echo "Adding thin volume '$name'..." - # thin-volume can be created with max size, - # since they are overprovisioned anyway. - suffix= - (( crypt != 0 )) && suffix="-plain" - if ! create_volume "$name$suffix" "$(( volume_id++ ))" "$max"; then - echo "Error: Failed to create thin volume $name" >&2 - continue - fi - if (( crypt != 0 )) && ! encrypt_device \ - "/dev/mapper/$name$suffix" "$name" 0 "$max"; then - echo "Warning: Failed to encrypt thin volume '$name', continuing without encryption." >&2 - name="$name$suffix" - fi - save_partition_info "$name" "*" "1" "${scratch_device_sz}-${max}" - done - - if [ -n "$thin_snapshot" ]; then - # create thin-snapshot, use first one - read -r name _ <<< "$thin_snapshot" - echo "Adding base system snapshot '$name'..." - # min/max and crypt was used for the pool data device, ignore it here! - # Calculate how much of the CoW space we reserve for changes in the base - # system. Usually all the files in the base system should be static, but - # if someone decided to run apt dist-upgrade, this would change a lot of - # existing blocks, which is bad. - # Use MIN( readonly_size / 2, scratch_size / 10 ) - # until we come up with anything better. - # Given an RO image of 10GB, this gives us: - # 40GB scratch -> 46GB, so initially 36GB free space - # 5GB scratch -> 14.5GB, initially 4.5GB free space - declare -r max_reserved_sz="$(( scratch_device_sz / 10 ))" - reserved_sz="$(( read_only_device_sz / 2 ))" - (( reserved_sz > max_reserved_sz )) && reserved_sz="$max_reserved_sz" - thin_snapshot_sz="$(( scratch_device_sz + read_only_device_sz - reserved_sz ))" - # For later on-demand growing, overprovision by free space we found on - # clean NTFS volumes. This requires a user-space helper to listen for - # dm events in stage4, which should in turn add that free space to the pool-data - if (( root_ntfs_extra > 0 )); then - thin_snapshot_sz="$(( thin_snapshot_sz + root_ntfs_extra ))" - fi - if ! create_volume "$name" 1 "$thin_snapshot_sz" "$read_only_device"; then - echo "Error: Failed to create external snapshot for '$read_only_device'." >&2 - ramdisk_fallback # does not return - fi - finish_setup "$name" "1" "$thin_snapshot_sz" - fi - echo "Warning: Thin volumes defined, but no snapshot. Using tmpfs...." >&2 - ramdisk_fallback - fi -fi - -### -## SNAPSHOT (OLD FUNCTIONALITY) -### -if [ -n "$snapshot" ] && require_exact_scratch_size; then - read -r name crypt min max _ <<< "$snapshot" - [ -n "$id44_crypted" ] && crypt=0 - suffix= - (( crypt != 0 )) && suffix="-plain" - if ! create_snapshot "$name$suffix N" "$scratch_device"; then - ramdisk_fallback # no return - fi - if (( crypt != 0 )) && ! encrypt_device "/dev/mapper/$name$suffix" "$name" 0 "$max"; then - echo "Warning: Failed to encrypt snapshot $name, continuing without encryption." >&2 - name="$name$suffix" - fi - finish_setup "$name" "1" "$scratch_device_sz" -fi - -# ultimate fallback -ramdisk_fallback -exit 1 diff --git a/modules.d/slx-dmsetup/hooks/s3-cow-setup.sh b/modules.d/slx-dmsetup/hooks/s3-cow-setup.sh new file mode 100755 index 00000000..75f0c0c4 --- /dev/null +++ b/modules.d/slx-dmsetup/hooks/s3-cow-setup.sh @@ -0,0 +1,742 @@ +#!/usr/bin/env bash +# +# Script to back given read-only device using the block device +# specified by SLX_WRITABLE_DEVICE_IDENTIFIER in the SLX config. +# If SLX_WRITABLE_DEVICE_PARTITION_TABLE is sepcified, it will +# further create device mapper devices accordingly. +# +# Example partition config: +# +# thin-snapshot root 10G 1 +# thin-volume tmp 20G 0 +# linear data0 5-10G 1 +# linear data1 1-50% 1 +# +# NOTE: Encrypting thin-snapshot will actually encrypt the +# entire pool data device used for the pool. +# TODO: Support external keys +# TODO: Put table in file in config.tgz + +drop_shell() { + . /lib/dracut-lib.sh + emergency_shell "$@" + exit 1 +} + +# for debugging purposes +exec {BASH_XTRACEFD}> /run/openslx/dmsetup.log +set -x + +# read-only device to prepare for CoW +[ -s "/.read_only_device" ] && read_only_device="$( cat "/.read_only_device" )" +[ -n "$read_only_device" ] || read_only_device="$1" +[ -n "$read_only_device" ] || drop_shell 'Read only device neither given via /.read_only_device nor $1' + +[ -b "$read_only_device" ] || drop_shell "Given device '$1' does not exist or is not block device" + +declare -g read_only_device_sz="$( blockdev --getsz "$read_only_device" )" +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# !! Use _sz suffix for sizes expressed in number of 512b sectors, !! +# !! _size for random other crap !! +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +if ! (( read_only_device_sz > 0 )); then + drop_shell "Could not determine size of read only device '$read_only_device'" +fi + +# handle_unit +# Supply percentage, or size in [kmgt]bytes, +# returns appropriate value in number of 512b sectors +handle_unit() { + # default to bytes + local -i potency=0 + local -i val="$1" + case "$2" in + [%]) # These are relative to the writable CoW device + # Allow > 100% for over-provisioning + val="$(( remaining_device_sz * val / 100 ))" + ;; + [Kk]) potency=1 ;;& + [Mm]) potency=2 ;;& + [Gg]) potency=3 ;;& + [Tt]) potency=4 ;;& + *) + # => 1024 ** potency for G, M, K, etc results in bytes + # => bytes / 512 = sectors + val=$(( val * ( 1024 ** potency) / 512 )) + ;; + esac + echo "$val" +} + +parse_config() { + local remaining_device_sz="$writable_device_sz" + # First, handle absolute definitions + parse_config_int "$1" 0 + # Then, distribute relative values to remaining space + parse_config_int "$1" 1 +} + +# global array variables storing the configuration of the partitions +declare -ag linear thin_volume +snapshot= +thin_snapshot= +parse_config_int() { + [ -z "$1" ] && return 1 + local -i rel_only="$2" + while IFS= read -r line || [ -n "$line" ]; do + [ -z "$line" ] && continue + read -r type name range crypt _ <<< "$line" + type=${type//-/_} # to use the type as variable in eval + if ! [[ "$type" =~ \ + ^(linear|snapshot|thin_snapshot|thin_volume)$ ]]; then + echo "$0: Ignoring invalid type: $line" + continue + fi + if [[ -z "$name" ]]; then + echo "$0: Ignoring nameless entry: $line" + continue + fi + unset min_unit max_unit min max + # ranges can be like: 40G, 40-80G, 10G-20% + if ! [[ "$range" =~ ^([0-9]+)([GgMmKkBb%]?)(-([0-9]+)([GgMmKkBb%]?))?$ ]]; then + echo "$0: Ignoring invalid range: $line" + continue + fi + local min="${BASH_REMATCH[1]}" + local max="${BASH_REMATCH[4]:-${BASH_REMATCH[1]}}" + local min_unit="${BASH_REMATCH[2]:-${BASH_REMATCH[5]}}" + local max_unit="${BASH_REMATCH[5]:-${BASH_REMATCH[2]}}" + # first pass we handle absolute values unly, second pass relative ones + if [[ "$min_unit" = "%" || "$max_unit" = "%" ]]; then + [ "$rel_only" != 1 ] && continue + else + [ "$rel_only" = 1 ] && continue + fi + if [ -z "$min_unit" ]; then + echo "$0: WARNING: No unit given in range, assuming BYTES: $line" + fi + min="$( handle_unit "$min" "$min_unit" )" + max="$( handle_unit "$max" "$max_unit" )" + if (( min > max )); then + # So, we might end up with something like 30G-100%, but the writable device + # is only 20GB. In that case we most likely want to contine, and not consider + # this an error. So let's try to come up with some logic on what is an error + # and what isn't. Probably anything involving a mix of percentage and + # non-percentage should not be an error. + if [[ "$min_unit" = "%" && "$max_unit" != "%" ]] \ + || [[ "$min_unit" != "%" && "$max_unit" = "%" ]]; then + # Let's hope for the best + max="$min" + else + echo "$0: Ignoring invalid range (min > max): $line" + continue + fi + fi + if ! [[ "$crypt" =~ ^[01]$ ]]; then + echo "$0: Disabling encryption due to invalid crypt argument: $line" + crypt=0 + fi + # finally save it to the global array for this type + case "$type" in + linear) linear+=("${name} ${crypt} ${min} ${max}") ;; + thin_volume) thin_volume+=("${name} ${crypt} ${min} ${max}") ;; + # Special - rootfs, only one makes sense + snapshot) + [ -n "$snapshot" ] && echo "Warning: More than one snapshot declared!" >&2 + snapshot="${name} ${crypt} ${min} ${max}" + ;; + thin_snapshot) + [ -n "$thin_snapshot" ] && echo "Warning: More than one thin_snapshot declared!" >&2 + thin_snapshot="${name} ${crypt} ${min} ${max}" + ;; + *) echo "$0: SOMETHING NOT GOOT CHECK SOURCE CODE" ;; + esac + # Decrease for upcoming calculations if we used fixed values here + if [ "$rel_only" != 1 ]; then + (( remaining_device_sz -= ( min + max ) / 2 )) + fi + done <<< "$1" +} + +# encrypt_device [ ] +encrypt_device() { + # TODO: Send key back to us, demand ransom + modprobe dm-crypt || echo "encrypt: dm-crypt loading failed, maybe builtin?" + if ! [ -b "$1" ]; then + echo "encrypt: Not block device: '$1'" + return 1 + fi + if [ -z "$2" ]; then + echo "encrypt: No name given" + return 1 + fi + local dev_size="$( blockdev --getsz "$1" )" + if [ -z "$dev_size" ]; then + echo "encrypt: Cannot get size of $1" + return 1 + fi + echo "encrypt: Encrypting device $1 as $2" + local size="${4:-0}" + local start="${3:-0}" + # Sanitize (negated check to catch non-numeric values) + if ! [ "$start" -ge 0 ] 2> /dev/null; then + echo "encrypt: Invalid start offset '$start', using 0" + start=0 + fi + if ! [ "$size" -gt 0 ] 2> /dev/null; then + echo "encrypt: Invalid end offset '$size', using entire device" + size="$dev_size" + fi + # Put in bounds + if (( start >= dev_size )); then + echo "encrypt: Start offset $start past end of device ($dev_size)" + return 1 + fi + if (( start + size > dev_size )); then + echo "encrypt: End offset ($start + $size) past end of device ($dev_size), truncating" + size="$(( dev_size - start ))" + fi + local key + key="$( < /dev/urandom xxd -c32 -p -l32 )" + [ -z "$key" ] && key="$( < /dev/urandom tr -c -d 'a-f0-9' | dd count=1 bs=32 )" + [ -z "$key" ] && key="$( < /dev/urandom head -c32 | xxd -c32 -p )" + [ -z "$key" ] && key="$( < /dev/urandom xxd -c32 -p | head -n 1 )" + if [ -z "$key" ]; then + echo "encrypt: ERROR: Could not generate encryption key" + return 1 + fi + if ! dmsetup_create_noudevsync "$2" \ + "0 ${size} crypt aes-xts-plain64 $key 0 $1 ${start} 1 allow_discards"; then + echo "encrypt: Failed to encrypt $1." + return 1 + fi + echo "encrypt: Setup successful" + return 0 +} +# create_snapshot " " "cow_device" +create_snapshot() { + modprobe dm-snapshot || echo "snapshot: dm-snapshot loading failed, maybe builtin?" + read -r name persist _ <<< "$1" + echo "snapshot: Creating $name for $read_only_device (using $2)" + if ! dmsetup_create_noudevsync "$name" \ + "0 $read_only_device_sz snapshot $read_only_device $2 ${persist:-N} 8"; then + return 1 + fi + return 0 +} + +# This function is called if no ID44 partition could be found or anoother kind +# of critical error occurs during the CoW layer setup. It will combine the +# the read-only device with a DM zero device to increase its virtual size +# by half the RAM size. A sparse file of that size will then be created and +# placed on a dedicated tmpfs. +# THIS FUNCTION MUST NEVER RETURN +ramdisk_fallback() { + echo "ramdisk: Falling back to regular dm-snapshot on a RAMdisk." + + # RAM size in kb, note that this is equal to half + # of the entire RAM when interpreted as 512-bytes sectors. + local ram_cow_sz="$(awk '/^MemTotal:/ { printf("%d\n", $2 ); exit }' /proc/meminfo)" + + # try to prepare the zero extension device + local extended_device="/dev/mapper/${read_only_device##*/}-extended" + modprobe dm-zero + dmsetup_create_noudevsync "${extended_device##*/}" <<-EOF + 0 $read_only_device_sz linear $read_only_device 0 + $read_only_device_sz $ram_cow_sz zero + EOF + local ret="$?" + if [ "$ret" -eq 0 ]; then + read_only_device="$extended_device" + read_only_device_sz="$(( read_only_device_sz + ram_cow_sz ))" + else + echo "ramdisk: Failed to setup the virtual, larger '$read_only_device'." + echo "ramdisk: Continuing with its original size." + fi + + # prepare dedicated tmpfs mount point + echo "ramdisk: Preparing dedicated tmpfs" + local cow_tmpfs="/run/openslx/cow" + if ! mkdir -p "$cow_tmpfs"; then + cow_tmpfs="${cow_tmpfs}.$$.$RANDOM" + mkdir -p "$cow_tmpfs" + fi + if ! mount -t tmpfs cow-tmpfs -o size="$(( read_only_device_sz / 2 + 100 ))k" "$cow_tmpfs"; then + echo "ramdisk: Failed to mount tmpfs in '$cow_tmpfs' of size '$(( read_only_device_sz / 2 + 100 ))KiB', trying to use regular /run tmpfs." + cow_tmpfs="/run" + fi + + # create sparse file there + local file="$cow_tmpfs/tmpfs-snapshot" + if ! truncate -s "$(( read_only_device_sz * 512 ))" "$file" \ + && ! dd if=/dev/null of="$file" seek="$read_only_device_sz" bs=512; then + drop_shell "Failed to allocate RAMdisk CoW file $file." + fi + declare -rg writable_device="$( losetup --show --find "$file" )" + local cow_device_candidate="root" + while [ -b "/dev/mapper/$cow_device_candidate" ]; do + cow_device_candidate="root.$RANDOM" + done + if [ -z "$writable_device" ] || ! create_snapshot "$cow_device_candidate N" "$writable_device"; then + drop_shell "CRITICAL: failed to setup RAMdisk fallback." + fi + # [noreturn] + finish_setup "$cow_device_candidate" "0" "$read_only_device_sz" +} + +# finish_setup [] +# is the device name only, /dev/mapper will be prepended automatically. +# denotes if the created device lies in a RAMdisk (0) or is backed by a disk (1). +# is given in sectors. +# THIS FUNCTION MUST NEVER RETURN +finish_setup() { + if [ -z "$1" ] || ! [ -b "/dev/mapper/$1" ]; then + drop_shell "'/dev/mapper/$1' not a block device. Failed to setup CoW layer." + fi + if ! [[ "$2" =~ ^[0-9]$ ]]; then + drop_shell "'$2' not a valid type, 0 or 1 expected." + fi + # optional? + { + echo "# Generated by '$0'." + echo "SLX_DNBD3_DEVICE_COW=/dev/mapper/$1" + } >> /etc/openslx + save_partition_info "$1" "/" "$2" "$3" + exit 0 +} + +# path to save the achieved setup to +declare -rg partitions_config="/run/openslx/dmsetup.state" +cat <<-EOF > "$partitions_config" +# Generated by '$0'. +# Format: +# Options can be: +# * type -> CoW layer type: 0 is RAMdisk, 1 is disk, 2 is network +# * size -> in 512 byte sectors +EOF + +# save_partition_info [] +save_partition_info() { + [ -b "/dev/mapper/$1" ] || return 1 + [ -n "$2" ] || return 1 + [[ "$3" =~ ^[0-9]$ ]] || return 1 + local opts="type=$3" + # plain size given + [[ "$4" =~ ^[0-9]+$ ]] && opts="$opts,physical_size=$4" + # - + [[ "$4" =~ ^[0-9]+-[0-9]+$ ]] && opts="$opts,shared_physical_size=${4%-*},virtual_size=${4#*-}" + echo "/dev/mapper/$1 $2 ${opts}" >> "$partitions_config" +} + +# This will create another dm-linear on top of $scratch_device in case its +# size differs from $scratch_device_sz. This is useful for setups where you +# cannot explicitly configure how much space to use from the underlying device, +# and the partition table says not to use the entire $writable_device for cow +require_exact_scratch_size() { + local current_sz="$( blockdev --getsz "$scratch_device" )" + (( current_sz == scratch_device_sz )) && return 0 # Everything fine + echo "exact_scratch: Adding another layer; want: $scratch_device_sz, is: $current_sz" + if (( current_sz < scratch_device_sz )); then + echo "exact_scratch: WARNING: scratch_device_sz is larger than actual device." + echo "exact_scratch: This should never happen." + scratch_device_sz="$current_sz" + return 0 + fi + # We could check if $scratch_device already is a dm target, and just adjust its + # size, but I think that scenario isn't possible, currently. + if ! dmsetup_create_noudevsync "scratch" "0 $scratch_device_sz linear $scratch_device 0"; then + echo "exact_scratch: Failed to create scratch space for the CoW layer." + return 1 + fi + scratch_device="/dev/mapper/scratch" + save_partition_info "scratch" "*" "1" "$scratch_device_sz" + return 0 +} + +create_pool() { + declare -r data_block_sz=256 # Desired Block size (number of 512byte sectors) + declare -r wanted_low_mb=100 # Free space below this will trigger a dm event + # create external snapshot for read-only device + # create remaining thin volumes + echo "pool: Creating thinpool for cow" + modprobe dm-thin-pool || echo "pool: dm-thin-pool load failed, maybe builtin?" + # create temporary metadata device + # calculate number of sectors needed and check boundaries: + # XXX Formula from thin-pool.txt calculates size in *bytes*, we want 512b blocks + metadata_dev_sz="$(( 48 * scratch_device_sz / data_block_sz / 512 ))" + # Min 2MB -> 4096 sectors, max 16GB -> 33554432 sectors + [ "$metadata_dev_sz" -lt 4096 ] && metadata_dev_sz="4096" + # TODO handle the exotic case of a too large metadata device to fit within RAM. + [ "$metadata_dev_sz" -gt 33554432 ] && metadata_dev_sz="33554432" + local scratch_device_offset=0 + local metadata_dev= + local metadata_persistent= + if [ -n "$metadata_persistent" ]; then + # create persistent slice of the writable device for the pool metadata + # Currently unused! Needs more work to reliably resume the pool on reboot, + # but only if booting exactly the same image + if ! dmsetup_create_noudevsync "pool-metadata" \ + "0 $metadata_dev_sz linear $scratch_device $scratch_device_offset"; then + echo "pool: Failed to create linear device for pool metadata device." + else + # Adjust size for pool-data down accordingly + scratch_device_offset="$metadata_dev_sz" + scratch_device_sz=$(( scratch_device_sz - metadata_dev_sz )) + declare -r metadata_dev="/dev/mapper/pool-metadata" + # TODO configurable wipe: dd if=/dev/zero of="$metadata_dev" count=1 bs=4096 + # TODO: If we fail later on in this function, we would actually have to destroy + # this target again, and re-adjust the offset and size back, so that the + # snapshot fallback would work properly. Or maybe just don't support fallback. + fi + fi + if [ -z "$metadata_dev" ]; then + # create RAMdisk in /run for metadata device + echo "pool: Creating loopdev in tmpfs for metadata" + mkdir -p /run/openslx + metadata_dev="$( mktemp /run/openslx/.pool-metadata.XXXXXX )" + # Create sparse file of required size + truncate -s "$(( metadata_dev_sz * 512 ))" "$metadata_dev" \ + || dd if=/dev/null of="$metadata_dev" bs=512 seek="$metadata_dev_sz" + declare -r metadata_dev="$( losetup --show --find "$metadata_dev" )" + fi + if [ -z "$metadata_dev" ]; then + echo "pool: Could not set up persistent or tmpfs-loop metadata device. Aborting." + return 1 + fi + + local pool_data_dev + if (( scratch_device_offset == 0 )); then + # No offset, no potential expansion, don't create another linear target + pool_data_dev="$scratch_device" + else + echo "pool: Creating additional linear target for data device" + pool_data_dev="/dev/mapper/pool-data" + # Create linear device of the writable device, in case we have an offset from + # the on-disk meta data. Also this way we can easily extend it later. + if ! dmsetup_create_noudevsync "${pool_data_dev##*/}" \ + "0 $scratch_device_sz linear $scratch_device $scratch_device_offset"; then + echo "pool: Failed to create pool data device on '$scratch_device'." + return 1 + fi + fi + local low_water_mark + # Convert MB to blocks + low_water_mark=$(( wanted_low_mb * 2048 / data_block_sz )) + echo "pool: Creating thinpool device" + if ! dmsetup_create_noudevsync "${pool_dev##*/}" \ + "0 $scratch_device_sz thin-pool $metadata_dev $pool_data_dev $data_block_sz $low_water_mark 1 skip_block_zeroing"; then + echo "pool: Failed to create thin-pool device (meta: $metadata_dev, data: $pool_data_dev)" + return 1 + fi + return 0 +} + +# create_volume [backing_dev] +create_volume() { + if [ -z "$pool_dev" ] || ! [ -b "$pool_dev" ]; then + echo "volume: Global pool device not set or present." + return 1 + fi + if [ $# -lt 3 ] || [ -z "$1" ]; then + echo "volume: not enough arguments." + return 1 + fi + local name="$1" + local id="$2" + local size="$3" + local backing_dev="$4" # Optional, internal if empty + + echo "volume: Creating $id/$name on $pool_dev" + if ! dmsetup message "$pool_dev" 0 "create_thin $id"; then + echo "volume: Failed to create thin volume with id '$id' in pool '$pool_dev'." + echo "volume: It might already exist, trying anyway..." + fi + if ! dmsetup_create_noudevsync "$name" "0 $size thin $pool_dev $id $backing_dev"; then + echo "volume: Failed to create external snapshot named '$name':" + echo " Size: $size" + echo " Backing device: $backing_dev" + echo " Thin volume id: $id" + return 1 + fi + return 0 +} + +### +## MAIN +### + +. /etc/openslx +. slx-tools + +# This is the main variable driving this script +declare -g writable_device="$( cat "/.writable_device" )" +declare -g id44_crypted= + +if [ -z "$writable_device" ]; then + echo "No writable device" + ramdisk_fallback + exit 0 +fi + +if ! [ -b "$writable_device" ]; then + drop_shell "Writable device '$writable_device' not a block device" +fi + +# "Preload" functions by executing them NOT in a subshell +dev_find_partitions &> /dev/null + +# Handle global encryption first +if is_on "$SLX_ID44_CRYPT"; then + if encrypt_device "$writable_device" "id44-crypt"; then + writable_device="/dev/mapper/id44-crypt" + # Remember the whole device is already encrypted, and ignore the crypt flag for the partition table later + id44_crypted=1 + fi +fi + +# NOTE: from here on out, every value related to size is in 512 bytes sectors! +declare -rg writable_device_sz="$( blockdev --getsz "$writable_device" )" +if ! (( writable_device > 0 )); then + drop_shell "Could not determine size of writable device '$writable_device'" +fi + +# If SLX_WRITABLE_DEVICE_PARTITION_TABLE is not set, just do +# regular thin-snapshot for the CoW layer, else parse it. +if [ -z "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" ]; then + SLX_WRITABLE_DEVICE_PARTITION_TABLE="thin-snapshot root 100% 0" +fi + +# extra swap if no existing? +if grep -qFw 'slx.swap' "/proc/cmdline"; then + # Only if our basic writable_device is large enough, or we have ntfs backup + echo "Additional swap on ID44 requested if existing is too small" + do_swap_sz=0 + if (( writable_device_sz > 80078125 )); then + # more than ~40GB, go ahead + do_swap_sz="$(( ( writable_device_sz - 70312500 ) / 2 ))" + # cap to 6GB + (( do_swap_sz > 11718750 )) && do_swap_sz=11718750 + fi + # Check how many we have and if they're regular, unencrypted ones. + # If it's plenty, don't cut out swap from our backing device + if (( do_swap_sz == 0 )); then + echo "Not enough ID44 space for swap..." + else + swap_sz="$( awk '$1 == "SwapTotal:" {print $2 * 2}' /proc/meminfo )" + echo "Have existing swap of $(( swap_sz / 2 / 1024 ))MiB" + (( do_swap_sz -= swap_sz )) + # Go ahead with swap? Only if we miss a reasonable amount... ( > 100MiB) + if (( do_swap_sz > 204800 )); then + echo "Adding $(( do_swap_sz / 2 / 1024 ))MiB of additional swap on backing dev" + skb="$(( do_swap_sz / 2 ))" + SLX_WRITABLE_DEVICE_PARTITION_TABLE="$( printf "%s\n%s" "linear slx-swap ${skb}K 0" \ + "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" )" + fi + fi +fi + +parse_config "$SLX_WRITABLE_DEVICE_PARTITION_TABLE" + +# Default to thin-snapshot, if none were configured +if [ -z "${snapshot}" ] && [ -z "${thin_snapshot}" ]; then + parse_config "thin-snapshot root 100% 0" +fi + +# Sanity checks for weird configurations +if [ -n "$snapshot" ] && [ -n "$thin_snapshot" ]; then + echo "Warning: Both snapshot and thin-snapshot specified, prefering thin-snapshot." >&2 +fi + +### +## LINEAR SLICES +### + +# start allocating spaces to the configured devices +declare -g writable_device_used_sz=0 +declare -g pool_crypted= + +# first, reserve the space for the rootfs cow snapshot (of either type)... +# (this is the first line of our custom partition table) +read -r name crypt min max _ <<< "${thin_snapshot:-${snapshot}}" + +declare -g scratch_device="/dev/mapper/scratch" +declare -gi scratch_device_sz=0 +if (( min <= writable_device_sz )); then + scratch_device_sz="$max" + (( scratch_device_sz > writable_device_sz )) && scratch_device_sz="$writable_device_sz" +else + # minimum snapshot size is bigger than physical device size + echo "Warning: Minimum snapshot size is too big for the scratch partition." >&2 + echo "Warning: You probably need to use a more conservative value." >&2 + echo "Warning: Using this client maximum scratch space ($writable_device_sz sectors)." >&2 + scratch_device_sz="$writable_device_sz" +fi + +if (( scratch_device_sz == writable_device_sz )); then + # Only one, use directly, maybe crypt + if [ -z "$id44_crypted" ] && [ "$crypt" -ne 0 ]; then + if ! encrypt_device "$writable_device" "${scratch_device##*/}" 0 "$scratch_device_sz"; then + echo "Warning: Continuing with unencrypted scratch" >&2 + scratch_device="$writable_device" + fi + else + # Noop + scratch_device="$writable_device" + fi +else + # Smaller slice requested, device mapper to the rescure + # Round down to 4k border, so next slice won't be misaligned if we're on a 4k sector disk + scratch_device_sz="$(( (scratch_device_sz / 8) * 8 ))" + + # encrypt the scratch device, if configured + if [ -z "$id44_crypted" ] && (( crypt != 0 )); then + if encrypt_device "$writable_device" "${scratch_device##*/}" 0 "$scratch_device_sz"; then + pool_crypted=1 + else + echo "Warning: Continuing with unencrypted scratch" >&2 + crypt=0 # So we do the linear thing below + fi + fi + if (( crypt == 0 )) && ! dmsetup_create_noudevsync "${scratch_device##*/}" \ + "0 $scratch_device_sz linear $writable_device $writable_device_used_sz"; then + echo "Error: Failed to create scratch space for the CoW layer." >&2 + # this should never fail, but if it does, we would likely not be able to use + # $writable_device for any dmsetup stuff, so just fallback to ramdisk + # until we have a better idea on what to do :) + ramdisk_fallback + fi +fi +save_partition_info "${scratch_device##*/}" "*" "1" "$scratch_device_sz" + +writable_device_used_sz="$scratch_device_sz" + +# setup linear slices of the writable device +for line in "${linear[@]}"; do + [ -z "$line" ] && continue + read -r name crypt min max _ <<< "$line" + [ -n "$id44_crypted" ] && crypt=0 + echo "Creating linear slice '$name'..." + free_space="$(( writable_device_sz - writable_device_used_sz ))" + if [ "$min" -gt "$free_space" ]; then + echo "Error: Not enough space left for linear device $name - have $free_space sectors, need $min" >&2 + continue + fi + # allocate its max if it fits within the free space, otherwise use the space left. + to_allocate="$max" + (( to_allocate > free_space )) && to_allocate="$free_space" + # as above, round down to align on 4k sector devices + to_allocate="$(( (to_allocate / 8) * 8 ))" + + if (( crypt != 0 )) \ + && ! encrypt_device "$writable_device" "${name}" "$writable_device_used_sz" "$to_allocate"; then + echo "Warning: Failed to encrypt '$name', continuing without encryption." >&2 + crypt=0 + fi + if (( crypt == 0 )) && ! dmsetup_create_noudevsync \ + "$name" "0 $to_allocate linear $writable_device $writable_device_used_sz"; then + echo "Warning: Failed to create linear device: $line" >&2 + continue + fi + # TODO sane? + save_partition_info "$name" "*" "1" "$to_allocate" + writable_device_used_sz=$(( to_allocate + writable_device_used_sz )) +done + +### +## THIN-PROVISIONING +### +declare -rg pool_dev="/dev/mapper/pool" +# Now decide what to do for the writable layer + +if [ -n "$thin_snapshot" ] || [ -n "${thin_volume[*]}" ]; then + if ! create_pool ; then + { + echo "Error: Failed to create thin pool. Will ignore:" + echo " Thin snapshot: $(declare -p thin_snapshot)" + echo " Thin volumes: $(declare -p thin_volume)" + echo "Trying snapshot fallback..." + } >&2 + [ -z "$snapshot" ] && snapshot="$thin_snapshot" + else + # Once we have created the pool, there is no point in snapshot fallback, + # as the space is already reserved by the pool + snapshot= + # the order in which pool devices are created does not matter + # so start with thin volumes starting with id 2 and end with + # the thin-snapshot with id 1 which needs to call finish_setup. + volume_id=2 + # go over thin-volumes + for line in "${thin_volume[@]}"; do + [ -z "$line" ] && continue + read -r name crypt min max _ <<< "$line" + if [ -n "$id44_crypted" ] || [ -n "$pool_crypted" ]; then + crypt=0 + fi + echo "Adding thin volume '$name'..." + # thin-volume can be created with max size, + # since they are overprovisioned anyway. + suffix= + (( crypt != 0 )) && suffix="-plain" + if ! create_volume "$name$suffix" "$(( volume_id++ ))" "$max"; then + echo "Error: Failed to create thin volume $name" >&2 + continue + fi + if (( crypt != 0 )) && ! encrypt_device \ + "/dev/mapper/$name$suffix" "$name" 0 "$max"; then + echo "Warning: Failed to encrypt thin volume '$name', continuing without encryption." >&2 + name="$name$suffix" + fi + save_partition_info "$name" "*" "1" "${scratch_device_sz}-${max}" + done + + if [ -n "$thin_snapshot" ]; then + # create thin-snapshot, use first one + read -r name _ <<< "$thin_snapshot" + echo "Adding base system snapshot '$name'..." + # min/max and crypt was used for the pool data device, ignore it here! + # Calculate how much of the CoW space we reserve for changes in the base + # system. Usually all the files in the base system should be static, but + # if someone decided to run apt dist-upgrade, this would change a lot of + # existing blocks, which is bad. + # Use MIN( readonly_size / 2, scratch_size / 10 ) + # until we come up with anything better. + # Given an RO image of 10GB, this gives us: + # 40GB scratch -> 46GB, so initially 36GB free space + # 5GB scratch -> 14.5GB, initially 4.5GB free space + declare -r max_reserved_sz="$(( scratch_device_sz / 10 ))" + reserved_sz="$(( read_only_device_sz / 2 ))" + (( reserved_sz > max_reserved_sz )) && reserved_sz="$max_reserved_sz" + thin_snapshot_sz="$(( scratch_device_sz + read_only_device_sz - reserved_sz ))" + if ! create_volume "$name" 1 "$thin_snapshot_sz" "$read_only_device"; then + echo "Error: Failed to create external snapshot for '$read_only_device'." >&2 + ramdisk_fallback # does not return + fi + finish_setup "$name" "1" "$thin_snapshot_sz" + fi + echo "Warning: Thin volumes defined, but no snapshot. Using tmpfs...." >&2 + ramdisk_fallback + fi +fi + +### +## SNAPSHOT (OLD FUNCTIONALITY) +### +if [ -n "$snapshot" ] && require_exact_scratch_size; then + read -r name crypt min max _ <<< "$snapshot" + [ -n "$id44_crypted" ] && crypt=0 + suffix= + (( crypt != 0 )) && suffix="-plain" + if ! create_snapshot "$name$suffix N" "$scratch_device"; then + ramdisk_fallback # no return + fi + if (( crypt != 0 )) && ! encrypt_device "/dev/mapper/$name$suffix" "$name" 0 "$max"; then + echo "Warning: Failed to encrypt snapshot $name, continuing without encryption." >&2 + name="$name$suffix" + fi + finish_setup "$name" "1" "$scratch_device_sz" +fi + +# ultimate fallback +ramdisk_fallback +exit 1 diff --git a/modules.d/slx-dmsetup/hooks/s3-mount-swap.sh b/modules.d/slx-dmsetup/hooks/s3-mount-swap.sh new file mode 100755 index 00000000..0384bcf4 --- /dev/null +++ b/modules.d/slx-dmsetup/hooks/s3-mount-swap.sh @@ -0,0 +1,10 @@ +#!/bin/ash + +. slx-tools + +for part in $( dev_find_partitions "82" "0657fd6d-a4ab-43c4-84e5-0933c84b4f4f" ); do + dev_swap_version "$part" &> /dev/null || continue + swapon "$part" +done + +exit 0 diff --git a/modules.d/slx-dmsetup/hooks/s3-prepare-rw-layer.sh b/modules.d/slx-dmsetup/hooks/s3-prepare-rw-layer.sh new file mode 100755 index 00000000..69fc75d2 --- /dev/null +++ b/modules.d/slx-dmsetup/hooks/s3-prepare-rw-layer.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +if ! [ -e "/.writable_devices" ]; then + echo "/.writable_devices not found" + exit 1 +fi +mapfile -t writable_devices < /.writable_devices + +declare -rg ntfs_list="/run/openslx/.thin-ntfs-candidates" + +. slx-tools + +if [ -s "$ntfs_list" ] || [[ "${#writable_devices[@]}" -gt 1 ]]; then + # More than one device, and/or NTFS space, need linear + echo "Have more than one writable device, creating linear target" + tbl="/run/openslx/dmsetup-linear-id44" + pos=0 + grow_max_sz=9999999999 + for dev in "${writable_devices[@]}"; do + max="$(( grow_max_sz - pos ))" + (( max <= 0 )) && break + sz="$( blockdev --getsz "$dev" )" + (( sz > 0 )) || continue + (( sz > max )) && sz="$max" + echo "$pos $sz linear $dev 0" + (( pos += sz )) + done > "$tbl" + if [ -s "$ntfs_list" ]; then + sum= + while read -r sum dev _ || [ -n "$sum" ]; do # each dev + echo "Appending NTFS partition $dev..." + word= + while read -r word range_start_b _ range_sz _ || [ -n "$word" ]; do # each slice of dev + [ "$word" = "Range" ] || continue + (( range_sz > 0 )) || continue + slice_sz="$(( grow_max_sz - pos ))" + (( slice_sz <= 0 )) && break + (( slice_sz > range_sz )) && slice_sz="$range_sz" + # Append line + if echo "$pos $slice_sz linear $dev $range_start_b" >> "$tbl"; then + # Update counter + (( pos += slice_sz )) + else + echo "Could not write new table row into $tbl" + fi + done < <( ntfsfree --block-size 512 --min-size "$(( 256 * 1024 * 1024 ))" "$dev" ) + done < "$ntfs_list" + # Don't try to add NTFS space again later + sed -i "s/^SLX_NTFSFREE.*$/# & # disabled in stage3\nSLX_NTFSFREE='never'/" "/etc/openslx" + rm -f -- "$ntfs_list" + fi + # See if we need a linear target at all + if ! [ -s "$tbl" ]; then + echo "Empty tmp/id44 table, fallback to RAM" + elif [ "$( wc -l < "$tbl" )" -eq 1 ] && [[ "${#writable_devices[@]}" -ge 1 ]]; then + # Only one line, have writable device -> use directly + echo "Table somehow ended up with one entry, discarding" + writable_device="${writable_devices[0]}" + else + # set up linear device + echo "Setting up linear id44 device with $( wc -l < "$tbl" ) slices" + if ! dmsetup_create_noudevsync "id44-group" < "$tbl"; then + echo "Error creating group of writable devices. Fallback to RAM :-(" + else + writable_device="/dev/mapper/id44-group" + fi + fi +else + # Single device + echo "Have a single writable device, using it directly" + writable_device="${writable_devices[0]}" +fi + +if [ -z "$writable_device" ]; then + echo "Could not find any suitable writable devices." +fi + +echo "$writable_device" > "/.writable_device" + +exit 0 diff --git a/modules.d/slx-dmsetup/hooks/s3-scan-id44.sh b/modules.d/slx-dmsetup/hooks/s3-scan-id44.sh new file mode 100755 index 00000000..60ba133d --- /dev/null +++ b/modules.d/slx-dmsetup/hooks/s3-scan-id44.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +. /etc/openslx +. slx-tools + +if [ -z "$SLX_WRITABLE_DEVICE_IDENTIFIER" ]; then + SLX_WRITABLE_DEVICE_IDENTIFIER="44 87f86132-ff94-4987-b250-444444444444" + echo "SLX_WRITABLE_DEVICE_IDENTIFIER='${SLX_WRITABLE_DEVICE_IDENTIFIER}'" >> /etc/openslx +fi + +declare -a writable_devices +read -r -a list <<<"$SLX_WRITABLE_DEVICE_IDENTIFIER" +echo "Scanning for partitions with type/label ${list[*]}..." +for dev in $( dev_find_partitions "${list[@]}" ); do + writable_devices+=( "$dev" ) +done +echo "Found ${#writable_devices[@]} matching partitions" + +for dev in "${writable_devices[@]}"; do + echo "$dev" +done > "/.writable_devices" + +exit 0 diff --git a/modules.d/slx-dmsetup/module-setup.sh b/modules.d/slx-dmsetup/module-setup.sh index 99c1adf2..e513ca6f 100755 --- a/modules.d/slx-dmsetup/module-setup.sh +++ b/modules.d/slx-dmsetup/module-setup.sh @@ -6,9 +6,10 @@ depends() { echo "haveged slx-tools" } install() { - inst "$moddir/hooks/dmsetup-slx-device" "/usr/local/bin/dmsetup-slx-device" + inst "$moddir/bin/dmsetup_create_noudevsync" "/usr/local/bin/dmsetup_create_noudevsync" # Grows the rootfs to match the underlying blockdev + # No using slx install helper because of additional condition _name="s3-grow-rootfs" inst "$moddir/hooks/${_name}.sh" \ "/usr/local/bin/${_name}.sh" @@ -19,12 +20,24 @@ install() { ln_r "${systemdsystemunitdir}/${_name}.service" \ "${systemdsystemunitdir}/initrd.target.wants/${_name}.service" + slx_service "s3-cow-setup" "Set up the CoW layer for rootfs" \ + --wafter "s3-connect-image.service" \ + --before "initrd-root-device.target" \ + --after "s3-fetch-config.service" + + slx_service "s3-mount-swap" "Mount existing swap partitions" \ + --before "s3-cow-setup" + + slx_service "s3-prepare-rw-layer" "Prepare suitable partitions for use as write layer" \ + --before "s3-cow-setup" + + slx_service "s3-scan-id44" "Scan for partitions marked as scratch space (ID44)" \ + --before "s3-prepare-rw-layer.service" \ + --after "s3-fetch-config.service" + inst_multiple blockdev xxd \ mkfs.ext4 resize2fs \ mkfs.xfs xfs_repair xfs_growfs - - # TODO properly find binary in PATH + /opt/openslx/{s,}bin etc - inst /opt/openslx/sbin/ntfsfree /usr/local/bin/ntfsfree } installkernel() { # install those modules in case the used kernel does not have them builtin diff --git a/modules.d/slx-extra-script/module-setup.sh b/modules.d/slx-extra-script/module-setup.sh index 9cc9bdbe..0c3600e2 100755 --- a/modules.d/slx-extra-script/module-setup.sh +++ b/modules.d/slx-extra-script/module-setup.sh @@ -9,7 +9,8 @@ depends() { install() { slx_service "s3-extra-script" "Execute extra script from URL" \ --wafter "s3-fetch-config.service" \ - --before "s3-dnbd3root.service" + --before "s3-connect-image.service" \ + --before "s3-prepare-rw-layer.service" mkdir --parents "${initdir}/${systemdsystemunitdir}/initrd.target.wants" local i for i in dmsetup mount; do diff --git a/modules.d/slx-extra-script/services/s3-extra-post-dmsetup.service b/modules.d/slx-extra-script/services/s3-extra-post-dmsetup.service index fd1f18e6..a7d8a6fe 100644 --- a/modules.d/slx-extra-script/services/s3-extra-post-dmsetup.service +++ b/modules.d/slx-extra-script/services/s3-extra-post-dmsetup.service @@ -1,7 +1,7 @@ [Unit] Description=Run extra script after dmsetup ConditionFileIsExecutable=/etc/extra-init -After=s3-dnbd3root.service +After=s3-cow-setup.service Before=s3-write-fstab.service Before=initrd-switch-root.target initrd-cleanup.service diff --git a/modules.d/slx-ntfsfree/hooks/s3-ntfsfree.sh b/modules.d/slx-ntfsfree/hooks/s3-ntfsfree.sh new file mode 100755 index 00000000..0c663ef3 --- /dev/null +++ b/modules.d/slx-ntfsfree/hooks/s3-ntfsfree.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +. /etc/openslx +[ "${SLX_NTFSFREE:-never}" = "never" ] && exit 0 + +declare -rg ntfs_list="/run/openslx/.thin-ntfs-candidates" +[ -e "$ntfs_list" ] && exit 0 + +echo "Scanning for suitable NTFS partitions to use as writable device" +if ! command -v ntfsfree &> /dev/null; then + echo "ntfsfree not found, cannot use NTFS partitions as RW layer" + exit 1 +fi + +for part in /dev/disk/by-partuuid/*; do + # Skip empty/ro devices + dev="$( readlink -f "$part" )" + dev="${dev##*/}" + ro="$( cat "/sys/class/block/${dev}/ro" )" + [ "$ro" = 1 ] && continue + # Only count ranges >= 256MB, sum will be in number of 512b blocks + sum="$( ntfsfree --block-size 512 --min-size "$(( 256 * 1024 * 1024 ))" "$part" 2> /dev/null \ + | awk -v sum=0 '{if ($1 == "Range") sum += $4}END{printf "%.0f", sum}' )" + # Only consider volume if sum of these ranges > 1GB (this is BLOCKS, not bytes) + (( sum > 2 * 1024 * 1024 )) || continue + echo "$sum $part" # only thing in loop going to stdout +done | sort -nr > "$ntfs_list" +echo "Found $( wc -l < "$ntfs_list" ) suitable partitions" + +exit 0 diff --git a/modules.d/slx-ntfsfree/module-setup.sh b/modules.d/slx-ntfsfree/module-setup.sh new file mode 100755 index 00000000..dabe2dda --- /dev/null +++ b/modules.d/slx-ntfsfree/module-setup.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +check() { + return 255 +} +depends() { + echo "slx-tools" +} +install() { + slx_service "s3-ntfsfree" "Scan for free space on NTFS partitions" \ + --wafter "s3-fetch-config.service" + --before "s3-prepare-rw-layer" + # TODO properly find binary in PATH + /opt/openslx/{s,}bin etc + inst /opt/openslx/sbin/ntfsfree /usr/local/bin/ntfsfree +} +installkernel() { + # install those modules in case the used kernel does not have them builtin + instmods \ + dm-thin-pool dm-snapshot dm-zero dm-crypt \ + crc32c xts aes drbg ansi_cprng \ + xfs ext4 +} -- cgit v1.2.3-55-g7522