From 25dc8e0ea7af9e6d8d7806d5ecf031e3bfe187c9 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Wed, 20 Oct 2021 13:34:10 +0200 Subject: [system-check/hardware-stats] Refactor report stats gather split stuff Reporting to server is now a separate service, as it slows down system-check for no reason via hooks.d. Also add a couple comments and simplifications to system-check main script. We now redirect to the tags file within the main script instead of letting each hook individually open the tags file and write to it concurrently, which seems like a bad idea in retrospect. --- .../etc/systemd/system/hardware-report.service | 11 +++ .../data/etc/systemd/system/hardware-stats.service | 3 +- .../hardware-report.service | 1 + .../opt/openslx/scripts/systemd-hardware_report | 97 ++++++++++++++++++++++ .../system-check/hooks.d/50-hardware-report | 97 ---------------------- 5 files changed, 110 insertions(+), 99 deletions(-) create mode 100644 core/modules/hardware-stats/data/etc/systemd/system/hardware-report.service create mode 120000 core/modules/hardware-stats/data/etc/systemd/system/multi-user.target.wants/hardware-report.service create mode 100755 core/modules/hardware-stats/data/opt/openslx/scripts/systemd-hardware_report delete mode 100755 core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-report (limited to 'core/modules/hardware-stats') diff --git a/core/modules/hardware-stats/data/etc/systemd/system/hardware-report.service b/core/modules/hardware-stats/data/etc/systemd/system/hardware-report.service new file mode 100644 index 00000000..d4649c4a --- /dev/null +++ b/core/modules/hardware-stats/data/etc/systemd/system/hardware-report.service @@ -0,0 +1,11 @@ +[Unit] +Description=Send hardware information to boot server +After=system-check.service hardware-stats.service +Wants=system-check.service hardware-stats.service + +[Service] +Type=oneshot +ExecStart=/opt/openslx/scripts/systemd-hardware_report +RemainAfterExit=yes +ExecStop=/opt/openslx/scripts/shutdown-system_usage_update + diff --git a/core/modules/hardware-stats/data/etc/systemd/system/hardware-stats.service b/core/modules/hardware-stats/data/etc/systemd/system/hardware-stats.service index f23f3384..8106af74 100644 --- a/core/modules/hardware-stats/data/etc/systemd/system/hardware-stats.service +++ b/core/modules/hardware-stats/data/etc/systemd/system/hardware-stats.service @@ -1,5 +1,5 @@ [Unit] -Description=Gather statistics about this machine and send to boot server +Description=Gather hardware information about this machine After=tmp.target mount-vm-store.service network.target run-virt-env.service Wants=tmp.target system-check.service Before=system-check.service @@ -8,5 +8,4 @@ Before=system-check.service Type=oneshot ExecStart=/opt/openslx/scripts/systemd-hardware_stats RemainAfterExit=yes -ExecStop=/opt/openslx/scripts/shutdown-system_usage_update diff --git a/core/modules/hardware-stats/data/etc/systemd/system/multi-user.target.wants/hardware-report.service b/core/modules/hardware-stats/data/etc/systemd/system/multi-user.target.wants/hardware-report.service new file mode 120000 index 00000000..d9610279 --- /dev/null +++ b/core/modules/hardware-stats/data/etc/systemd/system/multi-user.target.wants/hardware-report.service @@ -0,0 +1 @@ +../hardware-report.service \ No newline at end of file diff --git a/core/modules/hardware-stats/data/opt/openslx/scripts/systemd-hardware_report b/core/modules/hardware-stats/data/opt/openslx/scripts/systemd-hardware_report new file mode 100755 index 00000000..09d8efc9 --- /dev/null +++ b/core/modules/hardware-stats/data/opt/openslx/scripts/systemd-hardware_report @@ -0,0 +1,97 @@ +#!/bin/ash + +. /opt/openslx/config + +disable_remote_logging() { + echo "Server doesn't seem to support hardware/usage stats - disabling logging" + rm -f -- "/etc/cron.d/usage_stats" +} + +# sends the hardware information of this machine generated by +# systemd-hardware_stats_gather in /run/hwreport and /run/hwinfo +report_hardware_info() { + local uptime hwreport hwinfo uuid model subnet ret runmode jsonfile + if [ -z "$SLX_REMOTE_LOG" ]; then + disable_remote_logging + return 1 + fi + hwreport="/run/hwreport" + if [ ! -s "$hwreport" ]; then + echo "Missing hwreport file: $hwreport" + # TODO send data without? + return 1 + fi + + # Read generated data and current uptime and send it + hwinfo="/run/hwinfo" + if [ ! -s "$hwinfo" ]; then + echo "Missing hwinfo file: $hwinfo" + return 1 + fi + . "$hwinfo" + + # got everything, get the last infos + uptime=$(grep -oE '^[0-9]+' /proc/uptime) + + uuid=$(cat /etc/system-uuid) + if [ -z "$uuid" ] || [ "${#uuid}" -ne "36" ]; then + echo "No/malformed UUID, aborting" >&2 + exit 1 + fi + runmode="$SLX_RUNMODE_MODULE" + if [ -n "$SLX_EXAM" ]; then + # This isn't an actual runmode, but in case exam mode is active on a client you definitely want + # to know about it, more than other runmodes actually + runmode="exams" + fi + + # Combine manufacturer and model name (for displaying purposes) + model="$HW_MODEL" + if [ "$HW_MANUF" != "Unknown" ]; then + model="$model ($HW_MANUF)" + fi + # Get IP/subnet size + local primary="${SLX_BRIDGE:-br0}" + subnet="$( ip -o -f inet addr show "$primary" | awk '/scope global/ {print $4}' )" + # Finally, new json-based reporting + jsonfile="$( mktemp )" + if ! python3 /opt/openslx/system-check/collect_hw_info_json.py -p > "$jsonfile"; then + echo -n "" > "$jsonfile" + fi + # just assume the uuid/mac dumped are valid here (its checked often enough :)) + echo -n "Submitting to '$SLX_REMOTE_LOG' ... " + curl --retry 4 --retry-connrefused --max-time 5 --retry-max-time 15 \ + --data-urlencode "type=~poweron" --data-urlencode "uuid=$uuid" --data-urlencode "macaddr=$HW_MAC" \ + --data-urlencode "uptime=$uptime" --data-urlencode "realcores=$HW_CORES" --data-urlencode "vcores=$HW_THREADS" \ + --data-urlencode "sockets=$HW_SOCKETS" --data-urlencode "mbram=$HW_MBRAM" \ + --data-urlencode "kvmstate=$HW_KVM" --data-urlencode "cpumodel=$HW_CPUMODEL" --data-urlencode "id44mb=$HW_ID44" \ + --data-urlencode "badsectors=$HW_BADSECTORS" --data-urlencode "systemmodel=$model" --data-urlencode "subnet=$subnet" \ + --data-urlencode "runmode=$runmode" --data-urlencode "data@$hwreport" --data-urlencode "json@$jsonfile" \ + "$SLX_REMOTE_LOG" | grep -q "RESULT=0" + local ret=$? + if [ "$ret" -ne 0 ]; then + echo "failed." + disable_remote_logging + return 1 + fi + echo "succeeded." + rm -f -- "$hwreport" "$jsonfile" + START=$(( $RANDOM % 5 )) + DELAY=$(( $RANDOM % 20 )) + cat > "/etc/cron.d/usage_stats" <<-EOF + # Update usage statistics on server + SHELL=/bin/sh + PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/opt/openslx/sbin:/opt/openslx/bin + + ${START}-59/5 * * * * root sleep ${DELAY}; /opt/openslx/scripts/cron-system_usage_update --full + EOF + # TODO remove this hack one day: Sometimes, aufs doesn't update the mtime of dirs + # when creating files, so cron would not rescan the cron directory. + touch "/etc/cron.d" + # Trigger right now so resource usage gets updated + /opt/openslx/scripts/cron-system_usage_update --full + return 0 +} + +report_hardware_info + diff --git a/core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-report b/core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-report deleted file mode 100755 index 09d8efc9..00000000 --- a/core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-report +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/ash - -. /opt/openslx/config - -disable_remote_logging() { - echo "Server doesn't seem to support hardware/usage stats - disabling logging" - rm -f -- "/etc/cron.d/usage_stats" -} - -# sends the hardware information of this machine generated by -# systemd-hardware_stats_gather in /run/hwreport and /run/hwinfo -report_hardware_info() { - local uptime hwreport hwinfo uuid model subnet ret runmode jsonfile - if [ -z "$SLX_REMOTE_LOG" ]; then - disable_remote_logging - return 1 - fi - hwreport="/run/hwreport" - if [ ! -s "$hwreport" ]; then - echo "Missing hwreport file: $hwreport" - # TODO send data without? - return 1 - fi - - # Read generated data and current uptime and send it - hwinfo="/run/hwinfo" - if [ ! -s "$hwinfo" ]; then - echo "Missing hwinfo file: $hwinfo" - return 1 - fi - . "$hwinfo" - - # got everything, get the last infos - uptime=$(grep -oE '^[0-9]+' /proc/uptime) - - uuid=$(cat /etc/system-uuid) - if [ -z "$uuid" ] || [ "${#uuid}" -ne "36" ]; then - echo "No/malformed UUID, aborting" >&2 - exit 1 - fi - runmode="$SLX_RUNMODE_MODULE" - if [ -n "$SLX_EXAM" ]; then - # This isn't an actual runmode, but in case exam mode is active on a client you definitely want - # to know about it, more than other runmodes actually - runmode="exams" - fi - - # Combine manufacturer and model name (for displaying purposes) - model="$HW_MODEL" - if [ "$HW_MANUF" != "Unknown" ]; then - model="$model ($HW_MANUF)" - fi - # Get IP/subnet size - local primary="${SLX_BRIDGE:-br0}" - subnet="$( ip -o -f inet addr show "$primary" | awk '/scope global/ {print $4}' )" - # Finally, new json-based reporting - jsonfile="$( mktemp )" - if ! python3 /opt/openslx/system-check/collect_hw_info_json.py -p > "$jsonfile"; then - echo -n "" > "$jsonfile" - fi - # just assume the uuid/mac dumped are valid here (its checked often enough :)) - echo -n "Submitting to '$SLX_REMOTE_LOG' ... " - curl --retry 4 --retry-connrefused --max-time 5 --retry-max-time 15 \ - --data-urlencode "type=~poweron" --data-urlencode "uuid=$uuid" --data-urlencode "macaddr=$HW_MAC" \ - --data-urlencode "uptime=$uptime" --data-urlencode "realcores=$HW_CORES" --data-urlencode "vcores=$HW_THREADS" \ - --data-urlencode "sockets=$HW_SOCKETS" --data-urlencode "mbram=$HW_MBRAM" \ - --data-urlencode "kvmstate=$HW_KVM" --data-urlencode "cpumodel=$HW_CPUMODEL" --data-urlencode "id44mb=$HW_ID44" \ - --data-urlencode "badsectors=$HW_BADSECTORS" --data-urlencode "systemmodel=$model" --data-urlencode "subnet=$subnet" \ - --data-urlencode "runmode=$runmode" --data-urlencode "data@$hwreport" --data-urlencode "json@$jsonfile" \ - "$SLX_REMOTE_LOG" | grep -q "RESULT=0" - local ret=$? - if [ "$ret" -ne 0 ]; then - echo "failed." - disable_remote_logging - return 1 - fi - echo "succeeded." - rm -f -- "$hwreport" "$jsonfile" - START=$(( $RANDOM % 5 )) - DELAY=$(( $RANDOM % 20 )) - cat > "/etc/cron.d/usage_stats" <<-EOF - # Update usage statistics on server - SHELL=/bin/sh - PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/opt/openslx/sbin:/opt/openslx/bin - - ${START}-59/5 * * * * root sleep ${DELAY}; /opt/openslx/scripts/cron-system_usage_update --full - EOF - # TODO remove this hack one day: Sometimes, aufs doesn't update the mtime of dirs - # when creating files, so cron would not rescan the cron directory. - touch "/etc/cron.d" - # Trigger right now so resource usage gets updated - /opt/openslx/scripts/cron-system_usage_update --full - return 0 -} - -report_hardware_info - -- cgit v1.2.3-55-g7522