diff options
author | Jonathan Bauer | 2019-10-31 17:00:23 +0100 |
---|---|---|
committer | Jonathan Bauer | 2019-10-31 17:00:23 +0100 |
commit | ba6753cfabc149cc312f33a3c507b1cc0dbcee3b (patch) | |
tree | bf12c077ce4c1722a20319fc82b3eb2143b65a2c /core/modules/hardware-stats | |
parent | [vmware-common] Fix launching usbarbitrator with 15.5.x (diff) | |
download | mltk-ba6753cfabc149cc312f33a3c507b1cc0dbcee3b.tar.gz mltk-ba6753cfabc149cc312f33a3c507b1cc0dbcee3b.tar.xz mltk-ba6753cfabc149cc312f33a3c507b1cc0dbcee3b.zip |
[*] introduce system-check
* hardware stats now only reports hardware info data in /run/hwinfo and
/run/hwreport
* system-check hooks will generate lightdm warnings and curl the report
to the satellite
* run-virt now handles the ID44 warnings to cope for network shares on
/tmp/virt
WARNING PROFI111!
Diffstat (limited to 'core/modules/hardware-stats')
6 files changed, 169 insertions, 134 deletions
diff --git a/core/modules/hardware-stats/data/etc/systemd/system/hardware-stats.service b/core/modules/hardware-stats/data/etc/systemd/system/hardware-stats.service index 36d47986..7309d0bb 100644 --- a/core/modules/hardware-stats/data/etc/systemd/system/hardware-stats.service +++ b/core/modules/hardware-stats/data/etc/systemd/system/hardware-stats.service @@ -1,7 +1,8 @@ [Unit] Description=Gather statistics about this machine and send to boot server After=tmp.target mount-vm-store.service network.target -Wants=tmp.target +Wants=tmp.target system-check.service +Before=system-check.service [Service] Type=oneshot diff --git a/core/modules/hardware-stats/data/opt/openslx/scripts/systemd-hardware_stats b/core/modules/hardware-stats/data/opt/openslx/scripts/systemd-hardware_stats index 694c0707..6b81d7be 100755 --- a/core/modules/hardware-stats/data/opt/openslx/scripts/systemd-hardware_stats +++ b/core/modules/hardware-stats/data/opt/openslx/scripts/systemd-hardware_stats @@ -63,36 +63,33 @@ slxfdisk() { return 127 } -if [ -z "$SLX_REMOTE_LOG" ]; then - echo "No remote log url given, will not report" - exit 1 -fi - +################################################################################ # 1) Get MAC Address used for booting -eval $(grep -Eo BOOTIF=\\S+ /proc/cmdline) -if [ "${#BOOTIF}" -ne "20" ]; then +# +MAC="${SLX_PXE_MAC}" +if [ -z "$MAC" ]; then + # get MAC from sysfs + MAC="$(cat /sys/class/net/${SLX_PXE_NETIF:-br0}/address)" +fi +if [ -z "$MAC" ]; then + BOOTIF="$(grep -Po '(?<=BOOTIF=)[0-9a-f\-:]+' /proc/cmdline)" + [ "${#BOOTIF}" -eq "20" ] && MAC="${BOOTIF:3}" +fi +if [ -z "$MAC" ]; then echo "Getting MAC from /proc/cmdline failed, using 'ip a'..." - BOOTIF=01-$(ip a | grep -A 1 ': br0' | grep -o 'ether ..:..:..:..:..:..' | cut -d' ' -f2 | sed s/:/-/g) - if [ "${#BOOTIF}" -ne "20" ]; then - echo "FAIL FAIL FAIL" - BOOTIF="99-88-77-66-55-44-33" - fi + _mac="$(ip a | grep -A 1 ': br0' | grep -o 'ether ..:..:..:..:..:..' | cut -d' ' -f2)" + [ "$_mac" -eq 17 ] && MAC="$_mac" fi -MAC=${BOOTIF:3} -echo "Determined MAC=$MAC" - -# 2) Get machine UUID written in stage3.1 -UUID=$(cat /etc/system-uuid) -if [ -z "$UUID" ] || [ "${#UUID}" -ne "36" ]; then - echo "No/malformed UUID, aborting" >&2 - exit 1 +if [ -z "$MAC" ]; then + MAC="88-77-66-55-44-33" fi -echo "UUID=$UUID" - -# 3) Uptime in seconds -UPTIME=$(grep -o -E '^[0-9]+' /proc/uptime) +# always uppercase and dash-separated +MAC="${MAC^^}" +MAC="${MAC//:/-}" +echo "Determined MAC=$MAC" -# 4) Number of real and virtual CPU cores +################################################################################ +# 2) Number of real and virtual CPU cores # Virtual, cheap way VCORES=$(grep '^processor\s' /proc/cpuinfo | sort -u | wc -l) # Real cores @@ -109,11 +106,15 @@ if [ -z "$CPUCORES" ] || [ "$CPUCORES" = "0" ]; then fi echo "$CPUCORES real cores, $VCORES with HT" -# 5) CPU model name +################################################################################ +# 3) CPU model name +# CPUMODEL=$(grep -m1 '^model name\s*:' /proc/cpuinfo | sed 's/^model name\s*:\s*//;s/\s\s*/ /g;s/^ //;s/ $//') echo "$CPUMODEL" -# 6) RAM +################################################################################ +# 4) RAM +# RAM=$(grep -m1 '^MemTotal:' /proc/meminfo | awk '{print $2}') RAM=$(( $RAM / 1024 )) if [ -z "$RAM" ] || [ "$RAM" -lt 500 ]; then @@ -125,7 +126,9 @@ if [ -z "$RAM" ] || [ "$RAM" -lt 500 ]; then fi echo "$RAM MB RAM" -# 7) 64bit virtualization support +################################################################################ +# 5) 64bit virtualization support +# VT="UNSUPPORTED" VIRTTYPE=$(grep -m1 '^flags\s*:' /proc/cpuinfo | grep -wo -e svm -e vmx) [ -n "$VIRTTYPE" ] && modprobe msr @@ -153,7 +156,9 @@ elif [ "$VIRTTYPE" = "svm" ]; then # amd fi echo "$VIRTTYPE is $VT" -# 8) ID44 partition size +################################################################################ +# 6) ID44 partition size +# ID44=0 if ! slx-tools fs_path_isvolatile "/tmp/virt" ; then ID44_SPACE=($(slx-tools fs_path_space "/tmp/virt")) @@ -181,7 +186,9 @@ if ! slx-tools fs_path_isvolatile "/tmp/virt" ; then fi echo "Scratch space: $ID44 MB" -# 9) check smart values +################################################################################ +# 7) check smart values +# FDISK=$(mktemp) declare -a DISKS shopt -s extglob @@ -192,6 +199,7 @@ for disk in /dev/disk/by-path/!(*-part*|*-usb-*); do slxfdisk -l "$disk" done > "$FDISK" shopt -u extglob +[ -z "$SLX_SMARTCTL_MIN_REALLOC" ] && SLX_SMARTCTL_MIN_REALLOC=0 BADSECTORS=0 if which smartctl; then ALLSMART=$(mktemp) @@ -226,7 +234,9 @@ if which smartctl; then fi echo "SMART: $OVERALL - $REALLOC reallocated, $PENDING pending" -# A) Read system model and manufacturer +################################################################################ +# 8) Read system model and manufacturer +# dmidec() { local RETVAL=$(dmidecode "$@" 2>/dev/null | grep -v '^#' | grep -v '^Invalid' | sed 's/\s\s*/ /g;s/^ //;s/ $//') case "$RETVAL" in @@ -237,6 +247,10 @@ dmidec() { echo "$RETVAL" } +bashesc () { + sed s/\'/\'\"\'\"\'/g <<< $* +} + HW_MODEL=$(dmidec -q -s system-product-name) HW_MANUF=$(dmidec -q -s system-manufacturer) # Try fallback to baseboard @@ -245,138 +259,59 @@ if [ "$HW_MODEL" = "Unknown" ]; then HW_MANUF=$(dmidec -q -s baseboard-manufacturer) fi -MODEL="$HW_MODEL" -if [ "$HW_MANUF" != "Unknown" ]; then - MODEL="$MODEL ($HW_MANUF)" -fi -echo "System model: $MODEL" +HW_MANUF=$(bashesc "$HW_MANUF") +HW_MODEL=$(bashesc "$HW_MODEL") -# n) Dump raw data to a file -DATAFILE=$(mktemp) -cat > "$DATAFILE" <<-EOF +################################################################################ +# Save raw data to report file +# +REPORTFILE="/run/hwreport" +cat > "$REPORTFILE" <<-EOF ############################### CPU ##################################### Sockets: $(grep '^physical id' /proc/cpuinfo | sort -u | wc -l) Real cores: $CPUCORES Virtual cores: $VCORES ######################## Partition tables ############################### EOF -cat "$FDISK" >> "$DATAFILE" -cat >> "$DATAFILE" <<-EOF +cat "$FDISK" >> "$REPORTFILE" +cat >> "$REPORTFILE" <<-EOF ############################ PCI ID ##################################### EOF -lspci -n -m >> "$DATAFILE" -cat >> "$DATAFILE" <<-EOF +lspci -n -m >> "$REPORTFILE" +cat >> "$REPORTFILE" <<-EOF ########################## dmidecode #################################### EOF -dmidecode >> "$DATAFILE" +dmidecode >> "$REPORTFILE" if [ -n "$ALLSMART" ] && [ -s "$ALLSMART" ]; then - cat >> "$DATAFILE" <<-EOF + cat >> "$REPORTFILE" <<-EOF ########################### smartctl #################################### EOF - cat "$ALLSMART" >> "$DATAFILE" + cat "$ALLSMART" >> "$REPORTFILE" fi -cat >> "$DATAFILE" <<-EOF +cat >> "$REPORTFILE" <<-EOF ######################### EOF echo "Created report file" [ -n "$ALLSMART" ] && rm -f -- "$ALLSMART" -# Put some info in local file for later use -HDDCOUNT="${#DISKS[@]}" - -bashesc () { - sed s/\'/\'\"\'\"\'/g <<<$* -} -HW_MANUF=$(bashesc "$HW_MANUF") -HW_MODEL=$(bashesc "$HW_MODEL") +################################################################################ +# Save information to local file for later use +# cat > "/run/hwinfo" <<HORST HW_KVM='${VT}' HW_ID44='${ID44}' +HW_MAC='${MAC}' HW_MBRAM='${RAM}' -HW_HDDCOUNT='${HDDCOUNT}' +HW_HDDCOUNT='${#DISKS[@]}' +HW_BADSECTORS='${BADSECTORS}' HW_MANUF='${HW_MANUF}' HW_MODEL='${HW_MODEL}' +HW_CPUMODEL='${CPUMODEL}' HW_CORES='${CPUCORES}' HW_THREADS='${VCORES}' HORST -# Build warning logfile (for lightdm) -buildlogfile() { - . /run/hwinfo - exec 4> /run/hw-warnings.log - CONTACT_RZ= - if [ "$HW_KVM" = "DISABLED" ]; then - echo "ff0000" "* 64Bit-Gast-Support (VT-x oder AMD-V) ist im BIOS deaktiviert. 64Bit VMs können nicht gestartet werden." >&4 - CONTACT_RZ=jau - elif [ "$HW_KVM" = "UNSUPPORTED" ]; then - echo "000000" "* CPU hat keinen 64Bit-Gast-Support (VT-x oder AMD-V). 64Bit VMs können nicht gestartet werden." >&4 - fi - if [ -n "$HW_MBRAM" ] && [ "$HW_MBRAM" -lt 3400 ]; then - local GB=$(( ( HW_MBRAM + 300 ) / 1024 )) - echo "000000" "* Dieser PC hat wenig RAM (${GB}GB). Die Leistung von VM-Sitzungen wird nicht optimal sein." >&4 - fi - if [ "$HW_ID44" = "0" ]; then - echo "000000" "* Keine ID44-Partition gefunden. VMs bekommen wenig RAM zugewiesen." >&4 - if [ "$HW_HDDCOUNT" = "0" ]; then - echo "000000" " Keine Festplatte erkannt; eine Festplatte wird empfohlen, wenn Sie VMs nutzen wollen." >&4 - elif [ -n "$HW_HDDCOUNT" ]; then - CONTACT_RZ=klar - fi - if [ -n "$HW_MBRAM" ] && [ "$HW_MBRAM" -lt 4500 ]; then - echo "ff0000" " Da der PC wenig RAM hat, ist die Einrichtung einer ID44-Partition dringend zu empfehlen." >&4 - fi - elif [ -n "$HW_ID44" ] && [ "$HW_ID44" -lt 10000 ]; then - echo "000000" "* Die ID44-Partition ist sehr klein. VM-Sitzungen könnten nach einiger Zeit aus Speichermangel abstürzen." >&4 - CONTACT_RZ=fjeden - fi - if [ -n "$SLX_VM_NFS" ] && ! systemctl status mount-vm-store >/dev/null; then - echo "ff0000" "* Der VM-Store konnte nicht eingehängt werden. VMs können nicht gestartet werden." >&4 - echo "ff0000" " Versuchen Sie das Problem zu lösen, indem Sie den Computer neu starten." >&4 - fi - if grep -q '^nouveau ' "/proc/modules"; then - echo "ff5500" "* Die nVidia-Karte in diesem Rechner wird nur von den quelloffenen Treibern (nouveau) unterstützt, und daher mit verminderter Leistung laufen." >&4 - fi - if [ -n "$CONTACT_RZ" ]; then - echo "000000" " -- " >&4 - echo "000000" " -- Wenden Sie sich ggf. an den bwLehrpool-Support Ihres Rechenzentrums -- " >&4 - fi -} - -buildlogfile & - -# Fire away -echo "Submitting to $SLX_REMOTE_LOG" -if curl --retry 4 --retry-connrefused --max-time 5 --retry-max-time 15 \ - --data-urlencode "type=~poweron" --data-urlencode "uuid=$UUID" --data-urlencode "macaddr=$MAC" \ - --data-urlencode "uptime=$UPTIME" --data-urlencode "realcores=$CPUCORES" --data-urlencode "mbram=$RAM" \ - --data-urlencode "kvmstate=$VT" --data-urlencode "cpumodel=$CPUMODEL" --data-urlencode "id44mb=$ID44" \ - --data-urlencode "badsectors=$BADSECTORS" --data-urlencode "systemmodel=$MODEL" \ - --data-urlencode "data@$DATAFILE" "$SLX_REMOTE_LOG" | grep -q "RESULT=0"; then - echo "Success" - rm -f -- "$DATAFILE" - START=$(( $RANDOM % 5 )) - DELAY=$(( $RANDOM % 20 )) - cat > "/etc/cron.d/usage_stats" <<-EOF - # Update usage statistics on server - - SHELL=/bin/sh - PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/opt/openslx/sbin:/opt/openslx/bin - - ${START}-59/5 * * * * root sleep ${DELAY}; /opt/openslx/scripts/cron-system_usage_update - EOF - touch "/etc/cron.d" # Sometimes, aufs doesn't update the mtime of dirs when creating files, - # so cron would not rescan the cron directory - cleanup - # Trigger right now so resource usage gets updated - /opt/openslx/scripts/cron-system_usage_update - exit 0 -else - echo "Failed..." -fi - -echo "Server doesn't seem to support hardware/usage stats - disabling logging" -rm -f -- "/etc/cron.d/usage_stats" cleanup -exit 1 +exit 0 diff --git a/core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-report b/core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-report new file mode 100755 index 00000000..4510bfe4 --- /dev/null +++ b/core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-report @@ -0,0 +1,82 @@ +#!/bin/ash + +export PATH=$PATH:/opt/openslx/sbin:/opt/openslx/bin + +. /opt/openslx/config + +disable_remote_logging() { + echo "Server doesn't seem to support hardware/usage stats - disabling logging" + rm -f -- "/etc/cron.d/usage_stats" +} + +# sends the hardware information of this machine generated by +# systemd-hardware_stats_gather in /run/hwreport and /run/hwinfo +report_hardware_info() { + if [ -z "$SLX_REMOTE_LOG" ]; then + disable_remote_logging + return 1 + fi + local hwreport="/run/hwreport" + if [ ! -s "$hwreport" ]; then + echo "Missing hwreport file: $hwreport" + # TODO send data without? + return 1 + fi + + # Read generated data and current uptime and send it + local hwinfo="/run/hwinfo" + if [ ! -s "$hwinfo" ]; then + echo "Missing hwinfo file: $hwinfo" + return 1 + fi + . "$hwinfo" + + # got everything, get the last infos + local uptime=$(grep -oE '^[0-9]+' /proc/uptime) + + uuid=$(cat /etc/system-uuid) + if [ -z "$uuid" ] || [ "${#uuid}" -ne "36" ]; then + echo "No/malformed UUID, aborting" >&2 + exit 1 + fi + + # Combine manufacturer and model name (for displaying purposes) + local model="$HW_MODEL" + if [ "$HW_MANUF" != "Unknown" ]; then + model="$model ($HW_MANUF)" + fi + # just assume the uuid/mac dumped are valid here (its checked often enough :)) + echo -n "Submitting to '$SLX_REMOTE_LOG' ... " + curl --retry 4 --retry-connrefused --max-time 5 --retry-max-time 15 \ + --data-urlencode "type=~poweron" --data-urlencode "uuid=$uuid" --data-urlencode "macaddr=$HW_MAC" \ + --data-urlencode "uptime=$uptime" --data-urlencode "realcores=$HW_CORES" --data-urlencode "mbram=$HW_MBRAM" \ + --data-urlencode "kvmstate=$HW_KVM" --data-urlencode "cpumodel=$HW_CPUMODEL" --data-urlencode "id44mb=$HW_ID44" \ + --data-urlencode "badsectors=$HW_BADSECTORS" --data-urlencode "systemmodel=$model" \ + --data-urlencode "data@$hwreport" "$SLX_REMOTE_LOG" | grep -q "RESULT=0" + local ret=$? + if [ "$ret" -ne 0 ]; then + echo "failed." + disable_remote_logging + return 1 + fi + echo "succeeded." + rm -f -- "$hwreport" + START=$(( $RANDOM % 5 )) + DELAY=$(( $RANDOM % 20 )) + cat > "/etc/cron.d/usage_stats" <<-EOF + # Update usage statistics on server + SHELL=/bin/sh + PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/opt/openslx/sbin:/opt/openslx/bin + + ${START}-59/5 * * * * root sleep ${DELAY}; /opt/openslx/scripts/cron-system_usage_update + EOF + # TODO remove this hack one day: Sometimes, aufs doesn't update the mtime of dirs + # when creating files, so cron would not rescan the cron directory. + touch "/etc/cron.d" + # Trigger right now so resource usage gets updated + /opt/openslx/scripts/cron-system_usage_update + return 0 +} + +report_hardware_info + diff --git a/core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-warnings b/core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-warnings new file mode 100755 index 00000000..54894119 --- /dev/null +++ b/core/modules/hardware-stats/data/opt/openslx/system-check/hooks.d/50-hardware-warnings @@ -0,0 +1,14 @@ +#!/bin/ash +# This file is executed in /opt/openslx/scripts/systemd-generate_warnings +# If a first parameter is given, we write to that file instead of stdout. + +. /opt/openslx/config +. /run/hwinfo + +if [ -n "$1" ]; then + exec >> "$1" +fi + +if grep -q '^nouveau ' "/proc/modules"; then + echo 'slx-gfx-nouveau' +fi diff --git a/core/modules/hardware-stats/data/opt/openslx/system-check/lang/de/slx-gfx-nouveau b/core/modules/hardware-stats/data/opt/openslx/system-check/lang/de/slx-gfx-nouveau new file mode 100644 index 00000000..a85f6c36 --- /dev/null +++ b/core/modules/hardware-stats/data/opt/openslx/system-check/lang/de/slx-gfx-nouveau @@ -0,0 +1 @@ +* Die nVidia-Karte in diesem Rechner wird nur von den quelloffenen Treibern (nouveau) unterstützt, und daher mit verminderter Leistung laufen. diff --git a/core/modules/hardware-stats/data/opt/openslx/system-check/tags/slx-gfx-nouveau b/core/modules/hardware-stats/data/opt/openslx/system-check/tags/slx-gfx-nouveau new file mode 100644 index 00000000..3459a3a4 --- /dev/null +++ b/core/modules/hardware-stats/data/opt/openslx/system-check/tags/slx-gfx-nouveau @@ -0,0 +1,2 @@ +color="ff5500" +contact= |