From c124192e63bd8a48ed1e7caf9542dc52505dfd22 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 24 Jan 2019 12:17:07 +0100 Subject: [statistics] Handling of standby state in statistics, log crashes Or rather, not really crashes, but log whenever a client reports a poweron event without reporting a proper shutdown first. This isn't neccessarily a crash but could also be due to power loss, hard poweroff, or network failures. --- modules-available/statistics/api.inc.php | 102 +++++++++------------ modules-available/statistics/hooks/cron.inc.php | 20 +++- modules-available/statistics/inc/parser.inc.php | 10 +- .../statistics/inc/statistics.inc.php | 17 ++++ modules-available/statistics/page.inc.php | 29 +++++- 5 files changed, 107 insertions(+), 71 deletions(-) diff --git a/modules-available/statistics/api.inc.php b/modules-available/statistics/api.inc.php index 674cc48d..813e7d54 100644 --- a/modules-available/statistics/api.inc.php +++ b/modules-available/statistics/api.inc.php @@ -123,32 +123,20 @@ if ($type{0} === '~') { } } // Maybe log old crashed session - if ($uptime < 120) { + if ($uptime < 150 && $old !== false) { // See if we have a lingering session, create statistic entry if so - if ($old !== false && $old['logintime'] !== 0) { + if ($old['state'] === 'OCCUPIED' && $old['logintime'] !== 0) { $sessionLength = $old['lastseen'] - $old['logintime']; if ($sessionLength > 30 && $sessionLength < 86400*2) { - Database::exec('INSERT INTO statistic (dateline, typeid, machineuuid, clientip, username, data)' - . " VALUES (:start, '~session-length', :uuid, :clientip, '', :length)", array( - 'start' => $old['logintime'], - 'uuid' => $uuid, - 'clientip' => $ip, - 'length' => $sessionLength - )); + Statistics::logMachineState($uuid, $ip, Statistics::SESSION_LENGTH, $old['logintime'], $sessionLength); } } // Write poweroff period length to statistic table - if ($old !== false && $old['lastseen'] !== 0) { + if ($old['lastseen'] !== 0) { $lastSeen = $old['lastseen']; $offtime = ($NOW - $uptime) - $lastSeen; - if ($offtime > 300 && $offtime < 86400 * 90) { - Database::exec('INSERT INTO statistic (dateline, typeid, machineuuid, clientip, username, data)' - . " VALUES (:shutdown, '~offline-length', :uuid, :clientip, '', :length)", array( - 'shutdown' => $lastSeen, - 'uuid' => $uuid, - 'clientip' => $ip, - 'length' => $offtime - )); + if ($offtime > 90 && $offtime < 86400 * 30) { + Statistics::logMachineState($uuid, $ip, $old['state'] === 'STANDBY' ? Statistics::SUSPEND_LENGTH : Statistics::OFFLINE_LENGTH, $lastSeen, $offtime); } } } @@ -173,36 +161,41 @@ if ($type{0} === '~') { } else if ($type === '~runstate') { // Usage (occupied/free) $sessionLength = 0; + $strUpdateBoottime = ''; if ($old === false) die("Unknown machine.\n"); if ($old['clientip'] !== $ip) { EventLog::warning("[runstate] IP address of client $uuid seems to have changed ({$old['clientip']} -> $ip)"); die("Address changed.\n"); } $used = Request::post('used', 0, 'integer'); - if ($old['state'] === 'OFFLINE' && $NOW - $old['lastseen'] > 600) { - $strUpdateBoottime = ' lastboot = UNIX_TIMESTAMP(), '; - } else { - $strUpdateBoottime = ''; - } - // 1) Log last session length if we didn't see the machine for a while - if ($NOW - $old['lastseen'] > 610 && $old['lastseen'] !== 0) { - // Old session timed out - might be caused by hard reboot - if ($old['logintime'] !== 0) { - if ($old['lastseen'] > $old['logintime']) { - $sessionLength = $old['lastseen'] - $old['logintime']; - } - $old['logintime'] = 0; - } - } - // Figure out what's happening - state changes $params = array( 'uuid' => $uuid, 'oldlastseen' => $old['lastseen'], 'oldstate' => $old['state'], ); + if ($old['state'] === 'OFFLINE') { + // This should never happen -- we expect a poweron event before runstate, which would set the state to IDLE + // So it might be that the poweron event got lost, or that a couple of runstate events got lost, which + // caused our cron.inc.php to time out the client and reset it to OFFLINE + if ($NOW - $old['lastseen'] > 900) { + $strUpdateBoottime = ' lastboot = UNIX_TIMESTAMP(), '; + } + // 1) Log last session length if we didn't see the machine for a while + if ($NOW - $old['lastseen'] > 900 && $old['lastseen'] !== 0) { + // Old session timed out - might be caused by hard reboot + if ($old['logintime'] !== 0) { + if ($old['lastseen'] > $old['logintime']) { + $sessionLength = $old['lastseen'] - $old['logintime']; + } + } + } + } + // Figure out what's happening - state changes if ($used === 0 && $old['state'] !== 'IDLE') { - // Is not in use, was in use before - $sessionLength = $NOW - $old['logintime']; + if ($old['state'] === 'OCCUPIED' && $sessionLength === 0) { + // Is not in use, was in use before + $sessionLength = $NOW - $old['logintime']; + } $res = Database::exec('UPDATE machine SET lastseen = UNIX_TIMESTAMP(),' . $strUpdateBoottime . " logintime = 0, currentuser = NULL, state = 'IDLE' " @@ -232,13 +225,7 @@ if ($type{0} === '~') { } // 9) Log last session length if applicable if ($mode === false && $sessionLength > 0 && $sessionLength < 86400*2 && $old['logintime'] !== 0) { - Database::exec('INSERT INTO statistic (dateline, typeid, machineuuid, clientip, username, data)' - . " VALUES (:start, '~session-length', :uuid, :clientip, '', :length)", array( - 'start' => $old['logintime'], - 'uuid' => $uuid, - 'clientip' => $ip, - 'length' => $sessionLength - )); + Statistics::logMachineState($uuid, $ip, Statistics::SESSION_LENGTH, $old['logintime'], $sessionLength); } } elseif ($type === '~poweroff') { if ($old === false) die("Unknown machine.\n"); @@ -246,16 +233,10 @@ if ($type{0} === '~') { EventLog::warning("[poweroff] IP address of client $uuid seems to have changed ({$old['clientip']} -> $ip)"); die("Address changed.\n"); } - if ($mode === false && $old['logintime'] !== 0) { + if ($mode === false && $old['state'] === 'OCCUPIED' && $old['logintime'] !== 0) { $sessionLength = $old['lastseen'] - $old['logintime']; if ($sessionLength > 0 && $sessionLength < 86400*2) { - Database::exec('INSERT INTO statistic (dateline, typeid, machineuuid, clientip, username, data)' - . " VALUES (:start, '~session-length', :uuid, :clientip, '', :length)", array( - 'start' => $old['logintime'], - 'uuid' => $uuid, - 'clientip' => $ip, - 'length' => $sessionLength - )); + Statistics::logMachineState($uuid, $ip, Statistics::SESSION_LENGTH, $old['logintime'], $sessionLength); } } Database::exec("UPDATE machine SET logintime = 0, lastseen = UNIX_TIMESTAMP(), state = 'OFFLINE' @@ -368,13 +349,7 @@ if ($type{0} === '~') { $lastSeen = $old['lastseen']; $duration = $NOW - $lastSeen; if ($duration > 500 && $duration < 86400 * 14) { - Database::exec('INSERT INTO statistic (dateline, typeid, machineuuid, clientip, username, data)' - . " VALUES (:suspend, '~suspend-length', :uuid, :clientip, '', :length)", array( - 'suspend' => $lastSeen, - 'uuid' => $uuid, - 'clientip' => $ip, - 'length' => $duration - )); + Statistics::logMachineState($uuid, $ip, Statistics::SUSPEND_LENGTH, $lastSeen, $duration); } } } else { @@ -449,9 +424,14 @@ if ($type{0} === '.') { function checkHardwareChange($old, $new) { if ($new['mbram'] !== 0) { - if ($new['mbram'] + 1000 < $old['mbram']) { - $ram1 = round($old['mbram'] / 512) / 2; - $ram2 = round($new['mbram'] / 512) / 2; + if ($new['mbram'] < 6200) { + $ram1 = ceil($old['mbram'] / 512) / 2; + $ram2 = ceil($new['mbram'] / 512) / 2; + } else { + $ram1 = ceil($old['mbram'] / 1024); + $ram2 = ceil($new['mbram'] / 1024); + } + if ($ram1 !== $ram2) { EventLog::warning('[poweron] Client ' . $new['uuid'] . ' (' . $new['clientip'] . "): RAM decreased from {$ram1}GB to {$ram2}GB"); } if (!empty($old['cpumodel']) && !empty($new['cpumodel']) && $new['cpumodel'] !== $old['cpumodel']) { diff --git a/modules-available/statistics/hooks/cron.inc.php b/modules-available/statistics/hooks/cron.inc.php index f05762bc..0b8e740e 100644 --- a/modules-available/statistics/hooks/cron.inc.php +++ b/modules-available/statistics/hooks/cron.inc.php @@ -21,10 +21,11 @@ function logstats() function state_cleanup() { // Fix online state of machines that crashed - $standby = time() - 86400 * 2; // Reset standby machines after two days + $standby = time() - 86400 * 4; // Reset standby machines after four days $on = time() - 610; // Reset others after ~10 minutes // Query for logging - $res = Database::simpleQuery("SELECT machineuuid, clientip, state FROM machine WHERE lastseen < If(state = 'STANDBY', $standby, $on) AND state <> 'OFFLINE'"); + $res = Database::simpleQuery("SELECT machineuuid, clientip, state, logintime, lastseen FROM machine + WHERE lastseen < If(state = 'STANDBY', $standby, $on) AND state <> 'OFFLINE'"); while ($row = $res->fetch(PDO::FETCH_ASSOC)) { Database::exec('INSERT INTO clientlog (dateline, logtypeid, clientip, machineuuid, description, extra) VALUES (UNIX_TIMESTAMP(), :type, :client, :uuid, :description, :longdesc)', array( @@ -34,9 +35,20 @@ function state_cleanup() 'longdesc' => '', 'uuid' => $row['machineuuid'], )); + if ($row['state'] === 'OCCUPIED') { + $length = $row['lastseen'] - $row['logintime']; + if ($length > 0 && $length < 86400 * 7) { + Statistics::logMachineState($row['machineuuid'], $row['clientip'], Statistics::SESSION_LENGTH, $row['logintime'], $length); + } + } elseif ($row['state'] === 'STANDBY') { + $length = time() - $row['lastseen']; + if ($length > 0 && $length < 86400 * 7) { + Statistics::logMachineState($row['machineuuid'], $row['clientip'], Statistics::SUSPEND_LENGTH, $row['lastseen'], $length); + } + } } - // Update -- yes this is not atomic. Should be sufficient for simple warnings though. - Database::exec("UPDATE machine SET state = 'OFFLINE' WHERE lastseen < If(state = 'STANDBY', $standby, $on) AND state <> 'OFFLINE'"); + // Update -- yes this is not atomic. Should be sufficient for simple warnings and bookkeeping though. + Database::exec("UPDATE machine SET logintime = 0, state = 'OFFLINE' WHERE lastseen < If(state = 'STANDBY', $standby, $on) AND state <> 'OFFLINE'"); } state_cleanup(); diff --git a/modules-available/statistics/inc/parser.inc.php b/modules-available/statistics/inc/parser.inc.php index b179b4a3..0d39079d 100644 --- a/modules-available/statistics/inc/parser.inc.php +++ b/modules-available/statistics/inc/parser.inc.php @@ -104,10 +104,12 @@ class Parser { foreach ($lines as $line) { if (preg_match('/^Disk (\S+):.* (\d+) bytes/i', $line, $out)) { // --- Beginning of MBR disk --- + unset($hdd); if ($out[2] < 10000) // sometimes vmware reports lots of 512byte disks continue; + if (substr($out[1], 0, 8) === '/dev/dm-') // Ignore device mapper + continue; // disk total size and name - unset($hdd); $mbrToMbFactor = 0; // This is != 0 for mbr $sectorToMbFactor = 0; // This is != for gpt $hdd = array( @@ -122,10 +124,12 @@ class Parser { $hdds[] = &$hdd; } elseif (preg_match('/^Disk (\S+):\s+(\d+)\s+sectors,/i', $line, $out)) { // --- Beginning of GPT disk --- + unset($hdd); if ($out[2] < 1000) // sometimes vmware reports lots of 512byte disks continue; + if (substr($out[1], 0, 8) === '/dev/dm-') // Ignore device mapper + continue; // disk total size and name - unset($hdd); $mbrToMbFactor = 0; // This is != 0 for mbr $sectorToMbFactor = 0; // This is != for gpt $hdd = array( @@ -213,7 +217,7 @@ class Parser { $hdd['size'] = round(($hdd['sectors'] * $sectorToMbFactor) / 1024); } $free = $hdd['size'] - $hdd['used']; - if ($free > 5 || ($free / $hdd['size']) > 0.1) { + if ($hdd['size'] > 0 && ($free > 5 || ($free / $hdd['size']) > 0.1)) { $hdd['partitions'][] = array( 'id' => 'free-id-' . $i, 'name' => Dictionary::translate('unused'), diff --git a/modules-available/statistics/inc/statistics.inc.php b/modules-available/statistics/inc/statistics.inc.php index 2500f16f..1f8a081a 100644 --- a/modules-available/statistics/inc/statistics.inc.php +++ b/modules-available/statistics/inc/statistics.inc.php @@ -70,4 +70,21 @@ class Statistics return $list; } + const SESSION_LENGTH = '~session-length'; + const OFFLINE_LENGTH = '~offline-length'; + const SUSPEND_LENGTH = '~suspend-length'; + + public static function logMachineState($uuid, $ip, $type, $start, $length, $username = '') + { + return Database::exec('INSERT INTO statistic (dateline, typeid, machineuuid, clientip, username, data)' + . " VALUES (:start, :type, :uuid, :clientip, :username, :length)", array( + 'start' => $start, + 'type' => $type, + 'uuid' => $uuid, + 'clientip' => $ip, + 'username' => $username, + 'length' => $length, + )); + } + } diff --git a/modules-available/statistics/page.inc.php b/modules-available/statistics/page.inc.php index abacc8d2..2d86615e 100644 --- a/modules-available/statistics/page.inc.php +++ b/modules-available/statistics/page.inc.php @@ -910,16 +910,18 @@ class Page_Statistics extends Page //if ($cutoff < $client['firstseen']) $cutoff = $client['firstseen']; $scale = 100 / ($NOW - $cutoff); $res = Database::simpleQuery('SELECT dateline, typeid, data FROM statistic' - . " WHERE dateline > :cutoff AND typeid IN ('~session-length', '~offline-length') AND machineuuid = :uuid ORDER BY dateline ASC", array( + . " WHERE dateline > :cutoff AND typeid IN (:sessionLength, :offlineLength) AND machineuuid = :uuid ORDER BY dateline ASC", array( 'cutoff' => $cutoff - 86400 * 14, 'uuid' => $uuid, + 'sessionLength' => Statistics::SESSION_LENGTH, + 'offlineLength' => Statistics::OFFLINE_LENGTH, )); $spans['rows'] = array(); $spans['graph'] = ''; $last = false; $first = true; while ($row = $res->fetch(PDO::FETCH_ASSOC)) { - if (!$client['isclient'] && $row['typeid'] === '~session-length') + if (!$client['isclient'] && $row['typeid'] === Statistics::SESSION_LENGTH) continue; // Don't differentiate between session and idle for non-clients if ($first && $row['dateline'] > $cutoff && $client['lastboot'] > $cutoff) { // Special case: offline before @@ -945,9 +947,12 @@ class Page_Statistics extends Page } $row['from'] = Util::prettyTime($row['dateline']); $row['duration'] = floor($row['data'] / 86400) . 'd ' . gmdate('H:i', $row['data']); - if ($row['typeid'] === '~offline-length') { + if ($row['typeid'] === Statistics::OFFLINE_LENGTH) { $row['glyph'] = 'off'; $color = '#444'; + } elseif ($row['typeid'] === Statistics::SUSPEND_LENGTH) { + $row['glyph'] = 'pause'; + $color = '#686'; } else { $row['glyph'] = 'user'; $color = '#e77'; @@ -967,8 +972,26 @@ class Page_Statistics extends Page } if ($client['state'] === 'OCCUPIED') { $spans['graph'] .= '
 
'; + $spans['rows'][] = [ + 'from' => Util::prettyTime($client['logintime']), + 'duration' => '-', + 'glyph' => 'user', + ]; + $row['duration'] = floor($row['data'] / 86400) . 'd ' . gmdate('H:i', $row['data']); } elseif ($client['state'] === 'OFFLINE') { $spans['graph'] .= '
 
'; + $spans['rows'][] = [ + 'from' => Util::prettyTime($client['lastseen']), + 'duration' => '-', + 'glyph' => 'off', + ]; + } elseif ($client['state'] === 'STANDBY') { + $spans['graph'] .= '
 
'; + $spans['rows'][] = [ + 'from' => Util::prettyTime($client['lastseen']), + 'duration' => '-', + 'glyph' => 'pause', + ]; } $t = explode('-', date('Y-n-j-G', $cutoff)); if ($t[3] >= 8 && $t[3] <= 22) { -- cgit v1.2.3-55-g7522