From f736d67c0c1acae1b33190758e4160b898e8a6d4 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Wed, 3 Jul 2024 17:16:50 +0200 Subject: [statistics_reporting] Fix infinite loop If there are more than 1000 events with the same timestamp, we'd end up in an infinite loop. Take care of this unusual case by increasing a rowskip counter in this case. --- .../statistics_reporting/inc/queries.inc.php | 38 +++++++++++++--------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'modules-available') diff --git a/modules-available/statistics_reporting/inc/queries.inc.php b/modules-available/statistics_reporting/inc/queries.inc.php index bafe80bc..410f9517 100644 --- a/modules-available/statistics_reporting/inc/queries.inc.php +++ b/modules-available/statistics_reporting/inc/queries.inc.php @@ -115,33 +115,37 @@ class Queries $machines[$row['machineuuid']] = $row; } // Don't filter by typeid in the query, still faster by being able to use the machineuuid/dateline index and filtering later - $last = $from - 86400; // Start 24h early to catch sessions already in progress + $startTimestamp = $from - 86400; // Start 24h early to catch sessions already in progress $dups = []; + $skipCount = 0; + $BATCH_SIZE = 1000; // Fetch in batches of 1000 rows (for current 50 machines) do { $res = Database::simpleQuery("SELECT logid, dateline, typeid, machineuuid, data FROM statistic WHERE dateline >= :last AND dateline <= :to AND machineuuid IS NOT NULL - ORDER BY dateline ASC LIMIT 1000", compact('last', 'to')); - $last = false; + ORDER BY dateline ASC, logid ASC LIMIT $BATCH_SIZE OFFSET $skipCount", + ['last' => $startTimestamp, 'to' => $to]); + $lastStartTimestamp = $startTimestamp; + $startTimestamp = false; $count = 0; + $lastDups = $dups; + $dups = []; foreach ($res as $row) { $count += 1; // Update count first, as we use it as a condition in outer loop. No continue before this settype($row['logid'], 'int'); // Update for next query - $last = $row['dateline']; + $startTimestamp = $row['dateline']; // Ignore dups, we query >= last dateline as we can have multiple events at the same second, but // only some of them got returned because of LIMIT. Skip here because excluding in query directly // would make the query itself rather inefficient. We also cannot use logid > X because the logid - // is not strictly ascending with time, as dateline gets backdated to event start on insert - if ($count === 150) { - $dups = []; - } elseif ($count > 900) { - $dups[] = $row['logid']; - } elseif ($count < 150 && array_key_exists($row['logid'], $dups)) { - continue; - } + // is not strictly ascending with time, as dateline gets backdated to event start on insert. + // In case we have 1000 rows with identical timestamp, we need to start skipping lines in the result, + // otherwise we'd loop infinitely. + $dups[$row['logid']] = 1; + if (isset($lastDups[$row['logid']])) + continue; // Already seen in last batch if (!isset($machines[$row['machineuuid']])) - continue; + continue; // Not interested in this machine if ($row['typeid'] !== '~offline-length' && $row['typeid'] !== '~suspend-length' && $row['typeid'] !== '~session-length') continue; settype($row['dateline'], 'int'); @@ -174,8 +178,12 @@ class Queries } self::addTime($machine, $row, $bounds); } - $dups = array_flip($dups); - } while ($last !== false && $count === 1000); // Check if we need to fetch more rows for current batch + if ($startTimestamp === $lastStartTimestamp) { + $skipCount += $BATCH_SIZE; + } else { + $skipCount = 0; + } + } while ($count === $BATCH_SIZE); // Check if we need to fetch more rows for current batch foreach ($machines as &$machine) { if (!$machine['active']) { $machine['totalOffTime'] = $machine['totalTime']; -- cgit v1.2.3-55-g7522