[statistics_reporting] Fix infinite loop

If there are more than 1000 events with the same timestamp, we'd end up in an infinite loop. Take care of this unusual case by increasing a rowskip counter in this case.
author: Simon Rettberg 2024-07-03 17:16:50 +0200
committer: Simon Rettberg 2024-07-03 17:16:50 +0200
commit: f736d67c0c1acae1b33190758e4160b898e8a6d4 (patch)
tree: 96ffb6acb26c4eff92860ecd18c3a69ce52794c6
parent: [sysconfig] Use auto-generated taskId (diff)
download: slx-admin-f736d67c0c1acae1b33190758e4160b898e8a6d4.tar.gz
slx-admin-f736d67c0c1acae1b33190758e4160b898e8a6d4.tar.xz
slx-admin-f736d67c0c1acae1b33190758e4160b898e8a6d4.zip
1 files changed, 23 insertions, 15 deletions
diff --git a/modules-available/statistics_reporting/inc/queries.inc.php b/modules-available/statistics_reporting/inc/queries.inc.php
index bafe80bc..410f9517 100644
--- a/modules-available/statistics_reporting/inc/queries.inc.php
+++ b/modules-available/statistics_reporting/inc/queries.inc.php
@@ -115,33 +115,37 @@ class Queries
 			$machines[$row['machineuuid']] = $row;
 		}
 		// Don't filter by typeid in the query, still faster by being able to use the machineuuid/dateline index and filtering later
-		$last = $from - 86400; // Start 24h early to catch sessions already in progress
+		$startTimestamp = $from - 86400; // Start 24h early to catch sessions already in progress
 		$dups = [];
+		$skipCount = 0;
+		$BATCH_SIZE = 1000;
 		// Fetch in batches of 1000 rows (for current 50 machines)
 		do {
 			$res = Database::simpleQuery("SELECT logid, dateline, typeid, machineuuid, data
 					FROM statistic WHERE dateline >= :last AND dateline <= :to AND machineuuid IS NOT NULL
-					ORDER BY dateline ASC LIMIT 1000", compact('last', 'to'));
-			$last = false;
+					ORDER BY dateline ASC, logid ASC LIMIT $BATCH_SIZE OFFSET $skipCount",
+				['last' => $startTimestamp, 'to' => $to]);
+			$lastStartTimestamp = $startTimestamp;
+			$startTimestamp = false;
 			$count = 0;
+			$lastDups = $dups;
+			$dups = [];
 			foreach ($res as $row) {
 				$count += 1; // Update count first, as we use it as a condition in outer loop. No continue before this
 				settype($row['logid'], 'int');
 				// Update for next query
-				$last = $row['dateline'];
+				$startTimestamp = $row['dateline'];
 				// Ignore dups, we query >= last dateline as we can have multiple events at the same second, but
 				// only some of them got returned because of LIMIT. Skip here because excluding in query directly
 				// would make the query itself rather inefficient. We also cannot use logid > X because the logid
-				// is not strictly ascending with time, as dateline gets backdated to event start on insert
-				if ($count === 150) {
-					$dups = [];
-				} elseif ($count > 900) {
-					$dups[] = $row['logid'];
-				} elseif ($count < 150 && array_key_exists($row['logid'], $dups)) {
-					continue;
-				}
+				// is not strictly ascending with time, as dateline gets backdated to event start on insert.
+				// In case we have 1000 rows with identical timestamp, we need to start skipping lines in the result,
+				// otherwise we'd loop infinitely.
+				$dups[$row['logid']] = 1;
+				if (isset($lastDups[$row['logid']]))
+					continue; // Already seen in last batch
 				if (!isset($machines[$row['machineuuid']]))
-					continue;
+					continue; // Not interested in this machine
 				if ($row['typeid'] !== '~offline-length' && $row['typeid'] !== '~suspend-length' && $row['typeid'] !== '~session-length')
 					continue;
 				settype($row['dateline'], 'int');
@@ -174,8 +178,12 @@ class Queries
 				}
 				self::addTime($machine, $row, $bounds);
 			}
-			$dups = array_flip($dups);
-		} while ($last !== false && $count === 1000); // Check if we need to fetch more rows for current batch
+			if ($startTimestamp === $lastStartTimestamp) {
+				$skipCount += $BATCH_SIZE;
+			} else {
+				$skipCount = 0;
+			}
+		} while ($count === $BATCH_SIZE); // Check if we need to fetch more rows for current batch
 		foreach ($machines as &$machine) {
 			if (!$machine['active']) {
 				$machine['totalOffTime'] = $machine['totalTime'];
author	Simon Rettberg	2024-07-03 17:16:50 +0200
committer	Simon Rettberg	2024-07-03 17:16:50 +0200
commit	f736d67c0c1acae1b33190758e4160b898e8a6d4 (patch)
tree	96ffb6acb26c4eff92860ecd18c3a69ce52794c6
parent	[sysconfig] Use auto-generated taskId (diff)
download	slx-admin-f736d67c0c1acae1b33190758e4160b898e8a6d4.tar.gz slx-admin-f736d67c0c1acae1b33190758e4160b898e8a6d4.tar.xz slx-admin-f736d67c0c1acae1b33190758e4160b898e8a6d4.zip