From 4a8f28f5067c3a048caf6c3bc6026cf15a709808 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 27 Jan 2022 18:56:52 +0100 Subject: [rebootcontrol] Adapt do new WakeOnLan task --- .../rebootcontrol/inc/rebootcontrol.inc.php | 383 ++++++++++----------- 1 file changed, 185 insertions(+), 198 deletions(-) diff --git a/modules-available/rebootcontrol/inc/rebootcontrol.inc.php b/modules-available/rebootcontrol/inc/rebootcontrol.inc.php index 59d20641..4883ea04 100644 --- a/modules-available/rebootcontrol/inc/rebootcontrol.inc.php +++ b/modules-available/rebootcontrol/inc/rebootcontrol.inc.php @@ -48,7 +48,7 @@ class RebootControl "port" => 9922, // Hard-coded, must match mgmt-sshd module )); if (!Taskmanager::isFailed($task)) { - self::addTask($task['id'], self::TASK_REBOOTCTL, $list, $task['id'], ['action' => $mode]); + self::addTask($task['id'], self::TASK_REBOOTCTL, $list, ['action' => $mode]); foreach ($list as $client) { $client['mode'] = $mode; $client['minutes'] = $minutes; @@ -58,7 +58,7 @@ class RebootControl return $task; } - private static function addTask($id, $type, $clients, $taskIds, $other = false) + private static function addTask(string $taskId, string $type, array $clients, array $other = null) { $lids = ArrayUtil::flattenByKey($clients, 'locationid'); $lids = array_unique($lids); @@ -70,15 +70,12 @@ class RebootControl } $newClients[] = $d; } - if (!is_array($taskIds)) { - $taskIds = [$taskIds]; - } $data = [ - 'id' => $id, + 'id' => $taskId, 'type' => $type, 'locations' => $lids, 'clients' => $newClients, - 'tasks' => $taskIds, + 'tasks' => [$taskId], // This did hold multiple tasks in the past; keep it in case we need this again ]; if (is_array($other)) { $data += $other; @@ -88,9 +85,10 @@ class RebootControl /** * @param int[]|null $locations filter by these locations + * @param ?string $id only with this TaskID * @return array|false list of active tasks for reboots/shutdowns. */ - public static function getActiveTasks($locations = null, $id = null) + public static function getActiveTasks(array $locations = null, $id = null) { if (is_array($locations) && in_array(0, $locations)) { $locations = null; @@ -149,7 +147,7 @@ class RebootControl { $task = self::runScriptInternal($clients, $command, $timeout, $privkey); if (!Taskmanager::isFailed($task)) { - self::addTask($task['id'], self::TASK_EXEC, $clients, $task['id']); + self::addTask($task['id'], self::TASK_EXEC, $clients); } return $task; } @@ -212,13 +210,12 @@ class RebootControl } /** - * @param array $sourceMachines list of source machines. array of [clientip, machineuuid] entries - * @param string $bcast directed broadcast address to send to - * @param string|string[] $macaddr destination mac address(es) - * @param string $passwd optional WOL password, mac address or ipv4 notation - * @return array|false task struct, false on error + * @param string|string[] $macs + * @param $bcast + * @param $passwd + * @return string */ - public static function wakeViaClient($sourceMachines, $macaddr, $bcast = false, $passwd = false) + private static function buildClientWakeCommand($macs, $bcast = false, $passwd = false): string { $command = 'jawol'; if (!empty($bcast)) { @@ -229,22 +226,35 @@ class RebootControl if (!empty($passwd)) { $command .= " -p '$passwd'"; } - if (is_array($macaddr)) { - $macaddr = implode("' '", $macaddr); + if (is_array($macs)) { + $macs = implode(" ", $macs); } - $command .= " '$macaddr'"; - // Yes there is one zero missing from the usleep -- that's the whole point: we prefer 100ms sleeps + $command .= " $macs"; + return $command; + } + + /** + * @param array $sourceMachines list of source machines. array of [clientip, machineuuid] entries + * @param string $bcast directed broadcast address to send to + * @param string|string[] $macaddr destination mac address(es) + * @param string $passwd optional WOL password, mac address or ipv4 notation + * @return array|false task struct, false on error + */ + public static function wakeViaClient($sourceMachines, $macaddr, string $bcast = null, string $passwd = null) + { + $command = self::buildClientWakeCommand($macaddr, $bcast, $passwd); + // Yes there is one zero "missing" from the usleep -- that's the whole point: we prefer 100ms sleeps return self::runScriptInternal($sourceMachines, "for i in 1 1 0; do $command; usleep \${i}00000 2> /dev/null || sleep \$i; done"); } /** * @param string|string[] $macaddr destination mac address(es) - * @param string $bcast directed broadcast address to send to - * @param string $passwd optional WOL password; mac address or ipv4 notation + * @param ?string $bcast directed broadcast address to send to + * @param ?string $passwd optional WOL password; mac address or ipv4 notation * @return array|false task struct, false on error */ - public static function wakeDirectly($macaddr, $bcast = false, $passwd = false) + public static function wakeDirectly($macaddr, string $bcast = null, string $passwd = null) { if (!is_array($macaddr)) { $macaddr = [$macaddr]; @@ -261,7 +271,14 @@ class RebootControl ]); } - public static function wakeViaJumpHost($jumphost, $bcast, $clients) + /** + * Explicitly wake given clients via jumphost + * @param array $jumphost the according row from the database, representing the desired jumphost + * @param string $bcast (directed) broadcast address for WOL packet, %IP% in command template + * @param array $clients list of clients, must contain at least key 'macaddr' for every client + * @return array|false task struct on successful submission to TM, false on error + */ + public static function wakeViaJumpHost(array $jumphost, string $bcast, array $clients) { $hostid = $jumphost['hostid']; $macs = ArrayUtil::flattenByKey($clients, 'macaddr'); @@ -285,61 +302,43 @@ class RebootControl } /** - * @param array $list list of clients containing each keys 'macaddr' and 'clientip' - * @return string id of this job + * @param array $clientList list of clients containing each keys 'macaddr' and 'clientip' + * @param array $failed list of failed clients from $clientList + * @return ?string taskid of this job */ - public static function wakeMachines($list, &$failed = []) + public static function wakeMachines(array $clientList, array &$failed = []) { - /* TODO: Refactor mom's spaghetti - * Now that I figured out what I want, do something like this: - * 1) Group clients by subnet - * 2) Only after step 1, start to collect possible ways to wake up clients for each subnet that's not empty - * 3) Have some priority list for the methods, extend Taskmanager to have "negative dependency" - * i.e. submit task B with task A as parent task, but only launch task B if task A failed. - * If task A succeeded, mark task B as FINISHED immediately without actually running it. - * (or introduce new statusCode for this?) - */ $errors = ''; - $tasks = []; - $bad = $unknown = []; + $sent = $unknown = $unreachable = []; // For event filtering by rule - $events = []; // Need all subnets... + /* subnetid => [ + * subnetid => 1234, + * start => 1234, (ip2long) + * end => 5678, (ip2long) + * jumphosts => [id1, id2, ...], + */ $subnets = []; $res = Database::simpleQuery('SELECT subnetid, start, end, isdirect FROM reboot_subnet'); foreach ($res as $row) { $row += [ - 'jumphosts' => [], - 'direct' => [], - 'indirect' => [], + 'djumphosts' => [], + 'ijumphosts' => [], ]; $subnets[$row['subnetid']] = $row; } // Get all jump hosts - $jumphosts = []; - $res = Database::simpleQuery('SELECT jh.hostid, host, port, username, sshkey, script, jh.reachable, - Group_Concat(jxs.subnetid) AS subnets1, Group_Concat(sxs.dstid) AS subnets2 - FROM reboot_jumphost jh - LEFT JOIN reboot_jumphost_x_subnet jxs ON (jh.hostid = jxs.hostid) - LEFT JOIN reboot_subnet s ON (INET_ATON(jh.host) BETWEEN s.start AND s.end) - LEFT JOIN reboot_subnet_x_subnet sxs ON (sxs.srcid = s.subnetid AND sxs.reachable <> 0) - GROUP BY jh.hostid'); - foreach ($res as $row) { - if ($row['subnets1'] === null && $row['subnets2'] === null) - continue; - $nets = explode(',', $row['subnets1'] . ',' . $row['subnets2']); - foreach ($nets as $net) { - if (empty($net) || !isset($subnets[$net])) - continue; - $subnets[$net]['jumphosts'][$row['hostid']] = $row['hostid']; - } - $row['jobs'] = []; - $jumphosts[$row['hostid']] = $row; - } - // Group by subnet - foreach ($list as $client) { - $ip = sprintf('%u', ip2long($client['clientip'])); - //$client['numip'] = $ip; + self::addJumphostsToSubnets($subnets); + // Determine method for all clients + $taskClients = []; // array of arrays with keys [ip, mac, methods] + $taskSsh = []; // SSH configs for task, array of arrays with keys [username, sshkey, ip, port, command] + foreach ($clientList as $client) { + $c = [ + 'ip' => $client['clientip'], + 'mac' => $client['macaddr'], + 'methods' => [], + ]; + $ip = sprintf('%u', ip2long($client['clientip'])); // 32Bit snprintf unset($subnet); $subnet = false; foreach ($subnets as &$sn) { @@ -352,134 +351,58 @@ class RebootControl $unknown[] = $client; continue; } - $ok = false; - if (!$ok && $subnet['isdirect']) { - // Directly reachable - $subnet['direct'][] = $client; - $ok = true; + self::findMachinesForSubnet($subnet); + // Highest priority - clients in same subnet, no directed broadcast + // required, should be most reliable + self::addSshMethodUsingClient($subnet['dclients'], $c['methods'], $taskSsh); + // Jumphost - usually in same subnet + self::addSshMethodUsingJumphost($subnet['djumphosts'], true, $c['methods'], $taskSsh); + // Use clients in other subnets, known to be able to reach the destination net + self::addSshMethodUsingClient($subnet['iclients'], $c['methods'], $taskSsh); + // Jumphosts in other subnets, determined to be able to reach destination subnet + self::addSshMethodUsingJumphost($subnet['ijumphosts'], true, $c['methods'], $taskSsh); + // If directly reachable from server, prefer this now + if ($subnet['isdirect']) { + $c['methods'][] = 'DIRECT'; } - if (!$ok && !empty($subnet['jumphosts'])) { - foreach ($subnet['jumphosts'] as $hostid) { - if ($jumphosts[$hostid]['reachable'] != 0) { - $jumphosts[$hostid]['jobs'][$subnet['end']][] = $client; - $ok = true; - break; - } - } - } - if (!$ok) { - // find clients in same subnet, or reachable ones - self::findMachinesForSubnet($subnet); - if (empty($subnet['dclients']) && empty($subnet['iclients'])) { - // Nothing found -- cannot wake this host - $bad[] = $client; - } else { - // Found suitable indirect host - $subnet['indirect'][] = $client; - $ok = true; - } - } - if ($ok && isset($client['machineuuid'])) { + if (empty($c['methods'])) { + $unreachable[] = $client; + } else { // TODO: Remember WOL was attempted } - } - unset($subnet); - // Batch process - // First, via jump host - foreach ($jumphosts as $jh) { - foreach ($jh['jobs'] as $bcast => $clients) { - $errors .= 'Via jumphost ' . $jh['host'] . ': ' . implode(', ', ArrayUtil::flattenByKey($clients, 'clientip')) . "\n"; - $task = self::wakeViaJumpHost($jh, $bcast, $clients); - if (Taskmanager::isFailed($task)) { - // TODO: Figure out $subnet from $bcast and queue as indirect - // (rather, overhaul this whole spaghetti code) - $errors .= ".... FAILED TO LAUNCH TASK ON JUMPHOST!\n"; - } else { - self::addEventList($events, $clients, 'jumphost', $jh['host']); - } - } - } - // Server or client - foreach ($subnets as $subnet) { - if (!empty($subnet['direct'])) { - // Can wake directly - if (!self::wakeGroup('From server', $tasks, $errors, null, $subnet['direct'], $subnet['end'])) { - if (!empty($subnet['dclients']) || !empty($subnet['iclients'])) { - $errors .= "Re-queueing clients for indirect wakeup\n"; - $subnet['indirect'] = array_merge($subnet['indirect'], $subnet['direct']); - } - } else { - self::addEventList($events, $subnet['direct'], 'satellite'); - } - } - if (!empty($subnet['indirect'])) { - // Can wake indirectly - $ok = false; - if (!empty($subnet['dclients'])) { - $ok = true; - if (!self::wakeGroup('in same subnet', $tasks, $errors, $subnet['dclients'], $subnet['indirect'])) { - if (!empty($subnet['iclients'])) { - $errors .= "Re-re-queueing clients for indirect wakeup\n"; - $ok = false; - } - } else { - self::addEventList($events, $subnet['indirect'], 'same-subnet', $subnet['dclients'][0]['clientip']); - } - } - if (!$ok && !empty($subnet['iclients'])) { - $ok = self::wakeGroup('across subnets', $tasks, $errors, $subnet['iclients'], $subnet['indirect'], $subnet['end']); - if ($ok) { - self::addEventList($events, $subnet['indirect'], 'other-subnet', $subnet['iclients'][0]['clientip']); - } - } - if (!$ok) { - $errors .= "I'm all out of ideas.\n"; - } + // Only other fallback is jumphosts that were not reachable when last checked, this is really a last resort + self::addSshMethodUsingJumphost($subnet['djumphosts'], false, $c['methods'], $taskSsh); + self::addSshMethodUsingJumphost($subnet['ijumphosts'], false, $c['methods'], $taskSsh); + + if (!empty($c['methods'])) { + $taskClients[] = $c; + $sent[] = $client; } } - if (!empty($bad)) { - $ips = ArrayUtil::flattenByKey($bad, 'clientip'); - $errors .= "**** WARNING ****\nNo way to send WOL packets to the following machines:\n" . implode("\n", $ips) . "\n"; - } + unset($subnet); + if (!empty($unknown)) { $ips = ArrayUtil::flattenByKey($unknown, 'clientip'); $errors .= "**** WARNING ****\nThe following clients do not belong to a known subnet (bug?)\n" . implode("\n", $ips) . "\n"; } - $failed = array_merge($bad, $unknown); - $id = Util::randomUuid(); - self::addTask($id, self::TASK_WOL, $list, $tasks, ['log' => $errors]); - foreach ($events as $event) { - EventLog::applyFilterRules('#action-wol', $event); + if (!empty($unreachable)) { + $ips = ArrayUtil::flattenByKey($unreachable, 'clientip'); + $errors .= "**** WARNING ****\nThe following clients are not reachable with any method\n" . implode("\n", $ips) . "\n"; } - return $id; - } - - private static function wakeGroup($type, &$tasks, &$errors, $via, $clients, $bcast = false) - { - $macs = ArrayUtil::flattenByKey($clients, 'macaddr'); - $ips = ArrayUtil::flattenByKey($clients, 'clientip'); - if ($via !== null) { - $srcips = ArrayUtil::flattenByKey($via, 'clientip'); - $errors .= 'Via ' . implode(', ', $srcips) . ' '; - } - $errors .= $type . ': ' . implode(', ', $ips); - if ($bcast !== false) { - $errors .= ' (UDP to ' . long2ip($bcast) . ')'; - } - $errors .= "\n"; - if ($via === null) { - $task = self::wakeDirectly($macs, $bcast); - } else { - $task = self::wakeViaClient($via, $macs, $bcast); - } - if ($task !== false && isset($task['id'])) { - $tasks[] = $task['id']; - } - if (Taskmanager::isFailed($task)) { - $errors .= ".... FAILED TO START ACCORDING TASK!\n"; - return false; + $failed = array_unique(array_merge($unknown, $unreachable)); + $task = Taskmanager::submit('WakeOnLan', [ + 'clients' => $taskClients, + 'ssh' => $taskSsh, + ]); + if (isset($task['id'])) { + $id = $task['id']; + self::addTask($id, self::TASK_WOL, $clientList, ['log' => $errors]); + foreach ($sent as $client) { + EventLog::applyFilterRules('#action-wol', $client); + } + return $id; } - return true; + return null; } private static function findMachinesForSubnet(&$subnet) @@ -488,15 +411,12 @@ class RebootControl return; $cutoff = time() - 320; // Get clients from same subnet first - $subnet['dclients'] = Database::queryAll("SELECT machineuuid, clientip FROM machine + $subnet['dclients'] = Database::queryColumnArray("SELECT clientip FROM machine WHERE state IN ('IDLE', 'OCCUPIED') AND INET_ATON(clientip) BETWEEN :start AND :end AND lastseen > :cutoff LIMIT 3", ['start' => $subnet['start'], 'end' => $subnet['end'], 'cutoff' => $cutoff]); - $subnet['iclients'] = []; - if (!empty($subnet['dclients'])) - return; // If none, get clients from other subnets known to be able to reach this one - $subnet['iclients'] = Database::queryAll("SELECT m.machineuuid, m.clientip FROM reboot_subnet_x_subnet sxs + $subnet['iclients'] = Database::queryColumnArray("SELECT m.clientip FROM reboot_subnet_x_subnet sxs INNER JOIN reboot_subnet s ON (s.subnetid = sxs.srcid AND sxs.dstid = :subnetid AND sxs.reachable = 1) INNER JOIN machine m ON (INET_ATON(m.clientip) BETWEEN s.start AND s.end AND state IN ('IDLE', 'OCCUPIED') AND m.lastseen > :cutoff) LIMIT 20", ['subnetid' => $subnet['subnetid'], 'cutoff' => $cutoff]); @@ -517,20 +437,87 @@ class RebootControl } /** - * Add given clients to given event array - * @param array $events - * @param array $clients - * @param string $type - * @param null $via + * Append a "wake via client" WOL method for the given client. Append at least one, but stop + * if there are at least two methods already. + * + * @param array $sshClients [in] list of online clients to use for waking + * @param array $c [out] The client's methods array + * @param array $taskSsh [out] add according task struct to this array, if not already exists + * @return void */ - private static function addEventList(array &$events, array $clients, string $type, $via = null) + private static function addSshMethodUsingClient(array $sshClients, array &$methods, array &$taskSsh) { - foreach ($clients as $client) { - $client['type'] = $type; - if (!empty($via)) { - $client['via'] = $via; + foreach ($sshClients as $host) { + if (!isset($taskSsh[$host])) { + $taskSsh[$host] = [ + 'username' => 'root', + 'sshkey' => SSHKey::getPrivateKey(), + 'ip' => $host, + 'port' => 9922, + 'command' => self::buildClientWakeCommand('%MACS%', '%IP%'), + ]; + } + $methods[] = $host; + if (count($methods) >= 2) + break; + } + } + + private static function addSshMethodUsingJumphost(array $jumpHosts, bool $reachable, array &$methods, array &$taskSsh) + { + // If it's the fallback to apparently unreachable jump-hosts, ignore if we already have two methods + if (!$reachable && count($methods) >= 2) + return; + // host, port, username, sshkey, script, jh.reachable + foreach ($jumpHosts as $jh) { + if ($reachable !== (bool)$jh['reachable']) + continue; + $key = substr(md5($jh['host'] . ':' . $jh['port'] . ':' . $jh['username']), 0, 10); + if (!isset($taskSsh[$key])) { + $taskSsh[$key] = [ + 'username' => $jh['username'], + 'sshkey' => $jh['sshkey'], + 'ip' => $jh['host'], + 'port' => $jh['port'], + 'command' => $jh['script'], + ]; + } + $methods[] = $key; + if (count($methods) >= 2) + break; + } + } + + /** + * Load all jumphosts from DB, sort into passed $subnets. Also split up + * by directly assigned subnets, and indirectly dtermined, reachable subnets. + * @param array $subnets [in] + * @return void + */ + private static function addJumphostsToSubnets(array &$subnets) + { + $res = Database::simpleQuery('SELECT host, port, username, sshkey, script, jh.reachable, + Group_Concat(jxs.subnetid) AS dsubnets, Group_Concat(sxs.dstid) AS isubnets + FROM reboot_jumphost jh + LEFT JOIN reboot_jumphost_x_subnet jxs ON (jh.hostid = jxs.hostid) + LEFT JOIN reboot_subnet s ON (INET_ATON(jh.host) BETWEEN s.start AND s.end) + LEFT JOIN reboot_subnet_x_subnet sxs ON (sxs.srcid = s.subnetid AND sxs.reachable <> 0) + GROUP BY jh.hostid'); + foreach ($res as $row) { + $dnets = empty($row['dsubnets']) ? [] : explode(',', $row['dsubnets']); + $inets = empty($row['isubnets']) ? [] : explode(',', $row['isubnets']); + $inets = array_diff($inets, $dnets); // There might be duplicates if both joins match + foreach ($dnets as $net) { + if (empty($net) || !isset($subnets[$net])) + continue; + $subnets[$net]['djumphosts'][] =& $row; + } + foreach ($inets as $net) { + if (empty($net) || !isset($subnets[$net])) + continue; + $subnets[$net]['ijumphosts'][] =& $row; } - $events[] = $client; + unset($row); } } -- cgit v1.2.3-55-g7522