From 2fa2da86251778ca02318e8af791750cda66575b Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Sat, 8 Apr 2017 11:06:06 +0200 Subject: [RemoteReboot] Give up online check after 3 minutes, try more ports --- .../openslx/taskmanager/tasks/RemoteReboot.java | 110 +++++++++++++-------- 1 file changed, 71 insertions(+), 39 deletions(-) diff --git a/src/main/java/org/openslx/taskmanager/tasks/RemoteReboot.java b/src/main/java/org/openslx/taskmanager/tasks/RemoteReboot.java index a166231..230480f 100644 --- a/src/main/java/org/openslx/taskmanager/tasks/RemoteReboot.java +++ b/src/main/java/org/openslx/taskmanager/tasks/RemoteReboot.java @@ -6,6 +6,7 @@ import java.net.Socket; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -50,6 +51,19 @@ public class RemoteReboot extends AbstractTask { this.setStatusObject( this.status ); + if ( minutes < 0 ) { + status.addError( "Delay cannot be negative" ); + } + if ( sshkey == null || sshkey.length() == 0 ) { + status.addError( "No SSH key given" ); + } + if ( port < 1 || port > 65535 ) { + status.addError( "Invalid port number" ); + } + + if ( status.error != null ) + return false; + status.clients = clients; Date shutdownTime = new Date(System.currentTimeMillis()+minutes*60*1000); SimpleDateFormat sdf = new SimpleDateFormat("HH:mm"); @@ -64,27 +78,31 @@ public class RemoteReboot extends AbstractTask { if ( clients.length == 0 ) return true; + final List rebootingClients = new ArrayList<>(); // try to connect to every client and start the reboot/shutdown process ExecutorService tp = Executors.newFixedThreadPool( clients.length > 4 ? 4 : clients.length ); for (final Client client : clients) { - if ( client == null || client.clientip == null ) + if ( client == null || client.clientip == null || client.machineuuid == null ) { + status.addError( "null Client or missing ip/uuid in list, ignoring." ); continue; - status.clientStatus.put(client.clientip, ClientStatus.CONNECTING); + } + status.clientStatus.put(client.machineuuid, ClientStatus.CONNECTING); tp.submit(new Runnable() { public void run() { try { Shell shell = new SSH(client.clientip, port, "root", sshkey); if (shutdown) { new Shell.Empty(shell).exec("/sbin/shutdown +" + minutes); - status.clientStatus.put(client.clientip, minutes == 0 ? ClientStatus.SHUTDOWN : ClientStatus.SHUTDOWN_AT); + status.clientStatus.put(client.machineuuid, minutes == 0 ? ClientStatus.SHUTDOWN : ClientStatus.SHUTDOWN_AT); } else { new Shell.Empty(shell).exec("/sbin/reboot"); - status.clientStatus.put(client.clientip, ClientStatus.REBOOTING); + status.clientStatus.put(client.machineuuid, ClientStatus.REBOOTING); + rebootingClients.add( client ); } } catch (IOException e) { - status.clientStatus.put(client.clientip, ClientStatus.ERROR); + status.clientStatus.put(client.machineuuid, ClientStatus.ERROR); } - } + } }); } tp.shutdown(); @@ -96,39 +114,42 @@ public class RemoteReboot extends AbstractTask return false; } - // wait for rebooting clients to finish rebooting - List rebootingClients = new ArrayList<>(); - for (Entry entry : status.clientStatus.entrySet()) { - if (entry.getValue() == ClientStatus.REBOOTING) { - rebootingClients.add(entry.getKey()); - } - } - if (rebootingClients.size() > 0) { - ExecutorService statusTP = Executors.newFixedThreadPool( rebootingClients.size() > 4 ? 4 : rebootingClients.size() ); - for (final String ip : rebootingClients) { - statusTP.submit(new Runnable() { - public void run() { - while (!isOnline(ip)) { - try { - Thread.sleep(3000); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - return; - } - } - status.clientStatus.put(ip, ClientStatus.ONLINE); - } - }); + // Give about 3 minutes for reboot, should be plenty + final int[] ports; + if ( this.port == 22 ) { + ports = new int[] { 22, 445 }; + } else { + ports = new int[] { this.port, 22, 445 }; } - statusTP.shutdown(); - try { - statusTP.awaitTermination( 180, TimeUnit.SECONDS ); - } catch ( InterruptedException e ) { + Thread.sleep(30000); + } catch (InterruptedException e) { Thread.currentThread().interrupt(); return false; } + long lastcheck = System.currentTimeMillis(); + long deadline = lastcheck + 120 * 1000; + while ( rebootingClients.size() > 0 && System.currentTimeMillis() < deadline ) { + long delay = 10000 - (System.currentTimeMillis() - lastcheck); + if ( delay > 0 ) { + try { + Thread.sleep(delay); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return false; + } + } + lastcheck = System.currentTimeMillis(); + Iterator it = rebootingClients.iterator(); + while (it.hasNext()) { + Client client = it.next(); + if (isOnline(client.clientip, ports)) { + it.remove(); + status.clientStatus.put(client.machineuuid, ClientStatus.ONLINE); + } + } + } } // change status of clients that got stuck because of timeouts @@ -143,12 +164,14 @@ public class RemoteReboot extends AbstractTask } - private boolean isOnline(String address) - { - try ( Socket s = new Socket() ) { - s.connect( new InetSocketAddress( address, port ), 3000 ); - return true; - } catch ( Exception ex ) { + private boolean isOnline(String address, int... ports) + { + for ( int port : ports ) { + try ( Socket s = new Socket() ) { + s.connect( new InetSocketAddress( address, port ), 1000 ); + return true; + } catch ( Exception ex ) { + } } return false; } @@ -165,6 +188,15 @@ public class RemoteReboot extends AbstractTask private String time; private String locationId; private String locationName; + private String error; + private void addError( String e ) + { + if ( error == null ) { + error = e + "\n"; + } else { + error += e + "\n"; + } + } } @SuppressWarnings( "unused" ) -- cgit v1.2.3-55-g7522