/hudson-core/src/main/java/hudson/slaves/ConnectionActivityMonitor.java

http://github.com/hudson/hudson · Java · 114 lines · 56 code · 15 blank · 43 comment · 9 complexity · 49694d24d75b55181aea7a8f168c0254 MD5 · raw file

  1. /*
  2. * The MIT License
  3. *
  4. * Copyright (c) 2004-2009, Sun Microsystems, Inc.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. package hudson.slaves;
  25. import hudson.model.AsyncPeriodicWork;
  26. import hudson.model.TaskListener;
  27. import hudson.model.Hudson;
  28. import hudson.model.Computer;
  29. import hudson.util.TimeUnit2;
  30. import hudson.remoting.VirtualChannel;
  31. import hudson.remoting.Channel;
  32. import hudson.remoting.Callable;
  33. import hudson.Extension;
  34. import java.io.IOException;
  35. import java.util.logging.Logger;
  36. /**
  37. * Makes sure that connections to slaves are alive, and if they are not, cut them off.
  38. *
  39. * <p>
  40. * If we only rely on TCP retransmission time out for this, the time it takes to detect a bad connection
  41. * is in the order of 10s of minutes, so we take the matters to our own hands.
  42. *
  43. * @author Kohsuke Kawaguchi
  44. * @since 1.325
  45. */
  46. @Extension
  47. public class ConnectionActivityMonitor extends AsyncPeriodicWork {
  48. public ConnectionActivityMonitor() {
  49. super("Connection Activity monitoring to slaves");
  50. }
  51. protected void execute(TaskListener listener) throws IOException, InterruptedException {
  52. if (!enabled) return;
  53. long now = System.currentTimeMillis();
  54. for (Computer c: Hudson.getInstance().getComputers()) {
  55. VirtualChannel ch = c.getChannel();
  56. if (ch instanceof Channel) {
  57. Channel channel = (Channel) ch;
  58. if (now-channel.getLastHeard() > TIME_TILL_PING) {
  59. // haven't heard from this slave for a while.
  60. Long lastPing = (Long)channel.getProperty(ConnectionActivityMonitor.class);
  61. if (lastPing!=null && now-lastPing > TIMEOUT) {
  62. LOGGER.info("Repeated ping attempts failed on "+c.getName()+". Disconnecting");
  63. c.disconnect(OfflineCause.create(Messages._ConnectionActivityMonitor_OfflineCause()));
  64. } else {
  65. // send a ping. if we receive a reply, it will be reflected in the next getLastHeard() call.
  66. channel.callAsync(PING_COMMAND);
  67. if (lastPing==null)
  68. channel.setProperty(ConnectionActivityMonitor.class,now);
  69. }
  70. } else {
  71. // we are receiving data nicely
  72. channel.setProperty(ConnectionActivityMonitor.class,null);
  73. }
  74. }
  75. }
  76. }
  77. public long getRecurrencePeriod() {
  78. return enabled ? FREQUENCY : TimeUnit2.DAYS.toMillis(30);
  79. }
  80. /**
  81. * Time till initial ping
  82. */
  83. private static final long TIME_TILL_PING = Long.getLong(ConnectionActivityMonitor.class.getName()+".timeToPing",TimeUnit2.MINUTES.toMillis(3));
  84. private static final long FREQUENCY = Long.getLong(ConnectionActivityMonitor.class.getName()+".frequency",TimeUnit2.SECONDS.toMillis(10));
  85. /**
  86. * When do we abandon the effort and cut off?
  87. */
  88. private static final long TIMEOUT = Long.getLong(ConnectionActivityMonitor.class.getName()+".timeToPing",TimeUnit2.MINUTES.toMillis(4));
  89. // disabled by default until proven in the production
  90. public boolean enabled = Boolean.getBoolean(ConnectionActivityMonitor.class.getName()+".enabled");
  91. private static final PingCommand PING_COMMAND = new PingCommand();
  92. private static final class PingCommand implements Callable<Void,RuntimeException> {
  93. public Void call() throws RuntimeException {
  94. return null;
  95. }
  96. private static final long serialVersionUID = 1L;
  97. }
  98. private static final Logger LOGGER = Logger.getLogger(ConnectionActivityMonitor.class.getName());
  99. }