/interpreter/tags/at2dist041108/src/edu/vub/at/actors/net/comm/CommunicationBus.java
Java | 519 lines | 209 code | 47 blank | 263 comment | 24 complexity | 2dace885c6fd0b3b2214e714ca71f4c9 MD5 | raw file
1/** 2 * AmbientTalk/2 Project 3 * CommunicationBus.java created on 2-apr-2007 at 19:30:12 4 * (c) Programming Technology Lab, 2006 - 2007 5 * Authors: Tom Van Cutsem & Stijn Mostinckx 6 * 7 * Permission is hereby granted, free of charge, to any person 8 * obtaining a copy of this software and associated documentation 9 * files (the "Software"), to deal in the Software without 10 * restriction, including without limitation the rights to use, 11 * copy, modify, merge, publish, distribute, sublicense, and/or 12 * sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following 14 * conditions: 15 * 16 * The above copyright notice and this permission notice shall be 17 * included in all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 21 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 23 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 26 * OTHER DEALINGS IN THE SOFTWARE. 27 */ 28package edu.vub.at.actors.net.comm; 29 30import edu.vub.at.actors.natives.ELVirtualMachine; 31import edu.vub.at.actors.net.cmd.VMCommand; 32import edu.vub.at.util.logging.Logging; 33 34import java.io.BufferedInputStream; 35import java.io.BufferedOutputStream; 36import java.io.IOException; 37import java.io.ObjectInputStream; 38import java.io.ObjectOutputStream; 39import java.net.Socket; 40import java.util.Collection; 41import java.util.HashMap; 42import java.util.Iterator; 43import java.util.Map; 44import java.util.Timer; 45import java.util.TimerTask; 46 47/** 48 * For each AmbientTalk virtual machine, there exists one communication bus instance. 49 * The communication bus can be in two conceptual states. It can be: 50 * <ul> 51 * <li>Connected: when connected, the communication bus's {@link CommunicationBus#networkAddress_} field 52 * is set. 53 * <li>Disconnected: when disconnected, the {@link CommunicationBus#networkAddress_} field is 54 * set to null. 55 * </ul> 56 * 57 * A communication bus is always started in a disconnected state. Toggling between both 58 * states is done using the {@link CommunicationBus#connect()} and 59 * {@link CommunicationBus#disconnect()} methods. 60 * <p> 61 * The communication bus is the central coordinator of the network communication layer. 62 * It encapsulates four important threads: 63 * <ul> 64 * <li>The {@link MulticastServerThread} is responsible for sending 'heartbeat' messages 65 * in a multicast fashion across the network, to notify other VMs in the network that 66 * this VM is still 'alive'. 67 * <li>The {@link MulticastListenerThread} is responsible for listening for the 68 * 'heartbeat' messages of other VMs, and for adding new connections in the 69 * connection table of the communication bus. 70 * <li>The {@link MasterConnectionThread} is responsible for opening a server 71 * socket that accepts incoming connections from so-called <i>slave</i> VMs. 72 * <li>A timer thread which runs a task at a fixed rate to checks the connection 73 * table for timed out VMs. 74 * </ul> 75 * 76 * The most important bookkeeping datastructure of the communication bus is the 77 * {@link CommunicationBus#addressToConnection_} table, also referred to as the 78 * <b>connection table</b>. This table stores all of the currently connected 79 * VMs that the host VM knows about. Whenever a heartbeat is received, a VM's 80 * connection registration in this table is updated. When a heartbeat is first 81 * received, the VM is added. When a heartbeat has not been heard for longer 82 * than the timeout interval, the VM is removed from the connection table. 83 * <p> 84 * Schematically, we have the following situation after establishing a connection 85 * between a master and a slave VM (see the description of {@link MulticastListenerThread}): 86 * 87 * <pre> 88 * Slave Master 89 * cs = new Socket(masterAddress) ms = socket.accept() 90 * cs.in <-------------------------- ms.out 91 * cs.out -------------------------> ms.in 92 * </pre> 93 * 94 * It does not matter which socket is registered in the connection table. Hence, 95 * once both VMs have set up a connection, the concept of 'master' and 'slave' is 96 * no longer useful and both VMs become peers. 97 * 98 * It is always the output stream of either socket that is used for sending messages to the other VM. 99 * It is always the input stream of either socket that is used for receiving messages from the other VM. 100 * 101 * @author tvcutsem 102 */ 103public class CommunicationBus { 104 105 /** the AmbientTalk VM that owns this communication bus */ 106 public final ELVirtualMachine host_; 107 108 /** the name of the overlay network in which to discover AmbientTalk VMs */ 109 private final String groupName_; 110 111 /** if non-null, the communication bus is connected to the network */ 112 private volatile Address networkAddress_; 113 114 private MulticastListenerThread mcListener_; 115 private MulticastServerThread mcServer_; 116 private MasterConnectionThread masterConnectionThread_; 117 118 /** 119 * Maps the address of a currently connected VM to connection information 120 * such as the last time it was seen, and the socket connection. 121 * Also known as the "connection table". 122 * 123 * This datastructure is modified by different threads: 124 * <ul> 125 * <li>Addition of master VMs by {@link MulticastListenerThread} 126 * <li>Addition of slave VMs by {@link MasterConnectionThread} 127 * <li>Removal of disconnected VMs by {@link CommandProcessor} 128 * <li>Checking for timed out VMs by {@link CommunicationBus#timeoutDetector_} 129 * <li>Lookup of connections by AmbientTalk event loops for message transmission 130 * </ul> 131 * Hence, access to this datastructure must by <b>synchronized</b>! 132 */ 133 private final HashMap addressToConnection_; 134 135 /** 136 * The timer used for checking 'stale' (i.e. timed out) connections 137 * in the connection table. 138 */ 139 private final Timer timeoutDetectorTimer_; 140 141 /** 142 * The timer task used for removing timed out connections 143 * from the connections table. 144 */ 145 private TimeoutDetectorTask timeoutDetector_; 146 147 /** 148 * Bookkeeping datastructure to store connection-related information 149 * of a connected VM. A connection entry is uniquely identified by 150 * means of the {@link Address} of its VM. 151 */ 152 private static class Connection { 153 public final Socket socket_; 154 /** this value is updated as new heartbeats are received */ 155 public long lastSeenAtTime_; 156 public final ObjectOutputStream outgoing_; 157 public final ObjectInputStream incoming_; 158 public Connection(Socket s, long lastSeenAt) throws IOException { 159 socket_ = s; 160 lastSeenAtTime_ = lastSeenAt; 161 // NOTE: apparently it is highly important that the ObjectOutputStream on the socket.getOutputStream() is 162 // created BEFORE trying to create an ObjectInputStream on the socket.getInputStream() 163 // switching the below two statements causes the master and the slave to deadlock! 164 outgoing_ = new ObjectOutputStream(new BufferedOutputStream(s.getOutputStream())); 165 // The buffered output stream must be EXPLICITLY flushed, otherwise the buffered input stream 166 // that is created below (but by the other VM) will block indefinitely 167 outgoing_.flush(); 168 incoming_ = new ObjectInputStream(new BufferedInputStream(s.getInputStream())); 169 } 170 171 /** 172 * Closes the underlying socket. This will eventually trigger the command processor 173 * tied to this connection, which will cause this connection to be removed 174 * from the table. 175 */ 176 public void close() { 177 try { 178 socket_.close(); 179 } catch (IOException ioe) { } 180 } 181 } 182 183 /** 184 * The timer task used for removing timed out connections 185 * from the connection table. 186 */ 187 private class TimeoutDetectorTask extends TimerTask { 188 189 /** 190 * the maximum amount of time that a remote VM gets to send a new 191 * heartbeat before it is considered as being 'offline' 192 */ 193 private static final int MAX_RESPONSE_DELAY = 10000; // in milliseconds 194 195 /** 196 * The rate at which to schedule this timer task 197 */ 198 public static final int DETECTION_RATE = 4000; // in milliseconds 199 200 public void run() { 201 closeConnectionOfMembersNotSeenIn(MAX_RESPONSE_DELAY); 202 } 203 204 } 205 206 public CommunicationBus(ELVirtualMachine host, String ambientTalkNetworkName) { 207 host_ = host; 208 groupName_ = ambientTalkNetworkName; 209 addressToConnection_ = new HashMap(); 210 timeoutDetectorTimer_ = new Timer(true); // create a daemon timer 211 } 212 213 /** 214 * Tries to connect the communication bus to the underlying network. 215 * @throws IOException if no server socket could be created to listen 216 * for incoming connections. If this exception is raised, it is 217 * guaranteed that the communication bus is left disconnected (i.e. 218 * it is not partially connected) 219 */ 220 public Address connect() throws NetworkException { 221 if (networkAddress_ != null) { 222 return networkAddress_; // if the bus is already connected, there is no need to connect it again 223 } 224 225 masterConnectionThread_ = new MasterConnectionThread(this); 226 try { 227 networkAddress_ = masterConnectionThread_.startServing(groupName_); 228 } catch (IOException e) { 229 masterConnectionThread_ = null; 230 throw new NetworkException("Could not connect to network:", e); 231 } 232 mcListener_ = new MulticastListenerThread(this, networkAddress_); 233 mcServer_ = new MulticastServerThread(networkAddress_); 234 mcListener_.start(); 235 mcServer_.start(); 236 237 // start detecting timed out VMs 238 timeoutDetector_ = new TimeoutDetectorTask(); 239 timeoutDetectorTimer_.scheduleAtFixedRate(timeoutDetector_, 0, TimeoutDetectorTask.DETECTION_RATE); 240 241 return networkAddress_; 242 } 243 244 /** 245 * Called by the VM when it has disconnected from the underlying channel. 246 * It gracefully shuts down all network threads, sets the network address 247 * to null and removes all current connections from the connection table. 248 */ 249 public void disconnect() { 250 if (networkAddress_ == null) { 251 return; // if the bus is already disconnected, there is no need to take it offline again 252 } 253 254 // once the bus is disconnected, the network address is set to null. 255 // this ensures that no further incoming connections are allowed to be 256 // registered in the addConnection method! 257 networkAddress_ = null; 258 masterConnectionThread_.stopServing(); 259 mcListener_.stopListening(); 260 mcServer_.stopBroadcasting(); 261 masterConnectionThread_ = null; 262 mcListener_ = null; 263 mcServer_ = null; 264 265 // stop detecting timed out VMs 266 timeoutDetector_.cancel(); 267 timeoutDetector_ = null; 268 269 closeConnectionOfAllMembers(); 270 } 271 272 /** 273 * Updates the time that the given virtual machine was last seen online. 274 * This method is invoked frequently by the {@link MulticastListenerThread}. 275 * 276 * @param member the address of the detected virtual machine 277 * @return true if the VM was properly registered, false if the VM is not yet registered 278 * (i.e. it is not yet considered as 'online') 279 */ 280 protected boolean updateTimeLastSeen(Address member) { 281 synchronized (addressToConnection_) { 282 Connection conn = (Connection) addressToConnection_.get(member); 283 if (conn != null) { 284 conn.lastSeenAtTime_ = System.currentTimeMillis(); 285 return true; 286 } else { 287 return false; 288 } 289 } 290 } 291 292 /** 293 * Registers a new virtual machine connection for the given address. 294 * If this VM was a slave in the discovery process, this method is 295 * invoked by the {@link MulticastListenerThread}. If this VM was 296 * a master in the discovery process, this method is invoked by the 297 * {@link MasterConnectionThread}. 298 * 299 * The socket's output stream will be stored in the connection table and is used 300 * for transmitting VM Commands to this VM. The socket's input stream will be 301 * coupled to a dedicated {@link CommandProcessor} which is responsible for 302 * processing incoming VM command objects. 303 * 304 * Calling this method implicitly also triggers a memberJoined event on this VM 305 * 306 * @throws IOException if no ObjectOutputStream can be created for the socket's output stream, 307 * or if no ObjectInputStream can be created for the socket's input stream. 308 * If this exception is raised, it is guaranteed the member is not registered in the connection table. 309 */ 310 protected void addConnection(Address newMember, Socket conn) throws IOException { 311 if (networkAddress_ == null) { 312 Logging.Network_LOG.debug("ignored connection to " + newMember + ": bus disconnected"); 313 return; // the bus has been disconnected, do not accept any new connections 314 } 315 316 // create a new connection object that can be used to send command objects to this member 317 Connection registeredConnection = new Connection(conn, System.currentTimeMillis()); 318 319 // spawn a new command processor dedicated for handling the command objects received from this member 320 CommandProcessor processor = new CommandProcessor(newMember, conn, registeredConnection.incoming_, this); 321 322 synchronized (addressToConnection_) { 323 // first check whether a connection for this member already exists 324 // (may happen when an old connection for this member has not yet been deleted) 325 // This case IS possible when a master receives a new connection from a slave, 326 // but that master has not yet detected the disconnection of the slave's old connection 327 Connection oldConnection = (Connection) addressToConnection_.get(newMember); 328 if (oldConnection != null) { 329 // explicitly close the connection we're about to override 330 // Note: in this case, we should *not* signal to the VM that a new member joined, 331 // because in the corresponding removeConnection, we will also not signal a member left 332 // (i.e. the connection was restored quickly enough such that we can abstract over the disconnection) 333 oldConnection.close(); 334 } else { 335 // notify the VM of a new connection 336 host_.event_memberJoined(newMember); 337 } 338 // register the new connection in the table (this may override an old connection) 339 addressToConnection_.put(newMember, registeredConnection); 340 } 341 342 // only start the processor if the member is properly registered 343 processor.start(); 344 345 Logging.Network_LOG.debug("successfully registered connection to " + newMember); 346 } 347 348 /** 349 * It is the responsibility of the {@link CommandProcessor} tied to the given VM 350 * to invoke this method when its connection has failed. 351 * 352 * This is the <b>only</b> method responsible for removing entries from the 353 * connection table. 354 * 355 * Calling this method implicitly also triggers a memberLeft event on this VM 356 */ 357 protected void removeConnection(Address oldMember, Socket connSocket) { 358 synchronized (addressToConnection_) { 359 Connection conn = (Connection) addressToConnection_.get(oldMember); 360 if (conn != null) { 361 // ONLY delete the connection if that connection was registered for the given 362 // socket. It might be that the connection in the connection table is already 363 // a NEWER connection that has OVERWRITTEN the old one. 364 if (conn.socket_ == connSocket) { 365 conn.close(); 366 addressToConnection_.remove(oldMember); 367 host_.event_memberLeft(oldMember); // notify VM that the member has left 368 } 369 // Note: if the sockets do not match, then the connection to be 370 // removed by this call has already been replaced by a more recent connection to the same address 371 // See the code for addConnection for more details 372 // Because the connection to be removed was already replaced, it is not necessary 373 // to notify the VM that a member has left 374 } 375 } 376 } 377 378 /** 379 * The {@link MulticastListenerThread} invokes this method regularly to 380 * remove connections to VMs which have not responded for longer than the 381 * given timeout period. 382 * 383 * Removal is done implicitly by closing timed out connections. The 384 * {@link CommandProcessor} tied to each connection is responsible for 385 * actually removing it. 386 * 387 * Calling this method might also implicitly trigger one or more memberLeft 388 * events on this VM. 389 */ 390 protected void closeConnectionOfMembersNotSeenIn(long period) { 391 synchronized (addressToConnection_) { 392 long now = System.currentTimeMillis(); 393 Collection connections = addressToConnection_.values(); 394 for (Iterator iter = connections.iterator(); iter.hasNext();) { 395 Connection c = (Connection) iter.next(); 396 if (now - c.lastSeenAtTime_ > period) { 397 c.close(); // the entry will be deleted by the Command Processor 398 Logging.Network_LOG.debug("Detected timed out VM"); 399 } 400 } 401 } 402 } 403 404 /** 405 * When the communication bus is explicitly disconnected from the 406 * network, the VM should be notified that all connected VMs 407 * have disconnected. 408 * 409 * Removal is done implicitly by closing all connections. The 410 * {@link CommandProcessor} tied to each connection is responsible for 411 * actually removing them. 412 */ 413 private void closeConnectionOfAllMembers() { 414 synchronized (addressToConnection_) { 415 Collection connections = addressToConnection_.values(); 416 for (Iterator iter = connections.iterator(); iter.hasNext();) { 417 Connection c = (Connection) iter.next(); 418 c.close(); // ensures the command processor will remove the entry eventually 419 } 420 } 421 } 422 423 /** 424 * Sends a VM Command object asynchronously to the recipient VM. 425 * There are no delivery guarantees for this message. 426 * If the recipient is offline, or the message times out, it is simply discarded 427 */ 428 public void sendAsyncUnicast(VMCommand msg, Address recipientVM) { 429 Logging.Network_LOG.info("sending async unicast cmd " + msg + " to " + recipientVM); 430 Connection conn; 431 synchronized (addressToConnection_) { 432 conn = (Connection) addressToConnection_.get(recipientVM); 433 } 434 if (conn != null) { 435 sendOneAsyncMessage(conn, msg, recipientVM); 436 } 437 } 438 439 /** 440 * Sends a VM Command object asynchronously to all connected VMs. 441 * There are no delivery guarantees for this message, nor is it guaranteed 442 * that all currently connected VMs will receive the message 443 */ 444 public void sendAsyncMulticast(VMCommand msg) { 445 Logging.Network_LOG.debug("sending async multicast cmd: " + msg); 446 447 // first clone the connection table such that we do not need to acquire the 448 // lock for the entire duration of the multicast 449 HashMap clonedConnections; 450 synchronized (addressToConnection_) { 451 clonedConnections = (HashMap) addressToConnection_.clone(); 452 } 453 454 for (Iterator iter = clonedConnections.entrySet().iterator(); iter.hasNext();) { 455 Map.Entry entry = (Map.Entry) iter.next(); 456 Address recipient = (Address) entry.getKey(); 457 Connection conn = (Connection) entry.getValue(); 458 sendOneAsyncMessage(conn, msg, recipient); 459 } 460 } 461 462 /** 463 * Sends the given {@link VMCommand} to the given member. With 'synchronous' transmission, 464 * it is meant that if this method returns gracefully, the caller can rest assured that 465 * the remote VM has correctly received the command object. It does not given any guarantees 466 * that the command object will have been executed remotely. 467 * 468 * @throws NetworkException if either the given address is no longer connected, or 469 * an I/O error occurs during the transmission of the command object. If this exception is 470 * raised, the caller does not know whether the message was correctly received or not. 471 */ 472 public void sendSynchronousUnicast(VMCommand msg, Address recipientVM) throws NetworkException { 473 Logging.Network_LOG.info("sending sync unicast cmd: " + msg + " to " + recipientVM); 474 475 Connection conn; 476 synchronized (addressToConnection_) { 477 conn = (Connection) addressToConnection_.get(recipientVM); 478 } 479 480 if (conn == null) { 481 throw new NetworkException("Recipient " + recipientVM + " is offline"); 482 } 483 484 try { 485 conn.outgoing_.writeObject(msg); 486 conn.outgoing_.flush(); 487 } catch (IOException e) { 488 // it is the sender's responsibility to close the connection's socket if something goes wrong 489 // the corresponding CommandProcessor registered on this socket will remove the member from 490 // the connection table 491 conn.close(); 492 Logging.Network_LOG.error("Error while trying to send command " + msg + " to " + recipientVM, e); 493 throw new NetworkException("Error while trying to transmit message " + msg, e); 494 } 495 } 496 497 /** 498 * Note that an I/O exception during the transmission of a command object is <b>fatal</b> 499 * for the connection. The connection is immediately terminated by closing the socket. 500 * It is the responsibility of the {@link CommandProcessor} registered on the socket 501 * connection's input stream to remove the recipient VM from the connection table. 502 */ 503 private void sendOneAsyncMessage(Connection conn, VMCommand msg, Address recipientVM) { 504 try { 505 conn.outgoing_.writeObject(msg); 506 conn.outgoing_.flush(); 507 } catch (IOException e) { 508 // it is the sender's responsibility to close the connection's socket if something goes wrong 509 // the CommandProcessor will remove the member from the connection table 510 conn.close(); 511 Logging.Network_LOG.error("Could not send command " + msg + " to " + recipientVM + ", dropping.", e); 512 } 513 } 514 515 public String toString() { 516 return "communication bus"; 517 } 518 519}