/interpreter/tags/at2dist110511/src/edu/vub/at/actors/net/comm/CommunicationBus.java

http://ambienttalk.googlecode.com/ · Java · 532 lines · 219 code · 49 blank · 264 comment · 24 complexity · 4d9909411032ac06e843276fcc2e89ed MD5 · raw file

  1. /**
  2. * AmbientTalk/2 Project
  3. * CommunicationBus.java created on 2-apr-2007 at 19:30:12
  4. * (c) Programming Technology Lab, 2006 - 2007
  5. * Authors: Tom Van Cutsem & Stijn Mostinckx
  6. *
  7. * Permission is hereby granted, free of charge, to any person
  8. * obtaining a copy of this software and associated documentation
  9. * files (the "Software"), to deal in the Software without
  10. * restriction, including without limitation the rights to use,
  11. * copy, modify, merge, publish, distribute, sublicense, and/or
  12. * sell copies of the Software, and to permit persons to whom the
  13. * Software is furnished to do so, subject to the following
  14. * conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be
  17. * included in all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  21. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  22. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  23. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  24. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  25. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  26. * OTHER DEALINGS IN THE SOFTWARE.
  27. */
  28. package edu.vub.at.actors.net.comm;
  29. import edu.vub.at.actors.natives.ELVirtualMachine;
  30. import edu.vub.at.actors.net.cmd.VMCommand;
  31. import edu.vub.at.util.logging.Logging;
  32. import java.io.BufferedInputStream;
  33. import java.io.BufferedOutputStream;
  34. import java.io.IOException;
  35. import java.io.ObjectInputStream;
  36. import java.io.ObjectOutputStream;
  37. import java.net.Socket;
  38. import java.util.Collection;
  39. import java.util.HashMap;
  40. import java.util.Iterator;
  41. import java.util.Map;
  42. import java.util.Timer;
  43. import java.util.TimerTask;
  44. /**
  45. * For each AmbientTalk virtual machine, there exists one communication bus instance.
  46. * The communication bus can be in two conceptual states. It can be:
  47. * <ul>
  48. * <li>Connected: when connected, the communication bus's {@link CommunicationBus#networkAddress_} field
  49. * is set.
  50. * <li>Disconnected: when disconnected, the {@link CommunicationBus#networkAddress_} field is
  51. * set to null.
  52. * </ul>
  53. *
  54. * A communication bus is always started in a disconnected state. Toggling between both
  55. * states is done using the {@link CommunicationBus#connect()} and
  56. * {@link CommunicationBus#disconnect()} methods.
  57. * <p>
  58. * The communication bus is the central coordinator of the network communication layer.
  59. * It encapsulates four important threads:
  60. * <ul>
  61. * <li>The {@link MulticastServerThread} is responsible for sending 'heartbeat' messages
  62. * in a multicast fashion across the network, to notify other VMs in the network that
  63. * this VM is still 'alive'.
  64. * <li>The {@link MulticastListenerThread} is responsible for listening for the
  65. * 'heartbeat' messages of other VMs, and for adding new connections in the
  66. * connection table of the communication bus.
  67. * <li>The {@link MasterConnectionThread} is responsible for opening a server
  68. * socket that accepts incoming connections from so-called <i>slave</i> VMs.
  69. * <li>A timer thread which runs a task at a fixed rate to checks the connection
  70. * table for timed out VMs.
  71. * </ul>
  72. *
  73. * The most important bookkeeping datastructure of the communication bus is the
  74. * {@link CommunicationBus#addressToConnection_} table, also referred to as the
  75. * <b>connection table</b>. This table stores all of the currently connected
  76. * VMs that the host VM knows about. Whenever a heartbeat is received, a VM's
  77. * connection registration in this table is updated. When a heartbeat is first
  78. * received, the VM is added. When a heartbeat has not been heard for longer
  79. * than the timeout interval, the VM is removed from the connection table.
  80. * <p>
  81. * Schematically, we have the following situation after establishing a connection
  82. * between a master and a slave VM (see the description of {@link MulticastListenerThread}):
  83. *
  84. * <pre>
  85. * Slave Master
  86. * cs = new Socket(masterAddress) ms = socket.accept()
  87. * cs.in <-------------------------- ms.out
  88. * cs.out -------------------------> ms.in
  89. * </pre>
  90. *
  91. * It does not matter which socket is registered in the connection table. Hence,
  92. * once both VMs have set up a connection, the concept of 'master' and 'slave' is
  93. * no longer useful and both VMs become peers.
  94. *
  95. * It is always the output stream of either socket that is used for sending messages to the other VM.
  96. * It is always the input stream of either socket that is used for receiving messages from the other VM.
  97. *
  98. * @author tvcutsem
  99. */
  100. public class CommunicationBus {
  101. /** the AmbientTalk VM that owns this communication bus */
  102. public final ELVirtualMachine host_;
  103. /** the name of the overlay network in which to discover AmbientTalk VMs */
  104. private final String groupName_;
  105. /** the ip address to which connect or ELVirtualMachine._DEFAULT_IP_ADDRESS_ if not specified*/
  106. private final String ipAddress_;
  107. /** if non-null, the communication bus is connected to the network */
  108. private volatile Address networkAddress_;
  109. private MulticastListenerThread mcListener_;
  110. private MulticastServerThread mcServer_;
  111. private MasterConnectionThread masterConnectionThread_;
  112. /**
  113. * Maps the address of a currently connected VM to connection information
  114. * such as the last time it was seen, and the socket connection.
  115. * Also known as the "connection table".
  116. *
  117. * This datastructure is modified by different threads:
  118. * <ul>
  119. * <li>Addition of master VMs by {@link MulticastListenerThread}
  120. * <li>Addition of slave VMs by {@link MasterConnectionThread}
  121. * <li>Removal of disconnected VMs by {@link CommandProcessor}
  122. * <li>Checking for timed out VMs by {@link CommunicationBus#timeoutDetector_}
  123. * <li>Lookup of connections by AmbientTalk event loops for message transmission
  124. * </ul>
  125. * Hence, access to this datastructure must by <b>synchronized</b>!
  126. */
  127. private final HashMap addressToConnection_;
  128. /**
  129. * The timer used for checking 'stale' (i.e. timed out) connections
  130. * in the connection table.
  131. */
  132. private final Timer timeoutDetectorTimer_;
  133. /**
  134. * The timer task used for removing timed out connections
  135. * from the connections table.
  136. */
  137. private TimeoutDetectorTask timeoutDetector_;
  138. /**
  139. * Bookkeeping datastructure to store connection-related information
  140. * of a connected VM. A connection entry is uniquely identified by
  141. * means of the {@link Address} of its VM.
  142. */
  143. private static class Connection {
  144. public final Socket socket_;
  145. /** this value is updated as new heartbeats are received */
  146. public long lastSeenAtTime_;
  147. public final ObjectOutputStream outgoing_;
  148. public final ObjectInputStream incoming_;
  149. public Connection(Socket s, long lastSeenAt) throws IOException {
  150. socket_ = s;
  151. lastSeenAtTime_ = lastSeenAt;
  152. // NOTE: apparently it is highly important that the ObjectOutputStream on the socket.getOutputStream() is
  153. // created BEFORE trying to create an ObjectInputStream on the socket.getInputStream()
  154. // switching the below two statements causes the master and the slave to deadlock!
  155. outgoing_ = new ObjectOutputStream(new BufferedOutputStream(s.getOutputStream()));
  156. // The buffered output stream must be EXPLICITLY flushed, otherwise the buffered input stream
  157. // that is created below (but by the other VM) will block indefinitely
  158. outgoing_.flush();
  159. incoming_ = new ObjectInputStream(new BufferedInputStream(s.getInputStream()));
  160. }
  161. /**
  162. * Closes the underlying socket. This will eventually trigger the command processor
  163. * tied to this connection, which will cause this connection to be removed
  164. * from the table.
  165. */
  166. public synchronized void close() {
  167. try {
  168. socket_.close();
  169. } catch (IOException ioe) { }
  170. }
  171. public synchronized void send(VMCommand msg) throws IOException{
  172. outgoing_.writeObject(msg);
  173. outgoing_.flush();
  174. }
  175. }
  176. /**
  177. * The timer task used for removing timed out connections
  178. * from the connection table.
  179. */
  180. private class TimeoutDetectorTask extends TimerTask {
  181. /**
  182. * the maximum amount of time that a remote VM gets to send a new
  183. * heartbeat before it is considered as being 'offline'
  184. */
  185. private static final int MAX_RESPONSE_DELAY = 10000; // in milliseconds
  186. /**
  187. * The rate at which to schedule this timer task
  188. */
  189. public static final int DETECTION_RATE = 4000; // in milliseconds
  190. public void run() {
  191. closeConnectionOfMembersNotSeenIn(MAX_RESPONSE_DELAY);
  192. }
  193. }
  194. public CommunicationBus(ELVirtualMachine host, String ambientTalkNetworkName, String ipAddress) {
  195. host_ = host;
  196. groupName_ = ambientTalkNetworkName;
  197. ipAddress_ = ipAddress;
  198. addressToConnection_ = new HashMap();
  199. timeoutDetectorTimer_ = new Timer(true); // create a daemon timer
  200. }
  201. public String getIpAddress() {
  202. return ipAddress_;
  203. }
  204. public String getGroupName(){
  205. return groupName_;
  206. }
  207. /**
  208. * Tries to connect the communication bus to the underlying network.
  209. * @throws IOException if no server socket could be created to listen
  210. * for incoming connections. If this exception is raised, it is
  211. * guaranteed that the communication bus is left disconnected (i.e.
  212. * it is not partially connected)
  213. */
  214. public Address connect() throws NetworkException {
  215. if (networkAddress_ != null) {
  216. return networkAddress_; // if the bus is already connected, there is no need to connect it again
  217. }
  218. masterConnectionThread_ = new MasterConnectionThread(this);
  219. try {
  220. networkAddress_ = masterConnectionThread_.startServing();
  221. } catch (IOException e) {
  222. masterConnectionThread_ = null;
  223. throw new NetworkException("Could not connect to network:", e);
  224. }
  225. mcListener_ = new MulticastListenerThread(this, networkAddress_);
  226. mcServer_ = new MulticastServerThread(networkAddress_);
  227. mcListener_.start();
  228. mcServer_.start();
  229. // start detecting timed out VMs
  230. timeoutDetector_ = new TimeoutDetectorTask();
  231. timeoutDetectorTimer_.scheduleAtFixedRate(timeoutDetector_, 0, TimeoutDetectorTask.DETECTION_RATE);
  232. return networkAddress_;
  233. }
  234. /**
  235. * Called by the VM when it has disconnected from the underlying channel.
  236. * It gracefully shuts down all network threads, sets the network address
  237. * to null and removes all current connections from the connection table.
  238. */
  239. public void disconnect() {
  240. if (networkAddress_ == null) {
  241. return; // if the bus is already disconnected, there is no need to take it offline again
  242. }
  243. // once the bus is disconnected, the network address is set to null.
  244. // this ensures that no further incoming connections are allowed to be
  245. // registered in the addConnection method!
  246. networkAddress_ = null;
  247. masterConnectionThread_.stopServing();
  248. mcListener_.stopListening();
  249. mcServer_.stopBroadcasting();
  250. masterConnectionThread_ = null;
  251. mcListener_ = null;
  252. mcServer_ = null;
  253. // stop detecting timed out VMs
  254. timeoutDetector_.cancel();
  255. timeoutDetector_ = null;
  256. closeConnectionOfAllMembers();
  257. }
  258. /**
  259. * Updates the time that the given virtual machine was last seen online.
  260. * This method is invoked frequently by the {@link MulticastListenerThread}.
  261. *
  262. * @param member the address of the detected virtual machine
  263. * @return true if the VM was properly registered, false if the VM is not yet registered
  264. * (i.e. it is not yet considered as 'online')
  265. */
  266. protected boolean updateTimeLastSeen(Address member) {
  267. synchronized (addressToConnection_) {
  268. Connection conn = (Connection) addressToConnection_.get(member);
  269. if (conn != null) {
  270. conn.lastSeenAtTime_ = System.currentTimeMillis();
  271. return true;
  272. } else {
  273. return false;
  274. }
  275. }
  276. }
  277. /**
  278. * Registers a new virtual machine connection for the given address.
  279. * If this VM was a slave in the discovery process, this method is
  280. * invoked by the {@link MulticastListenerThread}. If this VM was
  281. * a master in the discovery process, this method is invoked by the
  282. * {@link MasterConnectionThread}.
  283. *
  284. * The socket's output stream will be stored in the connection table and is used
  285. * for transmitting VM Commands to this VM. The socket's input stream will be
  286. * coupled to a dedicated {@link CommandProcessor} which is responsible for
  287. * processing incoming VM command objects.
  288. *
  289. * Calling this method implicitly also triggers a memberJoined event on this VM
  290. *
  291. * @throws IOException if no ObjectOutputStream can be created for the socket's output stream,
  292. * or if no ObjectInputStream can be created for the socket's input stream.
  293. * If this exception is raised, it is guaranteed the member is not registered in the connection table.
  294. */
  295. protected void addConnection(Address newMember, Socket conn) throws IOException {
  296. if (networkAddress_ == null) {
  297. Logging.Network_LOG.debug("ignored connection to " + newMember + ": bus disconnected");
  298. return; // the bus has been disconnected, do not accept any new connections
  299. }
  300. // create a new connection object that can be used to send command objects to this member
  301. Connection registeredConnection = new Connection(conn, System.currentTimeMillis());
  302. // spawn a new command processor dedicated for handling the command objects received from this member
  303. CommandProcessor processor = new CommandProcessor(newMember, conn, registeredConnection.incoming_, this);
  304. synchronized (addressToConnection_) {
  305. // first check whether a connection for this member already exists
  306. // (may happen when an old connection for this member has not yet been deleted)
  307. // This case IS possible when a master receives a new connection from a slave,
  308. // but that master has not yet detected the disconnection of the slave's old connection
  309. Connection oldConnection = (Connection) addressToConnection_.get(newMember);
  310. if (oldConnection != null) {
  311. // explicitly close the connection we're about to override
  312. // Note: in this case, we should *not* signal to the VM that a new member joined,
  313. // because in the corresponding removeConnection, we will also not signal a member left
  314. // (i.e. the connection was restored quickly enough such that we can abstract over the disconnection)
  315. oldConnection.close();
  316. } else {
  317. // notify the VM of a new connection
  318. host_.event_memberJoined(newMember);
  319. }
  320. // register the new connection in the table (this may override an old connection)
  321. addressToConnection_.put(newMember, registeredConnection);
  322. }
  323. // only start the processor if the member is properly registered
  324. processor.start();
  325. Logging.Network_LOG.debug("successfully registered connection to " + newMember);
  326. }
  327. /**
  328. * It is the responsibility of the {@link CommandProcessor} tied to the given VM
  329. * to invoke this method when its connection has failed.
  330. *
  331. * This is the <b>only</b> method responsible for removing entries from the
  332. * connection table.
  333. *
  334. * Calling this method implicitly also triggers a memberLeft event on this VM
  335. */
  336. protected void removeConnection(Address oldMember, Socket connSocket) {
  337. synchronized (addressToConnection_) {
  338. Connection conn = (Connection) addressToConnection_.get(oldMember);
  339. if (conn != null) {
  340. // ONLY delete the connection if that connection was registered for the given
  341. // socket. It might be that the connection in the connection table is already
  342. // a NEWER connection that has OVERWRITTEN the old one.
  343. if (conn.socket_ == connSocket) {
  344. conn.close();
  345. addressToConnection_.remove(oldMember);
  346. host_.event_memberLeft(oldMember); // notify VM that the member has left
  347. }
  348. // Note: if the sockets do not match, then the connection to be
  349. // removed by this call has already been replaced by a more recent connection to the same address
  350. // See the code for addConnection for more details
  351. // Because the connection to be removed was already replaced, it is not necessary
  352. // to notify the VM that a member has left
  353. }
  354. }
  355. }
  356. /**
  357. * The {@link MulticastListenerThread} invokes this method regularly to
  358. * remove connections to VMs which have not responded for longer than the
  359. * given timeout period.
  360. *
  361. * Removal is done implicitly by closing timed out connections. The
  362. * {@link CommandProcessor} tied to each connection is responsible for
  363. * actually removing it.
  364. *
  365. * Calling this method might also implicitly trigger one or more memberLeft
  366. * events on this VM.
  367. */
  368. protected void closeConnectionOfMembersNotSeenIn(long period) {
  369. synchronized (addressToConnection_) {
  370. long now = System.currentTimeMillis();
  371. Collection connections = addressToConnection_.values();
  372. for (Iterator iter = connections.iterator(); iter.hasNext();) {
  373. Connection c = (Connection) iter.next();
  374. if (now - c.lastSeenAtTime_ > period) {
  375. c.close(); // the entry will be deleted by the Command Processor
  376. Logging.Network_LOG.debug("Detected timed out VM");
  377. }
  378. }
  379. }
  380. }
  381. /**
  382. * When the communication bus is explicitly disconnected from the
  383. * network, the VM should be notified that all connected VMs
  384. * have disconnected.
  385. *
  386. * Removal is done implicitly by closing all connections. The
  387. * {@link CommandProcessor} tied to each connection is responsible for
  388. * actually removing them.
  389. */
  390. private void closeConnectionOfAllMembers() {
  391. synchronized (addressToConnection_) {
  392. Collection connections = addressToConnection_.values();
  393. for (Iterator iter = connections.iterator(); iter.hasNext();) {
  394. Connection c = (Connection) iter.next();
  395. c.close(); // ensures the command processor will remove the entry eventually
  396. }
  397. }
  398. }
  399. /**
  400. * Sends a VM Command object asynchronously to the recipient VM.
  401. * There are no delivery guarantees for this message.
  402. * If the recipient is offline, or the message times out, it is simply discarded
  403. */
  404. public void sendAsyncUnicast(VMCommand msg, Address recipientVM) {
  405. Logging.Network_LOG.info("sending async unicast cmd " + msg + " to " + recipientVM);
  406. Connection conn;
  407. synchronized (addressToConnection_) {
  408. conn = (Connection) addressToConnection_.get(recipientVM);
  409. }
  410. if (conn != null) {
  411. sendOneAsyncMessage(conn, msg, recipientVM);
  412. }
  413. }
  414. /**
  415. * Sends a VM Command object asynchronously to all connected VMs.
  416. * There are no delivery guarantees for this message, nor is it guaranteed
  417. * that all currently connected VMs will receive the message
  418. */
  419. public void sendAsyncMulticast(VMCommand msg) {
  420. Logging.Network_LOG.debug("sending async multicast cmd: " + msg);
  421. // first clone the connection table such that we do not need to acquire the
  422. // lock for the entire duration of the multicast
  423. HashMap clonedConnections;
  424. synchronized (addressToConnection_) {
  425. clonedConnections = (HashMap) addressToConnection_.clone();
  426. }
  427. for (Iterator iter = clonedConnections.entrySet().iterator(); iter.hasNext();) {
  428. Map.Entry entry = (Map.Entry) iter.next();
  429. Address recipient = (Address) entry.getKey();
  430. Connection conn = (Connection) entry.getValue();
  431. sendOneAsyncMessage(conn, msg, recipient);
  432. }
  433. }
  434. /**
  435. * Sends the given {@link VMCommand} to the given member. With 'synchronous' transmission,
  436. * it is meant that if this method returns gracefully, the caller can rest assured that
  437. * the remote VM has correctly received the command object. It does not given any guarantees
  438. * that the command object will have been executed remotely.
  439. *
  440. * @throws NetworkException if either the given address is no longer connected, or
  441. * an I/O error occurs during the transmission of the command object. If this exception is
  442. * raised, the caller does not know whether the message was correctly received or not.
  443. */
  444. public void sendSynchronousUnicast(VMCommand msg, Address recipientVM) throws NetworkException {
  445. Logging.Network_LOG.info("sending sync unicast cmd: " + msg + " to " + recipientVM);
  446. Connection conn;
  447. synchronized (addressToConnection_) {
  448. conn = (Connection) addressToConnection_.get(recipientVM);
  449. }
  450. if (conn == null) {
  451. throw new NetworkException("Recipient " + recipientVM + " is offline");
  452. }
  453. try {
  454. conn.send(msg);
  455. } catch (IOException e) {
  456. // it is the sender's responsibility to close the connection's socket if something goes wrong
  457. // the corresponding CommandProcessor registered on this socket will remove the member from
  458. // the connection table
  459. conn.close();
  460. Logging.Network_LOG.error("Error while trying to send command " + msg.toString() + " to " + recipientVM, e);
  461. throw new NetworkException("Error while trying to transmit message " + msg, e);
  462. }
  463. }
  464. /**
  465. * Note that an I/O exception during the transmission of a command object is <b>fatal</b>
  466. * for the connection. The connection is immediately terminated by closing the socket.
  467. * It is the responsibility of the {@link CommandProcessor} registered on the socket
  468. * connection's input stream to remove the recipient VM from the connection table.
  469. */
  470. private void sendOneAsyncMessage(Connection conn, VMCommand msg, Address recipientVM) {
  471. try {
  472. conn.send(msg);
  473. } catch (IOException e) {
  474. // it is the sender's responsibility to close the connection's socket if something goes wrong
  475. // the CommandProcessor will remove the member from the connection table
  476. conn.close();
  477. Logging.Network_LOG.error("Could not send command " + msg + " to " + recipientVM + ", dropping.", e);
  478. }
  479. }
  480. public String toString() {
  481. return "communication bus";
  482. }
  483. }