PageRenderTime 86ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/src/contrib/corona/src/java/org/apache/hadoop/corona/SessionDriver.java

https://github.com/Beckham007/hadoop-20
Java | 451 lines | 305 code | 92 blank | 54 comment | 35 complexity | 012669abddc38d3e976561a75b70b0c6 MD5 | raw file
  1. package org.apache.hadoop.corona;
  2. import java.io.IOException;
  3. import java.util.ArrayList;
  4. import java.util.Collections;
  5. import java.util.LinkedList;
  6. import java.util.List;
  7. import java.net.ServerSocket;
  8. import java.net.Socket;
  9. import java.net.InetSocketAddress;
  10. import org.apache.commons.logging.Log;
  11. import org.apache.commons.logging.LogFactory;
  12. import org.apache.hadoop.conf.Configuration;
  13. import org.apache.hadoop.util.Daemon;
  14. import org.apache.hadoop.util.StringUtils;
  15. import org.apache.hadoop.net.NetUtils;
  16. import org.apache.thrift.*;
  17. import org.apache.thrift.protocol.*;
  18. import org.apache.thrift.transport.*;
  19. import org.apache.thrift.server.TServer;
  20. import org.apache.thrift.server.TThreadPoolServer;
  21. /**
  22. * Handles sessions from client to cluster manager.
  23. */
  24. public class SessionDriver {
  25. public static final Log LOG = LogFactory.getLog(SessionDriver.class);
  26. final CoronaConf conf;
  27. final SessionDriverService.Iface iface;
  28. String sessionId = "";
  29. SessionInfo sessionInfo;
  30. ServerSocket serverSocket;
  31. TServer server;
  32. Thread serverThread;
  33. CMNotifierThread cmNotifier;
  34. IOException failException = null;
  35. public SessionDriver(Configuration conf, SessionDriverService.Iface iface)
  36. throws IOException {
  37. this(new CoronaConf(conf), iface);
  38. }
  39. public SessionDriver(CoronaConf conf, SessionDriverService.Iface iface)
  40. throws IOException {
  41. this.conf = conf;
  42. this.iface = iface;
  43. serverSocket = initializeServer(conf);
  44. org.apache.hadoop.corona.InetAddress myAddress =
  45. new org.apache.hadoop.corona.InetAddress();
  46. myAddress.setHost(serverSocket.getInetAddress().getHostAddress());
  47. myAddress.setPort(serverSocket.getLocalPort());
  48. LOG.info("My serverSocketPort " + serverSocket.getLocalPort());
  49. LOG.info("My Address " + myAddress.getHost() + ":" + myAddress.getPort());
  50. String userName = System.getProperty("user.name");
  51. String sessionName = userName + "-" + new java.util.Date().toString();
  52. this.sessionInfo = new SessionInfo();
  53. this.sessionInfo.setAddress(myAddress);
  54. this.sessionInfo.setName(sessionName);
  55. this.sessionInfo.setUserId(userName);
  56. this.sessionInfo.setPoolId(conf.getPoolName());
  57. // TODO - set session priority.
  58. this.sessionInfo.setPriority(SessionPriority.NORMAL);
  59. this.sessionInfo.setNoPreempt(false);
  60. this.serverThread = new Daemon(new Thread() {
  61. public void run() {
  62. server.serve();
  63. }
  64. });
  65. this.serverThread.start();
  66. cmNotifier = new CMNotifierThread(conf, sessionInfo, this);
  67. cmNotifier.setDaemon(true);
  68. cmNotifier.start();
  69. sessionId = cmNotifier.getSessionRegistrationData().handle;
  70. }
  71. static java.net.InetAddress getLocalAddress() throws IOException {
  72. try {
  73. return java.net.InetAddress.getLocalHost();
  74. } catch (java.net.UnknownHostException e) {
  75. throw new IOException(e);
  76. }
  77. }
  78. private ServerSocket initializeServer(CoronaConf conf) throws IOException {
  79. // Choose any free port.
  80. ServerSocket serverSocket = new ServerSocket(0, 0, getLocalAddress());
  81. TServerSocket tServerSocket = new TServerSocket(serverSocket,
  82. conf.getCMSoTimeout());
  83. TFactoryBasedThreadPoolServer.Args args =
  84. new TFactoryBasedThreadPoolServer.Args(tServerSocket);
  85. args.processor(new SessionDriverService.Processor(iface));
  86. args.transportFactory(new TTransportFactory());
  87. args.protocolFactory(new TBinaryProtocol.Factory(true, true));
  88. args.stopTimeoutVal = 0;
  89. server = new TFactoryBasedThreadPoolServer(
  90. args, new TFactoryBasedThreadPoolServer.DaemonThreadFactory());
  91. return serverSocket;
  92. }
  93. public IOException getFailed() {
  94. return failException;
  95. }
  96. public void setFailed(IOException e) {
  97. failException = e;
  98. }
  99. public String getSessionId() { return sessionId; }
  100. public SessionInfo getSessionInfo() { return sessionInfo; }
  101. public void setName(String name) throws IOException {
  102. if (failException != null) {
  103. throw failException;
  104. }
  105. if (name == null || name.length() == 0) {
  106. return;
  107. }
  108. sessionInfo.name = name;
  109. SessionInfo newInfo = new SessionInfo(sessionInfo);
  110. cmNotifier.addCall(
  111. new ClusterManagerService.sessionUpdateInfo_args(sessionId, newInfo));
  112. }
  113. public void setUrl(String url) throws IOException {
  114. if (failException != null) {
  115. throw failException;
  116. }
  117. sessionInfo.url = url;
  118. SessionInfo newInfo = new SessionInfo(sessionInfo);
  119. cmNotifier.addCall(
  120. new ClusterManagerService.sessionUpdateInfo_args(sessionId, newInfo));
  121. }
  122. /**
  123. * For test purposes. Abort a sessiondriver without sending a sessionEnd()
  124. * call to the CM. This allows testing for abnormal session termination
  125. */
  126. public void abort() {
  127. LOG.info("Aborting session driver");
  128. cmNotifier.clearCalls();
  129. cmNotifier.doShutdown();
  130. server.stop();
  131. }
  132. public void stop(SessionStatus status) {
  133. LOG.info("Stopping session driver");
  134. // clear all calls from the notifier and append a last call
  135. // to send the sessionEnd
  136. cmNotifier.clearCalls();
  137. cmNotifier.addCall(new ClusterManagerService.sessionEnd_args(sessionId, status));
  138. cmNotifier.doShutdown();
  139. server.stop();
  140. }
  141. public void join() throws InterruptedException {
  142. serverThread.join();
  143. cmNotifier.join();
  144. }
  145. public void requestResources(List<ResourceRequest> wanted) throws IOException {
  146. if (failException != null)
  147. throw failException;
  148. cmNotifier.addCall(new ClusterManagerService.requestResource_args(sessionId, wanted));
  149. }
  150. public void releaseResources(List<ResourceRequest> released) throws IOException {
  151. if (failException != null)
  152. throw failException;
  153. List<Integer> releasedIds = new ArrayList<Integer>();
  154. for (ResourceRequest req: released) {
  155. releasedIds.add(req.getId());
  156. }
  157. cmNotifier.addCall(new ClusterManagerService.releaseResource_args(sessionId, releasedIds));
  158. }
  159. public static class CMNotifierThread extends Thread {
  160. final List<TBase> pendingCalls = Collections.synchronizedList(new LinkedList<TBase> ());
  161. /**
  162. * starting retry interval
  163. */
  164. final int retryIntervalStart;
  165. /**
  166. * multiplier between successive retry intervals
  167. */
  168. final int retryIntervalFactor;
  169. /**
  170. * max number of retries
  171. */
  172. final int retryCountMax;
  173. /**
  174. * period between polling for dispatches
  175. */
  176. final int waitInterval;
  177. /**
  178. * intervals between heartbeats
  179. */
  180. final int heartbeatInterval;
  181. final String host;
  182. final int port;
  183. final SessionInfo sinfo;
  184. final SessionRegistrationData sreg;
  185. final SessionDriver sessionDriver;
  186. /**
  187. * Time (in milliseconds) when to make the next RPC call
  188. * -1 means to make the call immediately
  189. */
  190. long nextDispatchTime = -1;
  191. /**
  192. * Number of retries that have been made for the first call
  193. * in the list
  194. */
  195. short numRetries = 0;
  196. /**
  197. * current retry interval
  198. */
  199. int currentRetryInterval;
  200. /**
  201. * last time heartbeat was sent
  202. */
  203. long lastHeartbeatTime = 0;
  204. TTransport transport = null;
  205. ClusterManagerService.Client client;
  206. volatile boolean shutdown = false;
  207. public void doShutdown() {
  208. shutdown = true;
  209. wakeupThread();
  210. }
  211. public CMNotifierThread(CoronaConf conf, SessionInfo sinfo, SessionDriver sdriver)
  212. throws IOException {
  213. waitInterval = conf.getNotifierPollInterval();
  214. retryIntervalFactor = conf.getNotifierRetryIntervalFactor();
  215. retryCountMax = conf.getNotifierRetryMax();
  216. retryIntervalStart = conf.getNotifierRetryIntervalStart();
  217. heartbeatInterval = Math.max(conf.getSessionExpiryInterval()/8, 1);
  218. sessionDriver = sdriver;
  219. String target = conf.getClusterManagerAddress();
  220. InetSocketAddress address = NetUtils.createSocketAddr(target);
  221. host = address.getHostName();
  222. port = address.getPort();
  223. this.sinfo = sinfo;
  224. try {
  225. LOG.info("Connecting to cluster manager at " + host + ":" + port);
  226. init();
  227. sreg = client.sessionStart(sinfo);
  228. close();
  229. LOG.info("Established session " + sreg.handle);
  230. } catch (TException e) {
  231. throw new IOException(e);
  232. }
  233. }
  234. public SessionRegistrationData getSessionRegistrationData() {
  235. return sreg;
  236. }
  237. synchronized private void wakeupThread() {
  238. this.notify();
  239. }
  240. public void addCall(TBase call) {
  241. pendingCalls.add(call);
  242. wakeupThread();
  243. }
  244. public void clearCalls() {
  245. pendingCalls.clear();
  246. }
  247. private void init () throws TException {
  248. if (transport == null) {
  249. transport = new TSocket(host, port);
  250. client = new ClusterManagerService.Client(new TBinaryProtocol(transport));
  251. transport.open();
  252. }
  253. }
  254. public void close() {
  255. if (transport != null) {
  256. transport.close();
  257. transport = null;
  258. client = null;
  259. }
  260. }
  261. private void resetRetryState() {
  262. nextDispatchTime = -1;
  263. numRetries = 0;
  264. currentRetryInterval = retryIntervalStart;
  265. }
  266. /**
  267. * get the first pending call if it exists, else null
  268. */
  269. private TBase getCall() {
  270. try {
  271. TBase ret = pendingCalls.get(0);
  272. return (ret);
  273. } catch (IndexOutOfBoundsException e) {
  274. return null;
  275. }
  276. }
  277. public void run() {
  278. while (!shutdown) {
  279. synchronized (this) {
  280. try {
  281. this.wait(waitInterval);
  282. } catch (InterruptedException e) {
  283. }
  284. }
  285. long now = ClusterManager.clock.getTime();
  286. try {
  287. // send heartbeat if one is due
  288. // if shutdown is ordered, don't send heartbeat
  289. if (!shutdown && ((now - lastHeartbeatTime) > heartbeatInterval)) {
  290. init();
  291. client.sessionHeartbeat(sreg.handle);
  292. resetRetryState();
  293. lastHeartbeatTime = now;
  294. }
  295. // except in the case of shutdown, wait correct time
  296. // before trying calls again (in case of failures)
  297. // in the case of shutdown - we try to send as many pending
  298. // calls as we can before terminating
  299. if (!shutdown && (now < nextDispatchTime))
  300. continue;
  301. // send pending requests/releases
  302. while (!pendingCalls.isEmpty()) {
  303. TBase call = getCall();
  304. init();
  305. dispatchCall(call);
  306. resetRetryState();
  307. // we can only remove the first element if
  308. // it is the call we just dispatched
  309. TBase currentCall = getCall();
  310. if (currentCall == call)
  311. pendingCalls.remove(0);
  312. }
  313. } catch (TException e) {
  314. LOG.error("Call to CM, numRetry: " + numRetries +
  315. " failed with exception: \n" + StringUtils.stringifyException(e));
  316. // close the transport/client on any exception
  317. // will be reopened on next try
  318. close();
  319. if (numRetries > retryCountMax) {
  320. LOG.error("All retries failed - closing CMNotifier");
  321. sessionDriver.setFailed(new IOException(e));
  322. break;
  323. }
  324. numRetries++;
  325. nextDispatchTime = now + currentRetryInterval;
  326. currentRetryInterval *= retryIntervalFactor;
  327. } catch (InvalidSessionHandle e) {
  328. LOG.error("InvalidSession exception - closing CMNotifier");
  329. sessionDriver.setFailed(new IOException(e));
  330. break;
  331. }
  332. } // while (true)
  333. } // run()
  334. private void dispatchCall(TBase call) throws TException, InvalidSessionHandle {
  335. if (LOG.isDebugEnabled())
  336. LOG.debug ("Begin dispatching call: " + call.toString());
  337. if (call instanceof ClusterManagerService.requestResource_args) {
  338. ClusterManagerService.requestResource_args args =
  339. (ClusterManagerService.requestResource_args)call;
  340. client.requestResource(args.handle, args.requestList);
  341. } else if (call instanceof ClusterManagerService.releaseResource_args) {
  342. ClusterManagerService.releaseResource_args args =
  343. (ClusterManagerService.releaseResource_args)call;
  344. client.releaseResource(args.handle, args.idList);
  345. } else if (call instanceof ClusterManagerService.sessionEnd_args) {
  346. ClusterManagerService.sessionEnd_args args =
  347. (ClusterManagerService.sessionEnd_args)call;
  348. client.sessionEnd(args.handle, args.status);
  349. } else if (call instanceof ClusterManagerService.sessionUpdateInfo_args) {
  350. ClusterManagerService.sessionUpdateInfo_args args =
  351. (ClusterManagerService.sessionUpdateInfo_args)call;
  352. client.sessionUpdateInfo(args.handle, args.info);
  353. } else {
  354. throw new RuntimeException("Unknown Class: " + call.getClass().getName());
  355. }
  356. LOG.debug ("End dispatch call");
  357. }
  358. }
  359. }