PageRenderTime 107ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/src/java/org/apache/cassandra/service/StorageLoadBalancer.java

https://github.com/angosso/git-cassandra-angosso-angosso.html
Java | 371 lines | 168 code | 38 blank | 165 comment | 16 complexity | e379917e2d79c91f6c24d74d5699dff2 MD5 | raw file
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.cassandra.service;
  19. import java.util.*;
  20. import java.util.concurrent.ExecutorService;
  21. import java.util.concurrent.atomic.AtomicBoolean;
  22. import org.apache.cassandra.gms.*;
  23. import org.slf4j.Logger;
  24. import org.slf4j.LoggerFactory;
  25. import org.apache.cassandra.concurrent.JMXEnabledThreadPoolExecutor;
  26. import java.net.InetAddress;
  27. import org.apache.cassandra.net.IVerbHandler;
  28. import org.apache.cassandra.net.Message;
  29. import org.apache.cassandra.net.MessagingService;
  30. import org.apache.cassandra.utils.FBUtilities;
  31. /*
  32. * The load balancing algorithm here is an implementation of
  33. * the algorithm as described in the paper "Scalable range query
  34. * processing for large-scale distributed database applications".
  35. * This class keeps track of load information across the system.
  36. * It registers itself with the Gossiper for ApplicationState namely
  37. * load information i.e number of requests processed w.r.t distinct
  38. * keys at an Endpoint. Monitor load information for a 5 minute
  39. * interval and then do load balancing operations if necessary.
  40. */
  41. public class StorageLoadBalancer implements IEndpointStateChangeSubscriber
  42. {
  43. class LoadBalancer implements Runnable
  44. {
  45. LoadBalancer()
  46. {
  47. /* Copy the entries in loadInfo_ into loadInfo2_ and use it for all calculations */
  48. loadInfo2_.putAll(loadInfo_);
  49. }
  50. /**
  51. * Obtain a node which is a potential target. Start with
  52. * the neighbours i.e either successor or predecessor.
  53. * Send the target a MoveMessage. If the node cannot be
  54. * relocated on the ring then we pick another candidate for
  55. * relocation.
  56. */
  57. public void run()
  58. {
  59. /*
  60. int threshold = (int)(StorageLoadBalancer.TOPHEAVY_RATIO * averageSystemLoad());
  61. int myLoad = localLoad();
  62. InetAddress predecessor = StorageService.instance.getPredecessor(StorageService.getLocalStorageEndpoint());
  63. if (logger_.isDebugEnabled())
  64. logger_.debug("Trying to relocate the predecessor {}", predecessor);
  65. boolean value = tryThisNode(myLoad, threshold, predecessor);
  66. if ( !value )
  67. {
  68. loadInfo2_.remove(predecessor);
  69. InetAddress successor = StorageService.instance.getSuccessor(StorageService.getLocalStorageEndpoint());
  70. if (logger_.isDebugEnabled())
  71. logger_.debug("Trying to relocate the successor {}", successor);
  72. value = tryThisNode(myLoad, threshold, successor);
  73. if ( !value )
  74. {
  75. loadInfo2_.remove(successor);
  76. while ( !loadInfo2_.isEmpty() )
  77. {
  78. InetAddress target = findARandomLightNode();
  79. if ( target != null )
  80. {
  81. if (logger_.isDebugEnabled())
  82. logger_.debug("Trying to relocate the random node {}", target);
  83. value = tryThisNode(myLoad, threshold, target);
  84. if ( !value )
  85. {
  86. loadInfo2_.remove(target);
  87. }
  88. else
  89. {
  90. break;
  91. }
  92. }
  93. else
  94. {
  95. // No light nodes available - this is NOT good.
  96. logger_.warn("Not even a single lightly loaded node is available ...");
  97. break;
  98. }
  99. }
  100. loadInfo2_.clear();
  101. // If we are here and no node was available to
  102. // perform load balance with we need to report and bail.
  103. if ( !value )
  104. {
  105. logger_.warn("Load Balancing operations weren't performed for this node");
  106. }
  107. }
  108. }
  109. */
  110. }
  111. /*
  112. private boolean tryThisNode(int myLoad, int threshold, InetAddress target)
  113. {
  114. boolean value = false;
  115. LoadInfo li = loadInfo2_.get(target);
  116. int pLoad = li.count();
  117. if ( ((myLoad + pLoad) >> 1) <= threshold )
  118. {
  119. //calculate the number of keys to be transferred
  120. int keyCount = ( (myLoad - pLoad) >> 1 );
  121. if (logger_.isDebugEnabled())
  122. logger_.debug("Number of keys we attempt to transfer to " + target + " " + keyCount);
  123. // Determine the token that the target should join at.
  124. BigInteger targetToken = BootstrapAndLbHelper.getTokenBasedOnPrimaryCount(keyCount);
  125. // Send a MoveMessage and see if this node is relocateable
  126. MoveMessage moveMessage = new MoveMessage(targetToken);
  127. Message message = new Message(StorageService.getLocalStorageEndpoint(), StorageLoadBalancer.lbStage_, StorageLoadBalancer.moveMessageVerbHandler_, new Object[]{moveMessage});
  128. if (logger_.isDebugEnabled())
  129. logger_.debug("Sending a move message to {}", target);
  130. IAsyncResult result = MessagingService.getMessagingInstance().sendRR(message, target);
  131. value = (Boolean)result.get()[0];
  132. if (logger_.isDebugEnabled())
  133. logger_.debug("Response for query to relocate " + target + " is " + value);
  134. }
  135. return value;
  136. }
  137. */
  138. }
  139. class MoveMessageVerbHandler implements IVerbHandler
  140. {
  141. public void doVerb(Message message)
  142. {
  143. Message reply = message.getReply(FBUtilities.getLocalAddress(), new byte[] {(byte)(isMoveable_.get() ? 1 : 0)});
  144. MessagingService.instance.sendOneWay(reply, message.getFrom());
  145. if ( isMoveable_.get() )
  146. {
  147. // MoveMessage moveMessage = (MoveMessage)message.getMessageBody()[0];
  148. /* Start the leave operation and join the ring at the position specified */
  149. isMoveable_.set(false);
  150. }
  151. }
  152. }
  153. private static final int BROADCAST_INTERVAL = 60 * 1000;
  154. public static final StorageLoadBalancer instance = new StorageLoadBalancer();
  155. private static final Logger logger_ = LoggerFactory.getLogger(StorageLoadBalancer.class);
  156. /* time to delay in minutes the actual load balance procedure if heavily loaded */
  157. private static final int delay_ = 5;
  158. /* If a node's load is this factor more than the average, it is considered Heavy */
  159. private static final double TOPHEAVY_RATIO = 1.5;
  160. /* this indicates whether this node is already helping someone else */
  161. private AtomicBoolean isMoveable_ = new AtomicBoolean(false);
  162. private Map<InetAddress, Double> loadInfo_ = new HashMap<InetAddress, Double>();
  163. /* This map is a clone of the one above and is used for various calculations during LB operation */
  164. private Map<InetAddress, Double> loadInfo2_ = new HashMap<InetAddress, Double>();
  165. /* Timer is used to disseminate load information */
  166. private Timer loadTimer_ = new Timer(false);
  167. private StorageLoadBalancer()
  168. {
  169. Gossiper.instance.register(this);
  170. }
  171. public void onChange(InetAddress endpoint, ApplicationState state, VersionedValue value)
  172. {
  173. if (state != ApplicationState.LOAD)
  174. return;
  175. loadInfo_.put(endpoint, Double.parseDouble(value.value));
  176. /*
  177. // clone load information to perform calculations
  178. loadInfo2_.putAll(loadInfo_);
  179. // Perform the analysis for load balance operations
  180. if ( isHeavyNode() )
  181. {
  182. if (logger_.isDebugEnabled())
  183. logger_.debug(StorageService.getLocalStorageEndpoint() + " is a heavy node with load " + localLoad());
  184. // lb_.schedule( new LoadBalancer(), StorageLoadBalancer.delay_, TimeUnit.MINUTES );
  185. }
  186. */
  187. }
  188. public void onJoin(InetAddress endpoint, EndpointState epState)
  189. {
  190. VersionedValue localValue = epState.getApplicationState(ApplicationState.LOAD);
  191. if (localValue != null)
  192. {
  193. onChange(endpoint, ApplicationState.LOAD, localValue);
  194. }
  195. }
  196. public void onAlive(InetAddress endpoint, EndpointState state) {}
  197. public void onDead(InetAddress endpoint, EndpointState state) {}
  198. public void onRemove(InetAddress endpoint) {}
  199. /*
  200. private boolean isMoveable()
  201. {
  202. if ( !isMoveable_.get() )
  203. return false;
  204. int myload = localLoad();
  205. InetAddress successor = StorageService.instance.getSuccessor(StorageService.getLocalStorageEndpoint());
  206. LoadInfo li = loadInfo2_.get(successor);
  207. // "load" is NULL means that the successor node has not
  208. // yet gossiped its load information. We should return
  209. // false in this case since we want to err on the side
  210. // of caution.
  211. if ( li == null )
  212. return false;
  213. else
  214. {
  215. return ( ( myload + li.count() ) <= StorageLoadBalancer.TOPHEAVY_RATIO*averageSystemLoad() );
  216. }
  217. }
  218. */
  219. private double localLoad()
  220. {
  221. Double load = loadInfo2_.get(FBUtilities.getLocalAddress());
  222. return load == null ? 0 : load;
  223. }
  224. private double averageSystemLoad()
  225. {
  226. int nodeCount = loadInfo2_.size();
  227. Set<InetAddress> nodes = loadInfo2_.keySet();
  228. double systemLoad = 0;
  229. for (InetAddress node : nodes)
  230. {
  231. systemLoad += loadInfo2_.get(node);
  232. }
  233. double averageLoad = (nodeCount > 0) ? (systemLoad / nodeCount) : 0;
  234. if (logger_.isDebugEnabled())
  235. logger_.debug("Average system load is {}", averageLoad);
  236. return averageLoad;
  237. }
  238. private boolean isHeavyNode()
  239. {
  240. return ( localLoad() > ( StorageLoadBalancer.TOPHEAVY_RATIO * averageSystemLoad() ) );
  241. }
  242. private boolean isMoveable(InetAddress target)
  243. {
  244. double threshold = StorageLoadBalancer.TOPHEAVY_RATIO * averageSystemLoad();
  245. if (isANeighbour(target))
  246. {
  247. // If the target is a neighbour then it is
  248. // moveable if its
  249. Double load = loadInfo2_.get(target);
  250. if (load == null)
  251. {
  252. return false;
  253. }
  254. else
  255. {
  256. double myload = localLoad();
  257. double avgLoad = (load + myload) / 2;
  258. return avgLoad <= threshold;
  259. }
  260. }
  261. else
  262. {
  263. InetAddress successor = StorageService.instance.getSuccessor(target);
  264. double sLoad = loadInfo2_.get(successor);
  265. double targetLoad = loadInfo2_.get(target);
  266. return (sLoad + targetLoad) <= threshold;
  267. }
  268. }
  269. private boolean isANeighbour(InetAddress neighbour)
  270. {
  271. InetAddress predecessor = StorageService.instance.getPredecessor(FBUtilities.getLocalAddress());
  272. if ( predecessor.equals(neighbour) )
  273. return true;
  274. InetAddress successor = StorageService.instance.getSuccessor(FBUtilities.getLocalAddress());
  275. if ( successor.equals(neighbour) )
  276. return true;
  277. return false;
  278. }
  279. /*
  280. * Determine the nodes that are lightly loaded. Choose at
  281. * random one of the lightly loaded nodes and use them as
  282. * a potential target for load balance.
  283. */
  284. private InetAddress findARandomLightNode()
  285. {
  286. List<InetAddress> potentialCandidates = new ArrayList<InetAddress>();
  287. Set<InetAddress> allTargets = loadInfo2_.keySet();
  288. double avgLoad = averageSystemLoad();
  289. for (InetAddress target : allTargets)
  290. {
  291. double load = loadInfo2_.get(target);
  292. if (load < avgLoad)
  293. {
  294. potentialCandidates.add(target);
  295. }
  296. }
  297. if (potentialCandidates.size() > 0)
  298. {
  299. Random random = new Random();
  300. int index = random.nextInt(potentialCandidates.size());
  301. return potentialCandidates.get(index);
  302. }
  303. return null;
  304. }
  305. public Map<InetAddress, Double> getLoadInfo()
  306. {
  307. return loadInfo_;
  308. }
  309. public void startBroadcasting()
  310. {
  311. // send the first broadcast "right away" (i.e., in 2 gossip heartbeats, when we should have someone to talk to);
  312. // after that send every BROADCAST_INTERVAL.
  313. loadTimer_.schedule(new LoadDisseminator(), 2 * Gossiper.intervalInMillis_, BROADCAST_INTERVAL);
  314. }
  315. /**
  316. * Wait for at least BROADCAST_INTERVAL ms, to give all nodes enough time to
  317. * report in.
  318. */
  319. public void waitForLoadInfo()
  320. {
  321. int duration = BROADCAST_INTERVAL + StorageService.RING_DELAY;
  322. try
  323. {
  324. logger_.info("Sleeping {} ms to wait for load information...", duration);
  325. Thread.sleep(duration);
  326. }
  327. catch (InterruptedException e)
  328. {
  329. throw new AssertionError(e);
  330. }
  331. }
  332. }