PageRenderTime 83ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/src/main/java/org/apache/giraph/graph/AutoBalancer.java

https://github.com/dvryaboy/giraph
Java | 302 lines | 236 code | 21 blank | 45 comment | 39 complexity | 996dc1b73a20e03ec2d4b287a36ef8fb MD5 | raw file
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.giraph.graph;
  19. import java.util.Comparator;
  20. import java.util.ArrayList;
  21. import java.util.List;
  22. import java.util.Map;
  23. import java.util.Map.Entry;
  24. import java.util.NavigableMap;
  25. import java.util.Set;
  26. import java.util.TreeMap;
  27. import java.util.TreeSet;
  28. import org.apache.hadoop.io.LongWritable;
  29. import org.apache.hadoop.io.Writable;
  30. import org.apache.hadoop.io.WritableComparable;
  31. import org.apache.log4j.Logger;
  32. import org.json.JSONArray;
  33. import org.json.JSONException;
  34. /**
  35. * Balancer that automatically balances vertex ranges based on
  36. * number of vertices or edges (configurable).
  37. *
  38. * @param <I> vertex id type
  39. */
  40. @SuppressWarnings("rawtypes")
  41. public final class AutoBalancer<
  42. I extends WritableComparable,
  43. V extends Writable,
  44. E extends Writable,
  45. M extends Writable>
  46. extends VertexRangeBalancer<I, V, E, M> {
  47. /** Class logger */
  48. private static final Logger LOG = Logger.getLogger(AutoBalancer.class);
  49. /** enum for what to balance on */
  50. public enum BalCriterium {
  51. BAL_NONE,
  52. BAL_NUM_VERTICES,
  53. BAL_NUM_EDGES,
  54. BAL_NUM_VERTICES_AND_EDGES,
  55. }
  56. /** boolean: decides whether to balance number of vertices or edges */
  57. private static BalCriterium balanceOn =
  58. BalCriterium.BAL_NUM_VERTICES_AND_EDGES;
  59. public static void setBalCriterium(BalCriterium balanceOn) {
  60. AutoBalancer.balanceOn = balanceOn;
  61. }
  62. private long getVertexRangeEntries(VertexRange<I,V,E,M> v)
  63. {
  64. if (balanceOn == BalCriterium.BAL_NUM_VERTICES_AND_EDGES) {
  65. return v.getVertexCount() + v.getEdgeCount();
  66. }
  67. if (balanceOn == BalCriterium.BAL_NUM_EDGES) {
  68. return v.getEdgeCount();
  69. }
  70. if (balanceOn == BalCriterium.BAL_NUM_VERTICES) {
  71. return v.getVertexCount();
  72. }
  73. return 0;
  74. }
  75. public class VertexRangeComparator
  76. implements Comparator<VertexRange<I,V,E,M>> {
  77. /**
  78. * Compares the number of entities based on balanceOn.
  79. */
  80. @SuppressWarnings("unchecked")
  81. public int compare(VertexRange<I,V,E,M> v1, VertexRange<I,V,E,M> v2) {
  82. long numEntries1 = getVertexRangeEntries(v1);
  83. long numEntries2 = getVertexRangeEntries(v2);
  84. if (numEntries1 == numEntries2) {
  85. return v1.getMaxIndex().compareTo(v2.getMaxIndex());
  86. }
  87. return (int)(numEntries2 - numEntries1);
  88. }
  89. }
  90. @Override
  91. public final NavigableMap<I, VertexRange<I, V, E, M>> rebalance() {
  92. if (balanceOn == BalCriterium.BAL_NONE) {
  93. return getPrevVertexRangeMap();
  94. }
  95. Map<String, JSONArray> workerHostnameIdMap = getWorkerHostnamePortMap();
  96. NavigableMap<I, VertexRange<I, V, E, M>> prevVertexRangeMap =
  97. getPrevVertexRangeMap();
  98. NavigableMap<I, VertexRange<I, V, E, M>> nextVertexRangeMap =
  99. new TreeMap<I, VertexRange<I, V, E, M>>();
  100. NavigableMap<String, Set<VertexRange<I,V,E,M>>> hostToVertexRangeMap =
  101. new TreeMap<String, Set<VertexRange<I, V, E, M>>>();
  102. NavigableMap<String, LongWritable> hostToNumEntriesMap =
  103. new TreeMap<String, LongWritable>();
  104. NavigableMap<VertexRange<I,V,E,M>, String> vRngDistToHostMap =
  105. new TreeMap<VertexRange<I, V, E, M>, String>(
  106. new VertexRangeComparator());
  107. NavigableMap<LongWritable, List<String>> numEntriesToHostMap =
  108. new TreeMap<LongWritable, List<String>>();
  109. LOG.info("rebalance: workerHostnameIdMap size=" +
  110. workerHostnameIdMap.size() +
  111. " prevVertexRangeMap size=" +
  112. prevVertexRangeMap.size());
  113. // First generate total number of entries to balance on
  114. // and generate appropriate maps:
  115. // map from hostnameId to list of vertex ranges
  116. // map from hostnameId to total number of entries
  117. int numVertexRanges = 0;
  118. long numTotalEntries = 0;
  119. // initialize with workerHostnameIdMap to account for
  120. // hostnameId's without any vertices
  121. for (String hostnameId : workerHostnameIdMap.keySet()) {
  122. hostToNumEntriesMap.put(hostnameId, new LongWritable(0));
  123. hostToVertexRangeMap.put(hostnameId,
  124. new TreeSet<VertexRange<I, V, E, M>>(
  125. new VertexRangeComparator()));
  126. }
  127. for (Entry<I, VertexRange<I, V, E, M>> entry :
  128. prevVertexRangeMap.entrySet()) {
  129. numVertexRanges++;
  130. VertexRange<I, V, E, M> vRange = entry.getValue();
  131. long numEntries = getVertexRangeEntries(vRange);
  132. numTotalEntries += numEntries;
  133. String hostnameId = vRange.getHostnameId();
  134. LongWritable entryLong = hostToNumEntriesMap.get(hostnameId);
  135. if (entryLong == null) {
  136. throw new RuntimeException("Unknown hostnameId=" +
  137. hostnameId);
  138. }
  139. entryLong.set(entryLong.get() + numEntries);
  140. Set<VertexRange<I, V, E, M>> vRangeList =
  141. hostToVertexRangeMap.get(hostnameId);
  142. if (vRangeList == null) {
  143. throw new RuntimeException("Unknown hostnameId=" +
  144. hostnameId);
  145. }
  146. if (vRangeList.add(vRange) == false) {
  147. throw new RuntimeException(
  148. "All VertexRanges should be different");
  149. }
  150. }
  151. LOG.info("rebalance: numTotalEntries=" + numTotalEntries +
  152. " numVertexRanges=" + numVertexRanges +
  153. " hostToVertexRangeMap size=" + hostToVertexRangeMap.size() +
  154. " hostToNumEntriesMap size=" + hostToNumEntriesMap.size());
  155. // Next take away the vertex ranges with the smallest number of
  156. // entities till every hostnameId maps to a list of vertex ranges
  157. // containing not more than the average number of entries.
  158. // The vertex ranges taken away are put as keys into a sorted map,
  159. // sorted on the number of entries in descending order.
  160. long aveWorkerEntries = numTotalEntries / hostToNumEntriesMap.size();
  161. long squareDeviation = 0;
  162. numVertexRanges = 0;
  163. numTotalEntries = 0;
  164. for (Entry<String, Set<VertexRange<I,V,E,M>>> entry :
  165. hostToVertexRangeMap.entrySet()) {
  166. long numEntries = hostToNumEntriesMap.get(entry.getKey()).get();
  167. numTotalEntries += numEntries;
  168. numVertexRanges += entry.getValue().size();
  169. squareDeviation += ((numEntries - aveWorkerEntries) *
  170. (numEntries - aveWorkerEntries));
  171. if (numEntries > aveWorkerEntries) {
  172. int sz = entry.getValue().size();
  173. for (int i = 0; i < sz; i++) {
  174. VertexRange<I,V,E,M> vRange =
  175. ((TreeSet<VertexRange<I,V,E,M>>)
  176. entry.getValue()).pollLast();
  177. vRngDistToHostMap.put(vRange, entry.getKey());
  178. numEntries -= getVertexRangeEntries(vRange);
  179. if (numEntries <= aveWorkerEntries) {
  180. hostToNumEntriesMap.get(entry.getKey()).set(numEntries);
  181. break;
  182. }
  183. }
  184. }
  185. }
  186. LOG.info("rebalance: Initial squareDeviation=" + squareDeviation +
  187. " numTotalEntries=" + numTotalEntries +
  188. " numVertexRanges=" + numVertexRanges);
  189. // Next the map from hostnameId to number of entries is reversed
  190. // (value becomes key) such that the number of entries get sorted.
  191. // Then the removed vertex ranges are assigned to hostnameId's, largest
  192. // vertex ranges to hostnameId's with the smallest number of entities.
  193. for (Entry<String, LongWritable> entry :
  194. hostToNumEntriesMap.entrySet()) {
  195. List<String> hostnameIds =
  196. numEntriesToHostMap.get(entry.getValue());
  197. if (hostnameIds == null) {
  198. hostnameIds = new ArrayList<String>();
  199. numEntriesToHostMap.put(entry.getValue(), hostnameIds);
  200. }
  201. hostnameIds.add(entry.getKey());
  202. }
  203. for (Entry<VertexRange<I,V,E,M>, String> entry :
  204. vRngDistToHostMap.entrySet()) {
  205. String hostnameId = entry.getValue();
  206. String newHostnameId = null;
  207. Entry<LongWritable, List<String>> entriesToHost =
  208. numEntriesToHostMap.firstEntry();
  209. for (String id : entriesToHost.getValue()) {
  210. if (hostnameId.equals(id)) {
  211. newHostnameId = id;
  212. break;
  213. }
  214. }
  215. if (newHostnameId == null) {
  216. newHostnameId = entriesToHost.getValue().get(0);
  217. }
  218. entriesToHost.getValue().remove(newHostnameId);
  219. VertexRange<I, V, E, M> replacedVertexRange = null;
  220. if (hostnameId.equals(newHostnameId)) {
  221. replacedVertexRange = entry.getKey();
  222. } else {
  223. try {
  224. replacedVertexRange =
  225. new VertexRange<I, V, E, M>(entry.getKey());
  226. } catch (Exception e) {
  227. throw new RuntimeException(e);
  228. }
  229. JSONArray hostnamePortArray =
  230. workerHostnameIdMap.get(newHostnameId);
  231. try {
  232. replacedVertexRange.setHostnameId(newHostnameId);
  233. replacedVertexRange.setHostname(
  234. hostnamePortArray.getString(0));
  235. replacedVertexRange.setPort(hostnamePortArray.getInt(1));
  236. } catch (JSONException e) {
  237. throw new RuntimeException(e);
  238. }
  239. }
  240. nextVertexRangeMap.put(replacedVertexRange.getMaxIndex(),
  241. replacedVertexRange);
  242. LongWritable numEntries =
  243. new LongWritable(entriesToHost.getKey().get() +
  244. getVertexRangeEntries(entry.getKey()));
  245. if (entriesToHost.getValue().size() == 0) {
  246. numEntriesToHostMap.remove(entriesToHost.getKey());
  247. }
  248. List<String> hostnameIds = numEntriesToHostMap.get(numEntries);
  249. if (hostnameIds == null) {
  250. hostnameIds = new ArrayList<String>();
  251. numEntriesToHostMap.put(numEntries, hostnameIds);
  252. }
  253. hostnameIds.add(newHostnameId);
  254. }
  255. squareDeviation = 0;
  256. numTotalEntries = 0;
  257. numVertexRanges = 0;
  258. for (Entry<LongWritable, List<String>> entriesToHost :
  259. numEntriesToHostMap.entrySet()) {
  260. long numEntries = entriesToHost.getKey().get();
  261. squareDeviation += (entriesToHost.getValue().size() *
  262. (numEntries - aveWorkerEntries) *
  263. (numEntries - aveWorkerEntries));
  264. numTotalEntries += (numEntries * entriesToHost.getValue().size());
  265. numVertexRanges += entriesToHost.getValue().size();
  266. }
  267. for (Set<VertexRange<I,V,E,M>> vRangeSet :
  268. hostToVertexRangeMap.values()) {
  269. for (VertexRange<I,V,E,M> vRange : vRangeSet) {
  270. nextVertexRangeMap.put(vRange.getMaxIndex(), vRange);
  271. }
  272. }
  273. LOG.info("rebalance: Final squareDeviation=" + squareDeviation +
  274. " numTotalEntries=" + numTotalEntries +
  275. " numVertexRangesCalculated=" + numVertexRanges +
  276. " numVertexRangesAssigned=" + nextVertexRangeMap.size());
  277. // only run once, revisit this when we make graph mutations
  278. balanceOn = BalCriterium.BAL_NONE;
  279. return nextVertexRangeMap;
  280. }
  281. }