/src/org/ratankumar/www/cluster.java

https://bitbucket.org/ratan/webrec · Java · 311 lines · 243 code · 45 blank · 23 comment · 28 complexity · 489360431a2a0950dcf03b379332ede0 MD5 · raw file

  1. package org.ratankumar.www;
  2. import java.io.IOException;
  3. import java.util.ArrayList;
  4. import javax.servlet.http.HttpServlet;
  5. import javax.servlet.http.HttpServletRequest;
  6. import javax.servlet.http.HttpServletResponse;
  7. import com.google.appengine.api.datastore.DatastoreService;
  8. import com.google.appengine.api.datastore.DatastoreServiceFactory;
  9. import com.google.appengine.api.datastore.Entity;
  10. import com.google.appengine.api.datastore.PreparedQuery;
  11. import com.google.appengine.api.datastore.Query;
  12. @SuppressWarnings("serial")
  13. public class cluster extends HttpServlet {
  14. private static final int NUM_CLUSTERS = 2; // Total clusters.
  15. private static final int TOTAL_DATA = 7; // Total data points.
  16. public void doGet(HttpServletRequest req, HttpServletResponse resp)
  17. throws IOException {
  18. initialize();
  19. kMeanCluster();
  20. // Print out clustering results.
  21. for(int i = 0; i < NUM_CLUSTERS; i++)
  22. {
  23. System.out.println("Cluster " + i + " includes:");
  24. for(int j = 0; j < TOTAL_DATA; j++)
  25. {
  26. if(dataSet.get(j).cluster() == i){
  27. System.out.println(" (" + dataSet.get(j).X() + ", " + dataSet.get(j).Y() + ")");
  28. }
  29. } // j
  30. System.out.println();
  31. } // i
  32. // Print out centroid results.
  33. System.out.println("Centroids finalized at:");
  34. for(int i = 0; i < NUM_CLUSTERS; i++)
  35. {
  36. System.out.println(" (" + centroids.get(i).X() + ", " + centroids.get(i).Y());
  37. }
  38. System.out.print("\n");
  39. return;
  40. }
  41. // private static final double //SAMPLES1[][] ;//;new double[][] {{1.0, 1.0},
  42. // {1.5, 2.0},
  43. // {3.0, 4.0},
  44. // {5.0, 7.0},
  45. // {3.5, 5.0},
  46. // {4.5, 5.0},
  47. // {3.5, 4.5}
  48. // };
  49. private static ArrayList<Data> dataSet = new ArrayList<Data>();
  50. private static ArrayList<Centroid> centroids = new ArrayList<Centroid>();
  51. private static void initialize()
  52. {
  53. System.out.println("Centroids initialized at:");
  54. centroids.add(new Centroid(1.0, 1.0)); // lowest set.
  55. centroids.add(new Centroid(5.0, 7.0)); // highest set.
  56. System.out.println(" (" + centroids.get(0).X() + ", " + centroids.get(0).Y() + ")");
  57. System.out.println(" (" + centroids.get(1).X() + ", " + centroids.get(1).Y() + ")");
  58. System.out.print("\n");
  59. return;
  60. }
  61. private static void kMeanCluster()
  62. {
  63. final double bigNumber = Math.pow(10, 10); // some big number that's sure to be larger than our data range.
  64. double minimum = bigNumber; // The minimum value to beat.
  65. double distance = 0.0; // The current minimum value.
  66. int sampleNumber = 0;
  67. int cluster = 0;
  68. boolean isStillMoving = true;
  69. Data newData = null;
  70. final int SAMPLES[][] = {{0,0}};
  71. DatastoreService ds = DatastoreServiceFactory.getDatastoreService();
  72. Query q3 = new Query("SFactor");
  73. PreparedQuery pq3 = ds.prepare(q3);
  74. int ino=0;
  75. for (Entity result3 : pq3.asIterable())
  76. {
  77. int sf = (int) Integer.parseInt((String) result3.getProperty("sf"));
  78. int uno = (int) Integer.parseInt((String) result3.getProperty("uno"));
  79. SAMPLES[ino][0]=sf;
  80. SAMPLES[ino][1]=uno;
  81. }
  82. // Add in new data, one at a time, recalculating centroids with each new one.
  83. while(dataSet.size() < ino)
  84. {
  85. newData = new Data(SAMPLES[sampleNumber][0], SAMPLES[sampleNumber][1]);
  86. dataSet.add(newData);
  87. minimum = bigNumber;
  88. for(int i = 0; i < NUM_CLUSTERS; i++)
  89. {
  90. distance = dist(newData, centroids.get(i));
  91. if(distance < minimum){
  92. minimum = distance;
  93. cluster = i;
  94. }
  95. }
  96. newData.cluster(cluster);
  97. // calculate new centroids.
  98. for(int i = 0; i < NUM_CLUSTERS; i++)
  99. {
  100. int totalX = 0;
  101. int totalY = 0;
  102. int totalInCluster = 0;
  103. for(int j = 0; j < dataSet.size(); j++)
  104. {
  105. if(dataSet.get(j).cluster() == i){
  106. totalX += dataSet.get(j).X();
  107. totalY += dataSet.get(j).Y();
  108. totalInCluster++;
  109. }
  110. }
  111. if(totalInCluster > 0){
  112. centroids.get(i).X(totalX / totalInCluster);
  113. centroids.get(i).Y(totalY / totalInCluster);
  114. }
  115. }
  116. sampleNumber++;
  117. }
  118. // Now, keep shifting centroids until equilibrium occurs.
  119. while(isStillMoving)
  120. {
  121. // calculate new centroids.
  122. for(int i = 0; i < NUM_CLUSTERS; i++)
  123. {
  124. int totalX = 0;
  125. int totalY = 0;
  126. int totalInCluster = 0;
  127. for(int j = 0; j < dataSet.size(); j++)
  128. {
  129. if(dataSet.get(j).cluster() == i){
  130. totalX += dataSet.get(j).X();
  131. totalY += dataSet.get(j).Y();
  132. totalInCluster++;
  133. }
  134. }
  135. if(totalInCluster > 0){
  136. centroids.get(i).X(totalX / totalInCluster);
  137. centroids.get(i).Y(totalY / totalInCluster);
  138. }
  139. }
  140. // Assign all data to the new centroids
  141. isStillMoving = false;
  142. for(int i = 0; i < dataSet.size(); i++)
  143. {
  144. Data tempData = dataSet.get(i);
  145. minimum = bigNumber;
  146. for(int j = 0; j < NUM_CLUSTERS; j++)
  147. {
  148. distance = dist(tempData, centroids.get(j));
  149. if(distance < minimum){
  150. minimum = distance;
  151. cluster = j;
  152. }
  153. }
  154. tempData.cluster(cluster);
  155. if(tempData.cluster() != cluster){
  156. tempData.cluster(cluster);
  157. isStillMoving = true;
  158. }
  159. }
  160. }
  161. return;
  162. }
  163. /**
  164. * // Calculate Euclidean distance.
  165. * @param d - Data object.
  166. * @param c - Centroid object.
  167. * @return - double value.
  168. */
  169. private static double dist(Data d, Centroid c)
  170. {
  171. return Math.sqrt(Math.pow((c.Y() - d.Y()), 2) + Math.pow((c.X() - d.X()), 2));
  172. }
  173. private static class Data
  174. {
  175. private double mX = 0;
  176. private double mY = 0;
  177. private int mCluster = 0;
  178. public Data(double x, double y)
  179. {
  180. this.X(x);
  181. this.Y(y);
  182. return;
  183. }
  184. public void X(double x)
  185. {
  186. this.mX = x;
  187. return;
  188. }
  189. public double X()
  190. {
  191. return this.mX;
  192. }
  193. public void Y(double y)
  194. {
  195. this.mY = y;
  196. return;
  197. }
  198. public double Y()
  199. {
  200. return this.mY;
  201. }
  202. public void cluster(int clusterNumber)
  203. {
  204. this.mCluster = clusterNumber;
  205. return;
  206. }
  207. public int cluster()
  208. {
  209. return this.mCluster;
  210. }
  211. }
  212. private static class Centroid
  213. {
  214. private double mX = 0.0;
  215. private double mY = 0.0;
  216. public Centroid(double newX, double newY)
  217. {
  218. this.mX = newX;
  219. this.mY = newY;
  220. return;
  221. }
  222. public void X(double newX)
  223. {
  224. this.mX = newX;
  225. return;
  226. }
  227. public double X()
  228. {
  229. return this.mX;
  230. }
  231. public void Y(double newY)
  232. {
  233. this.mY = newY;
  234. return;
  235. }
  236. public double Y()
  237. {
  238. return this.mY;
  239. }
  240. }
  241. public static void main(String[] args)
  242. {
  243. initialize();
  244. kMeanCluster();
  245. // Print out clustering results.
  246. for(int i = 0; i < NUM_CLUSTERS; i++)
  247. {
  248. System.out.println("Cluster " + i + " includes:");
  249. for(int j = 0; j < TOTAL_DATA; j++)
  250. {
  251. if(dataSet.get(j).cluster() == i){
  252. System.out.println(" (" + dataSet.get(j).X() + ", " + dataSet.get(j).Y() + ")");
  253. }
  254. } // j
  255. System.out.println();
  256. } // i
  257. // Print out centroid results.
  258. System.out.println("Centroids finalized at:");
  259. for(int i = 0; i < NUM_CLUSTERS; i++)
  260. {
  261. System.out.println(" (" + centroids.get(i).X() + ", " + centroids.get(i).Y());
  262. }
  263. System.out.print("\n");
  264. return;
  265. }
  266. }