PageRenderTime 54ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/alaspatial/src/main/java/org/ala/spatial/analysis/method/Aloc.java

http://alageospatialportal.googlecode.com/
Java | 821 lines | 611 code | 89 blank | 121 comment | 183 complexity | e7f0cc91fc9fc45213fb439ab69f5828 MD5 | raw file
  1. /**
  2. * ************************************************************************
  3. * Copyright (C) 2010 Atlas of Living Australia All Rights Reserved.
  4. *
  5. * The contents of this file are subject to the Mozilla Public License Version
  6. * 1.1 (the "License"); you may not use this file except in compliance with the
  7. * License. You may obtain a copy of the License at http://www.mozilla.org/MPL/
  8. *
  9. * Software distributed under the License is distributed on an "AS IS" basis,
  10. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
  11. * the specific language governing rights and limitations under the License.
  12. * *************************************************************************
  13. */
  14. package org.ala.spatial.analysis.method;
  15. import java.util.ArrayList;
  16. import java.util.List;
  17. import java.util.concurrent.CountDownLatch;
  18. import java.util.concurrent.LinkedBlockingQueue;
  19. import org.ala.spatial.util.AnalysisJob;
  20. import org.ala.spatial.util.Layer;
  21. import org.ala.spatial.util.SpatialLogger;
  22. /**
  23. * ALOC
  24. *
  25. * @author adam
  26. *
  27. */
  28. public class Aloc {
  29. private static final int MAX_ITERATIONS = 250;
  30. /**
  31. * produces group allocations by ALOC with gower metric measure
  32. *
  33. * - reduces iteration duration dependance on #groups
  34. * - increases memory usage
  35. * from
  36. * #cells*#layers
  37. * to
  38. * #cells*#layers + #cells*#groups
  39. *
  40. * @param data_pieces
  41. * @param nNoOfGroups
  42. * @param nCols
  43. * @param pieces
  44. * @param job
  45. * @return
  46. */
  47. public static int[] runGowerMetricThreadedMemory(ArrayList<Object> data_pieces, int nNoOfGroups, int nCols, int pieces, Layer[] layers, AnalysisJob job, int threadcount, int[] iterationCount) {
  48. if (job != null) {
  49. job.setStage(1); //seeding stage
  50. }
  51. if (job != null && job.isCancelled()) {
  52. return null;
  53. }
  54. int[] rowCounts = new int[pieces];
  55. int nRowsTotal = 0;
  56. for (int i = 0; i < pieces; i++) {
  57. rowCounts[i] = ((float[]) data_pieces.get(i)).length / nCols;
  58. if (i > 0) {
  59. rowCounts[i] += rowCounts[i - 1];
  60. }
  61. }
  62. nRowsTotal += rowCounts[pieces - 1];
  63. int nRows;
  64. int min_movement = -1;
  65. int[] min_groups = new int[nRowsTotal];
  66. double[] min_dists = new double[nRowsTotal];
  67. //range standardize columns 0-1
  68. float[] col_min = new float[nCols];
  69. float[] col_max = new float[nCols];
  70. double[] col_range = new double[nCols];
  71. int seedidxsize = 0;
  72. int i, j, k;
  73. for (i = 0; i < nCols; i++) {
  74. col_min[i] = Float.MAX_VALUE;
  75. col_max[i] = Float.MAX_VALUE * -1;
  76. }
  77. for (k = 0; k < pieces; k++) {
  78. float[] data = (float[]) data_pieces.get(k);
  79. for (i = 0; i < nCols; i++) {
  80. nRows = data.length / nCols;
  81. for (j = 0; j < nRows; j++) {
  82. float f = data[i + j * nCols];
  83. if (Float.isNaN(col_min[i]) || col_min[i] > f) {
  84. col_min[i] = f;
  85. }
  86. if (Float.isNaN(col_max[i]) || col_max[i] < f) {
  87. col_max[i] = f;
  88. }
  89. }
  90. }
  91. }
  92. for (i = 0; i < nCols; i++) {
  93. col_range[i] = col_max[i] - col_min[i];
  94. if (col_range[i] <= 0) {
  95. //error
  96. if (job != null) {
  97. //job.setCurrentState(AnalysisJob.FAILED);
  98. job.log("column '" + i + "' has zero range.");
  99. job.setMessage("Layer '" + layers[i].display_name + "' cannot be included since it has no variation for the area specified.");
  100. } else {
  101. SpatialLogger.log("column '" + i + "' has zero range.");
  102. SpatialLogger.log("Layer '" + layers[i].display_name + "' cannot be included since it has no variation for the area specified.");
  103. }
  104. //return null;
  105. col_range[i] = 1;
  106. }
  107. }
  108. for (k = 0; k < pieces; k++) {
  109. float[] data = (float[]) data_pieces.get(k);
  110. for (i = 0; i < nCols; i++) {
  111. nRows = data.length / nCols;
  112. for (j = 0; j < nRows; j++) {
  113. data[i + j * nCols] = (float) ((data[i + j * nCols] - col_min[i]) / col_range[i]);
  114. }
  115. }
  116. }
  117. //1. determine correct # of groups by varying radius
  118. double start_radius = 1;
  119. double radius = start_radius;
  120. double step = radius / 2.0f;
  121. int count = 0;
  122. int[] seedidx = new int[nNoOfGroups + 1000];
  123. double[] seeds = new double[nCols * (1000 + nNoOfGroups)]; //space for an extra 1000 groups during seeding
  124. double[] oldSeeds = new double[nCols * (1000 + nNoOfGroups)];
  125. int[] oldCount = new int[nCols * (1000 + nNoOfGroups)];
  126. //initial seed as first record
  127. {
  128. seedidx[0] = 0;
  129. float[] data = (float[]) data_pieces.get(0);
  130. for (i = 0; i < nCols; i++) {
  131. seeds[i] = data[i];
  132. }
  133. }
  134. int c;
  135. while (seedidxsize != nNoOfGroups && count < 25) {
  136. seedidxsize = 1;
  137. int rowPos = 0;
  138. for (c = 0; c < pieces; c++) {
  139. float[] data = (float[]) data_pieces.get(c);
  140. nRows = data.length / nCols;
  141. rowPos = rowCounts[c] - nRows + (c == 0 ? 1 : 0);
  142. for (i = (c == 0 ? 1 : 0); i < nRows; i++, rowPos++) {
  143. for (j = 0; j < seedidxsize; j++) {
  144. //calc dist between obj(i) & obj(seedidx(j))
  145. double dist = 0;
  146. int missing = 0;
  147. for (k = 0; k < nCols; k++) {
  148. double v1 = data[i * nCols + k];
  149. double v2 = seeds[j * nCols + k];
  150. if (Double.isNaN(v1) || Double.isNaN(v2) || col_range[k] == 0) {
  151. missing++;
  152. } else {
  153. dist += java.lang.Math.abs(v1 - v2);
  154. }
  155. }
  156. //add to seedidx if distance > radius
  157. if (nCols == missing) {
  158. //error
  159. missing--;
  160. }
  161. dist = dist / (double) (nCols - missing);
  162. if (dist < radius) {
  163. break;
  164. }
  165. }
  166. if (j == seedidxsize) {
  167. seedidx[seedidxsize] = rowPos;
  168. for (k = 0; k < nCols; k++) {
  169. seeds[seedidxsize * nCols + k] = data[i * nCols + k];
  170. }
  171. seedidxsize++;
  172. }
  173. if (seedidxsize > nNoOfGroups) {
  174. break;
  175. }
  176. }
  177. //repeat break if necessary
  178. if (seedidxsize > nNoOfGroups) {
  179. break;
  180. }
  181. }
  182. count++; //force a break
  183. if (seedidxsize == nNoOfGroups) {
  184. continue;
  185. }
  186. //PERFORM RECONCILIATION OF NUMBER OF GROUPS IF count >= 20
  187. if (count < 20) {
  188. if (seedidxsize < nNoOfGroups) {
  189. radius -= step;
  190. } else if (seedidxsize > nNoOfGroups) {
  191. radius += step;
  192. }
  193. step /= 2.0;
  194. } else {
  195. //loop while number of groups is < nNoOfGroups
  196. if (seedidxsize < nNoOfGroups) {
  197. radius -= step;
  198. } else {
  199. break;
  200. }
  201. }
  202. if (job != null) {
  203. job.setProgress(count / 25.0, "seeding (" + count + ") " + seedidxsize + " != " + nNoOfGroups + " radius:" + radius);
  204. } else {
  205. SpatialLogger.log("seeding (" + count + ") " + seedidxsize + " != " + nNoOfGroups + " radius:" + radius);
  206. }
  207. if (job != null && job.isCancelled()) {
  208. return null;
  209. }
  210. }
  211. if (job != null) {
  212. job.setProgress(count / 25.0, "seeding (" + count + ") " + seedidxsize + " != " + nNoOfGroups + " radius:" + radius);
  213. } else {
  214. SpatialLogger.log("seeding (" + count + ") " + seedidxsize + " != " + nNoOfGroups + " radius:" + radius);
  215. }
  216. if (job != null && job.isCancelled()) {
  217. return null;
  218. }
  219. if (job != null) {
  220. job.setStage(2); //iterations
  221. }
  222. //setup piece data
  223. List apdList = java.util.Collections.synchronizedList(new ArrayList());
  224. for (i = 0; i < pieces; i++) {
  225. int rowcount = ((float[]) data_pieces.get(i)).length / nCols;
  226. apdList.add(new AlocPieceData(
  227. (float[]) data_pieces.get(i),
  228. new double[rowcount * seedidxsize],
  229. new short[rowcount],
  230. new double[rowcount]));
  231. }
  232. //setup shared data
  233. seeds = java.util.Arrays.copyOf(seeds, seedidxsize * nCols);
  234. int[] seedgroup_nonmissingvalues = new int[seedidxsize * nCols];
  235. double[] otherGroupMovement = new double[seedidxsize];
  236. double[] groupMovement = new double[seedidxsize];
  237. AlocSharedData[] asdCopies = new AlocSharedData[threadcount];
  238. for (i = 0; i < threadcount; i++) {
  239. asdCopies[i] = new AlocSharedData(
  240. otherGroupMovement.clone(),
  241. groupMovement.clone(),
  242. nCols,
  243. col_range.clone(),
  244. seedidxsize,
  245. seeds.clone(),
  246. seedgroup_nonmissingvalues.clone());
  247. }
  248. //setup thread data
  249. AlocThreadData[] atdArray = new AlocThreadData[threadcount];
  250. for (i = 0; i < threadcount; i++) {
  251. atdArray[i] = new AlocThreadData(
  252. new int[seedidxsize], new int[seedidxsize * nCols],
  253. new double[seedidxsize * nCols]);
  254. }
  255. //2. allocate all objects to a group
  256. int[] groupsize = new int[seedidxsize];
  257. for (i = 0; i < seedidxsize; i++) {
  258. groupsize[i] = 0;
  259. int rowPos = 0;
  260. for (int n = 0; n < pieces; n++) {
  261. float[] data = (float[]) data_pieces.get(n);
  262. nRows = data.length / nCols;
  263. rowPos = rowCounts[n] - nRows;
  264. if (seedidx[i] >= rowPos && seedidx[i] < rowCounts[n]) {
  265. for (j = 0; j < nCols; j++) {
  266. seeds[i * nCols + j] = data[(seedidx[i] - rowPos) * nCols + j];
  267. seedgroup_nonmissingvalues[i * nCols + j] = 0;
  268. }
  269. break;
  270. }
  271. }
  272. }
  273. for (i = 0; i < pieces; i++) {
  274. AlocPieceData apd = (AlocPieceData) apdList.get(i);
  275. for (j = 0; j < apd.groups.length; j++) {
  276. apd.groups[j] = -1;
  277. }
  278. }
  279. LinkedBlockingQueue<AlocPieceData> lbq = new LinkedBlockingQueue(new ArrayList<AlocPieceData>());
  280. AlocInnerLoop3[] ail = new AlocInnerLoop3[threadcount];
  281. for (i = 0; i < threadcount; i++) {
  282. ail[i] = new AlocInnerLoop3(lbq, atdArray[i], asdCopies[i]);
  283. }
  284. if (job != null) {
  285. job.log("Started AlocInnerLoops (" + threadcount + " threads): " + System.currentTimeMillis());
  286. } else {
  287. SpatialLogger.log("Started AlocInnerLoops (" + threadcount + " threads): " + System.currentTimeMillis());
  288. }
  289. int iteration = 0;
  290. int movement = -1;
  291. while (movement != 0 && iteration < MAX_ITERATIONS) {
  292. //preserve first element from each group
  293. //- ok for first iteration since centroid movement occurs at end
  294. int[] preserved_members = new int[seedidxsize];
  295. if (iteration > 0) {
  296. for (i = 0; i < seedidxsize; i++) {
  297. preserved_members[i] = -1;
  298. }
  299. int count_preserved = 0;
  300. int pos = 0;
  301. for (i = 0; i < pieces && count_preserved < seedidxsize; i++) {
  302. short[] grps = ((AlocPieceData) apdList.get(i)).groups;
  303. for (j = 0; j < grps.length; j++, pos++) {
  304. if (preserved_members[grps[j]] == -1) {
  305. preserved_members[grps[j]] = pos;
  306. count_preserved++;
  307. }
  308. }
  309. }
  310. }
  311. //get copies of shared data
  312. if (iteration == 0) {
  313. for (i = 0; i < threadcount; i++) {
  314. asdCopies[i].otherGroupMovement = otherGroupMovement;
  315. asdCopies[i].groupMovement = groupMovement;
  316. asdCopies[i].seeds = seeds;
  317. asdCopies[i].seedgroup_nonmissingvalues = seedgroup_nonmissingvalues;
  318. }
  319. }
  320. //rebuild spans
  321. CountDownLatch cdl = new CountDownLatch(pieces);
  322. for (i = 0; i < threadcount; i++) {
  323. ail[i].next(cdl);
  324. }
  325. for (i = 0; i < pieces; i++) {
  326. lbq.add((AlocPieceData) apdList.get(i));
  327. }
  328. //wait for pieces to be finished
  329. try {
  330. cdl.await();
  331. } catch (Exception e) {
  332. e.printStackTrace();
  333. }
  334. //init (for reducing distance checks)
  335. for (i = 0; i < seedidxsize * nCols; i++) {
  336. oldSeeds[i] = seeds[i];
  337. oldCount[i] = seedgroup_nonmissingvalues[i];
  338. }
  339. //update seeds and nonmissingvalues counts
  340. if (iteration == 0) {
  341. for (i = 0; i < seedidxsize; i++) {
  342. for (j = 0; j < nCols; j++) {
  343. seeds[i * nCols + j] = Float.NaN;
  344. }
  345. }
  346. }
  347. movement = 0;
  348. for (i = 0; i < threadcount; i++) {
  349. movement += atdArray[i].movement;
  350. double[] seed_adj = atdArray[i].seeds_adjustment;
  351. int[] seed_nmv_adj = atdArray[i].seeds_nmv_adjustment;
  352. for (j = 0; j < seedidxsize * nCols; j++) {
  353. if (Double.isNaN(seeds[j])
  354. && seed_adj[j] != 0) {
  355. seeds[j] = seed_adj[j];
  356. } else {
  357. seeds[j] += seed_adj[j];
  358. }
  359. seedgroup_nonmissingvalues[j] += seed_nmv_adj[j];
  360. }
  361. }
  362. //update group sizes
  363. for (i = 0; i < threadcount; i++) {
  364. int[] seed_gs_adj = atdArray[i].groupsize;
  365. for (j = 0; j < seedidxsize; j++) {
  366. groupsize[j] += seed_gs_adj[j];
  367. }
  368. }
  369. //enforce minimum group membership of size 1
  370. if (iteration > 0) {
  371. boolean repeat = true;
  372. while (repeat) {
  373. repeat = false;
  374. //check
  375. for (i = 0; i < seedidxsize; i++) {
  376. if (groupsize[i] == 0) {
  377. repeat = true;
  378. //move original member back here
  379. //old group == cg, new group == i
  380. //row == preserved_members[i]
  381. int row = preserved_members[i];
  382. int pos = 0;
  383. int cg = -1;
  384. for (j = 0; j < pieces; j++) {
  385. short[] grps = ((AlocPieceData) apdList.get(j)).groups;
  386. if (pos + grps.length > row) {
  387. cg = grps[row - pos];
  388. break;
  389. }
  390. pos += grps.length;
  391. }
  392. groupsize[i]++;
  393. groupsize[cg]--;
  394. //update seeds
  395. float v1;
  396. float[] data = null;
  397. for (k = 0; k < pieces; k++) {
  398. if (row < rowCounts[k]) {
  399. data = (float[]) data_pieces.get(k);
  400. row = row - (rowCounts[k] - data.length / nCols);
  401. break;
  402. }
  403. }
  404. for (j = 0; j < nCols; j++) {
  405. v1 = data[row * nCols + j];
  406. if (!Float.isNaN(v1)) {
  407. //old seed
  408. seeds[cg * nCols + j] -= v1;
  409. seedgroup_nonmissingvalues[cg * nCols + j]--;
  410. //new seed
  411. seedgroup_nonmissingvalues[i * nCols + j]++;
  412. } else {
  413. i = i + 1 - 1;
  414. }
  415. //new seed
  416. seeds[i * nCols + j] = v1;
  417. }
  418. }
  419. }
  420. }
  421. }
  422. //calc max movement (for reducing distance checks)
  423. double mov;
  424. double maxmov = -1 * Double.MAX_VALUE;
  425. for (i = 0; i < seedidxsize; i++) {
  426. mov = 0;
  427. int nmissing = 0;
  428. if (iteration == 0) {
  429. for (j = 0; j < nCols; j++) {
  430. k = i * nCols + j;
  431. if (seedgroup_nonmissingvalues[k] > 0) {
  432. mov += Math.abs(seeds[k] / (double) seedgroup_nonmissingvalues[k]);
  433. } else {
  434. nmissing++;
  435. }
  436. }
  437. } else {
  438. for (j = 0; j < nCols; j++) {
  439. k = i * nCols + j;
  440. if (oldCount[k] == 0 || seedgroup_nonmissingvalues[k] == 0) {
  441. nmissing++;
  442. } else {
  443. double v1 = oldSeeds[k] / (double) oldCount[k];
  444. double v2 = seeds[k] / (double) seedgroup_nonmissingvalues[k];
  445. mov += Math.abs(v1 - v2);
  446. }
  447. }
  448. }
  449. mov /= (double) (nCols - nmissing);
  450. if (mov > maxmov) {
  451. maxmov = mov;
  452. }
  453. otherGroupMovement[i] = mov;
  454. groupMovement[i] = mov;
  455. }
  456. for (i = 0; i < seedidxsize; i++) {
  457. otherGroupMovement[i] += maxmov;
  458. }
  459. //backup min_movement
  460. if (min_movement == -1 || min_movement > movement) {
  461. min_movement = movement;
  462. //copy groups to min_groups
  463. k = 0;
  464. for (i = 0; i < pieces; i++) {
  465. short[] grps = ((AlocPieceData) apdList.get(i)).groups;
  466. double[] dist = ((AlocPieceData) apdList.get(i)).rowDist;
  467. for (j = 0; j < grps.length; j++, k++) {
  468. min_groups[k] = grps[j];
  469. min_dists[k] = dist[j];
  470. }
  471. }
  472. }
  473. //test for -1 group allocations here
  474. iteration++;
  475. //job progress is non-linear, use something else so estimates are better
  476. if (job != null) {
  477. job.setProgress(Math.sqrt(iteration / 100.0), "moving (" + iteration + ") > moved " + movement);
  478. }
  479. if (job != null && job.isCancelled()) {
  480. for (i = 0; i < threadcount; i++) {
  481. ail[i].kill();
  482. }
  483. return null;
  484. }
  485. }
  486. if (job != null) {
  487. job.setProgress(1);
  488. }
  489. for (i = 0; i < threadcount; i++) {
  490. ail[i].kill();
  491. }
  492. //reverse column range standardization
  493. for (k = 0; k < pieces; k++) {
  494. float[] data = (float[]) data_pieces.get(k);
  495. for (i = 0; i < nCols; i++) {
  496. nRows = data.length / nCols;
  497. for (j = 0; j < nRows; j++) {
  498. data[i + j * nCols] = (float) ((data[i + j * nCols] * col_range[i]) + col_min[i]);
  499. }
  500. }
  501. }
  502. //reverse row range standardization
  503. double[] extents = (double[]) data_pieces.get(data_pieces.size() - 1);
  504. for (k = 0; k < pieces; k++) {
  505. float[] data = (float[]) data_pieces.get(k);
  506. for (i = 0; i < nCols; i++) {
  507. nRows = data.length / nCols;
  508. for (j = 0; j < nRows; j++) {
  509. data[i + j * nCols] = (float) ((data[i + j * nCols] * (extents[6 + i * 2 + 1] - extents[6 + i * 2])) + extents[6 + i * 2]);
  510. }
  511. }
  512. }
  513. iterationCount[0] = iteration;
  514. //write-back row groups
  515. return min_groups;
  516. }
  517. }
  518. /**
  519. * for data_pieces + larger memory usage (#cells * #groups)
  520. *
  521. * @author Adam
  522. */
  523. class AlocInnerLoop3 extends Thread {
  524. LinkedBlockingQueue<AlocPieceData> lbq;
  525. AlocThreadData alocThreadData;
  526. AlocSharedData alocSharedData;
  527. CountDownLatch countDownLatch;
  528. public AlocInnerLoop3(LinkedBlockingQueue<AlocPieceData> lbq_, AlocThreadData alocThreadData_, AlocSharedData alocSharedData_) {
  529. lbq = lbq_;
  530. alocThreadData = alocThreadData_;
  531. alocSharedData = alocSharedData_;
  532. setPriority(Thread.MIN_PRIORITY);
  533. }
  534. @Override
  535. public void run() {
  536. try {
  537. while (true) {
  538. // run on next batch
  539. AlocPieceData next = lbq.take();
  540. alocInnerLoop(next);
  541. countDownLatch.countDown();
  542. }
  543. } catch (InterruptedException ex) {
  544. } catch (Exception e) {
  545. e.printStackTrace();
  546. }
  547. }
  548. public void next(CountDownLatch newCountDownLatch) {
  549. //reset movement
  550. alocThreadData.movement = 0;
  551. countDownLatch = newCountDownLatch;
  552. if (!isAlive()) {
  553. this.start();
  554. }
  555. }
  556. private void alocInnerLoop(AlocPieceData apd) {
  557. float[] data = apd.data;
  558. double[] distances = apd.distances;
  559. short[] groups = apd.groups;
  560. final double[] groupMovement = alocSharedData.groupMovement;
  561. final int nCols = alocSharedData.nCols;
  562. final double[] col_range = alocSharedData.col_range;
  563. final int seedidxsize = alocSharedData.seedidxsize;
  564. final double[] seeds = alocSharedData.seeds;
  565. final int[] seedgroup_nonmissingvalues = alocSharedData.seedgroup_nonmissingvalues;
  566. int i, j, k;
  567. double min_dist_value = 0.00001f;
  568. int min_idx = 0;
  569. double dist;
  570. int missing;
  571. double v2;
  572. double v1;
  573. double min_dist;
  574. int nRows = data.length / nCols;
  575. int rws;
  576. double gm;
  577. int grp;
  578. //int skips = 0;
  579. for (i = 0; i < nRows; i++) {
  580. rws = i * seedidxsize;
  581. grp = groups[i];
  582. if (grp >= 0) {
  583. distances[rws + grp] += groupMovement[grp] + min_dist_value;
  584. gm = distances[rws + grp];
  585. min_idx = groups[i];
  586. if (Double.isNaN(gm)) {
  587. gm = Float.MAX_VALUE;
  588. }
  589. } else {
  590. gm = 0;
  591. }
  592. min_dist = Float.MAX_VALUE;
  593. for (j = 0; j < seedidxsize; j++) {
  594. distances[rws + j] -= groupMovement[j];
  595. if (j == grp || !(distances[rws + j] > gm)) {
  596. //calc dist between obj(i) & obj(seeds(j))
  597. dist = 0;
  598. missing = 0;
  599. for (k = 0; k < nCols; k++) {
  600. v1 = data[i * nCols + k];
  601. v2 = seeds[j * nCols + k];
  602. if (Double.isNaN(v1) || Double.isNaN(v2) || col_range[k] == 0) {
  603. missing++;
  604. } else {
  605. if (seedgroup_nonmissingvalues[j * nCols + k] > 0) {
  606. v2 = v2 / seedgroup_nonmissingvalues[j * nCols + k];
  607. }
  608. dist += java.lang.Math.abs(v1 - v2);//range == 1 (standardized 0-1); / (float) col_range[k];
  609. }
  610. }
  611. dist = dist / (double) (nCols - missing);
  612. if (min_dist > dist) {
  613. min_dist = dist;
  614. min_idx = j;
  615. }
  616. distances[rws + j] = dist;
  617. }
  618. //else{
  619. // skips++;
  620. //}
  621. }
  622. //loop for checking
  623. /*
  624. for (j = 0; j < seedidxsize; j++) {
  625. if( j != grp && !(distances[rws + j] <= gm)){
  626. //calc dist between obj(i) & obj(seeds(j))
  627. dist = 0;
  628. missing = 0;
  629. for (k = 0; k < nCols; k++) {
  630. v1 = data[i * nCols + k];
  631. v2 = seeds[j * nCols + k];
  632. if (Float.isNaN(v1) || Float.isNaN(v2) || col_range[k] == 0) {
  633. missing++;
  634. } else {
  635. if (seedgroup_nonmissingvalues[j * nCols + k] > 0) {
  636. v2 = v2 / seedgroup_nonmissingvalues[j * nCols + k];
  637. }
  638. dist += java.lang.Math.abs(v1 - v2);//range == 1 (standardized 0-1); / (float) col_range[k];
  639. }
  640. }
  641. dist = dist / (float) (nCols - missing);
  642. if (min_dist >= dist && j != grp) {
  643. //should NEVER get here
  644. min_dist = dist;
  645. min_idx = j;
  646. }
  647. //do not store distance in test; distances[rws + j] = dist;
  648. }
  649. }*/
  650. //add this group to group min_idx;
  651. if (grp != (short) min_idx) {
  652. alocThreadData.movement++;
  653. //remove from previous group
  654. if (grp >= 0) {
  655. alocThreadData.groupsize[grp]--;
  656. for (j = 0; j < nCols; j++) {
  657. if (!Float.isNaN(data[i * nCols + j])) {
  658. alocThreadData.seeds_adjustment[grp * nCols + j] -= data[i * nCols + j];
  659. alocThreadData.seeds_nmv_adjustment[grp * nCols + j]--;
  660. }
  661. }
  662. }
  663. //reassign group
  664. groups[i] = (short) min_idx;
  665. //add to new group
  666. alocThreadData.groupsize[min_idx]++;
  667. for (j = 0; j < nCols; j++) {
  668. if (!Float.isNaN(data[i * nCols + j])) {
  669. alocThreadData.seeds_adjustment[min_idx * nCols + j] += data[i * nCols + j];
  670. alocThreadData.seeds_nmv_adjustment[min_idx * nCols + j]++;
  671. }
  672. }
  673. }
  674. }
  675. }
  676. void kill() {
  677. this.interrupt();
  678. }
  679. }
  680. class AlocPieceData {
  681. public float[] data;
  682. public double[] distances;
  683. public short[] groups;
  684. public double[] rowDist;
  685. public AlocPieceData(float[] data_,
  686. double[] distances_,
  687. short[] groups_,
  688. double[] rowDist_) {
  689. data = data_;
  690. distances = distances_;
  691. groups = groups_;
  692. rowDist = rowDist_;
  693. }
  694. }
  695. class AlocThreadData {
  696. public int[] groupsize;
  697. public int[] seeds_nmv_adjustment;
  698. public double[] seeds_adjustment;
  699. public int movement;
  700. public AlocThreadData(int[] groupsize_, int[] seeds_nvm_adjustment_,
  701. double[] seeds_adjustment_) {
  702. groupsize = groupsize_;
  703. seeds_nmv_adjustment = seeds_nvm_adjustment_;
  704. seeds_adjustment = seeds_adjustment_;
  705. movement = 0;
  706. }
  707. }
  708. class AlocSharedData {
  709. public double[] otherGroupMovement;
  710. public double[] groupMovement;
  711. public int nCols;
  712. public double[] col_range;
  713. public int seedidxsize;
  714. public double[] seeds;
  715. public int[] seedgroup_nonmissingvalues;
  716. public AlocSharedData(
  717. double[] otherGroupMovement_,
  718. double[] groupMovement_,
  719. int nCols_,
  720. double[] col_range_,
  721. int seedidxsize_,
  722. double[] seeds_,
  723. int[] seedgroup_nonmissingvalues_) {
  724. otherGroupMovement = otherGroupMovement_;
  725. groupMovement = groupMovement_;
  726. nCols = nCols_;
  727. col_range = col_range_;
  728. seedidxsize = seedidxsize_;
  729. seeds = seeds_;
  730. seedgroup_nonmissingvalues = seedgroup_nonmissingvalues_;
  731. }
  732. }