PageRenderTime 62ms CodeModel.GetById 9ms RepoModel.GetById 1ms app.codeStats 0ms

/src/ifcSoft/model/som/SOM.java

https://code.google.com/p/ifcsoft/
Java | 655 lines | 345 code | 95 blank | 215 comment | 58 complexity | 01731b92c3affb7c55bb9db75145d0c1 MD5 | raw file
Possible License(s): GPL-3.0
  1. /**
  2. * Copyright (C) 2011 Kyle Thayer <kyle.thayer AT gmail.com>
  3. *
  4. * This file is part of the IFCSoft project (http://ifcsoft.com)
  5. *
  6. * IFCSoft is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. package ifcSoft.model.som;
  20. import java.awt.Point;
  21. import java.util.LinkedList;
  22. import java.util.concurrent.BlockingQueue;
  23. import java.util.concurrent.LinkedBlockingQueue;
  24. import java.util.concurrent.atomic.AtomicBoolean;
  25. import ifcSoft.model.som.jobs.ComputeStandardSOMJob;
  26. import ifcSoft.model.som.jobs.FindMembershipsJob;
  27. import ifcSoft.model.thread.ThreadJob;
  28. import ifcSoft.model.dataSet.DataSet;
  29. import ifcSoft.model.DataSetProxy;
  30. import ifcSoft.model.dataSet.dataSetScalar.DataSetScalar;
  31. import ifcSoft.model.som.jobs.ComputeBatchSOMJob;
  32. import java.util.logging.Level;
  33. import java.util.logging.Logger;
  34. import org.puremvc.java.patterns.facade.Facade;
  35. /**
  36. * The Self Organizing Map object
  37. * @author Kyle Thayer <kthayer@emory.edu>
  38. */
  39. public class SOM {
  40. protected Facade facade;
  41. /**
  42. * SOM made of square nodes
  43. */
  44. public static int SQUARESOM = 1;
  45. /**
  46. * SOM made of Hexagonal Nodes
  47. */
  48. public static int HEXSOM = 2;
  49. /**
  50. * Cluster according to Edge Map
  51. */
  52. public static final int ECLUSTER = 1;
  53. /**
  54. * Cluster according to multiple edges on the Edge Map
  55. */
  56. public static final int MECLUSTER = 2;
  57. /**
  58. * Cluster according to the UMap
  59. */
  60. public static final int UCLUSTER = 3; //UMap cluster
  61. /**
  62. * The SOM map (array of nodes)
  63. */
  64. protected SOMNode SOMnodes[][];
  65. /**
  66. * In order to go back to an old SOM map, but I might not even be using this now.
  67. * TODO: Rather than overwrite a SOM internally, it should be replaced by a new SOM. I might already do this
  68. */
  69. protected SOMNode OldSOMnodes[][];
  70. protected DataSetScalar datasetScalar;
  71. protected float[][] UMap;
  72. protected int[][] DenseMap;
  73. protected int denseMapPlaced; //number of points placed in the density map
  74. protected double placedPointsError;
  75. protected int[][][] subsetDenseMaps;
  76. protected int [] subsetDenseMapPlaced;
  77. protected LinkedList<String> rawSetNames;
  78. protected int[][][] dataSetDenseMaps; //this is for other data sets (not subsets)
  79. protected int [] dataSetDenseMapPlaced;
  80. protected LinkedList<DataSetProxy> dataSets; //this is only to be accessed, not written to
  81. protected float[][][] UEmap;
  82. protected short[][] MemberArray; //col 1 = x, col 2 = y, col 3 = setNum
  83. protected int mapType = 0;
  84. /**
  85. * If the SOM used log weights.
  86. */
  87. //public boolean isLog = true;
  88. AtomicBoolean iscanceled = new AtomicBoolean(false);
  89. AtomicBoolean ispaused = new AtomicBoolean(false);
  90. protected LinkedList<FindMembershipsJob> pausedJobs = new LinkedList<FindMembershipsJob>();
  91. protected float SOMprogress = 0;
  92. protected float weighting[]; //weighting of the different types
  93. protected int channelsUsed[] = null; //if only some channels are used, this lists them (for speed boost)
  94. protected boolean allUsedWeightsSame = true; //if all the used weights are the same, then BMU doesn't need to use the weights
  95. //this will probably belong further up the the program and be passed to me
  96. protected BlockingQueue<ThreadJob> jobqueue = new LinkedBlockingQueue<ThreadJob>();
  97. public SOM(SOMSettings somSettings, Facade facade){
  98. int dims = somSettings.datasetscalar.getDimensions();
  99. SOMnodes = new SOMNode[somSettings.width][somSettings.height];
  100. this.mapType = HEXSOM; //for now we default as HEX map
  101. this.weighting = somSettings.weights;
  102. //this.isLog = isLog;
  103. this.datasetScalar = somSettings.datasetscalar;
  104. this.facade = facade;
  105. if(this.weighting == null || this.weighting.length < dims){
  106. this.weighting = new float[dims];
  107. for(int i = 0; i < dims; i++){
  108. this.weighting[i] = 1;
  109. }
  110. }
  111. checkWeighting();
  112. if(somSettings.initType == SOMSettings.LINEARINIT){
  113. SOMInitFns.linearInitialize(somSettings.width, somSettings.height, datasetScalar.getDataSet(), this);
  114. }else if (somSettings.initType == SOMSettings.RANDOMINIT){
  115. SOMInitFns.randomInitialize(somSettings.width, somSettings.height, this);
  116. }else if (somSettings.initType == SOMSettings.FILEINIT){
  117. try {
  118. SOMInitFns.loadSOMfile(dims, this);
  119. } catch (Exception ex) {
  120. Logger.getLogger(SOM.class.getName()).log(Level.SEVERE, null, ex);
  121. }
  122. }
  123. //TODO: should I just go ahead and start the threads to await jobs?
  124. //what happens to threads just sitting out there? I probably should have
  125. //universal threads and jobQueue associated with the program, and pass it to the SOM
  126. //have a listener to get the threads if they exist?
  127. for(int i = 0; i < 4; i++){
  128. Thread newthread = new Thread(new SOMThread(jobqueue, facade));
  129. newthread.setPriority(newthread.getPriority() - 1);
  130. //drop the priority by 1 so that it runs in background and doesn't
  131. //interfere with the gui
  132. newthread.start();
  133. }
  134. }
  135. /**
  136. * This function saves information on the weighting used, allowing findBMU to run faster
  137. */
  138. private void checkWeighting() {
  139. //set the channelsUsed if some aren't
  140. int numChannelsUsed = 0;
  141. for(int i = 0; i <weighting.length; i++){
  142. if(weighting[i] != 0){
  143. numChannelsUsed++;
  144. }
  145. }
  146. if(numChannelsUsed < weighting.length){
  147. channelsUsed = new int[numChannelsUsed];
  148. int channelsUsedI = 0;
  149. for(int i = 0; i < weighting.length; i++){
  150. if(weighting[i] != 0){
  151. channelsUsed[channelsUsedI] = i;
  152. channelsUsedI++;
  153. }
  154. }
  155. }
  156. //find out if all used weights are the same
  157. allUsedWeightsSame = true;
  158. if(channelsUsed == null){
  159. float firstWeight = weighting[0];
  160. for(int i = 1; i < weighting.length; i++){
  161. if(weighting[i] != firstWeight){
  162. allUsedWeightsSame = false;
  163. }
  164. }
  165. }else{
  166. float firstWeight = weighting[channelsUsed[0]];
  167. for(int i = 1; i < channelsUsed.length; i++){
  168. if(weighting[channelsUsed[i]] != firstWeight){
  169. allUsedWeightsSame = false;
  170. }
  171. }
  172. }
  173. }
  174. String[] getDimLabels() {
  175. return datasetScalar.getColLabels();
  176. }
  177. /**
  178. * Calculates the SOM with the given iterations and max neighborhood size.
  179. * @param somSettings
  180. */
  181. public void calculateSOM(SOMSettings somSettings){
  182. SOMprogress = 0;
  183. iscanceled.set(false); //cancel anything else I was doing earlier
  184. //and make a new boolean cancel object for the next job
  185. iscanceled = new AtomicBoolean(false);
  186. ThreadJob newjob;
  187. System.out.println("calculateSOM type:" + somSettings.SOMType);
  188. if(somSettings.SOMType.equals(SOMSettings.CLASSICSOM)){
  189. newjob = new ComputeStandardSOMJob(datasetScalar,
  190. somSettings.classicIterations, somSettings.classicMaxNeighborhood, somSettings.classicMinNeighborhood,
  191. this, iscanceled);
  192. }else{
  193. newjob = new ComputeBatchSOMJob(datasetScalar,
  194. somSettings.batchSteps,somSettings.batchMaxNeighborhood, somSettings.batchMinNeighborhood,
  195. somSettings.batchPointsPerNode, this, iscanceled);
  196. }
  197. try {
  198. jobqueue.put(newjob);
  199. } catch (InterruptedException e) {
  200. e.printStackTrace();
  201. }
  202. }
  203. /**
  204. * Pause the jobs on the job queue.
  205. */
  206. public synchronized void pauseJobs(){
  207. ispaused.set(true);
  208. }
  209. /**
  210. * Put a paused job on a list of paused jobs for the SOM.
  211. * @param fmj
  212. */
  213. public synchronized void insertPausedJob(FindMembershipsJob fmj){
  214. if(ispaused.get()){ //in case it got here after the jobs were unpaused
  215. pausedJobs.addLast(fmj);
  216. }else{ //put it back in the job queue where it belongs
  217. try {
  218. jobqueue.put(fmj);
  219. } catch (InterruptedException e) {
  220. e.printStackTrace();
  221. }
  222. }
  223. }
  224. /**
  225. * Restart the jobs that are on the paused job list for the SOM.
  226. */
  227. public synchronized void restartPausedJobs(){
  228. ispaused.set(false);
  229. try {
  230. while(pausedJobs.size() > 0){
  231. jobqueue.put(pausedJobs.removeFirst());
  232. }
  233. } catch (InterruptedException e) {
  234. e.printStackTrace();
  235. }
  236. }
  237. /**
  238. * Cancel the SOM job.
  239. */
  240. public void cancelSOM() {
  241. iscanceled.set(true);
  242. }
  243. /**
  244. * Cancel the SOM job.
  245. */
  246. public void cancelJobs() {
  247. iscanceled.set(true);
  248. }
  249. protected float getDistance(Point p1, Point p2){
  250. float euclidD = 0;
  251. float pt1W[] = getMapWeights(p1);
  252. float pt2W[] = getMapWeights(p2);
  253. for(int k = 0; k < pt1W.length; k++){
  254. euclidD+= Math.pow(weighting[k]*(pt1W[k] - pt2W[k]), 2);
  255. }
  256. return (float) Math.sqrt(euclidD);
  257. }
  258. /**
  259. * Creates and clears the density map.
  260. * Density map is how many are members of each node.
  261. */
  262. public synchronized void initDenseMap(){
  263. denseMapPlaced = 0;
  264. DenseMap = new int[SOMnodes.length][SOMnodes[0].length];
  265. //I think it starts out clear, but I'll clear it just in case
  266. for(int i =0; i < DenseMap.length; i++){
  267. for(int j=0; j < DenseMap[0].length; j++){
  268. DenseMap[i][j]=0;
  269. }
  270. }
  271. }
  272. /**
  273. * Membership of each data point is saved as a short array (32,767 max val, but also negatives)?
  274. * @param length
  275. */
  276. public synchronized void initMemberArray(int length){
  277. MemberArray = new short[length][3]; //the map pos of each member
  278. //It should be cleared as in all belong to cell (-1,-1)
  279. for(int i =0; i < MemberArray.length; i++){
  280. for(int j=0; j < MemberArray[0].length; j++){
  281. MemberArray[i][j]=-1;
  282. }
  283. }
  284. }
  285. /**
  286. * Initialize the dense maps for the raw data set dense maps.
  287. */
  288. public synchronized void initSetDenseMaps() {
  289. rawSetNames = datasetScalar.getRawSetNames();
  290. subsetDenseMaps = new int[rawSetNames.size()][SOMnodes.length][SOMnodes[0].length];
  291. subsetDenseMapPlaced = new int[rawSetNames.size()];
  292. //I think it starts out clear, but I'll clear it just in case
  293. for(int k = 0; k < subsetDenseMaps.length; k++){
  294. subsetDenseMapPlaced[k] = 0;
  295. for(int i =0; i < subsetDenseMaps[0].length; i++){
  296. for(int j=0; j < subsetDenseMaps[0][0].length; j++){
  297. subsetDenseMaps[k][i][j] = 0;
  298. }
  299. }
  300. }
  301. }
  302. /**
  303. * Set the given data point as a member of the given node.
  304. * @param mem
  305. * @param p
  306. */
  307. public synchronized void setMember(int mem, Point node, double dist){
  308. placedPointsError += dist;
  309. String set = datasetScalar.getPointSetName(mem);
  310. if(set == null){
  311. System.out.println("Error in SOM:setMember - getPointSetName returned null for mem = "+mem);
  312. return;
  313. }
  314. int setNum = rawSetNames.indexOf(set);
  315. MemberArray[mem][0] = (short) node.x;
  316. MemberArray[mem][1] = (short) node.y;
  317. MemberArray[mem][2] = (short) setNum;
  318. //increment the appropriate density maps
  319. (DenseMap[node.x][node.y])++;
  320. denseMapPlaced++;
  321. (subsetDenseMaps[setNum][node.x][node.y])++;
  322. (subsetDenseMapPlaced[setNum])++;
  323. }
  324. public double getPlacedPointsError(){
  325. return placedPointsError;
  326. }
  327. /**
  328. * Set the given data point of the given data set as a member of the given node.
  329. * @param DataSetNum
  330. * @param mem
  331. * @param p
  332. */
  333. public synchronized void setOtherDataSetMember(int DataSetNum, int mem, Point p, double dist){
  334. //increment the appropriate density maps
  335. (this.dataSetDenseMaps[DataSetNum][p.x][p.y])++;
  336. this.dataSetDenseMapPlaced[DataSetNum]++;
  337. }
  338. /**
  339. * Return the number of points placed on the density map.
  340. * @return
  341. */
  342. public synchronized int densityMapPlaced(){
  343. return denseMapPlaced;
  344. }
  345. /**
  346. * Return the number of points that have been placed on the subset density maps.
  347. * @return
  348. */
  349. public synchronized int[] subsetDensityMapsPlaced(){
  350. int [] ret = new int[subsetDenseMapPlaced.length];
  351. System.arraycopy(subsetDenseMapPlaced, 0, ret, 0, subsetDenseMapPlaced.length);
  352. return ret;
  353. }
  354. /**
  355. * Return the density Map of the requested other data set.
  356. * @param dataSet
  357. * @return
  358. */
  359. synchronized int[][] getDataSetDenseMap(int dataSet) {
  360. if(dataSetDenseMaps == null || dataSetDenseMaps.length <= dataSet){
  361. return null;
  362. }
  363. return dataSetDenseMaps[dataSet];
  364. }
  365. /**
  366. * Returns the number of points that have been placed for the given data set.
  367. * @param dataSet
  368. * @return
  369. */
  370. synchronized int dataSetDenseMapPlaced(int dataSet) {
  371. if(dataSetDenseMapPlaced == null || dataSetDenseMapPlaced.length <= dataSet){
  372. return 0;
  373. }
  374. return dataSetDenseMapPlaced[dataSet];
  375. }
  376. /**
  377. * Return the number of points that have been placed on the density map for other data sets.
  378. */
  379. int getOtherDataSetsPlaced(){
  380. int placed = 0;
  381. if(dataSetDenseMapPlaced != null){
  382. for(int i = 0; i < dataSetDenseMapPlaced.length; i++){
  383. placed += dataSetDenseMapPlaced[i];
  384. }
  385. }
  386. return placed;
  387. }
  388. /**
  389. * Returns the weights at a given node.
  390. */
  391. protected float[] getMapWeights(Point pt) {
  392. return SOMnodes[pt.x][pt.y].getWeights();
  393. }
  394. public float[] scalePoint(float[] unweighted){
  395. return datasetScalar.scalePoint(unweighted);
  396. }
  397. /**
  398. * Gets the horizontal number of SOM nodes.
  399. * @return the horizontal number of SOM nodes.
  400. */
  401. public int getWidth() {
  402. return SOMnodes.length;
  403. }
  404. /**
  405. * Gets the vertical number of SOM nodes.
  406. * @return the vertical number of SOM nodes.
  407. */
  408. public int getHeight() {
  409. return SOMnodes[0].length;
  410. }
  411. /**
  412. * Returns the type of SOM map (ie. Square, Hexagonal , ...)
  413. * @return the type of SOM map (ie. Square, Hexagonal, ...)
  414. */
  415. public int getSOMType() {
  416. return mapType;
  417. }
  418. /**
  419. * Returns the weighting used to build the SOM.
  420. * @return
  421. */
  422. public float[] getWeighting(){
  423. return weighting;
  424. }
  425. /**
  426. * progress is fractional between 0 and 1 unless it is actually done, then it is 100
  427. */
  428. int tempProg = 0;
  429. /**
  430. * Set the progress of the SOM calculation
  431. * @param p
  432. */
  433. public synchronized void setProgress(float p){
  434. SOMprogress = p;
  435. if(SOMprogress > (tempProg/ 10.0)+.1){
  436. tempProg++;
  437. }
  438. }
  439. /**
  440. * progress is fractional between 0 and 1 unless it is actually done, then it is 100
  441. * @return
  442. */
  443. public synchronized float getProgress(){
  444. return SOMprogress;
  445. }
  446. /**
  447. * TODO: this is not thread safe yet, I'll have to do something about that.
  448. * This might be outdated as well from when I overwrote SOM maps rather than made new ones.
  449. */
  450. public synchronized void clearUMaps() {
  451. UMap = null;
  452. DenseMap = null;
  453. UEmap = null;
  454. }
  455. /**
  456. * Returns the members of the given SOM node.
  457. * TODO: I have no error checking here, very dangerous
  458. * @param p
  459. * @return
  460. */
  461. public synchronized int[] getCellMembers(Point p) {
  462. LinkedList<Integer> membersLL = new LinkedList<Integer>();
  463. for(int i = 0; i < MemberArray.length; i++){
  464. if(MemberArray[i][0] == p.x && MemberArray[i][1] == p.y){
  465. membersLL.add(new Integer(i));
  466. }
  467. }
  468. int[] cellMembs = new int[membersLL.size()];
  469. for(int i = 0; i < cellMembs.length; i++){
  470. cellMembs[i] = membersLL.removeFirst();
  471. }
  472. return cellMembs;
  473. }
  474. /**
  475. * Returns the values of the given SOM node.
  476. * @param p
  477. * @return
  478. */
  479. float[] getCellVals(Point p) {
  480. return datasetScalar.unscalePoint(getMapWeights(p));
  481. }
  482. /**
  483. * Returns the number of dimensions.
  484. * @return
  485. */
  486. public int getDimensions() {
  487. return weighting.length;
  488. }
  489. /**
  490. * Returns the data set used to build the SOM.
  491. * @return
  492. */
  493. public DataSet getDataSet(){
  494. return datasetScalar.getDataSet();
  495. }
  496. public DataSetScalar getDataSetScalar(){
  497. return datasetScalar;
  498. }
  499. /**
  500. * Returns the given other data set.
  501. * @param dataSetNum
  502. * @return
  503. */
  504. DataSet getDataSet(int dataSetNum) {
  505. return dataSets.get(dataSetNum).getData();
  506. }
  507. /**
  508. * returns the length of the data set used to build the SOM.
  509. * @return
  510. */
  511. int dataLength() {
  512. return datasetScalar.length();
  513. }
  514. /**
  515. * Returns the length of the other data sets combined.
  516. * @return
  517. */
  518. synchronized int getOtherDataSetLength(){
  519. int totalLength = 0;
  520. if(dataSetDenseMaps!= null){
  521. for(int i = 0; i < dataSetDenseMaps.length; i++){
  522. if(dataSetDenseMaps[i] != null){
  523. totalLength += getDataSet(i).length();
  524. }
  525. }
  526. }
  527. return totalLength;
  528. }
  529. /**
  530. * returns the max value of the given dimension of the data used to build the SOM.
  531. * @param i
  532. * @return
  533. */
  534. float getMax(int i) {
  535. return datasetScalar.getMax(i);
  536. }
  537. /**
  538. * returns the min value of the given dimension of the data used to build the SOM.
  539. * @param i
  540. * @return
  541. */
  542. float getMin(int i) {
  543. return datasetScalar.getMin(i);
  544. }
  545. /**
  546. * returns the name of the data set used to build the SOM.
  547. * @return
  548. */
  549. String getDataSetName() {
  550. return datasetScalar.getName();
  551. }
  552. /**
  553. * Returns the names of the raw data sets that comprise the data set used to
  554. * build the SOM.
  555. * @return
  556. */
  557. LinkedList<String> getRawSetNames() {
  558. return this.rawSetNames;
  559. }
  560. }