PageRenderTime 99ms CodeModel.GetById 31ms RepoModel.GetById 1ms app.codeStats 0ms

/code/cs475/FeatureVector.java

https://github.com/dcrankshaw/cs475-machine_learning
Java | 199 lines | 154 code | 21 blank | 24 comment | 43 complexity | 820f41e42b277f32193c41d46e51be0a MD5 | raw file
  1. package cs475;
  2. import java.io.Serializable;
  3. import java.util.*;
  4. // Sparse vector implented as a Java TreeMap
  5. // NOTE All indices are 1-indexed
  6. public class FeatureVector implements Serializable, Iterable<Feature> {
  7. private static final long serialVersionUID = 1L;
  8. private TreeMap<Integer, Feature> vector;
  9. private int maxIndex_;
  10. private int numFeatures = 0;
  11. public FeatureVector() {
  12. vector = new TreeMap<Integer, Feature>();
  13. maxIndex_ = 0;
  14. }
  15. public int dimensionality() {
  16. return maxIndex_;
  17. }
  18. public boolean isEmpty() {
  19. return (numFeatures == 0);
  20. }
  21. public Set<Integer> getIndices() {
  22. return vector.keySet();
  23. }
  24. public void add(int index, double value) {
  25. Feature newFeature = new Feature(index, value);
  26. vector.put(index, newFeature);
  27. if (index > maxIndex_) {
  28. maxIndex_ = index;
  29. }
  30. ++numFeatures;
  31. }
  32. public double get(int index) {
  33. Feature feature = vector.get(index);
  34. if (feature == null) {
  35. return 0;
  36. } else {
  37. return feature.value_;
  38. }
  39. }
  40. // removes the feature at the given index
  41. // returns the removed feature, or null if the feature
  42. // was not there
  43. public Feature remove(int index) {
  44. Feature f = vector.remove(index);
  45. if (f != null) {
  46. numFeatures -= 1;
  47. }
  48. return f;
  49. }
  50. public Feature getFeature(int index) {
  51. return vector.get(index);
  52. }
  53. public Iterator<Feature> iterator() {
  54. return new FeatureVectorIterator();
  55. }
  56. public double computeDistance(FeatureVector other) {
  57. double norm = 0;
  58. Iterator<Feature> thisIter = this.iterator();
  59. boolean updateThis = true;
  60. Iterator<Feature> otherIter = other.iterator();
  61. boolean updateOther = true;
  62. if (thisIter.hasNext() && otherIter.hasNext()) {
  63. Feature thisFeature = thisIter.next();
  64. updateThis = false;
  65. Feature otherFeature = otherIter.next();
  66. updateOther = false;
  67. while ((thisIter.hasNext() || !updateThis) && (otherIter.hasNext() || !updateOther)) {
  68. if (updateThis) {
  69. thisFeature = thisIter.next();
  70. updateThis = false;
  71. }
  72. if (updateOther) {
  73. otherFeature = otherIter.next();
  74. updateOther = false;
  75. }
  76. if (thisFeature.index_ == otherFeature.index_) {
  77. double diff = thisFeature.value_ - otherFeature.value_;
  78. norm += diff*diff;
  79. updateThis = true;
  80. updateOther = true;
  81. } else if (thisFeature.index_ < otherFeature.index_) {
  82. norm += thisFeature.value_*thisFeature.value_;
  83. updateThis = true;
  84. while (thisIter.hasNext() || !updateThis) {
  85. if (updateThis) {
  86. thisFeature = thisIter.next();
  87. updateThis = false;
  88. }
  89. if (thisFeature.index_ == otherFeature.index_) {
  90. double diff = thisFeature.value_ - otherFeature.value_;
  91. norm += diff*diff;
  92. updateThis = true;
  93. updateOther = true;
  94. break;
  95. } else if (thisFeature.index_ > otherFeature.index_) {
  96. break;
  97. } else {
  98. norm += thisFeature.value_*thisFeature.value_;
  99. updateThis = true;
  100. }
  101. }
  102. } else if (otherFeature.index_ < thisFeature.index_) {
  103. norm += otherFeature.value_*otherFeature.value_;
  104. updateOther = true;
  105. while (otherIter.hasNext() || !updateOther) {
  106. if (updateOther) {
  107. otherFeature = otherIter.next();
  108. updateOther = false;
  109. }
  110. if (thisFeature.index_ == otherFeature.index_) {
  111. double diff = thisFeature.value_ - otherFeature.value_;
  112. norm += diff*diff;
  113. updateOther = true;
  114. updateThis = true;
  115. break;
  116. } else if (thisFeature.index_ < otherFeature.index_) {
  117. break;
  118. } else {
  119. norm += otherFeature.value_*otherFeature.value_;
  120. updateOther = true;
  121. }
  122. }
  123. }
  124. }
  125. //will only go into one of these loops
  126. while (thisIter.hasNext() || !updateThis) {
  127. if (updateThis) {
  128. thisFeature = thisIter.next();
  129. updateThis = false;
  130. }
  131. norm += thisFeature.value_*thisFeature.value_;
  132. updateThis = true;
  133. }
  134. while (otherIter.hasNext() || !updateOther) {
  135. if (updateOther) {
  136. otherFeature = otherIter.next();
  137. updateOther = false;
  138. }
  139. norm += otherFeature.value_*otherFeature.value_;
  140. updateOther = true;
  141. }
  142. }
  143. return Math.sqrt(norm);
  144. }
  145. /*
  146. public double computeDistance(FeatureVector second) {
  147. int largest_index = this.dimensionality();
  148. if (second.dimensionality() > largest_index) {
  149. largest_index = second.dimensionality();
  150. }
  151. HashSet<Integer> indices = new HashSet<Integer>(this.getIndices());
  152. Set<Integer> extraIndices = second.getIndices();
  153. for (Integer index : extraIndices) {
  154. indices.add(index);
  155. }
  156. double norm = 0;
  157. for (Integer ind : indices) {
  158. double diff = this.get(ind) - second.get(ind);
  159. norm += diff*diff;
  160. }
  161. return Math.sqrt(norm);
  162. }*/
  163. private class FeatureVectorIterator implements Iterator<Feature> {
  164. private Iterator<Feature> internalIterator;
  165. public FeatureVectorIterator() {
  166. internalIterator = vector.values().iterator();
  167. }
  168. public boolean hasNext() {
  169. return internalIterator.hasNext();
  170. }
  171. public Feature next() {
  172. return internalIterator.next();
  173. }
  174. public void remove() {
  175. internalIterator.remove();
  176. }
  177. }
  178. }