PageRenderTime 50ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/JJIL-OCR/src/jjil/j2se/ocr/Features.java

http://jjil.googlecode.com/
Java | 376 lines | 241 code | 32 blank | 103 comment | 56 complexity | 6c69a5523154f2aa54ab60e931783fb7 MD5 | raw file
  1. /*
  2. * This program is free software: you can redistribute it and/or modify
  3. * it under the terms of the GNU Lesser General Public License as published by
  4. * the Free Software Foundation, either version 3 of the License, or
  5. * (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU Lesser General Public License for more details.
  11. *
  12. * You should have received a copy of the Lesser GNU General Public License
  13. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. package jjil.j2se.ocr;
  16. import java.util.ArrayList;
  17. import java.util.List;
  18. import java.util.ListIterator;
  19. import jjil.algorithm.Gray8OtsuThreshold;
  20. import jjil.core.Point;
  21. import jjil.core.Vec2;
  22. import jjil.j2se.algorithm.CircularList;
  23. import jjil.j2se.algorithm.Pair;
  24. // TODO: Auto-generated Javadoc
  25. /**
  26. * The Class Features transforms a list of edge points into a collection
  27. * of features, which can then be matched with a learnt character template
  28. * (PrototypesCollection) in the CharMatcher class.
  29. *
  30. * @author webb
  31. */
  32. public class Features extends ArrayList<Feature> {
  33. /** Computed serialVersionUID. */
  34. private static final long serialVersionUID = -5447662603546860723L;
  35. /** The maximum number of features allowed. This is a fixed value because we avoid using new during character match, in order to avoid garbage collection and optimize performance under Android. */
  36. private final static int MAX_FEATURES = 2000;
  37. /** The number of features (in the static list below) that are currently in use. */
  38. private static int snFeaturesUsed = 0;
  39. /** The static array of features, which are used to store the features measured from this character. The ArrayList of features created by this class are all in this static list. The list is static for performance optimization, to reduce garbage collection. */
  40. private static Feature[] srFeatures = new Feature[MAX_FEATURES];
  41. /** A class for computing the Otsu threshold. This is used when we have to split an unmatchable character into two parts so as to handle characters which accidentally touch. */
  42. private Gray8OtsuThreshold mGray8OtsuThreshold =
  43. new Gray8OtsuThreshold(true, 256);
  44. /** The list of edge points, which define the boundary of the character. */
  45. private List<EdgePts> mleps;
  46. /** The mn components. */
  47. int mnComponents = 0;
  48. // Length of the outline
  49. /** The length of the outline. */
  50. int mnLength = 0;
  51. // These are the radius of gyration
  52. /** The mn rx inv. */
  53. int mnRxInv;
  54. /** The mn rx exp. */
  55. int mnRxExp;
  56. /** The mn ry inv. */
  57. int mnRyInv;
  58. /** The mn ry exp. */
  59. int mnRyExp;
  60. /** The position of the center of the outline. */
  61. Vec2 mvMean;
  62. /** The precomputed math, used to eliminate some lengthy computations. */
  63. static PrecomputeMath mPrecomputeMath = null;
  64. static {
  65. for (int i=0; i<srFeatures.length; i++) {
  66. srFeatures[i] = new Feature();
  67. }
  68. }
  69. /**
  70. * Instantiates a new compute features.
  71. *
  72. * @param leps the leps
  73. */
  74. public Features(List<EdgePts> leps) {
  75. this.mnComponents = leps.size();
  76. this.mleps = leps;
  77. /* find this.mvMean.getX(), this.mvMean.getY() */
  78. Vec2 Sum = new Vec2(0, 0);
  79. int LengthSum = 0;
  80. for (CircularList<EdgePt> OutLine : leps) {
  81. if (OutLine.size() <= 1) {
  82. continue;
  83. }
  84. CircularList<EdgePt>.CircListIter li = OutLine.circListIterator();
  85. Point Last = li.getNext().getPos();
  86. while (li.hasNext()) {
  87. li.next();
  88. Point Norm = li.getNext().getPos();
  89. int n = 1;
  90. Vec2 Delta = new Vec2(Last, Norm);
  91. int Length = 0;
  92. try {
  93. Length = Delta.length();
  94. } catch (jjil.core.Error ex) {
  95. }
  96. n = ((Length << 2) + Length + 32) >> 6;
  97. if (n != 0) {
  98. Sum.add(((Last.getX()<<1)+Delta.getX())*Length,
  99. ((Last.getY()<<1)+Delta.getY())*Length);
  100. LengthSum += Length;
  101. }
  102. if (n != 0) {
  103. Last = Norm;
  104. }
  105. }
  106. }
  107. if (LengthSum == 0) {
  108. return;
  109. }
  110. this.mnLength = LengthSum;
  111. this.mvMean = Sum.div(LengthSum).rsh(1);
  112. /* Find 2nd moments & radius of gyration */
  113. computeRadiusGyration(leps);
  114. /* extract character normalized features */
  115. computeFeatures(leps);
  116. }
  117. /**
  118. * Compute features. Transformas a list of EdgePts into features.
  119. *
  120. * @param leps the list of EdgePts. Each EdgePts object is a closed
  121. * boundary (internal or external) of the character to be recognized.
  122. */
  123. private void computeFeatures(List<EdgePts> leps) {
  124. for (CircularList<EdgePt> Loop : leps) {
  125. /* Check for bad loops */
  126. if (Loop.size() <= 1) {
  127. return;
  128. }
  129. Point Last = null;
  130. for (ListIterator<EdgePt> li = Loop.loopIterator();
  131. li.hasNext();) {
  132. EdgePt Segment = li.next();
  133. int LastX = (Segment.getPos().getX() - this.mvMean.getX()) *
  134. this.mnRyInv;
  135. int LastY = (Segment.getPos().getY() - this.mvMean.getY()) *
  136. this.mnRxInv;
  137. LastX >>= this.mnRyExp;
  138. LastY >>= this.mnRxExp;
  139. Point Norm = new Point(LastX, LastY);
  140. if (Last == null) {
  141. Last = Norm;
  142. } else {
  143. int n = 1;
  144. Vec2 Delta = new Vec2(Last, Norm);
  145. int Length = 0;
  146. try {
  147. Length = Delta.length();
  148. } catch (jjil.core.Error ex) {
  149. }
  150. n = ((Length << 2) + Length + 32) >> 6;
  151. if (n != 0) {
  152. short Theta = mPrecomputeMath.TableLookup(Delta);
  153. Vec2 d = Delta.lsh(8).div(n);
  154. Vec2 pf = new Vec2(Last).lsh(8).add(d.clone().rsh(1));
  155. for (int i = 0; i < n; i++) {
  156. // check to see if we're out of static feature
  157. // slots
  158. if (snFeaturesUsed == MAX_FEATURES) {
  159. return;
  160. }
  161. Feature f = srFeatures[snFeaturesUsed++];
  162. f.set((short) (pf.getX() >> 8),
  163. (short) ((pf.getY() >> 8)),
  164. Theta);
  165. if (!this.add(f)) {
  166. return;
  167. }
  168. pf.add(d);
  169. }
  170. }
  171. if (n != 0) { /* Throw away a point that is too close */
  172. Last = Norm;
  173. }
  174. }
  175. }
  176. }
  177. }
  178. /**
  179. * Compute the radius of gyration.
  180. *
  181. * @param leps the leps
  182. */
  183. private void computeRadiusGyration(List<EdgePts> leps) {
  184. Vec2 vMeanShift = this.mvMean.clone().lsh(8);
  185. int nBLFeat = 0;
  186. Vec2 I = new Vec2(0, 0);
  187. for (CircularList<EdgePt> Outline : leps) {
  188. if (Outline.size() <= 1) {
  189. continue;
  190. }
  191. Point Last = null;
  192. for (ListIterator<EdgePt> li = Outline.loopIterator();
  193. li.hasNext();) {
  194. EdgePt Segment = li.next();
  195. Point Norm = Segment.getPos().clone();
  196. if (Last == null) {
  197. Last = Norm;
  198. } else {
  199. int n = 1;
  200. Vec2 Delta = new Vec2(Last, Norm);
  201. int Length = 0;
  202. try {
  203. Length = Delta.length();
  204. } catch (jjil.core.Error ex) {
  205. }
  206. n = ((Length << 2) + Length + 32) >> 6;
  207. nBLFeat += n;
  208. if (n != 0) {
  209. Vec2 d = Delta.lsh(8).div(n);
  210. Vec2 pf = new Vec2(Last).lsh(8).add(d.clone().rsh(1)).
  211. sub(vMeanShift);
  212. long lX = (long) pf.getX() * pf.getX() * n +
  213. (long) pf.getX() * d.getX() * n * (n - 1) +
  214. (long) d.getX() * d.getX() * n * (n - 1) * (2 * n - 1) / 6;
  215. long lY = (long) pf.getY() * pf.getY() * n +
  216. (long) pf.getY() * d.getY() * n * (n - 1) +
  217. (long) d.getY() * d.getY() * n * (n - 1) * (2 * n - 1) / 6;
  218. I.add((int) (lX >> 16), (int) (lY >> 16));
  219. Last = Norm;
  220. }
  221. }
  222. }
  223. }
  224. Pair<Vec2, Vec2> p = mPrecomputeMath.getResult(nBLFeat,
  225. I.getY() == 0 ? 1 : I.getY(), I.getX() == 0 ? 1 : I.getX());
  226. this.mnRxInv = p.getFirst().getX();
  227. this.mnRxExp = p.getFirst().getY();
  228. this.mnRyInv = p.getSecond().getX();
  229. this.mnRyExp = p.getSecond().getY();
  230. }
  231. /**
  232. * Gets the components.
  233. *
  234. * @return the components
  235. */
  236. public int getComponents() {
  237. return this.mnComponents;
  238. }
  239. /**
  240. * Gets the length.
  241. *
  242. * @return the length
  243. */
  244. public int getLength() {
  245. return this.mnLength;
  246. }
  247. /**
  248. * Gets the xmean.
  249. *
  250. * @return the xmean
  251. */
  252. public int getXmean() {
  253. return this.mvMean.getX();
  254. }
  255. /**
  256. * Gets the ymean.
  257. *
  258. * @return the ymean
  259. */
  260. public int getYmean() {
  261. return this.mvMean.getY();
  262. }
  263. /**
  264. * Reset features.
  265. */
  266. static public void resetFeatures() {
  267. snFeaturesUsed = 0;
  268. }
  269. /**
  270. * Sets the precompute math.
  271. *
  272. * @param pm the new precompute math
  273. */
  274. public static void setPrecomputeMath(PrecomputeMath pm) {
  275. mPrecomputeMath = pm;
  276. }
  277. /**
  278. * Split this Features into two Features's, recomputing the features.
  279. *
  280. * @param nX X position to split on. Points < this X will get put in the first
  281. * Features, points >= this X will get put in the second/
  282. *
  283. * @return the pair< compute features, compute features>
  284. */
  285. private Pair<Features, Features> split(int nX) {
  286. List<EdgePts> lepsLeft = null, lepsRight = null;
  287. for (EdgePts eps : this.mleps) {
  288. Pair<List<EdgePts>, List<EdgePts>> pr = eps.split(nX);
  289. if (pr.getFirst() != null) {
  290. if (lepsLeft == null) {
  291. lepsLeft = pr.getFirst();
  292. } else {
  293. lepsLeft.addAll(pr.getFirst());
  294. }
  295. }
  296. if (pr.getSecond() != null) {
  297. if (lepsRight == null) {
  298. lepsRight = pr.getSecond();
  299. } else {
  300. lepsRight.addAll(pr.getSecond());
  301. }
  302. }
  303. }
  304. Features nfsLeft = null, nfsRight = null;
  305. nfsLeft = new Features(lepsLeft);
  306. if (nfsLeft.size() == 0) {
  307. nfsLeft = null;
  308. }
  309. nfsRight = new Features(lepsRight);
  310. if (nfsRight.size() == 0) {
  311. nfsRight = null;
  312. }
  313. return new Pair<Features,Features>(nfsLeft, nfsRight);
  314. }
  315. /**
  316. * Test and split.
  317. *
  318. * @param nMinWidth the n min width
  319. *
  320. * @return the pair< compute features, compute features>
  321. */
  322. public Pair<Features, Features> testAndSplit(int nMinWidth) {
  323. int nMinX = Integer.MAX_VALUE;
  324. int nMaxX = Integer.MIN_VALUE;
  325. for (EdgePts eps : this.mleps) {
  326. for (EdgePt ep : eps) {
  327. nMinX = Math.min(nMinX, ep.getPos().getX());
  328. nMaxX = Math.max(nMaxX, ep.getPos().getX());
  329. }
  330. }
  331. int rnHistogram[] = new int[nMaxX - nMinX + 1];
  332. for (EdgePts eps : this.mleps) {
  333. for (EdgePt ep : eps) {
  334. rnHistogram[ep.getPos().getX() - nMinX]++;
  335. }
  336. }
  337. int nSplitPoint = this.mGray8OtsuThreshold.calculateOtsuThreshold(rnHistogram);
  338. if (nSplitPoint > nMinWidth && nSplitPoint < rnHistogram.length-nMinWidth) {
  339. return this.split(nSplitPoint + nMinX);
  340. } else {
  341. return null;
  342. }
  343. }
  344. }