/ocr/ocrservice/src/com/googlecode/eyesfree/opticflow/TextTrackerProcessor.java

http://eyes-free.googlecode.com/ · Java · 482 lines · 273 code · 104 blank · 105 comment · 36 complexity · a107501c7e64bc813f053a4114d627da MD5 · raw file

  1. /*
  2. * Copyright (C) 2011 Google Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5. * use this file except in compliance with the License. You may obtain a copy of
  6. * the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. * License for the specific language governing permissions and limitations under
  14. * the License.
  15. */
  16. package com.googlecode.eyesfree.opticflow;
  17. import android.graphics.Canvas;
  18. import android.graphics.Color;
  19. import android.graphics.Matrix;
  20. import android.graphics.Paint;
  21. import android.graphics.Paint.Align;
  22. import android.graphics.Paint.Style;
  23. import android.graphics.PointF;
  24. import android.graphics.Rect;
  25. import android.graphics.RectF;
  26. import android.os.SystemClock;
  27. import android.util.Log;
  28. import com.googlecode.eyesfree.env.Size;
  29. import com.googlecode.leptonica.android.Pix;
  30. import com.googlecode.leptonica.android.Pixa;
  31. import java.util.LinkedList;
  32. import java.util.ListIterator;
  33. import java.util.Vector;
  34. /**
  35. * Frame processor that tracks positioning and visibility of text areas.
  36. *
  37. * @author alanv@google.com (Alan Viverette)
  38. */
  39. public class TextTrackerProcessor extends FrameProcessor {
  40. private static final String TAG = "OcrProcessor";
  41. /**
  42. * The minimum amount of overlap allowed between an existing text area
  43. * (after optical flow considerations) and a potential match.
  44. */
  45. private static final float MIN_OVERLAP = 0.50f;
  46. /**
  47. * The maximum amount of normalized error in aspect ratio allowed between an
  48. * existing text area and a potential match.
  49. */
  50. private static final float MAX_ASPECT_ERROR = 0.10f;
  51. /**
  52. * Minimum amount of time is milliseconds that a text area must persist in
  53. * order for it to be OCR'ed.
  54. */
  55. private static final long MIN_PRESENCE = 500;
  56. /**
  57. * Maximum amount of time in milliseconds that a text area may be absent in
  58. * order for it to remain in the OCR queue.
  59. */
  60. private static final long MAX_ABSENCE = 1500;
  61. /** Native optical flow tracker. */
  62. private final OpticalFlow mOpticalFlow;
  63. /** List of tracked text areas. */
  64. private final LinkedList<TrackedRect> mTrackedRects;
  65. /** List of new OCR candidates. */
  66. private LinkedList<TrackedRect> mOcrAdd;
  67. /** List of text areas to be removed from the OCR queue. */
  68. private LinkedList<TrackedRect> mOcrRemove;
  69. private RectF mBounds;
  70. /** Text detection listener. */
  71. private Listener mListener;
  72. /**
  73. * @param opticalFlow
  74. */
  75. public TextTrackerProcessor(OpticalFlow opticalFlow) {
  76. mOpticalFlow = opticalFlow;
  77. mTrackedRects = new LinkedList<TrackedRect>();
  78. mOcrAdd = new LinkedList<TrackedRect>();
  79. mOcrRemove = new LinkedList<TrackedRect>();
  80. }
  81. @Override
  82. public void onInit(Size size) {
  83. mBounds = new RectF(0, 0, size.width, size.height);
  84. }
  85. public void setListener(Listener listener) {
  86. mListener = listener;
  87. }
  88. @Override
  89. protected void onProcessFrame(TimestampedFrame frame) {
  90. if (frame.isBlurred() || frame.takenWhileFocusing()) {
  91. return;
  92. }
  93. Pixa pixa = frame.getDetectedText();
  94. float[] conf = frame.getTextConfidences();
  95. float angle = frame.getAngle();
  96. if (pixa == null)
  97. return;
  98. processResults(pixa, conf, angle);
  99. pixa.recycle();
  100. frame.recycleDetectedText();
  101. if (mListener != null) {
  102. mListener.onTextDetected(mBounds, mTrackedRects);
  103. }
  104. }
  105. @Override
  106. protected void onDrawDebug(final Canvas canvas) {
  107. Paint paint = new Paint();
  108. long current = SystemClock.uptimeMillis();
  109. for (TrackedRect trackedRect : mTrackedRects) {
  110. trackedRect.onDrawDebug(canvas, current);
  111. }
  112. }
  113. @Override
  114. protected Vector<String> getDebugText() {
  115. Vector<String> debugText = new Vector<String>();
  116. debugText.add("Tracking: " + mTrackedRects.size());
  117. return debugText;
  118. }
  119. private void processResults(Pixa textAreas, float[] textConfs, float angle) {
  120. LinkedList<TrackedRect> unmatchedRects = new LinkedList<TrackedRect>();
  121. LinkedList<TrackedRect> newRects = new LinkedList<TrackedRect>();
  122. long timestamp = SystemClock.uptimeMillis();
  123. updateTrackedRects(timestamp);
  124. matchExistingRects(textAreas, textConfs, angle, timestamp);
  125. }
  126. private void updateTrackedRects(long timestamp) {
  127. if (mOpticalFlow == null) {
  128. // No optical flow detection!
  129. return;
  130. }
  131. for (TrackedRect tracked : mTrackedRects) {
  132. PointF delta = mOpticalFlow.getAccumulatedDelta(tracked.timestamp,
  133. tracked.rect.centerX(), tracked.rect.centerY(), tracked.radius());
  134. tracked.rect.offset(delta.x, delta.y);
  135. tracked.timestamp = timestamp;
  136. }
  137. Log.i(TAG, "Updated " + mTrackedRects.size() + " tracked rects");
  138. }
  139. /**
  140. * Attempts to match the text areas in textAreas with the currently tracked
  141. * rectangles.
  142. *
  143. * @param textAreas A Pixa containing the detected text areas.
  144. * @param textConfs An array of text confidences corresponding to the text
  145. * areas.
  146. * @param timestamp The current system uptime in milliseconds.
  147. */
  148. private void matchExistingRects(Pixa textAreas, float[] textConfs, float angle, long timestamp) {
  149. int count = textConfs.length;
  150. boolean[] matchFlags = new boolean[count];
  151. ListIterator<TrackedRect> iterator = mTrackedRects.listIterator();
  152. // Matching algorithm runs in O(n*m) time, but we probably won't have
  153. // n*m > 100.
  154. while (iterator.hasNext()) {
  155. TrackedRect rect = iterator.next();
  156. int matchIndex = findBestMatch(rect, textAreas, matchFlags);
  157. if (matchIndex >= 0) {
  158. matchFlags[matchIndex] = true;
  159. boolean enqueue = onRectMatched(rect, textAreas, matchIndex, angle, timestamp);
  160. if (enqueue) {
  161. rect.firstTimestamp = -1;
  162. mOcrAdd.add(rect);
  163. }
  164. } else {
  165. boolean remove = onRectUnmatched(rect, timestamp);
  166. if (remove) {
  167. iterator.remove();
  168. mOcrRemove.add(rect);
  169. }
  170. }
  171. }
  172. // Go back through the list of matched Pix and add the unmatched ones to
  173. // the list of new tracked rects.
  174. for (int i = 0; i < count; i++) {
  175. if (matchFlags[i]) {
  176. continue;
  177. }
  178. Pix pix = textAreas.getPix(i);
  179. float quality = textConfs[i];
  180. Rect rect = textAreas.getBoxRect(i);
  181. TrackedRect newRect = new TrackedRect(pix, angle, quality, rect, timestamp);
  182. onRectDiscovered(newRect);
  183. }
  184. }
  185. /**
  186. * Searches textAreas for the text area most similar to rect.
  187. *
  188. * @param rect The tracked rect to match.
  189. * @param textAreas The Pixa containing potential matches.
  190. * @param matchFlags A boolean array marking matched Pix within textAreas.
  191. * @return Returns the index of the best match.
  192. */
  193. private int findBestMatch(TrackedRect rect, Pixa textAreas, boolean[] matchFlags) {
  194. float maxSimilarity = 0.0f;
  195. float rectAspect = rect.aspect();
  196. int size = textAreas.size();
  197. int maxIndex = -1;
  198. for (int i = 0; i < size; i++) {
  199. // Don't check if this Pix has already been claimed. Technically we
  200. // should check every pairing and minimize a cost function, but this
  201. // is easier.
  202. if (matchFlags[i]) {
  203. continue;
  204. }
  205. Rect boxRect = textAreas.getBoxRect(i);
  206. float overlap = rect.getOverlap(boxRect);
  207. // TODO(alanv): Ideally the OpticalFlow tracker will ensure that
  208. // identical rects overlap, but we can't count on it for dense text.
  209. // Remove this (and optical flow?) once we have a better way to
  210. // compute visual similarity.
  211. if (overlap < MIN_OVERLAP) {
  212. // Log.e(TAG, i + " failed with overlap=" + overlap);
  213. // continue;
  214. }
  215. float boxAspect = (boxRect.width() / (float) boxRect.height());
  216. float aspectError = Math.abs(boxAspect - rectAspect) / Math.max(boxAspect, rectAspect);
  217. // Aspect ratio should be constant even after zoom; however, it
  218. // results in split clusters appearing as two entirely new clusters.
  219. // This might not be so bad.
  220. if (aspectError > MAX_ASPECT_ERROR) {
  221. continue;
  222. }
  223. float similarity = overlap / (aspectError + 1) + (1 - aspectError);
  224. if (similarity > maxSimilarity) {
  225. maxIndex = i;
  226. maxSimilarity = similarity;
  227. }
  228. }
  229. return maxIndex;
  230. }
  231. /**
  232. * @param rect
  233. * @param textAreas
  234. * @param matchIndex
  235. * @param timestamp
  236. * @return true if rect needs to be added to queue
  237. */
  238. private boolean onRectMatched(TrackedRect rect, Pixa textAreas, int matchIndex, float angle,
  239. long timestamp) {
  240. Rect newRect = textAreas.getBoxRect(matchIndex);
  241. rect.missingTimestamp = -1;
  242. rect.timestamp = timestamp;
  243. rect.rect = new RectF(newRect);
  244. rect.rotation.setRotate(angle, newRect.exactCenterX(), newRect.exactCenterY());
  245. long presentSince = rect.firstTimestamp;
  246. if (presentSince < 0) {
  247. // We've already marked this rect as present and queued it for OCR.
  248. // TODO(alanv): If we've queued the rect but not finished OCR,
  249. // update the rect's Pix with a higher quality Pix (if one is
  250. // available).
  251. return false;
  252. }
  253. long presence = timestamp - presentSince;
  254. if (presence < MIN_PRESENCE) {
  255. return false;
  256. }
  257. return true;
  258. }
  259. /**
  260. * @param rect
  261. * @param timestamp
  262. * @return true if rect needs to be removed
  263. */
  264. private boolean onRectUnmatched(TrackedRect rect, long timestamp) {
  265. long missingSince = rect.missingTimestamp;
  266. if (missingSince < 0) {
  267. rect.missingTimestamp = timestamp;
  268. return false;
  269. }
  270. long absence = timestamp - missingSince;
  271. if (absence < MAX_ABSENCE) {
  272. return false;
  273. }
  274. return true;
  275. }
  276. /**
  277. * @param newRect
  278. */
  279. private void onRectDiscovered(TrackedRect newRect) {
  280. mTrackedRects.add(newRect);
  281. }
  282. public LinkedList<TrackedRect> getOcrAdd() {
  283. LinkedList<TrackedRect> temp = mOcrAdd;
  284. mOcrAdd = new LinkedList<TrackedRect>();
  285. return temp;
  286. }
  287. public LinkedList<TrackedRect> getOcrRemove() {
  288. LinkedList<TrackedRect> temp = mOcrRemove;
  289. mOcrRemove = new LinkedList<TrackedRect>();
  290. return temp;
  291. }
  292. /**
  293. * A huge mess containing everything the app needs to know about a tracked
  294. * text area. TODO(alanv): Refactor this.
  295. *
  296. * @author alanv@google.com (Alan Viverette)
  297. */
  298. public static class TrackedRect {
  299. public Pix pix;
  300. public float quality;
  301. public RectF rect;
  302. public String text;
  303. public Matrix rotation;
  304. public long firstTimestamp;
  305. public long timestamp;
  306. public long missingTimestamp;
  307. private Paint paint;
  308. public boolean queued;
  309. public TrackedRect(Pix pix, float quality, float angle, Rect rect, long timestamp) {
  310. this.pix = pix;
  311. this.quality = quality;
  312. this.rect = new RectF(rect);
  313. this.text = null;
  314. this.firstTimestamp = timestamp;
  315. this.timestamp = timestamp;
  316. this.missingTimestamp = -1;
  317. rotation = new Matrix();
  318. rotation.setRotate(angle, rect.exactCenterX(), rect.exactCenterY());
  319. paint = new Paint();
  320. paint.setTextAlign(Align.CENTER);
  321. }
  322. public float radius() {
  323. return (rect.width() + rect.height()) / 4;
  324. }
  325. public float aspect() {
  326. return (rect.width() / rect.height());
  327. }
  328. public float getOverlap(Rect other) {
  329. RectF otherF = new RectF(other);
  330. RectF isect = new RectF();
  331. if (isect.setIntersect(otherF, rect)) {
  332. float areaA = rect.width() * rect.height();
  333. float areaB = otherF.width() * otherF.height();
  334. float maxArea = Math.max(areaA, areaB);
  335. float isectArea = isect.width() * isect.height();
  336. return isectArea / maxArea;
  337. }
  338. return 0;
  339. }
  340. public void onDrawDebug(final Canvas canvas, final long timestamp) {
  341. int color = Color.BLACK;
  342. long alpha = 0xFF;
  343. // Apply rotation matrix.
  344. int saveCount = canvas.save();
  345. canvas.concat(rotation);
  346. if (missingTimestamp >= 0) {
  347. color = Color.YELLOW;
  348. long missing = timestamp - missingTimestamp;
  349. alpha = alpha * Math.max(0, (MAX_ABSENCE - missing)) / MAX_ABSENCE;
  350. color = ((int) alpha << 24) | (0xFFFFFF & color);
  351. } else if (firstTimestamp >= 0) {
  352. color = Color.RED;
  353. long present = timestamp - firstTimestamp;
  354. alpha = alpha * Math.max(0, present) / MIN_PRESENCE;
  355. } else {
  356. color = Color.GREEN;
  357. }
  358. color = ((int) alpha << 24) | (0xFFFFFF & color);
  359. if (text == null || text.length() > 0) {
  360. paint.setColor(color);
  361. canvas.drawRect(rect, paint);
  362. }
  363. if (text != null) {
  364. float cx = rect.left + rect.width() / 2.0f;
  365. float cy = rect.top + rect.height() * (2.0f / 3.0f);
  366. paint.setColor(Color.BLACK);
  367. paint.setStyle(Style.FILL);
  368. paint.setTextSize(2.0f * rect.height() / 3.0f);
  369. canvas.drawText(text, cx, cy, paint);
  370. }
  371. // Restore previous matrix.
  372. canvas.restoreToCount(saveCount);
  373. }
  374. }
  375. public interface Listener {
  376. public void onTextDetected(RectF bounds, LinkedList<TrackedRect> trackedRects);
  377. }
  378. }