/ocr/ocrservice/src/com/googlecode/eyesfree/opticflow/TextTrackerProcessor.java
Java | 482 lines | 273 code | 104 blank | 105 comment | 36 complexity | a107501c7e64bc813f053a4114d627da MD5 | raw file
1/* 2 * Copyright (C) 2011 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.googlecode.eyesfree.opticflow; 18 19import android.graphics.Canvas; 20import android.graphics.Color; 21import android.graphics.Matrix; 22import android.graphics.Paint; 23import android.graphics.Paint.Align; 24import android.graphics.Paint.Style; 25import android.graphics.PointF; 26import android.graphics.Rect; 27import android.graphics.RectF; 28import android.os.SystemClock; 29import android.util.Log; 30 31import com.googlecode.eyesfree.env.Size; 32import com.googlecode.leptonica.android.Pix; 33import com.googlecode.leptonica.android.Pixa; 34 35import java.util.LinkedList; 36import java.util.ListIterator; 37import java.util.Vector; 38 39/** 40 * Frame processor that tracks positioning and visibility of text areas. 41 * 42 * @author alanv@google.com (Alan Viverette) 43 */ 44public class TextTrackerProcessor extends FrameProcessor { 45 private static final String TAG = "OcrProcessor"; 46 47 /** 48 * The minimum amount of overlap allowed between an existing text area 49 * (after optical flow considerations) and a potential match. 50 */ 51 private static final float MIN_OVERLAP = 0.50f; 52 53 /** 54 * The maximum amount of normalized error in aspect ratio allowed between an 55 * existing text area and a potential match. 56 */ 57 private static final float MAX_ASPECT_ERROR = 0.10f; 58 59 /** 60 * Minimum amount of time is milliseconds that a text area must persist in 61 * order for it to be OCR'ed. 62 */ 63 private static final long MIN_PRESENCE = 500; 64 65 /** 66 * Maximum amount of time in milliseconds that a text area may be absent in 67 * order for it to remain in the OCR queue. 68 */ 69 private static final long MAX_ABSENCE = 1500; 70 71 /** Native optical flow tracker. */ 72 private final OpticalFlow mOpticalFlow; 73 74 /** List of tracked text areas. */ 75 private final LinkedList<TrackedRect> mTrackedRects; 76 77 /** List of new OCR candidates. */ 78 private LinkedList<TrackedRect> mOcrAdd; 79 80 /** List of text areas to be removed from the OCR queue. */ 81 private LinkedList<TrackedRect> mOcrRemove; 82 83 private RectF mBounds; 84 85 /** Text detection listener. */ 86 private Listener mListener; 87 88 /** 89 * @param opticalFlow 90 */ 91 public TextTrackerProcessor(OpticalFlow opticalFlow) { 92 mOpticalFlow = opticalFlow; 93 mTrackedRects = new LinkedList<TrackedRect>(); 94 mOcrAdd = new LinkedList<TrackedRect>(); 95 mOcrRemove = new LinkedList<TrackedRect>(); 96 } 97 98 @Override 99 public void onInit(Size size) { 100 mBounds = new RectF(0, 0, size.width, size.height); 101 } 102 103 public void setListener(Listener listener) { 104 mListener = listener; 105 } 106 107 @Override 108 protected void onProcessFrame(TimestampedFrame frame) { 109 if (frame.isBlurred() || frame.takenWhileFocusing()) { 110 return; 111 } 112 113 Pixa pixa = frame.getDetectedText(); 114 float[] conf = frame.getTextConfidences(); 115 float angle = frame.getAngle(); 116 117 if (pixa == null) 118 return; 119 120 processResults(pixa, conf, angle); 121 122 pixa.recycle(); 123 frame.recycleDetectedText(); 124 125 if (mListener != null) { 126 mListener.onTextDetected(mBounds, mTrackedRects); 127 } 128 } 129 130 @Override 131 protected void onDrawDebug(final Canvas canvas) { 132 Paint paint = new Paint(); 133 long current = SystemClock.uptimeMillis(); 134 135 for (TrackedRect trackedRect : mTrackedRects) { 136 trackedRect.onDrawDebug(canvas, current); 137 } 138 } 139 140 @Override 141 protected Vector<String> getDebugText() { 142 Vector<String> debugText = new Vector<String>(); 143 debugText.add("Tracking: " + mTrackedRects.size()); 144 return debugText; 145 } 146 147 private void processResults(Pixa textAreas, float[] textConfs, float angle) { 148 LinkedList<TrackedRect> unmatchedRects = new LinkedList<TrackedRect>(); 149 LinkedList<TrackedRect> newRects = new LinkedList<TrackedRect>(); 150 151 long timestamp = SystemClock.uptimeMillis(); 152 153 updateTrackedRects(timestamp); 154 matchExistingRects(textAreas, textConfs, angle, timestamp); 155 } 156 157 private void updateTrackedRects(long timestamp) { 158 if (mOpticalFlow == null) { 159 // No optical flow detection! 160 return; 161 } 162 163 for (TrackedRect tracked : mTrackedRects) { 164 PointF delta = mOpticalFlow.getAccumulatedDelta(tracked.timestamp, 165 tracked.rect.centerX(), tracked.rect.centerY(), tracked.radius()); 166 167 tracked.rect.offset(delta.x, delta.y); 168 tracked.timestamp = timestamp; 169 } 170 171 Log.i(TAG, "Updated " + mTrackedRects.size() + " tracked rects"); 172 } 173 174 /** 175 * Attempts to match the text areas in textAreas with the currently tracked 176 * rectangles. 177 * 178 * @param textAreas A Pixa containing the detected text areas. 179 * @param textConfs An array of text confidences corresponding to the text 180 * areas. 181 * @param timestamp The current system uptime in milliseconds. 182 */ 183 private void matchExistingRects(Pixa textAreas, float[] textConfs, float angle, long timestamp) { 184 int count = textConfs.length; 185 boolean[] matchFlags = new boolean[count]; 186 187 ListIterator<TrackedRect> iterator = mTrackedRects.listIterator(); 188 189 // Matching algorithm runs in O(n*m) time, but we probably won't have 190 // n*m > 100. 191 while (iterator.hasNext()) { 192 TrackedRect rect = iterator.next(); 193 194 int matchIndex = findBestMatch(rect, textAreas, matchFlags); 195 196 if (matchIndex >= 0) { 197 matchFlags[matchIndex] = true; 198 199 boolean enqueue = onRectMatched(rect, textAreas, matchIndex, angle, timestamp); 200 201 if (enqueue) { 202 rect.firstTimestamp = -1; 203 mOcrAdd.add(rect); 204 } 205 } else { 206 boolean remove = onRectUnmatched(rect, timestamp); 207 208 if (remove) { 209 iterator.remove(); 210 mOcrRemove.add(rect); 211 } 212 } 213 } 214 215 // Go back through the list of matched Pix and add the unmatched ones to 216 // the list of new tracked rects. 217 for (int i = 0; i < count; i++) { 218 if (matchFlags[i]) { 219 continue; 220 } 221 222 Pix pix = textAreas.getPix(i); 223 float quality = textConfs[i]; 224 Rect rect = textAreas.getBoxRect(i); 225 226 TrackedRect newRect = new TrackedRect(pix, angle, quality, rect, timestamp); 227 228 onRectDiscovered(newRect); 229 } 230 } 231 232 /** 233 * Searches textAreas for the text area most similar to rect. 234 * 235 * @param rect The tracked rect to match. 236 * @param textAreas The Pixa containing potential matches. 237 * @param matchFlags A boolean array marking matched Pix within textAreas. 238 * @return Returns the index of the best match. 239 */ 240 private int findBestMatch(TrackedRect rect, Pixa textAreas, boolean[] matchFlags) { 241 float maxSimilarity = 0.0f; 242 float rectAspect = rect.aspect(); 243 int size = textAreas.size(); 244 int maxIndex = -1; 245 246 for (int i = 0; i < size; i++) { 247 // Don't check if this Pix has already been claimed. Technically we 248 // should check every pairing and minimize a cost function, but this 249 // is easier. 250 if (matchFlags[i]) { 251 continue; 252 } 253 254 Rect boxRect = textAreas.getBoxRect(i); 255 float overlap = rect.getOverlap(boxRect); 256 257 // TODO(alanv): Ideally the OpticalFlow tracker will ensure that 258 // identical rects overlap, but we can't count on it for dense text. 259 // Remove this (and optical flow?) once we have a better way to 260 // compute visual similarity. 261 if (overlap < MIN_OVERLAP) { 262 // Log.e(TAG, i + " failed with overlap=" + overlap); 263 // continue; 264 } 265 266 float boxAspect = (boxRect.width() / (float) boxRect.height()); 267 float aspectError = Math.abs(boxAspect - rectAspect) / Math.max(boxAspect, rectAspect); 268 269 // Aspect ratio should be constant even after zoom; however, it 270 // results in split clusters appearing as two entirely new clusters. 271 // This might not be so bad. 272 if (aspectError > MAX_ASPECT_ERROR) { 273 continue; 274 } 275 276 float similarity = overlap / (aspectError + 1) + (1 - aspectError); 277 278 if (similarity > maxSimilarity) { 279 maxIndex = i; 280 maxSimilarity = similarity; 281 } 282 } 283 284 return maxIndex; 285 } 286 287 /** 288 * @param rect 289 * @param textAreas 290 * @param matchIndex 291 * @param timestamp 292 * @return true if rect needs to be added to queue 293 */ 294 private boolean onRectMatched(TrackedRect rect, Pixa textAreas, int matchIndex, float angle, 295 long timestamp) { 296 Rect newRect = textAreas.getBoxRect(matchIndex); 297 298 rect.missingTimestamp = -1; 299 rect.timestamp = timestamp; 300 rect.rect = new RectF(newRect); 301 rect.rotation.setRotate(angle, newRect.exactCenterX(), newRect.exactCenterY()); 302 303 long presentSince = rect.firstTimestamp; 304 305 if (presentSince < 0) { 306 // We've already marked this rect as present and queued it for OCR. 307 // TODO(alanv): If we've queued the rect but not finished OCR, 308 // update the rect's Pix with a higher quality Pix (if one is 309 // available). 310 311 return false; 312 } 313 314 long presence = timestamp - presentSince; 315 316 if (presence < MIN_PRESENCE) { 317 return false; 318 } 319 320 return true; 321 } 322 323 /** 324 * @param rect 325 * @param timestamp 326 * @return true if rect needs to be removed 327 */ 328 private boolean onRectUnmatched(TrackedRect rect, long timestamp) { 329 long missingSince = rect.missingTimestamp; 330 331 if (missingSince < 0) { 332 rect.missingTimestamp = timestamp; 333 334 return false; 335 } 336 337 long absence = timestamp - missingSince; 338 339 if (absence < MAX_ABSENCE) { 340 return false; 341 } 342 343 return true; 344 } 345 346 /** 347 * @param newRect 348 */ 349 private void onRectDiscovered(TrackedRect newRect) { 350 mTrackedRects.add(newRect); 351 } 352 353 public LinkedList<TrackedRect> getOcrAdd() { 354 LinkedList<TrackedRect> temp = mOcrAdd; 355 mOcrAdd = new LinkedList<TrackedRect>(); 356 357 return temp; 358 } 359 360 public LinkedList<TrackedRect> getOcrRemove() { 361 LinkedList<TrackedRect> temp = mOcrRemove; 362 mOcrRemove = new LinkedList<TrackedRect>(); 363 364 return temp; 365 } 366 367 /** 368 * A huge mess containing everything the app needs to know about a tracked 369 * text area. TODO(alanv): Refactor this. 370 * 371 * @author alanv@google.com (Alan Viverette) 372 */ 373 public static class TrackedRect { 374 public Pix pix; 375 376 public float quality; 377 378 public RectF rect; 379 380 public String text; 381 382 public Matrix rotation; 383 384 public long firstTimestamp; 385 386 public long timestamp; 387 388 public long missingTimestamp; 389 390 private Paint paint; 391 392 public boolean queued; 393 394 public TrackedRect(Pix pix, float quality, float angle, Rect rect, long timestamp) { 395 this.pix = pix; 396 this.quality = quality; 397 this.rect = new RectF(rect); 398 this.text = null; 399 this.firstTimestamp = timestamp; 400 this.timestamp = timestamp; 401 this.missingTimestamp = -1; 402 403 rotation = new Matrix(); 404 rotation.setRotate(angle, rect.exactCenterX(), rect.exactCenterY()); 405 406 paint = new Paint(); 407 paint.setTextAlign(Align.CENTER); 408 } 409 410 public float radius() { 411 return (rect.width() + rect.height()) / 4; 412 } 413 414 public float aspect() { 415 return (rect.width() / rect.height()); 416 } 417 418 public float getOverlap(Rect other) { 419 RectF otherF = new RectF(other); 420 RectF isect = new RectF(); 421 422 if (isect.setIntersect(otherF, rect)) { 423 float areaA = rect.width() * rect.height(); 424 float areaB = otherF.width() * otherF.height(); 425 float maxArea = Math.max(areaA, areaB); 426 427 float isectArea = isect.width() * isect.height(); 428 429 return isectArea / maxArea; 430 } 431 432 return 0; 433 } 434 435 public void onDrawDebug(final Canvas canvas, final long timestamp) { 436 int color = Color.BLACK; 437 long alpha = 0xFF; 438 439 // Apply rotation matrix. 440 int saveCount = canvas.save(); 441 canvas.concat(rotation); 442 443 if (missingTimestamp >= 0) { 444 color = Color.YELLOW; 445 long missing = timestamp - missingTimestamp; 446 alpha = alpha * Math.max(0, (MAX_ABSENCE - missing)) / MAX_ABSENCE; 447 color = ((int) alpha << 24) | (0xFFFFFF & color); 448 } else if (firstTimestamp >= 0) { 449 color = Color.RED; 450 long present = timestamp - firstTimestamp; 451 alpha = alpha * Math.max(0, present) / MIN_PRESENCE; 452 } else { 453 color = Color.GREEN; 454 } 455 456 color = ((int) alpha << 24) | (0xFFFFFF & color); 457 458 if (text == null || text.length() > 0) { 459 paint.setColor(color); 460 canvas.drawRect(rect, paint); 461 } 462 463 if (text != null) { 464 float cx = rect.left + rect.width() / 2.0f; 465 float cy = rect.top + rect.height() * (2.0f / 3.0f); 466 467 paint.setColor(Color.BLACK); 468 paint.setStyle(Style.FILL); 469 paint.setTextSize(2.0f * rect.height() / 3.0f); 470 471 canvas.drawText(text, cx, cy, paint); 472 } 473 474 // Restore previous matrix. 475 canvas.restoreToCount(saveCount); 476 } 477 } 478 479 public interface Listener { 480 public void onTextDetected(RectF bounds, LinkedList<TrackedRect> trackedRects); 481 } 482}