PageRenderTime 39ms CodeModel.GetById 12ms app.highlight 22ms RepoModel.GetById 1ms app.codeStats 0ms

/ocr/ocrservice/src/com/googlecode/eyesfree/opticflow/TextTrackerProcessor.java

http://eyes-free.googlecode.com/
Java | 482 lines | 273 code | 104 blank | 105 comment | 36 complexity | a107501c7e64bc813f053a4114d627da MD5 | raw file
  1/*
  2 * Copyright (C) 2011 Google Inc.
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5 * use this file except in compliance with the License. You may obtain a copy of
  6 * the License at
  7 *
  8 * http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 13 * License for the specific language governing permissions and limitations under
 14 * the License.
 15 */
 16
 17package com.googlecode.eyesfree.opticflow;
 18
 19import android.graphics.Canvas;
 20import android.graphics.Color;
 21import android.graphics.Matrix;
 22import android.graphics.Paint;
 23import android.graphics.Paint.Align;
 24import android.graphics.Paint.Style;
 25import android.graphics.PointF;
 26import android.graphics.Rect;
 27import android.graphics.RectF;
 28import android.os.SystemClock;
 29import android.util.Log;
 30
 31import com.googlecode.eyesfree.env.Size;
 32import com.googlecode.leptonica.android.Pix;
 33import com.googlecode.leptonica.android.Pixa;
 34
 35import java.util.LinkedList;
 36import java.util.ListIterator;
 37import java.util.Vector;
 38
 39/**
 40 * Frame processor that tracks positioning and visibility of text areas.
 41 *
 42 * @author alanv@google.com (Alan Viverette)
 43 */
 44public class TextTrackerProcessor extends FrameProcessor {
 45    private static final String TAG = "OcrProcessor";
 46
 47    /**
 48     * The minimum amount of overlap allowed between an existing text area
 49     * (after optical flow considerations) and a potential match.
 50     */
 51    private static final float MIN_OVERLAP = 0.50f;
 52
 53    /**
 54     * The maximum amount of normalized error in aspect ratio allowed between an
 55     * existing text area and a potential match.
 56     */
 57    private static final float MAX_ASPECT_ERROR = 0.10f;
 58
 59    /**
 60     * Minimum amount of time is milliseconds that a text area must persist in
 61     * order for it to be OCR'ed.
 62     */
 63    private static final long MIN_PRESENCE = 500;
 64
 65    /**
 66     * Maximum amount of time in milliseconds that a text area may be absent in
 67     * order for it to remain in the OCR queue.
 68     */
 69    private static final long MAX_ABSENCE = 1500;
 70
 71    /** Native optical flow tracker. */
 72    private final OpticalFlow mOpticalFlow;
 73
 74    /** List of tracked text areas. */
 75    private final LinkedList<TrackedRect> mTrackedRects;
 76
 77    /** List of new OCR candidates. */
 78    private LinkedList<TrackedRect> mOcrAdd;
 79
 80    /** List of text areas to be removed from the OCR queue. */
 81    private LinkedList<TrackedRect> mOcrRemove;
 82
 83    private RectF mBounds;
 84
 85    /** Text detection listener. */
 86    private Listener mListener;
 87
 88    /**
 89     * @param opticalFlow
 90     */
 91    public TextTrackerProcessor(OpticalFlow opticalFlow) {
 92        mOpticalFlow = opticalFlow;
 93        mTrackedRects = new LinkedList<TrackedRect>();
 94        mOcrAdd = new LinkedList<TrackedRect>();
 95        mOcrRemove = new LinkedList<TrackedRect>();
 96    }
 97
 98    @Override
 99    public void onInit(Size size) {
100        mBounds = new RectF(0, 0, size.width, size.height);
101    }
102
103    public void setListener(Listener listener) {
104        mListener = listener;
105    }
106
107    @Override
108    protected void onProcessFrame(TimestampedFrame frame) {
109        if (frame.isBlurred() || frame.takenWhileFocusing()) {
110            return;
111        }
112
113        Pixa pixa = frame.getDetectedText();
114        float[] conf = frame.getTextConfidences();
115        float angle = frame.getAngle();
116
117        if (pixa == null)
118            return;
119
120        processResults(pixa, conf, angle);
121
122        pixa.recycle();
123        frame.recycleDetectedText();
124
125        if (mListener != null) {
126            mListener.onTextDetected(mBounds, mTrackedRects);
127        }
128    }
129
130    @Override
131    protected void onDrawDebug(final Canvas canvas) {
132        Paint paint = new Paint();
133        long current = SystemClock.uptimeMillis();
134
135        for (TrackedRect trackedRect : mTrackedRects) {
136            trackedRect.onDrawDebug(canvas, current);
137        }
138    }
139
140    @Override
141    protected Vector<String> getDebugText() {
142        Vector<String> debugText = new Vector<String>();
143        debugText.add("Tracking: " + mTrackedRects.size());
144        return debugText;
145    }
146
147    private void processResults(Pixa textAreas, float[] textConfs, float angle) {
148        LinkedList<TrackedRect> unmatchedRects = new LinkedList<TrackedRect>();
149        LinkedList<TrackedRect> newRects = new LinkedList<TrackedRect>();
150
151        long timestamp = SystemClock.uptimeMillis();
152
153        updateTrackedRects(timestamp);
154        matchExistingRects(textAreas, textConfs, angle, timestamp);
155    }
156
157    private void updateTrackedRects(long timestamp) {
158        if (mOpticalFlow == null) {
159            // No optical flow detection!
160            return;
161        }
162
163        for (TrackedRect tracked : mTrackedRects) {
164            PointF delta = mOpticalFlow.getAccumulatedDelta(tracked.timestamp,
165                    tracked.rect.centerX(), tracked.rect.centerY(), tracked.radius());
166
167            tracked.rect.offset(delta.x, delta.y);
168            tracked.timestamp = timestamp;
169        }
170
171        Log.i(TAG, "Updated " + mTrackedRects.size() + " tracked rects");
172    }
173
174    /**
175     * Attempts to match the text areas in textAreas with the currently tracked
176     * rectangles.
177     *
178     * @param textAreas A Pixa containing the detected text areas.
179     * @param textConfs An array of text confidences corresponding to the text
180     *            areas.
181     * @param timestamp The current system uptime in milliseconds.
182     */
183    private void matchExistingRects(Pixa textAreas, float[] textConfs, float angle, long timestamp) {
184        int count = textConfs.length;
185        boolean[] matchFlags = new boolean[count];
186
187        ListIterator<TrackedRect> iterator = mTrackedRects.listIterator();
188
189        // Matching algorithm runs in O(n*m) time, but we probably won't have
190        // n*m > 100.
191        while (iterator.hasNext()) {
192            TrackedRect rect = iterator.next();
193
194            int matchIndex = findBestMatch(rect, textAreas, matchFlags);
195
196            if (matchIndex >= 0) {
197                matchFlags[matchIndex] = true;
198
199                boolean enqueue = onRectMatched(rect, textAreas, matchIndex, angle, timestamp);
200
201                if (enqueue) {
202                    rect.firstTimestamp = -1;
203                    mOcrAdd.add(rect);
204                }
205            } else {
206                boolean remove = onRectUnmatched(rect, timestamp);
207
208                if (remove) {
209                    iterator.remove();
210                    mOcrRemove.add(rect);
211                }
212            }
213        }
214
215        // Go back through the list of matched Pix and add the unmatched ones to
216        // the list of new tracked rects.
217        for (int i = 0; i < count; i++) {
218            if (matchFlags[i]) {
219                continue;
220            }
221
222            Pix pix = textAreas.getPix(i);
223            float quality = textConfs[i];
224            Rect rect = textAreas.getBoxRect(i);
225
226            TrackedRect newRect = new TrackedRect(pix, angle, quality, rect, timestamp);
227
228            onRectDiscovered(newRect);
229        }
230    }
231
232    /**
233     * Searches textAreas for the text area most similar to rect.
234     *
235     * @param rect The tracked rect to match.
236     * @param textAreas The Pixa containing potential matches.
237     * @param matchFlags A boolean array marking matched Pix within textAreas.
238     * @return Returns the index of the best match.
239     */
240    private int findBestMatch(TrackedRect rect, Pixa textAreas, boolean[] matchFlags) {
241        float maxSimilarity = 0.0f;
242        float rectAspect = rect.aspect();
243        int size = textAreas.size();
244        int maxIndex = -1;
245
246        for (int i = 0; i < size; i++) {
247            // Don't check if this Pix has already been claimed. Technically we
248            // should check every pairing and minimize a cost function, but this
249            // is easier.
250            if (matchFlags[i]) {
251                continue;
252            }
253
254            Rect boxRect = textAreas.getBoxRect(i);
255            float overlap = rect.getOverlap(boxRect);
256
257            // TODO(alanv): Ideally the OpticalFlow tracker will ensure that
258            // identical rects overlap, but we can't count on it for dense text.
259            // Remove this (and optical flow?) once we have a better way to
260            // compute visual similarity.
261            if (overlap < MIN_OVERLAP) {
262                // Log.e(TAG, i + " failed with overlap=" + overlap);
263                // continue;
264            }
265
266            float boxAspect = (boxRect.width() / (float) boxRect.height());
267            float aspectError = Math.abs(boxAspect - rectAspect) / Math.max(boxAspect, rectAspect);
268
269            // Aspect ratio should be constant even after zoom; however, it
270            // results in split clusters appearing as two entirely new clusters.
271            // This might not be so bad.
272            if (aspectError > MAX_ASPECT_ERROR) {
273                continue;
274            }
275
276            float similarity = overlap / (aspectError + 1) + (1 - aspectError);
277
278            if (similarity > maxSimilarity) {
279                maxIndex = i;
280                maxSimilarity = similarity;
281            }
282        }
283
284        return maxIndex;
285    }
286
287    /**
288     * @param rect
289     * @param textAreas
290     * @param matchIndex
291     * @param timestamp
292     * @return true if rect needs to be added to queue
293     */
294    private boolean onRectMatched(TrackedRect rect, Pixa textAreas, int matchIndex, float angle,
295            long timestamp) {
296        Rect newRect = textAreas.getBoxRect(matchIndex);
297
298        rect.missingTimestamp = -1;
299        rect.timestamp = timestamp;
300        rect.rect = new RectF(newRect);
301        rect.rotation.setRotate(angle, newRect.exactCenterX(), newRect.exactCenterY());
302
303        long presentSince = rect.firstTimestamp;
304
305        if (presentSince < 0) {
306            // We've already marked this rect as present and queued it for OCR.
307            // TODO(alanv): If we've queued the rect but not finished OCR,
308            // update the rect's Pix with a higher quality Pix (if one is
309            // available).
310
311            return false;
312        }
313
314        long presence = timestamp - presentSince;
315
316        if (presence < MIN_PRESENCE) {
317            return false;
318        }
319
320        return true;
321    }
322
323    /**
324     * @param rect
325     * @param timestamp
326     * @return true if rect needs to be removed
327     */
328    private boolean onRectUnmatched(TrackedRect rect, long timestamp) {
329        long missingSince = rect.missingTimestamp;
330
331        if (missingSince < 0) {
332            rect.missingTimestamp = timestamp;
333
334            return false;
335        }
336
337        long absence = timestamp - missingSince;
338
339        if (absence < MAX_ABSENCE) {
340            return false;
341        }
342
343        return true;
344    }
345
346    /**
347     * @param newRect
348     */
349    private void onRectDiscovered(TrackedRect newRect) {
350        mTrackedRects.add(newRect);
351    }
352
353    public LinkedList<TrackedRect> getOcrAdd() {
354        LinkedList<TrackedRect> temp = mOcrAdd;
355        mOcrAdd = new LinkedList<TrackedRect>();
356
357        return temp;
358    }
359
360    public LinkedList<TrackedRect> getOcrRemove() {
361        LinkedList<TrackedRect> temp = mOcrRemove;
362        mOcrRemove = new LinkedList<TrackedRect>();
363
364        return temp;
365    }
366
367    /**
368     * A huge mess containing everything the app needs to know about a tracked
369     * text area. TODO(alanv): Refactor this.
370     *
371     * @author alanv@google.com (Alan Viverette)
372     */
373    public static class TrackedRect {
374        public Pix pix;
375
376        public float quality;
377
378        public RectF rect;
379
380        public String text;
381
382        public Matrix rotation;
383
384        public long firstTimestamp;
385
386        public long timestamp;
387
388        public long missingTimestamp;
389
390        private Paint paint;
391
392        public boolean queued;
393
394        public TrackedRect(Pix pix, float quality, float angle, Rect rect, long timestamp) {
395            this.pix = pix;
396            this.quality = quality;
397            this.rect = new RectF(rect);
398            this.text = null;
399            this.firstTimestamp = timestamp;
400            this.timestamp = timestamp;
401            this.missingTimestamp = -1;
402
403            rotation = new Matrix();
404            rotation.setRotate(angle, rect.exactCenterX(), rect.exactCenterY());
405
406            paint = new Paint();
407            paint.setTextAlign(Align.CENTER);
408        }
409
410        public float radius() {
411            return (rect.width() + rect.height()) / 4;
412        }
413
414        public float aspect() {
415            return (rect.width() / rect.height());
416        }
417
418        public float getOverlap(Rect other) {
419            RectF otherF = new RectF(other);
420            RectF isect = new RectF();
421
422            if (isect.setIntersect(otherF, rect)) {
423                float areaA = rect.width() * rect.height();
424                float areaB = otherF.width() * otherF.height();
425                float maxArea = Math.max(areaA, areaB);
426
427                float isectArea = isect.width() * isect.height();
428
429                return isectArea / maxArea;
430            }
431
432            return 0;
433        }
434
435        public void onDrawDebug(final Canvas canvas, final long timestamp) {
436            int color = Color.BLACK;
437            long alpha = 0xFF;
438
439            // Apply rotation matrix.
440            int saveCount = canvas.save();
441            canvas.concat(rotation);
442
443            if (missingTimestamp >= 0) {
444                color = Color.YELLOW;
445                long missing = timestamp - missingTimestamp;
446                alpha = alpha * Math.max(0, (MAX_ABSENCE - missing)) / MAX_ABSENCE;
447                color = ((int) alpha << 24) | (0xFFFFFF & color);
448            } else if (firstTimestamp >= 0) {
449                color = Color.RED;
450                long present = timestamp - firstTimestamp;
451                alpha = alpha * Math.max(0, present) / MIN_PRESENCE;
452            } else {
453                color = Color.GREEN;
454            }
455
456            color = ((int) alpha << 24) | (0xFFFFFF & color);
457
458            if (text == null || text.length() > 0) {
459                paint.setColor(color);
460                canvas.drawRect(rect, paint);
461            }
462
463            if (text != null) {
464                float cx = rect.left + rect.width() / 2.0f;
465                float cy = rect.top + rect.height() * (2.0f / 3.0f);
466
467                paint.setColor(Color.BLACK);
468                paint.setStyle(Style.FILL);
469                paint.setTextSize(2.0f * rect.height() / 3.0f);
470
471                canvas.drawText(text, cx, cy, paint);
472            }
473
474            // Restore previous matrix.
475            canvas.restoreToCount(saveCount);
476        }
477    }
478
479    public interface Listener {
480        public void onTextDetected(RectF bounds, LinkedList<TrackedRect> trackedRects);
481    }
482}