PageRenderTime 53ms CodeModel.GetById 16ms app.highlight 33ms RepoModel.GetById 1ms app.codeStats 0ms

/ocr/ocrservice/jni/hydrogen/src/validator.cpp

http://eyes-free.googlecode.com/
C++ | 382 lines | 235 code | 90 blank | 57 comment | 29 complexity | 65edaa423730d4348d5119e4d42b7a01 MD5 | raw file
  1/*
  2 * Copyright 2011, Google Inc.
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License");
  5 * you may not use this file except in compliance with the License.
  6 * You may obtain a copy of the License at
  7 *
  8 *     http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS,
 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 * See the License for the specific language governing permissions and
 14 * limitations under the License.
 15 */
 16
 17#include <math.h>
 18
 19#include "leptonica.h"
 20#include "validator.h"
 21#include "thresholder.h"
 22#include "utilities.h"
 23#include "hydrogentextdetector.h"
 24
 25l_int32 BBoxHDist(BOX *b1, BOX *b2) {
 26  return L_MAX(b1->x, b2->x) - L_MIN(b1->x + b1->w, b2->x + b2->w);
 27}
 28
 29l_int32 BBoxVDist(BOX *b1, BOX *b2) {
 30  return L_MAX(b1->y, b2->y) - L_MIN(b1->y + b1->h, b2->y + b2->h);
 31}
 32
 33l_float32 RelativeDiff(l_int32 v1, l_int32 v2) {
 34  return L_ABS(v1 - v2) / (L_MIN(v1, v2) + 1.0);
 35}
 36
 37#define OLDPAIR_MIN_HPAIR_RATIO 0.5
 38#define OLDPAIR_MIN_WPAIR_RATIO 0.1
 39#define OLDPAIR_MAX_HDIST_RATIO 3.0
 40#define OLDPAIR_MAX_VDIST_RATIO 0.5
 41
 42/**
 43 * Test whether b1 and b2 are close enough to be a character pair.
 44 */
 45bool ValidatePairOld(BOX *b1, BOX *b2) {
 46  l_int32 max_w = L_MAX(b1->w, b2->w);
 47  l_int32 centerx1 = b1->x + b1->w / 2;
 48  l_int32 centerx2 = b2->x + b2->w / 2;
 49  l_int32 h_dist = L_ABS(centerx1 - centerx2);
 50
 51  /* Horizontal distance between centers is
 52   * less than twice the wider character */
 53  if (h_dist > max_w * OLDPAIR_MAX_HDIST_RATIO)
 54    return false;
 55
 56  l_int32 max_h = L_MAX(b1->h, b2->h);
 57  l_int32 centery1 = b1->y + b1->h / 2;
 58  l_int32 centery2 = b2->y + b2->h / 2;
 59  l_int32 v_dist = L_ABS(centery1 - centery2);
 60
 61  /* Vertical distance between centers is
 62   less than 50% of the taller character */
 63  if (v_dist > max_h * OLDPAIR_MAX_VDIST_RATIO)
 64    return false;
 65
 66  l_int32 min_h = L_MIN(b1->h, b2->h);
 67  l_float32 h_ratio = min_h / (max_h + 1.0);
 68
 69  /* Height ratio is between 0.5 and 2 */
 70  if (h_ratio < OLDPAIR_MIN_HPAIR_RATIO)
 71    return false;
 72
 73  l_int32 min_w = L_MIN(b1->w, b2->w);
 74  l_float32 w_ratio = min_w / (max_w + 1.0);
 75
 76  /* Width ratio is between 0.1 and 10 */
 77  if (w_ratio < OLDPAIR_MIN_WPAIR_RATIO)
 78    return false;
 79
 80  return true;
 81}
 82
 83l_float32 ComputeFDR(PIX *cc8) {
 84  l_float32 fdr;
 85
 86  pixGetFisherThresh(cc8, 0.0, &fdr, NULL);
 87
 88  return fdr;
 89}
 90
 91l_float32 ComputeGradientEnergy(PIX *cc8, PIX *cc) {
 92  l_float32 energy;
 93
 94  pixGradientEnergy(cc8, cc, &energy);
 95
 96  return energy;
 97}
 98
 99l_float32 ComputeCCDensity(PIX *pix) {
100  l_int32 area = pix->w * pix->h;
101  l_int32 pixel_count;
102
103  pixCountPixels(pix, &pixel_count, NULL);
104
105  return pixel_count / (l_float32) area;
106}
107
108l_float32 ComputeCCEdgeMax(PIX *pix8) {
109  l_int32 max;
110  l_int32 avg;
111
112  pixEdgeMax(pix8, &max, &avg);
113
114  return (l_float32) max;
115}
116
117l_float32 ComputeSingletonConfidence(PIX *pix, BOX *box, PIX *pix8) {
118  l_float32 aspect_ratio = box->w / (l_float32) box->h;
119  l_float32 density = ComputeCCDensity(pix);
120  l_float32 gradient = ComputeGradientEnergy(pix8, pix);
121  l_float32 edgemax = ComputeCCEdgeMax(pix8);
122
123  /* Compute features for confidence */
124  l_float32 features[7];
125  features[0] = 1.0;
126  features[1] = aspect_ratio;
127  features[2] = aspect_ratio * aspect_ratio;
128  features[3] = gradient;
129  features[4] = aspect_ratio / density;
130  features[5] = edgemax;
131
132  l_float32 beta[5];
133  beta[0] = -3.099;
134  beta[1] = 1.244;
135  beta[2] = -0.1142;
136  beta[3] = 39.86;
137  beta[4] = -0.4005;
138  beta[5] = 0;
139
140  l_float32 confidence = 0.0;
141  for (int i = 0; i < 6; i++) {
142    confidence += features[i] * beta[i];
143  }
144
145  return confidence;
146}
147
148bool ValidateSingleton(PIX *pix, BOX *box, PIX *pix8, l_float32 *pconf,
149                       HydrogenTextDetector::TextDetectorParameters &params) {
150  l_float32 aspect_ratio = box->w / (l_float32) box->h;
151  l_float32 density = ComputeCCDensity(pix);
152
153  *pconf = 0.0;
154
155  /* Aspect ratio */
156  if (aspect_ratio > params.single_max_aspect)
157    return false;
158
159  if (aspect_ratio < params.single_min_aspect)
160    return false;
161
162  /* Pixel density */
163  if (density < params.single_min_density)
164    return false;
165
166  l_int32 area = box->w * box->h;
167
168  /* Area */
169  if (area < params.single_min_area)
170    return false;
171
172  *pconf = 1.0; //ComputeSingletonConfidence(pix, box, pix8);
173
174  return true;
175}
176
177/**
178 * Test whether b1 and b2 are close enough to be a character pair.
179 */
180bool ValidatePair(BOX *b1, BOX *b2, l_float32 *pconf,
181                  HydrogenTextDetector::TextDetectorParameters &params) {
182  *pconf = 0.0;
183
184  l_int32 max_h = L_MAX(b1->h, b2->h);
185  l_int32 h_dist = BBoxHDist(b1, b2);
186  l_int32 v_dist = BBoxVDist(b1, b2);
187  l_float32 h_ratio = RelativeDiff(b1->h, b2->h);
188  l_int32 d1 = L_MAX(b1->h, b1->w);
189  l_int32 d2 = L_MAX(b2->h, b2->w);
190  l_float32 d_ratio = RelativeDiff(d1, d2);
191
192  /* Horizontal spacing less than 2x taller edge */
193  if (h_dist > params.pair_h_dist_ratio * max_h)
194    return false;
195
196  /* Must share at least 0.25x the larger vertical edge */
197  if (v_dist > 0 || L_ABS(v_dist) < max_h * params.pair_h_shared)
198    return false;
199
200  /* Heights must be at least 2x tolerance */
201  if (h_ratio > params.pair_h_ratio)
202    return false;
203
204  /* Maximum dimensions must be within 3x tolerance */
205  if (d_ratio > params.pair_d_ratio)
206    return false;
207
208  // TODO(alanv): Does this need to return a confidence value?
209  *pconf = 1.0;
210
211  return true;
212}
213
214l_float32 ComputePairNormalizedOverlapArea(BOX *b1, BOX *b2) {
215  BOX *overlap = boxOverlapRegion(b1, b2);
216
217  if (!overlap || overlap->w == 0.0 || overlap->h == 0.0) return 0.0;
218
219  l_float32 area0 = overlap->w * overlap->h;
220  l_float32 area1 = b1->w * b1->h;
221  l_float32 area2 = b2->w * b2->h;
222  l_float32 oarea = 2.0 * area0 / (area1 + area2);
223
224  return oarea;
225}
226
227l_float32 ComputePairNormalizedBaselineDistance(BOX *b1, BOX *b2) {
228  l_float32 dy = (b1->y + b1->h) - (b2->y  + b2->h);
229  l_float32 vdist = 2.0 * L_ABS(dy) / (b1->h + b2->h);
230
231  return vdist;
232}
233
234l_float32 ComputePairNormalizedToplineDistance(BOX *b1, BOX *b2) {
235  l_float32 dy = b1->y - b2->y;
236  l_float32 vdist = 2.0 * L_ABS(dy) / (b1->h + b2->h);
237
238  return vdist;
239}
240
241l_float32 ComputePairNormalizedHorizontalDistance(BOX *b1, BOX *b2) {
242  l_float32 dx = (b1->x - b2->x) + (b1->w - b2->w) / 2.0;
243  l_float32 hdist = 2.0 * L_ABS(dx) / (b1->w + b2->w);
244
245  return hdist;
246}
247
248l_float32 ComputePairAreaRatio(BOX *b1, BOX *b2) {
249  l_float32 area1 = b1->w * b1->h;
250  l_float32 area2 = b2->w * b2->h;
251  l_float32 ratio = L_MIN(area1, area2) / L_MAX(area1, area2);
252
253  return ratio;
254}
255
256l_float32 ComputePairWidthRatio(BOX *b1, BOX *b2) {
257  l_float32 ratio = L_MIN(b1->w, b2->w) / L_MAX(b1->w, b2->w);
258
259  return ratio;
260}
261
262l_float32 ComputePairHeightRatio(BOX *b1, BOX *b2) {
263  l_float32 ratio = L_MIN(b1->h, b2->h) / L_MAX(b1->h, b2->h);
264
265  return ratio;
266}
267
268l_float32 ComputePairContainmentCheck(BOX *b1, BOX *b2) {
269  l_int32 contains1, contains2;
270
271  boxContains(b1, b2, &contains1);
272  boxContains(b2, b1, &contains2);
273
274  l_float32 contains = (l_float32) (contains1 || contains2);
275
276  return contains;
277}
278
279l_float32 ComputePairConfidence(BOX *b1, BOX *b2) {
280  l_float32 features[9];
281  features[0] = 1.0;
282  features[1] = ComputePairNormalizedOverlapArea(b1, b2);
283  features[2] = ComputePairNormalizedBaselineDistance(b1, b2);
284  features[3] = ComputePairNormalizedToplineDistance(b1, b2);
285  features[4] = ComputePairNormalizedHorizontalDistance(b1, b2);
286  features[5] = ComputePairAreaRatio(b1, b2);
287  features[6] = ComputePairWidthRatio(b1, b2);
288  features[7] = ComputePairHeightRatio(b1, b2);
289  features[8] = ComputePairContainmentCheck(b1, b2);
290
291  l_float32 beta[9];
292  beta[0] = 3.987;
293  beta[1] = -9.681;
294  beta[2] = -5.804;
295  beta[3] = -4.857;
296  beta[4] = -2.906;
297  beta[5] = -1.813;
298  beta[6] = 3.481;
299  beta[7] = 3.983;
300  beta[8] = -39.24;
301
302  l_float32 confidence = 0.0;
303  for (int i = 0; i < 5; i++) {
304    confidence += features[i] * beta[i];
305  }
306
307  return confidence;
308}
309
310/**
311 * Test whether b1 and b2 are close enough to be clustered. More relaxed constraints than ValidatePair().
312 */
313bool ValidateClusterPair(BOX *b1, BOX *b2, bool *too_far, l_float32 *pconf,
314                         HydrogenTextDetector::TextDetectorParameters &params) {
315  *pconf = 0.0;
316
317  l_int32 max_d = L_MAX(b1->w, b1->h);
318  l_float32 h_ratio = RelativeDiff(b1->h, b2->h);
319
320  // If we're already too far out, quit
321  if (b2->x > b1->x + b1->w + params.cluster_width_spacing * max_d) {
322    *too_far = true;
323
324    return false;
325  }
326
327  *too_far = false;
328
329  // Must share at least 0.25x the larger vertical edge
330  //l_int32 v_dist = BBoxVDist(b1, b2);
331  //if (v_dist > 0 || L_ABS(v_dist) < L_MIN(min_h, max_h) * PAIR_H_SHARED)
332  //  return false;
333
334  // i and j must share at least half an edge
335  if (b2->y + b2->h * params.cluster_shared_edge < b1->y)
336    return false;
337  if (b1->y + b1->h * params.cluster_shared_edge < b2->y)
338    return false;
339
340  // Heights must be at least 2x tolerance
341  if (h_ratio > params.pair_h_ratio)
342    return false;
343
344  *pconf = 1.0; //ComputePairConfidence(b1, b2);
345
346  return true;
347}
348
349/**
350 * Test whether a finalized cluster is valid.
351 */
352bool ValidateCluster(PIX *pix8, PIXA *pixa, BOX *box, l_float32 *pconf,
353                     HydrogenTextDetector::TextDetectorParameters &params) {
354  *pconf = 0.0;
355
356  l_float32 aspect = box->w / (l_float32) box->h;
357  l_int32 count = pixaGetCount(pixa);
358  l_float32 fdr = ComputeFDR(pix8);
359
360  if (box->h < 15)
361    return false;
362
363  if (aspect < params.cluster_min_aspect)
364    return false;
365
366  if (count < params.cluster_min_blobs)
367    return false;
368
369  if (fdr < params.cluster_min_fdr)
370    return false;
371/*
372  l_int32 edge_max, edge_avg;
373  pixEdgeMax(pix8, &edge_max, &edge_avg);
374
375  if (edge_max < params.cluster_min_edge || edge_avg < params.cluster_min_edge_avg)
376    return false;
377*/
378  // TODO(alanv): Combine all of these into a confidence score, higher = better
379  *pconf = log(fdr); //log(fdr * edge_max * edge_avg);
380
381  return true;
382}