/ocr/ocrservice/jni/hydrogen/src/validator.cpp

http://eyes-free.googlecode.com/ · C++ · 382 lines · 235 code · 90 blank · 57 comment · 29 complexity · 65edaa423730d4348d5119e4d42b7a01 MD5 · raw file

  1. /*
  2. * Copyright 2011, Google Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <math.h>
  17. #include "leptonica.h"
  18. #include "validator.h"
  19. #include "thresholder.h"
  20. #include "utilities.h"
  21. #include "hydrogentextdetector.h"
  22. l_int32 BBoxHDist(BOX *b1, BOX *b2) {
  23. return L_MAX(b1->x, b2->x) - L_MIN(b1->x + b1->w, b2->x + b2->w);
  24. }
  25. l_int32 BBoxVDist(BOX *b1, BOX *b2) {
  26. return L_MAX(b1->y, b2->y) - L_MIN(b1->y + b1->h, b2->y + b2->h);
  27. }
  28. l_float32 RelativeDiff(l_int32 v1, l_int32 v2) {
  29. return L_ABS(v1 - v2) / (L_MIN(v1, v2) + 1.0);
  30. }
  31. #define OLDPAIR_MIN_HPAIR_RATIO 0.5
  32. #define OLDPAIR_MIN_WPAIR_RATIO 0.1
  33. #define OLDPAIR_MAX_HDIST_RATIO 3.0
  34. #define OLDPAIR_MAX_VDIST_RATIO 0.5
  35. /**
  36. * Test whether b1 and b2 are close enough to be a character pair.
  37. */
  38. bool ValidatePairOld(BOX *b1, BOX *b2) {
  39. l_int32 max_w = L_MAX(b1->w, b2->w);
  40. l_int32 centerx1 = b1->x + b1->w / 2;
  41. l_int32 centerx2 = b2->x + b2->w / 2;
  42. l_int32 h_dist = L_ABS(centerx1 - centerx2);
  43. /* Horizontal distance between centers is
  44. * less than twice the wider character */
  45. if (h_dist > max_w * OLDPAIR_MAX_HDIST_RATIO)
  46. return false;
  47. l_int32 max_h = L_MAX(b1->h, b2->h);
  48. l_int32 centery1 = b1->y + b1->h / 2;
  49. l_int32 centery2 = b2->y + b2->h / 2;
  50. l_int32 v_dist = L_ABS(centery1 - centery2);
  51. /* Vertical distance between centers is
  52. less than 50% of the taller character */
  53. if (v_dist > max_h * OLDPAIR_MAX_VDIST_RATIO)
  54. return false;
  55. l_int32 min_h = L_MIN(b1->h, b2->h);
  56. l_float32 h_ratio = min_h / (max_h + 1.0);
  57. /* Height ratio is between 0.5 and 2 */
  58. if (h_ratio < OLDPAIR_MIN_HPAIR_RATIO)
  59. return false;
  60. l_int32 min_w = L_MIN(b1->w, b2->w);
  61. l_float32 w_ratio = min_w / (max_w + 1.0);
  62. /* Width ratio is between 0.1 and 10 */
  63. if (w_ratio < OLDPAIR_MIN_WPAIR_RATIO)
  64. return false;
  65. return true;
  66. }
  67. l_float32 ComputeFDR(PIX *cc8) {
  68. l_float32 fdr;
  69. pixGetFisherThresh(cc8, 0.0, &fdr, NULL);
  70. return fdr;
  71. }
  72. l_float32 ComputeGradientEnergy(PIX *cc8, PIX *cc) {
  73. l_float32 energy;
  74. pixGradientEnergy(cc8, cc, &energy);
  75. return energy;
  76. }
  77. l_float32 ComputeCCDensity(PIX *pix) {
  78. l_int32 area = pix->w * pix->h;
  79. l_int32 pixel_count;
  80. pixCountPixels(pix, &pixel_count, NULL);
  81. return pixel_count / (l_float32) area;
  82. }
  83. l_float32 ComputeCCEdgeMax(PIX *pix8) {
  84. l_int32 max;
  85. l_int32 avg;
  86. pixEdgeMax(pix8, &max, &avg);
  87. return (l_float32) max;
  88. }
  89. l_float32 ComputeSingletonConfidence(PIX *pix, BOX *box, PIX *pix8) {
  90. l_float32 aspect_ratio = box->w / (l_float32) box->h;
  91. l_float32 density = ComputeCCDensity(pix);
  92. l_float32 gradient = ComputeGradientEnergy(pix8, pix);
  93. l_float32 edgemax = ComputeCCEdgeMax(pix8);
  94. /* Compute features for confidence */
  95. l_float32 features[7];
  96. features[0] = 1.0;
  97. features[1] = aspect_ratio;
  98. features[2] = aspect_ratio * aspect_ratio;
  99. features[3] = gradient;
  100. features[4] = aspect_ratio / density;
  101. features[5] = edgemax;
  102. l_float32 beta[5];
  103. beta[0] = -3.099;
  104. beta[1] = 1.244;
  105. beta[2] = -0.1142;
  106. beta[3] = 39.86;
  107. beta[4] = -0.4005;
  108. beta[5] = 0;
  109. l_float32 confidence = 0.0;
  110. for (int i = 0; i < 6; i++) {
  111. confidence += features[i] * beta[i];
  112. }
  113. return confidence;
  114. }
  115. bool ValidateSingleton(PIX *pix, BOX *box, PIX *pix8, l_float32 *pconf,
  116. HydrogenTextDetector::TextDetectorParameters &params) {
  117. l_float32 aspect_ratio = box->w / (l_float32) box->h;
  118. l_float32 density = ComputeCCDensity(pix);
  119. *pconf = 0.0;
  120. /* Aspect ratio */
  121. if (aspect_ratio > params.single_max_aspect)
  122. return false;
  123. if (aspect_ratio < params.single_min_aspect)
  124. return false;
  125. /* Pixel density */
  126. if (density < params.single_min_density)
  127. return false;
  128. l_int32 area = box->w * box->h;
  129. /* Area */
  130. if (area < params.single_min_area)
  131. return false;
  132. *pconf = 1.0; //ComputeSingletonConfidence(pix, box, pix8);
  133. return true;
  134. }
  135. /**
  136. * Test whether b1 and b2 are close enough to be a character pair.
  137. */
  138. bool ValidatePair(BOX *b1, BOX *b2, l_float32 *pconf,
  139. HydrogenTextDetector::TextDetectorParameters &params) {
  140. *pconf = 0.0;
  141. l_int32 max_h = L_MAX(b1->h, b2->h);
  142. l_int32 h_dist = BBoxHDist(b1, b2);
  143. l_int32 v_dist = BBoxVDist(b1, b2);
  144. l_float32 h_ratio = RelativeDiff(b1->h, b2->h);
  145. l_int32 d1 = L_MAX(b1->h, b1->w);
  146. l_int32 d2 = L_MAX(b2->h, b2->w);
  147. l_float32 d_ratio = RelativeDiff(d1, d2);
  148. /* Horizontal spacing less than 2x taller edge */
  149. if (h_dist > params.pair_h_dist_ratio * max_h)
  150. return false;
  151. /* Must share at least 0.25x the larger vertical edge */
  152. if (v_dist > 0 || L_ABS(v_dist) < max_h * params.pair_h_shared)
  153. return false;
  154. /* Heights must be at least 2x tolerance */
  155. if (h_ratio > params.pair_h_ratio)
  156. return false;
  157. /* Maximum dimensions must be within 3x tolerance */
  158. if (d_ratio > params.pair_d_ratio)
  159. return false;
  160. // TODO(alanv): Does this need to return a confidence value?
  161. *pconf = 1.0;
  162. return true;
  163. }
  164. l_float32 ComputePairNormalizedOverlapArea(BOX *b1, BOX *b2) {
  165. BOX *overlap = boxOverlapRegion(b1, b2);
  166. if (!overlap || overlap->w == 0.0 || overlap->h == 0.0) return 0.0;
  167. l_float32 area0 = overlap->w * overlap->h;
  168. l_float32 area1 = b1->w * b1->h;
  169. l_float32 area2 = b2->w * b2->h;
  170. l_float32 oarea = 2.0 * area0 / (area1 + area2);
  171. return oarea;
  172. }
  173. l_float32 ComputePairNormalizedBaselineDistance(BOX *b1, BOX *b2) {
  174. l_float32 dy = (b1->y + b1->h) - (b2->y + b2->h);
  175. l_float32 vdist = 2.0 * L_ABS(dy) / (b1->h + b2->h);
  176. return vdist;
  177. }
  178. l_float32 ComputePairNormalizedToplineDistance(BOX *b1, BOX *b2) {
  179. l_float32 dy = b1->y - b2->y;
  180. l_float32 vdist = 2.0 * L_ABS(dy) / (b1->h + b2->h);
  181. return vdist;
  182. }
  183. l_float32 ComputePairNormalizedHorizontalDistance(BOX *b1, BOX *b2) {
  184. l_float32 dx = (b1->x - b2->x) + (b1->w - b2->w) / 2.0;
  185. l_float32 hdist = 2.0 * L_ABS(dx) / (b1->w + b2->w);
  186. return hdist;
  187. }
  188. l_float32 ComputePairAreaRatio(BOX *b1, BOX *b2) {
  189. l_float32 area1 = b1->w * b1->h;
  190. l_float32 area2 = b2->w * b2->h;
  191. l_float32 ratio = L_MIN(area1, area2) / L_MAX(area1, area2);
  192. return ratio;
  193. }
  194. l_float32 ComputePairWidthRatio(BOX *b1, BOX *b2) {
  195. l_float32 ratio = L_MIN(b1->w, b2->w) / L_MAX(b1->w, b2->w);
  196. return ratio;
  197. }
  198. l_float32 ComputePairHeightRatio(BOX *b1, BOX *b2) {
  199. l_float32 ratio = L_MIN(b1->h, b2->h) / L_MAX(b1->h, b2->h);
  200. return ratio;
  201. }
  202. l_float32 ComputePairContainmentCheck(BOX *b1, BOX *b2) {
  203. l_int32 contains1, contains2;
  204. boxContains(b1, b2, &contains1);
  205. boxContains(b2, b1, &contains2);
  206. l_float32 contains = (l_float32) (contains1 || contains2);
  207. return contains;
  208. }
  209. l_float32 ComputePairConfidence(BOX *b1, BOX *b2) {
  210. l_float32 features[9];
  211. features[0] = 1.0;
  212. features[1] = ComputePairNormalizedOverlapArea(b1, b2);
  213. features[2] = ComputePairNormalizedBaselineDistance(b1, b2);
  214. features[3] = ComputePairNormalizedToplineDistance(b1, b2);
  215. features[4] = ComputePairNormalizedHorizontalDistance(b1, b2);
  216. features[5] = ComputePairAreaRatio(b1, b2);
  217. features[6] = ComputePairWidthRatio(b1, b2);
  218. features[7] = ComputePairHeightRatio(b1, b2);
  219. features[8] = ComputePairContainmentCheck(b1, b2);
  220. l_float32 beta[9];
  221. beta[0] = 3.987;
  222. beta[1] = -9.681;
  223. beta[2] = -5.804;
  224. beta[3] = -4.857;
  225. beta[4] = -2.906;
  226. beta[5] = -1.813;
  227. beta[6] = 3.481;
  228. beta[7] = 3.983;
  229. beta[8] = -39.24;
  230. l_float32 confidence = 0.0;
  231. for (int i = 0; i < 5; i++) {
  232. confidence += features[i] * beta[i];
  233. }
  234. return confidence;
  235. }
  236. /**
  237. * Test whether b1 and b2 are close enough to be clustered. More relaxed constraints than ValidatePair().
  238. */
  239. bool ValidateClusterPair(BOX *b1, BOX *b2, bool *too_far, l_float32 *pconf,
  240. HydrogenTextDetector::TextDetectorParameters &params) {
  241. *pconf = 0.0;
  242. l_int32 max_d = L_MAX(b1->w, b1->h);
  243. l_float32 h_ratio = RelativeDiff(b1->h, b2->h);
  244. // If we're already too far out, quit
  245. if (b2->x > b1->x + b1->w + params.cluster_width_spacing * max_d) {
  246. *too_far = true;
  247. return false;
  248. }
  249. *too_far = false;
  250. // Must share at least 0.25x the larger vertical edge
  251. //l_int32 v_dist = BBoxVDist(b1, b2);
  252. //if (v_dist > 0 || L_ABS(v_dist) < L_MIN(min_h, max_h) * PAIR_H_SHARED)
  253. // return false;
  254. // i and j must share at least half an edge
  255. if (b2->y + b2->h * params.cluster_shared_edge < b1->y)
  256. return false;
  257. if (b1->y + b1->h * params.cluster_shared_edge < b2->y)
  258. return false;
  259. // Heights must be at least 2x tolerance
  260. if (h_ratio > params.pair_h_ratio)
  261. return false;
  262. *pconf = 1.0; //ComputePairConfidence(b1, b2);
  263. return true;
  264. }
  265. /**
  266. * Test whether a finalized cluster is valid.
  267. */
  268. bool ValidateCluster(PIX *pix8, PIXA *pixa, BOX *box, l_float32 *pconf,
  269. HydrogenTextDetector::TextDetectorParameters &params) {
  270. *pconf = 0.0;
  271. l_float32 aspect = box->w / (l_float32) box->h;
  272. l_int32 count = pixaGetCount(pixa);
  273. l_float32 fdr = ComputeFDR(pix8);
  274. if (box->h < 15)
  275. return false;
  276. if (aspect < params.cluster_min_aspect)
  277. return false;
  278. if (count < params.cluster_min_blobs)
  279. return false;
  280. if (fdr < params.cluster_min_fdr)
  281. return false;
  282. /*
  283. l_int32 edge_max, edge_avg;
  284. pixEdgeMax(pix8, &edge_max, &edge_avg);
  285. if (edge_max < params.cluster_min_edge || edge_avg < params.cluster_min_edge_avg)
  286. return false;
  287. */
  288. // TODO(alanv): Combine all of these into a confidence score, higher = better
  289. *pconf = log(fdr); //log(fdr * edge_max * edge_avg);
  290. return true;
  291. }