/ocr/ocrservice/jni/hydrogen/src/hydrogentextdetector.cpp

http://eyes-free.googlecode.com/ · C++ · 248 lines · 167 code · 61 blank · 20 comment · 46 complexity · 8632811c5b3426c0b9b4155b7bd02923 MD5 · raw file

  1. /*
  2. * Copyright 2011, Google Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <ctime>
  17. #include <cstring>
  18. #include <cstdlib>
  19. #include "leptonica.h"
  20. #include "hydrogentextdetector.h"
  21. #include "clusterer.h"
  22. #include "thresholder.h"
  23. #include "utilities.h"
  24. HydrogenTextDetector::HydrogenTextDetector() {
  25. pixs_ = NULL;
  26. text_areas_ = NULL;
  27. text_confs_ = NULL;
  28. }
  29. HydrogenTextDetector::~HydrogenTextDetector() {
  30. Clear();
  31. }
  32. PIXA *HydrogenTextDetector::ExtractTextRegions(PIX *pix8, PIX *edges, NUMA **pconfs) {
  33. l_int32 result;
  34. if (parameters_.debug) fprintf(stderr, "ExtractTextRegions()\n");
  35. // TODO(alanv): More error checking for invalid arguments
  36. if (!pconfs) {
  37. return NULL;
  38. }
  39. clock_t timer = clock();
  40. NUMA *connconfs;
  41. PIXA *conncomp;
  42. if (parameters_.debug) fprintf(stderr, "ConnCompValidPixa()\n");
  43. result = ConnCompValidPixa(pix8, edges, &conncomp, &connconfs, parameters_);
  44. if (parameters_.debug) fprintf(stderr, "Found %d connected components\n", result);
  45. if (parameters_.debug && parameters_.out_dir[0] != '\0' && result > 0) {
  46. PIX *temp = pixaDisplayHeatmap(conncomp, pix8->w, pix8->h, connconfs);
  47. char filename[255];
  48. sprintf(filename, "%s/%d_validsingles.jpg", parameters_.out_dir, (int) timer);
  49. pixWriteImpliedFormat(filename, temp, 85, 0);
  50. }
  51. l_int32 count = pixaGetCount(conncomp);
  52. l_uint8 *remove = (l_uint8 *) calloc(count, sizeof(l_uint8));
  53. if (parameters_.debug) fprintf(stderr, "RemoveInvalidPairs()\n");
  54. result = RemoveInvalidPairs(pix8, conncomp, connconfs, remove, parameters_);
  55. if (parameters_.debug) fprintf(stderr, "Removed %d invalid pairs\n", result);
  56. if (parameters_.debug && parameters_.out_dir[0] != '\0' && result > 0) {
  57. PIX *temp = pixaDisplayRandomCmapFiltered(conncomp, pix8->w, pix8->h, remove);
  58. char filename[255];
  59. sprintf(filename, "%s/%d_validpairs.jpg", parameters_.out_dir, (int) timer);
  60. pixWriteImpliedFormat(filename, temp, 85, 0);
  61. }
  62. NUMA *clusterconfs;
  63. PIXA *clusters;
  64. if (parameters_.debug) fprintf(stderr, "ClusterValidComponents()\n");
  65. result = ClusterValidComponents(pix8, conncomp, connconfs, remove, &clusters, &clusterconfs, parameters_);
  66. if (parameters_.debug) fprintf(stderr, "Created %d clusters\n", result);
  67. if (parameters_.debug && parameters_.out_dir[0] != '\0' && result > 0) {
  68. PIX *temp = pixaDisplayHeatmap(clusters, pix8->w, pix8->h, clusterconfs);
  69. char filename[255];
  70. sprintf(filename, "%s/%d_validclusters.jpg", parameters_.out_dir, (int) timer);
  71. pixWriteImpliedFormat(filename, temp, 85, 0);
  72. }
  73. // Merge unused components that are contained inside the detected text areas.
  74. // This typically catches punctuation and dots over i's and j's.
  75. if (parameters_.debug) fprintf(stderr, "MergePairFragments()\n");
  76. result = MergePairFragments(pix8, clusters, conncomp, remove);
  77. *pconfs = clusterconfs;
  78. pixaDestroy(&conncomp);
  79. free(remove);
  80. return clusters;
  81. }
  82. PIX *HydrogenTextDetector::DetectAndFixSkew(PIX *pixs) {
  83. l_float32 angle, conf;
  84. skew_angle_ = 0.0;
  85. if (!parameters_.skew_enabled) {
  86. if (parameters_.debug) fprintf(stderr, "Bypassed skew (skew detection is disabled)\n");
  87. return pixClone(pixs);
  88. }
  89. if (pixFindSkewSweepAndSearch(pixs, &angle, &conf, parameters_.skew_sweep_reduction,
  90. parameters_.skew_search_reduction, parameters_.skew_sweep_range,
  91. parameters_.skew_sweep_delta, parameters_.skew_search_min_delta)) {
  92. if (parameters_.debug) fprintf(stderr, "Bypassed skew (failed sweep and search)\n");
  93. return pixClone(pixs);
  94. }
  95. if (conf <= 0 || L_ABS(angle) < parameters_.skew_min_angle) {
  96. if (parameters_.debug) fprintf(stderr, "Bypassed skew (low confidence or small angle)\n");
  97. return pixClone(pixs);
  98. }
  99. if (parameters_.debug) fprintf(stderr, "Found %f degree skew with confidence %f\n", angle, conf);
  100. // The detected angle is the one required to align the text,
  101. // which is the opposite of the angle of the text itself.
  102. skew_angle_ = -angle;
  103. l_float32 deg2rad = 3.1415926535 / 180.0;
  104. l_float32 radians = angle * deg2rad;
  105. PIX *pixd = pixRotate(pixs, radians, L_ROTATE_SAMPLING, L_BRING_IN_WHITE, 0, 0);
  106. return pixd;
  107. }
  108. void HydrogenTextDetector::SetSourceImage(PIX *pixs) {
  109. pixs_ = pixClone(pixs);
  110. }
  111. void HydrogenTextDetector::DetectText() {
  112. if (parameters_.debug) fprintf(stderr, "DetectText()\n");
  113. clock_t timer = clock();
  114. PIX *pix8 = pixConvertTo8(pixs_, false);
  115. if (parameters_.debug && parameters_.out_dir[0] != '\0') {
  116. char filename[255];
  117. sprintf(filename, "%s/%d_input.jpg", parameters_.out_dir, (int) timer);
  118. pixWriteImpliedFormat(filename, pix8, 85, 0);
  119. }
  120. PIX *edges;
  121. pixEdgeAdaptiveThreshold(pix8, &edges, parameters_.edge_tile_x, parameters_.edge_tile_y,
  122. parameters_.edge_thresh, parameters_.edge_avg_thresh);
  123. if (parameters_.debug && parameters_.out_dir[0] != '\0') {
  124. char filename[255];
  125. sprintf(filename, "%s/%d_edges.jpg", parameters_.out_dir, (int) timer);
  126. PIX *edges8 = pixConvertTo8(edges, false);
  127. pixWriteImpliedFormat(filename, edges8, 85, 0);
  128. pixDestroy(&edges8);
  129. }
  130. PIX *deskew = DetectAndFixSkew(edges);
  131. pixDestroy(&edges);
  132. if (parameters_.debug && parameters_.out_dir[0] != '\0') {
  133. char filename[255];
  134. sprintf(filename, "%s/%d_deskew.jpg", parameters_.out_dir, (int) timer);
  135. PIX *deskew8 = pixConvertTo8(deskew, false);
  136. pixWriteImpliedFormat(filename, deskew8, 85, 0);
  137. pixDestroy(&deskew8);
  138. }
  139. NUMA *confs;
  140. PIXA *clusters = ExtractTextRegions(pix8, deskew, &confs);
  141. if (parameters_.debug) fprintf(stderr, "Inverting image...\n");
  142. pixInvert(deskew, deskew);
  143. NUMA *invconfs;
  144. PIXA *invclusters = ExtractTextRegions(pix8, deskew, &invconfs);
  145. pixDestroy(&deskew);
  146. pixDestroy(&pix8);
  147. pixaJoin(clusters, invclusters, 0, 0);
  148. pixaDestroy(&invclusters);
  149. numaJoin(confs, invconfs);
  150. numaDestroy(&invconfs);
  151. text_areas_ = pixaCopy(clusters, L_CLONE);
  152. pixaDestroy(&clusters);
  153. text_confs_ = numaClone(confs);
  154. numaDestroy(&confs);
  155. if (parameters_.debug && parameters_.out_dir[0] != '\0') {
  156. PIX *temp = pixaDisplayHeatmap(text_areas_, pixs_->w, pixs_->h, text_confs_);
  157. char filename[255];
  158. sprintf(filename, "%s/heatmap.jpg", parameters_.out_dir);
  159. pixWriteImpliedFormat(filename, temp, 85, 0);
  160. }
  161. }
  162. void HydrogenTextDetector::Clear() {
  163. if (text_confs_) {
  164. numaDestroy(&text_confs_);
  165. }
  166. if (text_areas_) {
  167. pixaDestroy(&text_areas_);
  168. }
  169. if (pixs_) {
  170. pixDestroy(&pixs_);
  171. }
  172. }
  173. PIXA *HydrogenTextDetector::GetTextAreas() {
  174. return pixaCopy(text_areas_, L_CLONE);
  175. }
  176. l_float32 HydrogenTextDetector::GetSkewAngle() {
  177. return skew_angle_;
  178. }
  179. NUMA *HydrogenTextDetector::GetTextConfs() {
  180. return numaClone(text_confs_);
  181. }
  182. PIX *HydrogenTextDetector::GetSourceImage() {
  183. return pixClone(pixs_);
  184. }
  185. HydrogenTextDetector::TextDetectorParameters *HydrogenTextDetector::GetMutableParameters() {
  186. return &parameters_;
  187. }