PageRenderTime 68ms CodeModel.GetById 12ms app.highlight 52ms RepoModel.GetById 1ms app.codeStats 0ms

/ocr/ocrservice/jni/hydrogen/src/hydrogentextdetector.cpp

http://eyes-free.googlecode.com/
C++ | 248 lines | 167 code | 61 blank | 20 comment | 46 complexity | 8632811c5b3426c0b9b4155b7bd02923 MD5 | raw file
  1/*
  2 * Copyright 2011, Google Inc.
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License");
  5 * you may not use this file except in compliance with the License.
  6 * You may obtain a copy of the License at
  7 *
  8 *     http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS,
 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 * See the License for the specific language governing permissions and
 14 * limitations under the License.
 15 */
 16
 17#include <ctime>
 18#include <cstring>
 19#include <cstdlib>
 20
 21#include "leptonica.h"
 22#include "hydrogentextdetector.h"
 23#include "clusterer.h"
 24#include "thresholder.h"
 25#include "utilities.h"
 26
 27HydrogenTextDetector::HydrogenTextDetector() {
 28  pixs_ = NULL;
 29  text_areas_ = NULL;
 30  text_confs_ = NULL;
 31}
 32
 33HydrogenTextDetector::~HydrogenTextDetector() {
 34  Clear();
 35}
 36
 37PIXA *HydrogenTextDetector::ExtractTextRegions(PIX *pix8, PIX *edges, NUMA **pconfs) {
 38  l_int32 result;
 39
 40  if (parameters_.debug) fprintf(stderr, "ExtractTextRegions()\n");
 41
 42  // TODO(alanv): More error checking for invalid arguments
 43  if (!pconfs) {
 44    return NULL;
 45  }
 46
 47  clock_t timer = clock();
 48  NUMA *connconfs;
 49  PIXA *conncomp;
 50
 51  if (parameters_.debug) fprintf(stderr, "ConnCompValidPixa()\n");
 52  result = ConnCompValidPixa(pix8, edges, &conncomp, &connconfs, parameters_);
 53
 54  if (parameters_.debug) fprintf(stderr, "Found %d connected components\n", result);
 55
 56  if (parameters_.debug && parameters_.out_dir[0] != '\0' && result > 0) {
 57    PIX *temp = pixaDisplayHeatmap(conncomp, pix8->w, pix8->h, connconfs);
 58    char filename[255];
 59    sprintf(filename, "%s/%d_validsingles.jpg", parameters_.out_dir, (int) timer);
 60    pixWriteImpliedFormat(filename, temp, 85, 0);
 61  }
 62
 63  l_int32 count = pixaGetCount(conncomp);
 64  l_uint8 *remove = (l_uint8 *) calloc(count, sizeof(l_uint8));
 65
 66  if (parameters_.debug) fprintf(stderr, "RemoveInvalidPairs()\n");
 67  result = RemoveInvalidPairs(pix8, conncomp, connconfs, remove, parameters_);
 68
 69  if (parameters_.debug) fprintf(stderr, "Removed %d invalid pairs\n", result);
 70
 71  if (parameters_.debug && parameters_.out_dir[0] != '\0' && result > 0) {
 72    PIX *temp = pixaDisplayRandomCmapFiltered(conncomp, pix8->w, pix8->h, remove);
 73    char filename[255];
 74    sprintf(filename, "%s/%d_validpairs.jpg", parameters_.out_dir, (int) timer);
 75    pixWriteImpliedFormat(filename, temp, 85, 0);
 76  }
 77
 78  NUMA *clusterconfs;
 79  PIXA *clusters;
 80  if (parameters_.debug) fprintf(stderr, "ClusterValidComponents()\n");
 81  result = ClusterValidComponents(pix8, conncomp, connconfs, remove, &clusters, &clusterconfs, parameters_);
 82
 83  if (parameters_.debug) fprintf(stderr, "Created %d clusters\n", result);
 84
 85  if (parameters_.debug && parameters_.out_dir[0] != '\0' && result > 0) {
 86    PIX *temp = pixaDisplayHeatmap(clusters, pix8->w, pix8->h, clusterconfs);
 87    char filename[255];
 88    sprintf(filename, "%s/%d_validclusters.jpg", parameters_.out_dir, (int) timer);
 89    pixWriteImpliedFormat(filename, temp, 85, 0);
 90  }
 91
 92  // Merge unused components that are contained inside the detected text areas.
 93  // This typically catches punctuation and dots over i's and j's.
 94  if (parameters_.debug) fprintf(stderr, "MergePairFragments()\n");
 95  result = MergePairFragments(pix8, clusters, conncomp, remove);
 96
 97  *pconfs = clusterconfs;
 98
 99  pixaDestroy(&conncomp);
100  free(remove);
101
102  return clusters;
103}
104
105PIX *HydrogenTextDetector::DetectAndFixSkew(PIX *pixs) {
106  l_float32 angle, conf;
107
108  skew_angle_ = 0.0;
109
110  if (!parameters_.skew_enabled) {
111    if (parameters_.debug) fprintf(stderr, "Bypassed skew (skew detection is disabled)\n");
112
113    return pixClone(pixs);
114  }
115
116  if (pixFindSkewSweepAndSearch(pixs, &angle, &conf, parameters_.skew_sweep_reduction,
117                                parameters_.skew_search_reduction, parameters_.skew_sweep_range,
118                                parameters_.skew_sweep_delta, parameters_.skew_search_min_delta)) {
119    if (parameters_.debug) fprintf(stderr, "Bypassed skew (failed sweep and search)\n");
120
121    return pixClone(pixs);
122  }
123
124  if (conf <= 0 || L_ABS(angle) < parameters_.skew_min_angle) {
125    if (parameters_.debug) fprintf(stderr, "Bypassed skew (low confidence or small angle)\n");
126
127    return pixClone(pixs);
128  }
129
130  if (parameters_.debug) fprintf(stderr, "Found %f degree skew with confidence %f\n", angle, conf);
131
132  // The detected angle is the one required to align the text,
133  // which is the opposite of the angle of the text itself.
134  skew_angle_ = -angle;
135
136  l_float32 deg2rad = 3.1415926535 / 180.0;
137  l_float32 radians = angle * deg2rad;
138
139  PIX *pixd = pixRotate(pixs, radians, L_ROTATE_SAMPLING, L_BRING_IN_WHITE, 0, 0);
140
141  return pixd;
142}
143
144void HydrogenTextDetector::SetSourceImage(PIX *pixs) {
145  pixs_ = pixClone(pixs);
146}
147
148void HydrogenTextDetector::DetectText() {
149  if (parameters_.debug) fprintf(stderr, "DetectText()\n");
150
151  clock_t timer = clock();
152
153  PIX *pix8 = pixConvertTo8(pixs_, false);
154
155  if (parameters_.debug && parameters_.out_dir[0] != '\0') {
156    char filename[255];
157    sprintf(filename, "%s/%d_input.jpg", parameters_.out_dir, (int) timer);
158    pixWriteImpliedFormat(filename, pix8, 85, 0);
159  }
160
161  PIX *edges;
162  pixEdgeAdaptiveThreshold(pix8, &edges, parameters_.edge_tile_x, parameters_.edge_tile_y,
163                           parameters_.edge_thresh, parameters_.edge_avg_thresh);
164
165  if (parameters_.debug && parameters_.out_dir[0] != '\0') {
166    char filename[255];
167    sprintf(filename, "%s/%d_edges.jpg", parameters_.out_dir, (int) timer);
168    PIX *edges8 = pixConvertTo8(edges, false);
169    pixWriteImpliedFormat(filename, edges8, 85, 0);
170    pixDestroy(&edges8);
171  }
172
173  PIX *deskew = DetectAndFixSkew(edges);
174  pixDestroy(&edges);
175
176  if (parameters_.debug && parameters_.out_dir[0] != '\0') {
177    char filename[255];
178    sprintf(filename, "%s/%d_deskew.jpg", parameters_.out_dir, (int) timer);
179
180    PIX *deskew8 = pixConvertTo8(deskew, false);
181    pixWriteImpliedFormat(filename, deskew8, 85, 0);
182    pixDestroy(&deskew8);
183  }
184
185  NUMA *confs;
186  PIXA *clusters = ExtractTextRegions(pix8, deskew, &confs);
187
188  if (parameters_.debug) fprintf(stderr, "Inverting image...\n");
189  pixInvert(deskew, deskew);
190
191  NUMA *invconfs;
192  PIXA *invclusters = ExtractTextRegions(pix8, deskew, &invconfs);
193  pixDestroy(&deskew);
194  pixDestroy(&pix8);
195
196  pixaJoin(clusters, invclusters, 0, 0);
197  pixaDestroy(&invclusters);
198
199  numaJoin(confs, invconfs);
200  numaDestroy(&invconfs);
201
202  text_areas_ = pixaCopy(clusters, L_CLONE);
203  pixaDestroy(&clusters);
204
205  text_confs_ = numaClone(confs);
206  numaDestroy(&confs);
207
208  if (parameters_.debug && parameters_.out_dir[0] != '\0') {
209    PIX *temp = pixaDisplayHeatmap(text_areas_, pixs_->w, pixs_->h, text_confs_);
210    char filename[255];
211    sprintf(filename, "%s/heatmap.jpg", parameters_.out_dir);
212    pixWriteImpliedFormat(filename, temp, 85, 0);
213  }
214}
215
216void HydrogenTextDetector::Clear() {
217  if (text_confs_) {
218    numaDestroy(&text_confs_);
219  }
220
221  if (text_areas_) {
222    pixaDestroy(&text_areas_);
223  }
224
225  if (pixs_) {
226    pixDestroy(&pixs_);
227  }
228}
229
230PIXA *HydrogenTextDetector::GetTextAreas() {
231  return pixaCopy(text_areas_, L_CLONE);
232}
233
234l_float32 HydrogenTextDetector::GetSkewAngle() {
235  return skew_angle_;
236}
237
238NUMA *HydrogenTextDetector::GetTextConfs() {
239  return numaClone(text_confs_);
240}
241
242PIX *HydrogenTextDetector::GetSourceImage() {
243  return pixClone(pixs_);
244}
245
246HydrogenTextDetector::TextDetectorParameters *HydrogenTextDetector::GetMutableParameters() {
247  return &parameters_;
248}