/ocr/ocrservice/jni/hydrogen/src/clusterer.cpp

http://eyes-free.googlecode.com/ · C++ · 650 lines · 449 code · 138 blank · 63 comment · 89 complexity · b72477e2f5f5c62e749603712c703baf MD5 · raw file

  1. /*
  2. * Copyright 2011, Google Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <malloc.h>
  17. #include "leptonica.h"
  18. #include "clusterer.h"
  19. #include "validator.h"
  20. /* Type of connected components: 4 is up/down/left/right. 8 includes diagonals */
  21. #define CONN_COMP 8
  22. l_int32 ConnCompValidPixa(PIX *pix8, PIX *pix, PIXA **ppixa, NUMA **pconfs,
  23. HydrogenTextDetector::TextDetectorParameters &params) {
  24. l_int32 h, iszero;
  25. l_int32 x, y, xstart, ystart;
  26. l_float32 singleton_conf;
  27. PIX *pixt1, *pixt2, *pixt3, *pixt4, *pixt5;
  28. PIXA *pixa, *pixasort;
  29. NUMA *confs, *confsort;
  30. BOX *box;
  31. BOXA *boxa;
  32. L_STACK *lstack, *auxstack;
  33. PROCNAME("pixConnCompValidPixa");
  34. if (!ppixa)
  35. return ERROR_INT("&pixa not defined", procName, 1);
  36. if (!pconfs)
  37. return ERROR_INT("&confs not defined", procName, 1);
  38. *ppixa = NULL;
  39. *pconfs = NULL;
  40. if (!pix || pixGetDepth(pix) != 1)
  41. return ERROR_INT("pixs undefined or not 1 bpp", procName, 1);
  42. pixa = pixaCreate(0);
  43. confs = numaCreate(0);
  44. pixZero(pix, &iszero);
  45. if (iszero) {
  46. *ppixa = pixa;
  47. return 0;
  48. }
  49. if ((pixt1 = pixCopy(NULL, pix)) == NULL)
  50. return ERROR_INT("pixt1 not made", procName, 1);
  51. if ((pixt2 = pixCopy(NULL, pix)) == NULL)
  52. return ERROR_INT("pixt2 not made", procName, 1);
  53. h = pixGetHeight(pix);
  54. if ((lstack = lstackCreate(h)) == NULL)
  55. return ERROR_INT("lstack not made", procName, 1);
  56. if ((auxstack = lstackCreate(0)) == NULL)
  57. return ERROR_INT("auxstack not made", procName, 1);
  58. lstack->auxstack = auxstack;
  59. if ((boxa = boxaCreate(0)) == NULL)
  60. return ERROR_INT("boxa not made", procName, 1);
  61. xstart = 0;
  62. ystart = 0;
  63. while (1) {
  64. if (!nextOnPixelInRaster(pixt1, xstart, ystart, &x, &y))
  65. break;
  66. if ((box = pixSeedfillBB(pixt1, lstack, x, y, CONN_COMP)) == NULL)
  67. return ERROR_INT("box not made", procName, 1);
  68. /* Save the c.c. and remove from pixt2 as well */
  69. pixt3 = pixClipRectangle(pixt1, box, NULL);
  70. pixt4 = pixClipRectangle(pixt2, box, NULL);
  71. pixt5 = pixClipRectangle(pix8, box, NULL);
  72. pixXor(pixt3, pixt3, pixt4);
  73. pixRasterop(pixt2, box->x, box->y, box->w, box->h, PIX_SRC ^ PIX_DST, pixt3, 0, 0);
  74. pixDestroy(&pixt4);
  75. if (ValidateSingleton(pixt3, box, pixt5, &singleton_conf, params)) {
  76. boxaAddBox(boxa, box, L_INSERT);
  77. pixaAddPix(pixa, pixt3, L_INSERT);
  78. numaAddNumber(confs, singleton_conf);
  79. } else {
  80. boxDestroy(&box);
  81. pixDestroy(&pixt3);
  82. }
  83. pixDestroy(&pixt5);
  84. xstart = x;
  85. ystart = y;
  86. }
  87. /* Remove old boxa of pixa and replace with a clone copy */
  88. boxaDestroy(&pixa->boxa);
  89. pixa->boxa = boxaCopy(boxa, L_CLONE);
  90. /* Sort pixa, then destroy old pixa */
  91. NUMA *naindex;
  92. if ((pixasort = pixaSort(pixa, L_SORT_BY_X, L_SORT_INCREASING, &naindex, L_CLONE)) == NULL)
  93. return ERROR_INT("pixasort not made", procName, 1);
  94. confsort = numaSortByIndex(confs, naindex);
  95. /* Cleanup, freeing the fillsegs on each stack */
  96. lstackDestroy(&lstack, TRUE);
  97. pixDestroy(&pixt1);
  98. pixDestroy(&pixt2);
  99. boxaDestroy(&boxa);
  100. pixaDestroy(&pixa);
  101. *ppixa = pixasort;
  102. *pconfs = confsort;
  103. return 0;
  104. }
  105. l_int32 MergePix(PIXA *pixad, l_int32 d_idx, PIXA *pixas, l_int32 s_idx) {
  106. l_int32 op;
  107. l_int32 x, y, w, h;
  108. l_int32 dx, dy, dw, dh;
  109. PIX *pixd, *pixs, *pixmerge;
  110. BOX *boxd, *boxs, *boxmerge;
  111. PROCNAME("pixMergePix");
  112. if (!pixad)
  113. return ERROR_INT("pixad not defined", procName, 1);
  114. if (!pixas)
  115. return ERROR_INT("pixas not defined", procName, 1);
  116. boxd = pixaGetBox(pixad, d_idx, L_CLONE);
  117. boxs = pixaGetBox(pixas, s_idx, L_CLONE);
  118. boxmerge = boxBoundingRegion(boxd, boxs);
  119. boxGetGeometry(boxmerge, &x, &y, &w, &h);
  120. pixmerge = pixCreate(w, h, 1);
  121. op = PIX_SRC | PIX_DST;
  122. pixs = pixaGetPix(pixas, s_idx, L_CLONE);
  123. if (!pixs)
  124. return ERROR_INT("s_idx not valid", procName, 1);
  125. boxGetGeometry(boxs, &dx, &dy, &dw, &dh);
  126. pixRasterop(pixmerge, dx - x, dy - y, dw, dh, op, pixs, 0, 0);
  127. pixDestroy(&pixs);
  128. boxDestroy(&boxs);
  129. pixd = pixaGetPix(pixad, d_idx, L_CLONE);
  130. if (!pixd)
  131. return ERROR_INT("d_idx not valid", procName, 1);
  132. boxGetGeometry(boxd, &dx, &dy, &dw, &dh);
  133. pixRasterop(pixmerge, dx - x, dy - y, dw, dh, op, pixd, 0, 0);
  134. pixDestroy(&pixd);
  135. boxDestroy(&boxd);
  136. pixaReplacePix(pixad, d_idx, pixmerge, boxmerge);
  137. return 0;
  138. }
  139. l_int32 MergePairFragments(PIX *pix8, PIXA *clusters, PIXA *pixa, l_uint8 *remove) {
  140. l_uint8 setj;
  141. l_int32 i, j, real_j, contains, n, count, num_clusters, initj;
  142. l_int32 xi, yi, wi, hi;
  143. l_int32 xj, yj, wj, hj;
  144. BOX *boxi, *boxj;
  145. PIXA *pixasort;
  146. NUMA *numa;
  147. PROCNAME("pixMergePairFragments");
  148. if (!pixa)
  149. return ERROR_INT("pixa not defined", procName, -1);
  150. if (!remove)
  151. return ERROR_INT("remove not defined", procName, -1);
  152. n = pixaGetCount(pixa);
  153. num_clusters = pixaGetCount(clusters);
  154. if (!n) {
  155. L_INFO("pixa contained 0 pix", procName);
  156. return 0;
  157. }
  158. if (!num_clusters) {
  159. L_INFO("clusters contained 0 pix", procName);
  160. return 0;
  161. }
  162. if ((pixasort = pixaSort(pixa, L_SORT_BY_Y, L_SORT_INCREASING, &numa, L_CLONE)) == NULL)
  163. return ERROR_INT("failed to sort pixa", procName, -1);
  164. count = 0;
  165. initj = 0;
  166. setj = 0;
  167. for (i = 0; i < num_clusters; i++) {
  168. pixaGetBoxGeometry(clusters, i, &xi, &yi, &wi, &hi);
  169. boxi = pixaGetBox(clusters, i, L_CLONE);
  170. setj = 0;
  171. for (j = initj; j < n; j++) {
  172. numaGetIValue(numa, j, &real_j);
  173. // Only consider removed pix
  174. if (!remove[real_j])
  175. continue;
  176. pixaGetBoxGeometry(pixasort, j, &xj, &yj, &wj, &hj);
  177. // If the top of this pix is above the top of the cluster, skip
  178. if (yj < yi)
  179. continue;
  180. if (!setj) {
  181. initj = j;
  182. setj = 1;
  183. }
  184. // If the bottom of this pix is below the bottom of the cluster, stop
  185. if (yj > yi + hi)
  186. break;
  187. boxj = pixaGetBox(pixasort, j, L_CLONE);
  188. boxIntersects(boxi, boxj, &contains);
  189. if (contains) {
  190. MergePix(clusters, i, pixasort, j);
  191. //remove[real_j] = 0; // TODO eliminates duplicates
  192. count++;
  193. }
  194. boxDestroy(&boxj);
  195. }
  196. boxDestroy(&boxi);
  197. }
  198. pixaDestroy(&pixasort);
  199. numaDestroy(&numa);
  200. return count;
  201. }
  202. l_int32 RemoveInvalidPairs(PIX *pix8, PIXA *pixa, NUMA *confs, l_uint8 *remove,
  203. HydrogenTextDetector::TextDetectorParameters &params) {
  204. l_int32 i, j, n, count;
  205. l_float32 pair_conf;
  206. l_uint8 *has_partner;
  207. BOX *b1, *b2;
  208. PROCNAME("pixRemoveInvalidPairs");
  209. if (!pixa)
  210. return ERROR_INT("pixa not defined", procName, -1);
  211. if (!remove)
  212. return ERROR_INT("remove not defined", procName, -1);
  213. n = pixaGetCount(pixa);
  214. if (!n) {
  215. L_INFO("pixa contained 0 pix", procName);
  216. return 0;
  217. }
  218. has_partner = (l_uint8 *) calloc(n, sizeof(l_uint8));
  219. count = 0;
  220. for (i = 0; i < n; i++) {
  221. if (remove[i])
  222. continue;
  223. b1 = pixaGetBox(pixa, i, L_CLONE);
  224. /* Search right for a partner for i */
  225. for (j = i + 1; j < n; j++) {
  226. if (remove[j])
  227. continue;
  228. b2 = pixaGetBox(pixa, j, L_CLONE);
  229. /* Check whether this is a valid pair */
  230. if (!ValidatePair(b1, b2, &pair_conf, params)) {
  231. boxDestroy(&b2);
  232. continue;
  233. }
  234. // We don't need to adjust confidence values here, since we'll
  235. // generate cluster pairs and use those later.
  236. boxDestroy(&b2);
  237. has_partner[i] = 1;
  238. has_partner[j] = 1;
  239. break;
  240. }
  241. boxDestroy(&b1);
  242. }
  243. for (i = 0; i < n; i++) {
  244. if (!has_partner[i]) {
  245. remove[i] = 1;
  246. count++;
  247. }
  248. }
  249. free(has_partner);
  250. return count;
  251. }
  252. // Clustering pass
  253. l_int32 GenerateClusterPartners(PIX *pix8, PIXA *pixa, NUMA *confs, l_uint8 *remove, l_int32 **pleft,
  254. l_int32 **pright, HydrogenTextDetector::TextDetectorParameters &params) {
  255. l_int32 n, i, j;
  256. l_int32 xi, yi, wi, hi, maxd;
  257. l_int32 xj, yj, wj, hj;
  258. l_int32 dx, dy, d, mind, minj;
  259. l_int32 *left, *right;
  260. l_float32 clusterpair_conf, minconf;
  261. BOX *b1, *b2;
  262. bool too_far;
  263. PROCNAME("GenerateClusterPartners");
  264. if (!pixa)
  265. return ERROR_INT("pixa not defined", procName, -1);
  266. if (!pright)
  267. return ERROR_INT("&right not defined", procName, -1);
  268. if (!pleft)
  269. return ERROR_INT("&left not defined", procName, -1);
  270. n = pixaGetCount(pixa);
  271. if (!n) {
  272. L_INFO("pixa contained 0 pix", procName);
  273. return 0;
  274. }
  275. /* If n == 0, remove may be NULL. Since we have already checked for that,
  276. * any NULL arrays signal an error condition.
  277. */
  278. if (!remove)
  279. return ERROR_INT("remove not defined", procName, -1);
  280. left = (l_int32 *) malloc(n * sizeof(l_int32));
  281. right = (l_int32 *) malloc(n * sizeof(l_int32));
  282. /* Initialize left and right arrays */
  283. for (i = 0; i < n; i++) {
  284. left[i] = -2;
  285. right[i] = -2;
  286. }
  287. /* For each component, check all possible neighbors to find the most likely
  288. * right neighbor. If that right neighbor already has a left neighbor, insert
  289. * the component to the right of the existing neighbor and the left of the
  290. * right neighbor.
  291. */
  292. for (i = 0; i < n; i++) {
  293. if (remove[i])
  294. continue;
  295. pixaGetBoxGeometry(pixa, i, &xi, &yi, &wi, &hi);
  296. b1 = pixaGetBox(pixa, i, L_CLONE);
  297. mind = -1;
  298. minj = -1;
  299. maxd = L_MAX(wi, hi);
  300. minconf = 0.0;
  301. /* Search for closest right neighbor */
  302. for (j = i + 1; j < n; j++) {
  303. if (remove[j])
  304. continue;
  305. pixaGetBoxGeometry(pixa, j, &xj, &yj, &wj, &hj);
  306. b2 = pixaGetBox(pixa, j, L_CLONE);
  307. if (!ValidateClusterPair(b1, b2, &too_far, &clusterpair_conf, params)) {
  308. if (too_far)
  309. break;
  310. else
  311. continue;
  312. }
  313. /* calculate spacing between i and j */
  314. dx = xj - (xi + wi);
  315. dy = (yj + hj) - (yi + hi);
  316. d = dx * dx + dy * dy;
  317. /* If we haven't found a neighbor OR we're the closest neighbor, update
  318. * i's record for most likely neighbor.
  319. */
  320. if (mind < 0 || d < mind) {
  321. mind = d;
  322. minj = j;
  323. minconf = clusterpair_conf;
  324. }
  325. }
  326. /* If we found a valid neighbor, go ahead and use it. */
  327. if (mind >= 0) {
  328. j = left[minj];
  329. /* If minj already had a left neighbor, replace it with i */
  330. // TODO(alanv): Insertion fudges the partner confidence value
  331. if (j >= 0) {
  332. left[i] = j;
  333. right[j] = i;
  334. }
  335. left[minj] = i;
  336. right[i] = minj;
  337. // Adjust confidence to reflect partner confidence
  338. l_float32 conf;
  339. numaGetFValue(confs, i, &conf);
  340. conf *= minconf;
  341. numaReplaceNumber(confs, i, conf);
  342. }
  343. }
  344. *pleft = left;
  345. *pright = right;
  346. return 0;
  347. }
  348. l_int32 MergeClusterPartners(PIX *pix8, PIXA *pixa, NUMA *confs, l_uint8 *remove, l_int32 *left, l_int32 *right,
  349. PIXA **ppixad, NUMA **pclusterconfs, HydrogenTextDetector::TextDetectorParameters &params) {
  350. l_int32 n, count, i, j, temp;
  351. l_uint32 x, y, w, h;
  352. l_int32 xi, yi, wi, hi;
  353. l_int32 xj, yj, wj, hj;
  354. PIXA *pixad, *pixa_cluster;
  355. NUMA *confd, *numa_cluster;
  356. PIX *pix, *pixd, *pix_cluster;
  357. BOX *box, *boxd;
  358. PROCNAME("ClusterValidComponents");
  359. if (!ppixad)
  360. return ERROR_INT("&pixad not defined", procName, -1);
  361. if (!pclusterconfs)
  362. return ERROR_INT("&clusterconfs not defined", procName, -1);
  363. n = pixaGetCount(pixa);
  364. pixad = pixaCreate(0);
  365. confd = numaCreate(0);
  366. *ppixad = pixad;
  367. *pclusterconfs = confd;
  368. if (!n) {
  369. L_INFO("pixa contained 0 pix", procName);
  370. return 0;
  371. }
  372. /* If n == 0, then left, right, and remove may be NULL. Since we have
  373. * already checked for that, any NULL arrays signal an error condition.
  374. */
  375. if (!left)
  376. return ERROR_INT("left not defined", procName, -1);
  377. if (!right)
  378. return ERROR_INT("right not defined", procName, -1);
  379. if (!remove)
  380. return ERROR_INT("remove not defined", procName, -1);
  381. count = 0;
  382. /* Starting from the first component, generate a cluster by traveling
  383. * left and right as far as possible. Ignore components that have no
  384. * neighbors.
  385. */
  386. for (i = 0; i < n; i++) {
  387. if (remove[i])
  388. continue;
  389. if (left[i] < -1 && right[i] < -1)
  390. remove[i] = 1;
  391. if (left[i] < 0 && right[i] < 0)
  392. continue;
  393. pixa_cluster = pixaCreate(1);
  394. numa_cluster = numaCreate(1);
  395. /* We don't need to destroy this pix and box since pixa_cluster
  396. * takes ownership with L_INSERT.
  397. */
  398. pix = pixaGetPix(pixa, i, L_CLONE);
  399. box = pixaGetBox(pixa, i, L_CLONE);
  400. pixaAddPix(pixa_cluster, pix, L_INSERT);
  401. pixaAddBox(pixa_cluster, box, L_INSERT);
  402. numaAddNumber(numa_cluster, i);
  403. boxGetGeometry(box, &xi, &yi, &wi, &hi);
  404. x = xi;
  405. y = yi;
  406. w = xi + wi;
  407. h = yi + hi;
  408. /* Move along left neighbors */
  409. j = left[i];
  410. left[i] = -1;
  411. while (j >= 0) {
  412. pix = pixaGetPix(pixa, j, L_CLONE);
  413. box = pixaGetBox(pixa, j, L_CLONE);
  414. pixaAddPix(pixa_cluster, pix, L_INSERT);
  415. pixaAddBox(pixa_cluster, box, L_INSERT);
  416. numaAddNumber(numa_cluster, j);
  417. boxGetGeometry(box, &xj, &yj, &wj, &hj);
  418. x = L_MIN(x, (l_uint32) xj);
  419. y = L_MIN(y, (l_uint32) yj);
  420. w = L_MAX(w, (l_uint32) (xj + wj));
  421. h = L_MAX(h, (l_uint32) (yj + hj));
  422. right[j] = -1;
  423. temp = left[j];
  424. left[j] = -1;
  425. j = temp;
  426. }
  427. /* Move along right neighbors */
  428. j = right[i];
  429. right[i] = -1;
  430. while (j >= 0) {
  431. pix = pixaGetPix(pixa, j, L_CLONE);
  432. box = pixaGetBox(pixa, j, L_CLONE);
  433. pixaAddPix(pixa_cluster, pix, L_INSERT);
  434. pixaAddBox(pixa_cluster, box, L_INSERT);
  435. numaAddNumber(numa_cluster, j);
  436. boxGetGeometry(box, &xj, &yj, &wj, &hj);
  437. x = L_MIN(x, (l_uint32) xj);
  438. y = L_MIN(y, (l_uint32) yj);
  439. w = L_MAX(w, (l_uint32) xj + wj);
  440. h = L_MAX(h, (l_uint32) (yj + hj));
  441. left[j] = -1;
  442. temp = right[j];
  443. right[j] = -1;
  444. j = temp;
  445. }
  446. w = w - x;
  447. h = h - y;
  448. boxd = boxCreate(x, y, w, h);
  449. pix_cluster = pixClipRectangle(pix8, boxd, NULL);
  450. l_float32 temp_conf;
  451. l_float32 cluster_conf;
  452. /* If pixa seems valid, collapse its components to a single pix */
  453. if (ValidateCluster(pix_cluster, pixa_cluster, boxd, &cluster_conf, params)) {
  454. l_int32 num_comps = pixaGetCount(pixa_cluster);
  455. l_float32 avg_conf = 0.0;
  456. pixd = pixCreate(w, h, 1);
  457. for (int i = 0; i < num_comps; i++) {
  458. pix = pixaGetPix(pixa_cluster, i, L_CLONE);
  459. pixaGetBoxGeometry(pixa_cluster, i, &xj, &yj, &wj, &hj);
  460. pixRasterop(pixd, xj - x, yj - y, wj, hj, PIX_PAINT, pix, 0, 0);
  461. pixDestroy(&pix);
  462. numaGetFValue(confs, i, &temp_conf);
  463. avg_conf += temp_conf;
  464. }
  465. // Adjust average confidence to reflect overall cluster confidence
  466. avg_conf /= num_comps;
  467. avg_conf *= cluster_conf;
  468. pixaAddPix(pixad, pixd, L_INSERT);
  469. pixaAddBox(pixad, boxd, L_INSERT);
  470. numaAddNumber(confd, avg_conf);
  471. count++;
  472. } else {
  473. l_int32 num_nums = numaGetCount(numa_cluster);
  474. // Otherwise, mark its components as removed
  475. for (int i = 0; i < num_nums; i++) {
  476. if (!numaGetIValue(numa_cluster, i, &temp)) {
  477. remove[temp] = 1;
  478. }
  479. }
  480. boxDestroy(&boxd);
  481. }
  482. pixDestroy(&pix_cluster);
  483. pixaDestroy(&pixa_cluster);
  484. numaDestroy(&numa_cluster);
  485. }
  486. free(left);
  487. free(right);
  488. PIXA *pixasort;
  489. NUMA *confsort;
  490. /* Sort pixa, then destroy old pixa */
  491. NUMA *naindex;
  492. if ((pixasort = pixaSort(pixad, L_SORT_BY_Y, L_SORT_INCREASING, &naindex, L_CLONE)) == NULL)
  493. return ERROR_INT("pixasort not made", procName, 1);
  494. confsort = numaSortByIndex(confd, naindex);
  495. *ppixad = pixasort;
  496. *pclusterconfs = confsort;
  497. pixaDestroy(&pixad);
  498. numaDestroy(&confd);
  499. return count;
  500. }
  501. l_int32 ClusterValidComponents(PIX *pix8, PIXA *pixa, NUMA *confs, l_uint8 *remove, PIXA **ppixad,
  502. NUMA **pclusterconfs, HydrogenTextDetector::TextDetectorParameters &params) {
  503. l_int32 *left, *right;
  504. PIXA *pixad;
  505. NUMA *clusterconfs;
  506. if (GenerateClusterPartners(pix8, pixa, confs, remove, &left, &right, params))
  507. return -1;
  508. int count = MergeClusterPartners(pix8, pixa, confs, remove, left, right, &pixad, &clusterconfs, params);
  509. *ppixad = pixad;
  510. *pclusterconfs = clusterconfs;
  511. return count;
  512. }