/ocr/ocrservice/jni/hydrogen/src/thresholder.cpp

http://eyes-free.googlecode.com/ · C++ · 378 lines · 258 code · 59 blank · 61 comment · 56 complexity · 5f7ae4298b9f2baf21792093c687ae0d MD5 · raw file

  1. /*
  2. * Copyright 2011, Google Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <math.h>
  17. #include "leptonica.h"
  18. #include "thresholder.h"
  19. /*!
  20. * pixFisherAdaptiveThreshold()
  21. *
  22. * Input: pixs (8 bpp)
  23. * &pixd (<required return> thresholded input for pixs)
  24. * sx, sy (desired tile dimensions; actual size may vary)
  25. * scorefract (fraction of the max Otsu score; typ. 0.1)
  26. * fdrthresh (threshold for Fisher's Discriminant Rate; typ. 5.0)
  27. * Return: 0 if OK, 1 on error
  28. */
  29. l_int32 pixFisherAdaptiveThreshold(PIX *pixs, PIX **ppixd, l_int32 tile_x, l_int32 tile_y,
  30. l_float32 score_fract, l_float32 thresh) {
  31. l_float32 fdr;
  32. l_int32 w, h, d, nx, ny, x, y, t;
  33. PIX *pixb, *pixd, *pixt;
  34. PIXTILING *pt;
  35. PROCNAME("pixFisherAdaptiveThreshold");
  36. if (!pixs)
  37. return ERROR_INT("pixs not defined", procName, 1);
  38. if (!ppixd)
  39. return ERROR_INT("&ppixd not defined", procName, 1);
  40. pixGetDimensions(pixs, &w, &h, &d);
  41. if (d != 8)
  42. return ERROR_INT("pixs not 8 bpp", procName, 1);
  43. if (tile_x < 8 || tile_y < 8)
  44. return ERROR_INT("sx and sy must be >= 8", procName, 1);
  45. /* Compute FDR & threshold for individual tiles */
  46. nx = L_MAX(1, w / tile_x);
  47. ny = L_MAX(1, h / tile_y);
  48. pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0);
  49. pixd = pixCreate(w, h, 1);
  50. for (y = 0; y < ny; y++) {
  51. for (x = 0; x < nx; x++) {
  52. pixt = pixTilingGetTile(pt, y, x);
  53. pixGetFisherThresh(pixt, score_fract, &fdr, &t);
  54. if (fdr > thresh) {
  55. pixb = pixThresholdToBinary(pixt, t);
  56. pixTilingPaintTile(pixd, y, x, pixb, pt);
  57. pixDestroy(&pixb);
  58. }
  59. pixDestroy(&pixt);
  60. }
  61. }
  62. pixTilingDestroy(&pt);
  63. *ppixd = pixd;
  64. return 0;
  65. }
  66. /*!
  67. * pixGetFisherThresh()
  68. *
  69. * Input: pixs (any depth; cmapped ok)
  70. * scorefract (fraction of the max score, used to determine
  71. * the range over which the histogram min is searched)
  72. * &xfdr (<optional return> Fisher's Discriminate Rate value)
  73. * &xthresh (<optional return> Otsu threshold value)
  74. * Return: 0 if OK, 1 on error
  75. */
  76. l_int32 pixGetFisherThresh(PIX *pixs, l_float32 scorefract, l_float32 *pfdr, l_int32 *pthresh) {
  77. l_float32 mean1, mean2, sum, sum1, sum2, fract;
  78. l_float32 var, between, within, fdr;
  79. l_int32 thresh;
  80. NUMA *na;
  81. PROCNAME("pixGetFisherThresh");
  82. if (!pixs)
  83. return ERROR_INT("pixs not defined", procName, 1);
  84. if (!pfdr && !pthresh)
  85. return ERROR_INT("neither &pfdr nor &pthresh defined", procName, 1);
  86. na = pixGetGrayHistogram(pixs, 1);
  87. /* Compute Otsu threshold for histogram */
  88. numaSplitDistribution(na, scorefract, &thresh, &mean1, &mean2, &sum1, &sum2, NULL);
  89. /* Compute Fisher's Discriminant Rate if needed */
  90. if (pfdr) {
  91. numaGetHistogramStats(na, 0.0, 1.0, NULL, NULL, NULL, &var);
  92. numaGetSum(na, &sum);
  93. /* Between-class variance = sum of weighted squared distances
  94. between-class and overall means */
  95. fract = sum1 / sum;
  96. between = (fract * (1 - fract)) * (mean1 - mean2) * (mean1 - mean2);
  97. /* Within-class variance = difference between total variance
  98. and between-class variance */
  99. within = var - between;
  100. /* FDR = between-class variance over within-class variance */
  101. if (within <= 1) {
  102. fdr = between;
  103. } else {
  104. fdr = between / within;
  105. }
  106. *pfdr = fdr;
  107. }
  108. if (pthresh)
  109. *pthresh = thresh;
  110. numaDestroy(&na);
  111. return 0;
  112. }
  113. PIX *pixThreshedSobelEdgeFilter(PIX *pixs, l_int32 threshold) {
  114. l_uint8 bval, bidx;
  115. l_int32 w, h, d, i, j, wplt, wpld, gx, gy, vald;
  116. l_int32 val1, val2, val3, val4, val5, val6, val7, val8, val9;
  117. l_uint32 *datat, *linet, *datad, *lined;
  118. PIX *pixd;
  119. PROCNAME("pixThreshedSobelEdgeFilter");
  120. if (!pixs)
  121. return (PIX *) ERROR_PTR("pixs not defined", procName, NULL);
  122. pixGetDimensions(pixs, &w, &h, &d);
  123. if (d != 8)
  124. return (PIX *) ERROR_PTR("pixs not 8 bpp", procName, NULL);
  125. /* Compute filter output at each location. */
  126. pixd = pixCreateNoInit(w, h, 1);
  127. datat = pixGetData(pixs);
  128. wplt = pixGetWpl(pixs);
  129. datad = pixGetData(pixd);
  130. wpld = pixGetWpl(pixd);
  131. val1 = val2 = val3 = val4 = val5 = 0;
  132. val6 = val7 = val8 = val9 = 0;
  133. bval = bidx = 0;
  134. for (i = 0; i < h - 1; i++) {
  135. linet = datat + i * wplt;
  136. lined = datad + i * wpld;
  137. for (j = 0; j < w - 1; j++) {
  138. if (j == 0) { /* start a new row */
  139. val1 = GET_DATA_BYTE(linet, j);
  140. val2 = GET_DATA_BYTE(linet + wplt, j);
  141. val3 = GET_DATA_BYTE(linet + (wplt << 1), j);
  142. val4 = GET_DATA_BYTE(linet, j + 1);
  143. val5 = GET_DATA_BYTE(linet + wplt, j + 1);
  144. val6 = GET_DATA_BYTE(linet + (wplt << 1), j + 1);
  145. val7 = GET_DATA_BYTE(linet, j + 2);
  146. val8 = GET_DATA_BYTE(linet + wplt, j + 2);
  147. val9 = GET_DATA_BYTE(linet + (wplt << 1), j + 2);
  148. bval = 0;
  149. bidx = 0x80;
  150. } else { /* shift right by 1 pixel; update incrementally */
  151. val1 = val4;
  152. val2 = val5;
  153. val3 = val6;
  154. val4 = val7;
  155. val5 = val8;
  156. val6 = val9;
  157. val7 = GET_DATA_BYTE(linet, j + 2);
  158. val8 = GET_DATA_BYTE(linet + wplt, j + 2);
  159. val9 = GET_DATA_BYTE(linet + (wplt << 1), j + 2);
  160. }
  161. bval <<= 1;
  162. bidx >>= 1;
  163. gx = val1 + (val2 << 1) + val3 - val7 - (val8 << 1) - val9;
  164. gy = val1 + (val4 << 1) + val7 - val3 - (val6 << 1) - val9;
  165. vald = L_MIN(255, L_ABS(gx) + L_ABS(gy));
  166. /* Flip high bit if value exceeds threshold */
  167. if (vald >= threshold) {
  168. bval |= 1;
  169. }
  170. if (bidx == 0) {
  171. SET_DATA_BYTE(lined, j / 8, bval);
  172. bval = 0;
  173. bidx = 0x80;
  174. }
  175. }
  176. }
  177. return pixd;
  178. }
  179. l_uint8 pixGradientEnergy(PIX *pixs, PIX *mask, l_float32 *penergy) {
  180. l_int32 w, h, d;
  181. l_uint8 val1, val2;
  182. l_uint32 mask1, mask2;
  183. l_int32 wpls, wplm;
  184. l_uint32 *datas, *lines;
  185. l_uint32 *datam, *linem;
  186. l_int32 total, count;
  187. PROCNAME("pixGradientEnergy");
  188. if (!pixs)
  189. return ERROR_INT("pixs not defined", procName, -1);
  190. pixGetDimensions(pixs, &w, &h, &d);
  191. if (d != 8)
  192. return ERROR_INT("pixs not 8 bpp", procName, -1);
  193. /* Compute filter output at each location. */
  194. datas = pixGetData(pixs);
  195. wpls = pixGetWpl(pixs);
  196. datam = pixGetData(mask);
  197. wplm = pixGetWpl(mask);
  198. total = 0;
  199. count = 1;
  200. mask1 = mask2 = 0;
  201. val1 = val2 = 0;
  202. for (int y = 0; y < h; y++) {
  203. lines = datas + y * wpls;
  204. linem = datam + y * wplm;
  205. for (int x = 0; x < w - 1; x++) {
  206. if (x == 0) { /* start a new row */
  207. mask1 = GET_DATA_BIT(linem, x);
  208. mask2 = GET_DATA_BIT(linem, x + 1);
  209. val1 = GET_DATA_BYTE(lines, x);
  210. val2 = GET_DATA_BYTE(lines, x + 1);
  211. } else { /* shift right by 1 pixel; update incrementally */
  212. val1 = val2;
  213. val2 = GET_DATA_BYTE(lines, x + 1);
  214. mask1 = mask2;
  215. mask2 = GET_DATA_BIT(linem, x + 1);
  216. }
  217. /* If we're on an edge, add the gradient value and increment */
  218. if (mask1 != mask2) {
  219. total += L_ABS(val1 - val2);
  220. count += 1;
  221. }
  222. }
  223. }
  224. *penergy = total / (l_float32) count;
  225. return 0;
  226. }
  227. l_uint8 pixEdgeMax(PIX *pixs, l_int32 *pmax, l_int32 *pavg) {
  228. l_int32 w, h, d, wplt, vald;
  229. l_uint8 val1, val2, val3, val4, val5;
  230. l_uint32 *datat, *linet;
  231. l_int32 max, total;
  232. PROCNAME("pixEdgeMax");
  233. if (!pixs)
  234. return ERROR_INT("pixs not defined", procName, -1);
  235. pixGetDimensions(pixs, &w, &h, &d);
  236. if (d != 8)
  237. return ERROR_INT("pixs not 8 bpp", procName, -1);
  238. /* Compute filter output at each location. */
  239. datat = pixGetData(pixs);
  240. wplt = pixGetWpl(pixs);
  241. max = 0;
  242. total = 0;
  243. val1 = val2 = val3 = val4 = val5 = 0;
  244. for (int y = 0; y < h; y++) {
  245. linet = datat + y * wplt;
  246. for (int x = 0; x < w - 5; x++) {
  247. if (x == 0) { /* start a new row */
  248. val1 = GET_DATA_BYTE(linet, x);
  249. val2 = GET_DATA_BYTE(linet, x + 1);
  250. val3 = GET_DATA_BYTE(linet, x + 2);
  251. val4 = GET_DATA_BYTE(linet, x + 3);
  252. val5 = GET_DATA_BYTE(linet, x + 4);
  253. } else { /* shift right by 1 pixel; update incrementally */
  254. val1 = val2;
  255. val2 = val3;
  256. val3 = val4;
  257. val4 = val5;
  258. val5 = GET_DATA_BYTE(linet, x + 4);
  259. }
  260. //maxd = L_MAX(val5, L_MAX(val4, L_MAX(val3, L_MAX(val2, val1))));
  261. //mind = L_MIN(val5, L_MIN(val4, L_MIN(val3, L_MIN(val2, val1))));
  262. vald = L_ABS(val1 - val5); //maxd - mind;
  263. if (vald > max) {
  264. max = vald;
  265. }
  266. total += vald;
  267. }
  268. }
  269. *pmax = max;
  270. *pavg = total / (w * h);
  271. return 0;
  272. }
  273. /*!
  274. * pixEdgeAdaptiveThreshold()
  275. *
  276. * Input: pixs (8 bpp)
  277. * &pixd (<required return> thresholded input for pixs)
  278. * tile_x, tile_y (desired tile dimensions; actual size may vary)
  279. * thresh
  280. * avg_thresh
  281. * Return: 0 if OK, 1 on error
  282. */
  283. l_uint8 pixEdgeAdaptiveThreshold(PIX *pixs, PIX **ppixd, l_int32 tile_x, l_int32 tile_y,
  284. l_int32 thresh, l_int32 avg_thresh) {
  285. l_int32 w, h, d, nx, ny, x, y, t, max, avg;
  286. PIX *pixb, *pixd, *pixt;
  287. PIXTILING *pt;
  288. PROCNAME("pixEdgeAdaptiveThreshold");
  289. if (!pixs)
  290. return ERROR_INT("pixs not defined", procName, 1);
  291. if (!ppixd)
  292. return ERROR_INT("&ppixd not defined", procName, 1);
  293. pixGetDimensions(pixs, &w, &h, &d);
  294. if (d != 8)
  295. return ERROR_INT("pixs not 8 bpp", procName, 1);
  296. if (tile_x < 8 || tile_y < 8)
  297. return ERROR_INT("sx and sy must be >= 8", procName, 1);
  298. /* Compute FDR & threshold for individual tiles */
  299. nx = L_MAX(1, w / tile_x);
  300. ny = L_MAX(1, h / tile_y);
  301. pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0);
  302. pixd = pixCreate(w, h, 1);
  303. for (y = 0; y < ny; y++) {
  304. for (x = 0; x < nx; x++) {
  305. pixt = pixTilingGetTile(pt, y, x);
  306. pixEdgeMax(pixt, &max, &avg);
  307. if (max > thresh && avg > avg_thresh) {
  308. pixSplitDistributionFgBg(pixt, 0.0, 1, &t, NULL, NULL, 0);
  309. pixb = pixThresholdToBinary(pixt, t);
  310. pixTilingPaintTile(pixd, y, x, pixb, pt);
  311. pixDestroy(&pixb);
  312. }
  313. pixDestroy(&pixt);
  314. }
  315. }
  316. pixTilingDestroy(&pt);
  317. *ppixd = pixd;
  318. return 0;
  319. }