/ocr/ocrservice/jni/hydrogen/src/thresholder.cpp
C++ | 378 lines | 258 code | 59 blank | 61 comment | 56 complexity | 5f7ae4298b9f2baf21792093c687ae0d MD5 | raw file
1/* 2 * Copyright 2011, Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <math.h> 18 19#include "leptonica.h" 20#include "thresholder.h" 21 22/*! 23 * pixFisherAdaptiveThreshold() 24 * 25 * Input: pixs (8 bpp) 26 * &pixd (<required return> thresholded input for pixs) 27 * sx, sy (desired tile dimensions; actual size may vary) 28 * scorefract (fraction of the max Otsu score; typ. 0.1) 29 * fdrthresh (threshold for Fisher's Discriminant Rate; typ. 5.0) 30 * Return: 0 if OK, 1 on error 31 */ 32l_int32 pixFisherAdaptiveThreshold(PIX *pixs, PIX **ppixd, l_int32 tile_x, l_int32 tile_y, 33 l_float32 score_fract, l_float32 thresh) { 34 l_float32 fdr; 35 l_int32 w, h, d, nx, ny, x, y, t; 36 PIX *pixb, *pixd, *pixt; 37 PIXTILING *pt; 38 39 PROCNAME("pixFisherAdaptiveThreshold"); 40 41 if (!pixs) 42 return ERROR_INT("pixs not defined", procName, 1); 43 if (!ppixd) 44 return ERROR_INT("&ppixd not defined", procName, 1); 45 pixGetDimensions(pixs, &w, &h, &d); 46 if (d != 8) 47 return ERROR_INT("pixs not 8 bpp", procName, 1); 48 if (tile_x < 8 || tile_y < 8) 49 return ERROR_INT("sx and sy must be >= 8", procName, 1); 50 51 /* Compute FDR & threshold for individual tiles */ 52 nx = L_MAX(1, w / tile_x); 53 ny = L_MAX(1, h / tile_y); 54 pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0); 55 pixd = pixCreate(w, h, 1); 56 for (y = 0; y < ny; y++) { 57 for (x = 0; x < nx; x++) { 58 pixt = pixTilingGetTile(pt, y, x); 59 pixGetFisherThresh(pixt, score_fract, &fdr, &t); 60 61 if (fdr > thresh) { 62 pixb = pixThresholdToBinary(pixt, t); 63 pixTilingPaintTile(pixd, y, x, pixb, pt); 64 pixDestroy(&pixb); 65 } 66 67 pixDestroy(&pixt); 68 } 69 } 70 71 pixTilingDestroy(&pt); 72 73 *ppixd = pixd; 74 75 return 0; 76} 77 78/*! 79 * pixGetFisherThresh() 80 * 81 * Input: pixs (any depth; cmapped ok) 82 * scorefract (fraction of the max score, used to determine 83 * the range over which the histogram min is searched) 84 * &xfdr (<optional return> Fisher's Discriminate Rate value) 85 * &xthresh (<optional return> Otsu threshold value) 86 * Return: 0 if OK, 1 on error 87 */ 88l_int32 pixGetFisherThresh(PIX *pixs, l_float32 scorefract, l_float32 *pfdr, l_int32 *pthresh) { 89 l_float32 mean1, mean2, sum, sum1, sum2, fract; 90 l_float32 var, between, within, fdr; 91 l_int32 thresh; 92 NUMA *na; 93 94 PROCNAME("pixGetFisherThresh"); 95 96 if (!pixs) 97 return ERROR_INT("pixs not defined", procName, 1); 98 if (!pfdr && !pthresh) 99 return ERROR_INT("neither &pfdr nor &pthresh defined", procName, 1); 100 101 na = pixGetGrayHistogram(pixs, 1); 102 103 /* Compute Otsu threshold for histogram */ 104 numaSplitDistribution(na, scorefract, &thresh, &mean1, &mean2, &sum1, &sum2, NULL); 105 106 /* Compute Fisher's Discriminant Rate if needed */ 107 if (pfdr) { 108 numaGetHistogramStats(na, 0.0, 1.0, NULL, NULL, NULL, &var); 109 numaGetSum(na, &sum); 110 111 /* Between-class variance = sum of weighted squared distances 112 between-class and overall means */ 113 fract = sum1 / sum; 114 between = (fract * (1 - fract)) * (mean1 - mean2) * (mean1 - mean2); 115 116 /* Within-class variance = difference between total variance 117 and between-class variance */ 118 within = var - between; 119 120 /* FDR = between-class variance over within-class variance */ 121 if (within <= 1) { 122 fdr = between; 123 } else { 124 fdr = between / within; 125 } 126 127 *pfdr = fdr; 128 } 129 130 if (pthresh) 131 *pthresh = thresh; 132 133 numaDestroy(&na); 134 135 return 0; 136} 137 138PIX *pixThreshedSobelEdgeFilter(PIX *pixs, l_int32 threshold) { 139 l_uint8 bval, bidx; 140 l_int32 w, h, d, i, j, wplt, wpld, gx, gy, vald; 141 l_int32 val1, val2, val3, val4, val5, val6, val7, val8, val9; 142 l_uint32 *datat, *linet, *datad, *lined; 143 PIX *pixd; 144 145 PROCNAME("pixThreshedSobelEdgeFilter"); 146 147 if (!pixs) 148 return (PIX *) ERROR_PTR("pixs not defined", procName, NULL); 149 pixGetDimensions(pixs, &w, &h, &d); 150 if (d != 8) 151 return (PIX *) ERROR_PTR("pixs not 8 bpp", procName, NULL); 152 153 /* Compute filter output at each location. */ 154 pixd = pixCreateNoInit(w, h, 1); 155 datat = pixGetData(pixs); 156 wplt = pixGetWpl(pixs); 157 datad = pixGetData(pixd); 158 wpld = pixGetWpl(pixd); 159 val1 = val2 = val3 = val4 = val5 = 0; 160 val6 = val7 = val8 = val9 = 0; 161 bval = bidx = 0; 162 for (i = 0; i < h - 1; i++) { 163 linet = datat + i * wplt; 164 lined = datad + i * wpld; 165 for (j = 0; j < w - 1; j++) { 166 if (j == 0) { /* start a new row */ 167 val1 = GET_DATA_BYTE(linet, j); 168 val2 = GET_DATA_BYTE(linet + wplt, j); 169 val3 = GET_DATA_BYTE(linet + (wplt << 1), j); 170 val4 = GET_DATA_BYTE(linet, j + 1); 171 val5 = GET_DATA_BYTE(linet + wplt, j + 1); 172 val6 = GET_DATA_BYTE(linet + (wplt << 1), j + 1); 173 val7 = GET_DATA_BYTE(linet, j + 2); 174 val8 = GET_DATA_BYTE(linet + wplt, j + 2); 175 val9 = GET_DATA_BYTE(linet + (wplt << 1), j + 2); 176 bval = 0; 177 bidx = 0x80; 178 } else { /* shift right by 1 pixel; update incrementally */ 179 val1 = val4; 180 val2 = val5; 181 val3 = val6; 182 val4 = val7; 183 val5 = val8; 184 val6 = val9; 185 val7 = GET_DATA_BYTE(linet, j + 2); 186 val8 = GET_DATA_BYTE(linet + wplt, j + 2); 187 val9 = GET_DATA_BYTE(linet + (wplt << 1), j + 2); 188 } 189 190 bval <<= 1; 191 bidx >>= 1; 192 193 gx = val1 + (val2 << 1) + val3 - val7 - (val8 << 1) - val9; 194 gy = val1 + (val4 << 1) + val7 - val3 - (val6 << 1) - val9; 195 vald = L_MIN(255, L_ABS(gx) + L_ABS(gy)); 196 197 /* Flip high bit if value exceeds threshold */ 198 if (vald >= threshold) { 199 bval |= 1; 200 } 201 202 if (bidx == 0) { 203 SET_DATA_BYTE(lined, j / 8, bval); 204 205 bval = 0; 206 bidx = 0x80; 207 } 208 } 209 } 210 211 return pixd; 212} 213 214l_uint8 pixGradientEnergy(PIX *pixs, PIX *mask, l_float32 *penergy) { 215 l_int32 w, h, d; 216 l_uint8 val1, val2; 217 l_uint32 mask1, mask2; 218 l_int32 wpls, wplm; 219 l_uint32 *datas, *lines; 220 l_uint32 *datam, *linem; 221 l_int32 total, count; 222 223 PROCNAME("pixGradientEnergy"); 224 225 if (!pixs) 226 return ERROR_INT("pixs not defined", procName, -1); 227 pixGetDimensions(pixs, &w, &h, &d); 228 if (d != 8) 229 return ERROR_INT("pixs not 8 bpp", procName, -1); 230 231 /* Compute filter output at each location. */ 232 datas = pixGetData(pixs); 233 wpls = pixGetWpl(pixs); 234 datam = pixGetData(mask); 235 wplm = pixGetWpl(mask); 236 total = 0; 237 count = 1; 238 mask1 = mask2 = 0; 239 val1 = val2 = 0; 240 for (int y = 0; y < h; y++) { 241 lines = datas + y * wpls; 242 linem = datam + y * wplm; 243 for (int x = 0; x < w - 1; x++) { 244 if (x == 0) { /* start a new row */ 245 mask1 = GET_DATA_BIT(linem, x); 246 mask2 = GET_DATA_BIT(linem, x + 1); 247 val1 = GET_DATA_BYTE(lines, x); 248 val2 = GET_DATA_BYTE(lines, x + 1); 249 } else { /* shift right by 1 pixel; update incrementally */ 250 val1 = val2; 251 val2 = GET_DATA_BYTE(lines, x + 1); 252 mask1 = mask2; 253 mask2 = GET_DATA_BIT(linem, x + 1); 254 } 255 256 /* If we're on an edge, add the gradient value and increment */ 257 if (mask1 != mask2) { 258 total += L_ABS(val1 - val2); 259 count += 1; 260 } 261 } 262 } 263 264 *penergy = total / (l_float32) count; 265 266 return 0; 267} 268 269l_uint8 pixEdgeMax(PIX *pixs, l_int32 *pmax, l_int32 *pavg) { 270 l_int32 w, h, d, wplt, vald; 271 l_uint8 val1, val2, val3, val4, val5; 272 l_uint32 *datat, *linet; 273 l_int32 max, total; 274 275 PROCNAME("pixEdgeMax"); 276 277 if (!pixs) 278 return ERROR_INT("pixs not defined", procName, -1); 279 pixGetDimensions(pixs, &w, &h, &d); 280 if (d != 8) 281 return ERROR_INT("pixs not 8 bpp", procName, -1); 282 283 /* Compute filter output at each location. */ 284 datat = pixGetData(pixs); 285 wplt = pixGetWpl(pixs); 286 max = 0; 287 total = 0; 288 val1 = val2 = val3 = val4 = val5 = 0; 289 for (int y = 0; y < h; y++) { 290 linet = datat + y * wplt; 291 for (int x = 0; x < w - 5; x++) { 292 if (x == 0) { /* start a new row */ 293 val1 = GET_DATA_BYTE(linet, x); 294 val2 = GET_DATA_BYTE(linet, x + 1); 295 val3 = GET_DATA_BYTE(linet, x + 2); 296 val4 = GET_DATA_BYTE(linet, x + 3); 297 val5 = GET_DATA_BYTE(linet, x + 4); 298 } else { /* shift right by 1 pixel; update incrementally */ 299 val1 = val2; 300 val2 = val3; 301 val3 = val4; 302 val4 = val5; 303 val5 = GET_DATA_BYTE(linet, x + 4); 304 } 305 306 //maxd = L_MAX(val5, L_MAX(val4, L_MAX(val3, L_MAX(val2, val1)))); 307 //mind = L_MIN(val5, L_MIN(val4, L_MIN(val3, L_MIN(val2, val1)))); 308 vald = L_ABS(val1 - val5); //maxd - mind; 309 310 if (vald > max) { 311 max = vald; 312 } 313 314 total += vald; 315 } 316 } 317 318 *pmax = max; 319 *pavg = total / (w * h); 320 321 return 0; 322} 323 324/*! 325 * pixEdgeAdaptiveThreshold() 326 * 327 * Input: pixs (8 bpp) 328 * &pixd (<required return> thresholded input for pixs) 329 * tile_x, tile_y (desired tile dimensions; actual size may vary) 330 * thresh 331 * avg_thresh 332 * Return: 0 if OK, 1 on error 333 */ 334l_uint8 pixEdgeAdaptiveThreshold(PIX *pixs, PIX **ppixd, l_int32 tile_x, l_int32 tile_y, 335 l_int32 thresh, l_int32 avg_thresh) { 336 l_int32 w, h, d, nx, ny, x, y, t, max, avg; 337 PIX *pixb, *pixd, *pixt; 338 PIXTILING *pt; 339 340 PROCNAME("pixEdgeAdaptiveThreshold"); 341 342 if (!pixs) 343 return ERROR_INT("pixs not defined", procName, 1); 344 if (!ppixd) 345 return ERROR_INT("&ppixd not defined", procName, 1); 346 pixGetDimensions(pixs, &w, &h, &d); 347 if (d != 8) 348 return ERROR_INT("pixs not 8 bpp", procName, 1); 349 if (tile_x < 8 || tile_y < 8) 350 return ERROR_INT("sx and sy must be >= 8", procName, 1); 351 352 /* Compute FDR & threshold for individual tiles */ 353 nx = L_MAX(1, w / tile_x); 354 ny = L_MAX(1, h / tile_y); 355 pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0); 356 pixd = pixCreate(w, h, 1); 357 for (y = 0; y < ny; y++) { 358 for (x = 0; x < nx; x++) { 359 pixt = pixTilingGetTile(pt, y, x); 360 pixEdgeMax(pixt, &max, &avg); 361 362 if (max > thresh && avg > avg_thresh) { 363 pixSplitDistributionFgBg(pixt, 0.0, 1, &t, NULL, NULL, 0); 364 pixb = pixThresholdToBinary(pixt, t); 365 pixTilingPaintTile(pixd, y, x, pixb, pt); 366 pixDestroy(&pixb); 367 } 368 369 pixDestroy(&pixt); 370 } 371 } 372 373 pixTilingDestroy(&pt); 374 375 *ppixd = pixd; 376 377 return 0; 378}