/ocrorast/ocr-classify-zones.cc

https://code.google.com/p/ocropus/ · C++ · 695 lines · 583 code · 100 blank · 12 comment · 123 complexity · 1a8fbbd60f6e7e3e65c4ce1977b0cf36 MD5 · raw file

  1. #include "ocrorast.h"
  2. void ZoneFeatures::compressHist(intarray &histogram){
  3. intarray histogram_comp;
  4. histogram_comp.resize(MAX_LEN);
  5. fill(histogram_comp,0);
  6. // COMPRESSION
  7. int i = 0;
  8. int j = 0;
  9. int m = COMP_START;
  10. int k = COMP_LEN_START;
  11. while(i < MAX_LEN) {
  12. histogram_comp(j) = histogram(i);
  13. i++;
  14. if(i > COMP_START){
  15. m = min(MAX_LEN, m+k);
  16. while(i < m){
  17. histogram_comp(j)+= histogram(i);
  18. i++;
  19. }
  20. k = COMP_LEN_INC_A * k + COMP_LEN_INC_B;
  21. }
  22. j++;
  23. }
  24. histogram.clear();
  25. for(i = 0; i < j; i++)
  26. histogram.push(histogram_comp(i));
  27. }
  28. void ZoneFeatures::compress2DHist(intarray &histogram_2D){
  29. int steps[2];
  30. intarray histogram_2D_comp;
  31. histogram_2D_comp.resize(MAX_LEN,MAX_LEN);
  32. fill(histogram_2D_comp,0);
  33. int i, j, k, m, n;
  34. for(n = 0; n < MAX_LEN; n++) {
  35. i = 0;
  36. j = 0;
  37. m = COMP_START;
  38. k = COMP_LEN_START;
  39. while(i < MAX_LEN) {
  40. histogram_2D_comp(n,j) = histogram_2D(n,i);
  41. i++;
  42. if(i > COMP_START) {
  43. m = min(MAX_LEN, m+k);
  44. while(i < m) {
  45. histogram_2D_comp(n,j)+= histogram_2D(n,i);
  46. i++;
  47. }
  48. k = COMP_LEN_INC_A * k + COMP_LEN_INC_B;
  49. }
  50. j++;
  51. }
  52. }
  53. steps[0] = j;
  54. for(n = 0; n < steps[0]; n++) {
  55. i = 0;
  56. j = 0;
  57. m = COMP_START;
  58. k = COMP_LEN_START;
  59. while(i < MAX_LEN) {
  60. histogram_2D_comp(j,n) = histogram_2D_comp(i,n);
  61. i++;
  62. if(i > COMP_START) {
  63. m = min(MAX_LEN, m+k);
  64. while(i < m) {
  65. histogram_2D_comp(j,n)+= histogram_2D_comp(i,n);
  66. i++;
  67. }
  68. k = COMP_LEN_INC_A * k + COMP_LEN_INC_B;
  69. }
  70. j++;
  71. }
  72. }
  73. steps[1] = j;
  74. // move(histogram_2D, histogram_2D_comp);
  75. histogram_2D.clear();
  76. histogram_2D.resize(steps[0], steps[1]);
  77. for(i = 0; i < steps[0]; i++){
  78. for(j = 0; j < steps[1]; j++){
  79. histogram_2D(i,j) = histogram_2D_comp(i,j);
  80. }
  81. }
  82. }
  83. void ZoneFeatures::horizontalRunLengths(floatarray &resulthist,
  84. floatarray &resultstats,
  85. const bytearray &image){
  86. int imwidth = image.dim(0);
  87. int imheight = image.dim(1);
  88. intarray histogram_fg;
  89. histogram_fg.resize(MAX_LEN);
  90. fill(histogram_fg,0);
  91. intarray histogram_bg;
  92. histogram_bg.resize(MAX_LEN);
  93. fill(histogram_bg,0);
  94. int current_run_length_bg = 0;
  95. int current_run_length_fg = 0;
  96. int run_length_count_fg = 0;
  97. int run_length_count_bg = 0;
  98. bool end_fg = false;
  99. bool end_bg = false;
  100. // indicator that the end of the running segment occurs
  101. float mean_fg = 0, variance_fg = 0;
  102. float mean_bg = 0, variance_bg = 0;
  103. for(int j = 0; j < imheight; j++){
  104. for(int i = 0; i < imwidth; i++){
  105. if(image(i,j) == 0){
  106. current_run_length_fg++;
  107. end_fg = false;
  108. }
  109. else
  110. end_fg = true;
  111. if((current_run_length_fg > 0) && (end_fg || (i == imwidth-1))){
  112. current_run_length_fg = min(current_run_length_fg, MAX_LEN);
  113. histogram_fg(current_run_length_fg-1)++;
  114. run_length_count_fg++;
  115. mean_fg += current_run_length_fg;
  116. variance_fg += current_run_length_fg * current_run_length_fg;
  117. current_run_length_fg = 0;
  118. end_fg = false;
  119. }
  120. if(image(i,j) == 255){
  121. current_run_length_bg++;
  122. end_bg = false;
  123. }
  124. else
  125. end_bg = true;
  126. if((current_run_length_bg > 0) && (end_bg || (i == imwidth-1))){
  127. current_run_length_bg = min(current_run_length_bg, MAX_LEN);
  128. histogram_bg(current_run_length_bg-1)++;
  129. run_length_count_bg++;
  130. mean_bg += current_run_length_bg;
  131. variance_bg += current_run_length_bg * current_run_length_bg;
  132. current_run_length_bg = 0;
  133. end_bg = false;
  134. }
  135. }
  136. }
  137. compressHist(histogram_fg);
  138. compressHist(histogram_bg);
  139. for(int i=0, l=histogram_fg.length(); i<l; i++)
  140. resulthist.push(histogram_fg[i]);
  141. for(int i=0, l=histogram_bg.length(); i<l; i++)
  142. resulthist.push(histogram_bg[i]);
  143. if(run_length_count_fg){
  144. mean_fg/= run_length_count_fg;
  145. variance_fg = variance_fg/run_length_count_fg - mean_fg*mean_fg;
  146. }
  147. else{
  148. mean_fg=0; variance_fg=0;
  149. }
  150. resultstats.push(run_length_count_fg);
  151. resultstats.push(mean_fg);
  152. resultstats.push(variance_fg);
  153. if(run_length_count_bg){
  154. mean_bg/= run_length_count_bg;
  155. variance_bg = variance_bg/run_length_count_bg - mean_bg*mean_bg;
  156. }
  157. else{
  158. mean_bg=0; variance_bg=0;
  159. }
  160. resultstats.push(run_length_count_bg);
  161. resultstats.push(mean_bg);
  162. resultstats.push(variance_bg);
  163. }
  164. void ZoneFeatures::verticalRunLengths(floatarray &resulthist,
  165. floatarray &resultstats,
  166. const bytearray &image){
  167. int imwidth = image.dim(0);
  168. int imheight = image.dim(1);
  169. intarray histogram_fg;
  170. histogram_fg.resize(MAX_LEN);
  171. fill(histogram_fg,0);
  172. intarray histogram_bg;
  173. histogram_bg.resize(MAX_LEN);
  174. fill(histogram_bg,0);
  175. int current_run_length_bg = 0;
  176. int current_run_length_fg = 0;
  177. int run_length_count_fg = 0;
  178. int run_length_count_bg = 0;
  179. bool end_fg = false;
  180. bool end_bg = false;
  181. // indicator that the end of the running segment occurs
  182. float mean_fg = 0, variance_fg = 0;
  183. float mean_bg = 0, variance_bg = 0;
  184. for(int i = 0; i < imwidth; i++){
  185. for(int j = 0; j < imheight; j++){
  186. if(image(i,j) == 0){
  187. current_run_length_fg++;
  188. end_fg = false;
  189. }
  190. else
  191. end_fg = true;
  192. if((current_run_length_fg > 0) && (end_fg || (i == imheight-1))){
  193. current_run_length_fg = min(current_run_length_fg, MAX_LEN);
  194. histogram_fg(current_run_length_fg-1)++;
  195. run_length_count_fg++;
  196. mean_fg += current_run_length_fg;
  197. variance_fg += current_run_length_fg * current_run_length_fg;
  198. current_run_length_fg = 0;
  199. end_fg = false;
  200. }
  201. if(image(i,j) == 255){
  202. current_run_length_bg++;
  203. end_bg = false;
  204. }
  205. else
  206. end_bg = true;
  207. if((current_run_length_bg > 0) && (end_bg || (i == imheight-1))){
  208. current_run_length_bg = min(current_run_length_bg, MAX_LEN);
  209. histogram_bg(current_run_length_bg-1)++;
  210. run_length_count_bg++;
  211. mean_bg += current_run_length_bg;
  212. variance_bg += current_run_length_bg * current_run_length_bg;
  213. current_run_length_bg = 0;
  214. end_bg = false;
  215. }
  216. }
  217. }
  218. compressHist(histogram_fg);
  219. compressHist(histogram_bg);
  220. for(int i=0, l=histogram_fg.length(); i<l; i++)
  221. resulthist.push(histogram_fg[i]);
  222. for(int i=0, l=histogram_bg.length(); i<l; i++)
  223. resulthist.push(histogram_bg[i]);
  224. if(run_length_count_fg){
  225. mean_fg/= run_length_count_fg;
  226. variance_fg = variance_fg/run_length_count_fg - mean_fg*mean_fg;
  227. }
  228. else{
  229. mean_fg=0; variance_fg=0;
  230. }
  231. resultstats.push(run_length_count_fg);
  232. resultstats.push(mean_fg);
  233. resultstats.push(variance_fg);
  234. if(run_length_count_bg){
  235. mean_bg/= run_length_count_bg;
  236. variance_bg = variance_bg/run_length_count_bg - mean_bg*mean_bg;
  237. }
  238. else{
  239. mean_bg=0; variance_bg=0;
  240. }
  241. resultstats.push(run_length_count_bg);
  242. resultstats.push(mean_bg);
  243. resultstats.push(variance_bg);
  244. }
  245. void ZoneFeatures::mainDiagRunLengths(floatarray &resulthist,
  246. floatarray &resultstats,
  247. const bytearray &image){
  248. int imwidth = image.dim(0);
  249. int imheight = image.dim(1);
  250. intarray histogram_fg;
  251. histogram_fg.resize(MAX_LEN);
  252. fill(histogram_fg,0);
  253. intarray histogram_bg;
  254. histogram_bg.resize(MAX_LEN);
  255. fill(histogram_bg,0);
  256. int current_run_length_bg = 0;
  257. int current_run_length_fg = 0;
  258. int run_length_count_fg = 0;
  259. int run_length_count_bg = 0;
  260. bool end_fg = false;
  261. bool end_bg = false;
  262. // indicator that the end of the running segment occurs
  263. float mean_fg = 0, variance_fg = 0;
  264. float mean_bg = 0, variance_bg = 0;
  265. int pix = 0;
  266. for(int i = 0; i < imwidth + imheight; i++){
  267. for(int j = 0; j < min(imwidth, imheight); j++){
  268. if(i < imwidth){
  269. if(j < i+1)
  270. pix = image(i-j, j);
  271. else
  272. j = imwidth*imheight;
  273. }
  274. else{
  275. if(j < imwidth-1+imheight-i)
  276. pix = image(imwidth - j - 1, i - (imwidth-1) + j);
  277. else
  278. j = imwidth*imheight;
  279. }
  280. if((pix == 0) && (j != imwidth*imheight)){
  281. current_run_length_fg++;
  282. end_fg = false;
  283. }
  284. else
  285. end_fg = true;
  286. if( (current_run_length_fg > 0) &&
  287. (end_fg || (j == imwidth*imheight))){
  288. current_run_length_fg = min(current_run_length_fg, MAX_LEN);
  289. histogram_fg(current_run_length_fg-1)++;
  290. run_length_count_fg++;
  291. mean_fg += current_run_length_fg;
  292. variance_fg += current_run_length_fg * current_run_length_fg;
  293. current_run_length_fg = 0;
  294. end_fg = false;
  295. }
  296. if((pix == 255) && (j != imwidth*imheight)){
  297. current_run_length_bg++;
  298. end_bg = false;
  299. }
  300. else
  301. end_bg = true;
  302. if( (current_run_length_bg > 0) &&
  303. (end_bg || (j == imwidth*imheight))){
  304. current_run_length_bg = min(current_run_length_bg, MAX_LEN);
  305. histogram_bg(current_run_length_bg-1)++;
  306. run_length_count_bg++;
  307. mean_bg += current_run_length_bg;
  308. variance_bg += current_run_length_bg * current_run_length_bg;
  309. current_run_length_bg = 0;
  310. end_bg = false;
  311. }
  312. }
  313. }
  314. compressHist(histogram_fg);
  315. compressHist(histogram_bg);
  316. for(int i=0, l=histogram_fg.length(); i<l; i++)
  317. resulthist.push(histogram_fg[i]);
  318. for(int i=0, l=histogram_bg.length(); i<l; i++)
  319. resulthist.push(histogram_bg[i]);
  320. if(run_length_count_fg){
  321. mean_fg/= run_length_count_fg;
  322. variance_fg = variance_fg/run_length_count_fg - mean_fg*mean_fg;
  323. }
  324. else{
  325. mean_fg=0; variance_fg=0;
  326. }
  327. resultstats.push(run_length_count_fg);
  328. resultstats.push(mean_fg);
  329. resultstats.push(variance_fg);
  330. if(run_length_count_bg){
  331. mean_bg/= run_length_count_bg;
  332. variance_bg = variance_bg/run_length_count_bg - mean_bg*mean_bg;
  333. }
  334. else{
  335. mean_bg=0; variance_bg=0;
  336. }
  337. resultstats.push(run_length_count_bg);
  338. resultstats.push(mean_bg);
  339. resultstats.push(variance_bg);
  340. }
  341. void ZoneFeatures::sideDiagRunLengths(floatarray &resulthist,
  342. floatarray &resultstats,
  343. const bytearray &image){
  344. int imwidth = image.dim(0);
  345. int imheight = image.dim(1);
  346. intarray histogram_fg;
  347. histogram_fg.resize(MAX_LEN);
  348. fill(histogram_fg,0);
  349. intarray histogram_bg;
  350. histogram_bg.resize(MAX_LEN);
  351. fill(histogram_bg,0);
  352. int current_run_length_bg = 0;
  353. int current_run_length_fg = 0;
  354. int run_length_count_fg = 0;
  355. int run_length_count_bg = 0;
  356. bool end_fg = false;
  357. bool end_bg = false;
  358. // indicator that the end of the running segment occurs
  359. float mean_fg = 0, variance_fg = 0;
  360. float mean_bg = 0, variance_bg = 0;
  361. int pix = 0;
  362. for(int i = 0; i < imwidth + imheight; i++){
  363. for(int j = 0; j < min(imwidth, imheight); j++){
  364. if(i < imheight){
  365. if(j < i+1)
  366. pix = image(j, (imheight-1) - i + j);
  367. else
  368. j = imwidth*imheight;
  369. }
  370. else{
  371. if(j < imwidth-1+imheight-i)
  372. pix = image(i - (imheight-1) + j, j);
  373. else
  374. j = imwidth*imheight;
  375. }
  376. if((pix == 0) && (j != imwidth*imheight)){
  377. current_run_length_fg++;
  378. end_fg = false;
  379. }
  380. else
  381. end_fg = true;
  382. if( (current_run_length_fg > 0) &&
  383. (end_fg || (j == imwidth*imheight))){
  384. current_run_length_fg = min(current_run_length_fg, MAX_LEN);
  385. histogram_fg(current_run_length_fg-1)++;
  386. run_length_count_fg++;
  387. mean_fg += current_run_length_fg;
  388. variance_fg += current_run_length_fg * current_run_length_fg;
  389. current_run_length_fg = 0;
  390. end_fg = false;
  391. }
  392. if((pix == 255) && (j != imwidth*imheight)){
  393. current_run_length_bg++;
  394. end_bg = false;
  395. }
  396. else
  397. end_bg = true;
  398. if( (current_run_length_bg > 0) &&
  399. (end_bg || (j == imwidth*imheight))){
  400. current_run_length_bg = min(current_run_length_bg, MAX_LEN);
  401. histogram_bg(current_run_length_bg-1)++;
  402. run_length_count_bg++;
  403. mean_bg += current_run_length_bg;
  404. variance_bg += current_run_length_bg * current_run_length_bg;
  405. current_run_length_bg = 0;
  406. end_bg = false;
  407. }
  408. }
  409. }
  410. compressHist(histogram_fg);
  411. compressHist(histogram_bg);
  412. for(int i=0, l=histogram_fg.length(); i<l; i++)
  413. resulthist.push(histogram_fg[i]);
  414. for(int i=0, l=histogram_bg.length(); i<l; i++)
  415. resulthist.push(histogram_bg[i]);
  416. if(run_length_count_fg){
  417. mean_fg/= run_length_count_fg;
  418. variance_fg = variance_fg/run_length_count_fg - mean_fg*mean_fg;
  419. }
  420. else{
  421. mean_fg=0; variance_fg=0;
  422. }
  423. resultstats.push(run_length_count_fg);
  424. resultstats.push(mean_fg);
  425. resultstats.push(variance_fg);
  426. if(run_length_count_bg){
  427. mean_bg/= run_length_count_bg;
  428. variance_bg = variance_bg/run_length_count_bg - mean_bg*mean_bg;
  429. }
  430. else{
  431. mean_bg=0; variance_bg=0;
  432. }
  433. resultstats.push(run_length_count_bg);
  434. resultstats.push(mean_bg);
  435. resultstats.push(variance_bg);
  436. }
  437. void ZoneFeatures::concompHist(floatarray &result,
  438. rectarray &concomps){
  439. intarray histogram_width;
  440. histogram_width.resize(MAX_LEN);
  441. fill(histogram_width,0);
  442. intarray histogram_height;
  443. histogram_height.resize(MAX_LEN);
  444. fill(histogram_height,0);
  445. intarray histogram_2D;
  446. histogram_2D.resize(MAX_LEN,MAX_LEN);
  447. fill(histogram_2D,0);
  448. int width, height;
  449. for(int i=0, l=concomps.length(); i<l; i++){
  450. if(concomps[i].width() <=0 || concomps[i].height() <=0)
  451. continue;
  452. width = concomps[i].width();
  453. height = concomps[i].height();
  454. histogram_width(min(width, MAX_LEN)-1)++;
  455. histogram_height(min(height, MAX_LEN)-1)++;
  456. histogram_2D(min(width, MAX_LEN)-1,min(height, MAX_LEN)-1)++;
  457. }
  458. compressHist(histogram_width);
  459. compressHist(histogram_height);
  460. compress2DHist(histogram_2D);
  461. for(int i=0, l=histogram_width.length(); i<l; i++)
  462. result.push(histogram_width[i]);
  463. for(int i=0, l=histogram_height.length(); i<l; i++)
  464. result.push(histogram_height[i]);
  465. for(int i=0, l=histogram_2D.dim(0); i<l; i++)
  466. for(int j=0, k=histogram_2D.dim(1); j<k; j++)
  467. result.push(histogram_2D(i,j));
  468. }
  469. static inline double distance(float x1, float y1, float x2, float y2) {
  470. return (x2-x1)*(x2-x1) + (y2-y1)*(y2-y1);
  471. }
  472. void ZoneFeatures::concompNeighbors(floatarray &result,
  473. rectarray &concomps){
  474. // bounding boxes nearest neighbor distance
  475. int num_boxes = concomps.length();
  476. intarray histogram;
  477. histogram.resize(MAX_LEN);
  478. fill(histogram,0);
  479. float xc, yc;
  480. double dist_min, dist;
  481. for(int i=0; i<num_boxes; i++) {
  482. xc = concomps[i].xcenter();
  483. yc = concomps[i].ycenter();
  484. dist_min = 100000;
  485. for(int j=0; j<num_boxes; j++) {
  486. dist = distance(xc, yc, concomps[j].xcenter(), concomps[j].ycenter());
  487. if( dist && dist<dist_min)
  488. dist_min = dist;
  489. }
  490. dist_min = sqrt(dist_min);
  491. if(dist_min<0 || dist_min>=MAX_LEN)
  492. continue;
  493. histogram( int (dist_min) )++;
  494. }
  495. compressHist(histogram);
  496. for(int i=0, l=histogram.length(); i<l; i++)
  497. result.push(histogram[i]);
  498. }
  499. void ZoneFeatures::extractFeatures(floatarray &feature, bytearray &image){
  500. if(!contains_only(image, byte(0), byte(255))){
  501. fprintf(stderr,"Binary image expected! ");
  502. fprintf(stderr,"skipping feature extraction...\n");
  503. return ;
  504. }
  505. // RUNNING LENGTHS
  506. floatarray rl_stats;
  507. horizontalRunLengths(feature,rl_stats,image);
  508. verticalRunLengths(feature,rl_stats,image);
  509. mainDiagRunLengths(feature,rl_stats,image);
  510. sideDiagRunLengths(feature,rl_stats,image);
  511. for(int index=0; index<rl_stats.length(); index++)
  512. feature.push(rl_stats[index]);
  513. // CONNECTED COMPONENTS
  514. bytearray in;
  515. copy(in, image);
  516. invert(in);
  517. // Do connected component analysis
  518. intarray charimage;
  519. copy(charimage,in);
  520. label_components(charimage,false);
  521. // Clean non-text and noisy boxes and get character statistics
  522. rectarray bboxes,boxes;
  523. bounding_boxes(bboxes,charimage);
  524. for(int i=0, l=bboxes.length(); i<l; i++)
  525. if(bboxes[i].area())
  526. boxes.push(bboxes[i]);
  527. concompHist(feature, boxes);
  528. concompNeighbors(feature, boxes);
  529. int z = 1;
  530. feature.push(z);
  531. }
  532. ZoneFeatures *make_ZoneFeatures() {
  533. return new ZoneFeatures();
  534. }
  535. void LogReg::loadData(){
  536. class_num = log_reg_class_num;
  537. factor = log_reg_factor;
  538. offset = log_reg_offset;
  539. feature_len = log_reg_feature_len;
  540. lambda.resize(class_num, feature_len);
  541. int index = 0;
  542. for(int i = 0; i < class_num; i++){
  543. for(int j = 0; j < feature_len; j++){
  544. lambda(i,j) = log_reg_data[index++];
  545. }
  546. }
  547. }
  548. zone_class LogReg::classify(floatarray &feature){
  549. float sum, image_probability;
  550. float sum_total = 0;
  551. float sum_max = 0;
  552. int image_class = -1;
  553. for(int k = 0; k < class_num; k++){
  554. sum = 0;
  555. for(int j = 0; j < feature_len; j++)
  556. sum += lambda(k,j) * feature(j);
  557. sum = exp(factor * sum + feature_len * offset);
  558. if (sum > sum_max){
  559. sum_max = sum;
  560. image_class = k;
  561. }
  562. sum_total += sum;
  563. }
  564. image_probability = sum_max / sum_total;
  565. //fprintf(stderr,"%d %f \n",image_class,image_probability);
  566. switch(image_class){
  567. case 0: return math;
  568. case 1: return logo;
  569. case 2: return text;
  570. case 3: return table;
  571. case 4: return drawing;
  572. case 5: return halftone;
  573. case 6: return ruling;
  574. case 7: return noise;
  575. default: return undefined;
  576. }
  577. }
  578. void LogReg::getClassProbabilities(floatarray &probability,
  579. floatarray &feature){
  580. float sum,sum_total = 0;
  581. probability.resize(class_num);
  582. for(int k = 0; k < class_num; k++){
  583. sum = 0;
  584. for(int j = 0; j < feature_len; j++)
  585. sum += lambda(k,j) * feature(j);
  586. sum = exp(factor * sum + feature_len * offset);
  587. probability[k] = sum;
  588. sum_total += sum;
  589. }
  590. for(int k = 0; k < class_num; k++)
  591. probability[k] /= sum_total;
  592. }
  593. LogReg *make_LogReg() {
  594. return new LogReg();
  595. }