/ocrorast/ocr-classify-zones.cc
https://code.google.com/p/ocropus/ · C++ · 695 lines · 583 code · 100 blank · 12 comment · 123 complexity · 1a8fbbd60f6e7e3e65c4ce1977b0cf36 MD5 · raw file
- #include "ocrorast.h"
- void ZoneFeatures::compressHist(intarray &histogram){
- intarray histogram_comp;
- histogram_comp.resize(MAX_LEN);
- fill(histogram_comp,0);
- // COMPRESSION
- int i = 0;
- int j = 0;
- int m = COMP_START;
- int k = COMP_LEN_START;
- while(i < MAX_LEN) {
- histogram_comp(j) = histogram(i);
- i++;
- if(i > COMP_START){
- m = min(MAX_LEN, m+k);
- while(i < m){
- histogram_comp(j)+= histogram(i);
- i++;
- }
- k = COMP_LEN_INC_A * k + COMP_LEN_INC_B;
- }
- j++;
- }
- histogram.clear();
- for(i = 0; i < j; i++)
- histogram.push(histogram_comp(i));
- }
- void ZoneFeatures::compress2DHist(intarray &histogram_2D){
- int steps[2];
- intarray histogram_2D_comp;
- histogram_2D_comp.resize(MAX_LEN,MAX_LEN);
- fill(histogram_2D_comp,0);
- int i, j, k, m, n;
- for(n = 0; n < MAX_LEN; n++) {
- i = 0;
- j = 0;
- m = COMP_START;
- k = COMP_LEN_START;
- while(i < MAX_LEN) {
- histogram_2D_comp(n,j) = histogram_2D(n,i);
- i++;
- if(i > COMP_START) {
- m = min(MAX_LEN, m+k);
- while(i < m) {
- histogram_2D_comp(n,j)+= histogram_2D(n,i);
- i++;
- }
- k = COMP_LEN_INC_A * k + COMP_LEN_INC_B;
- }
- j++;
- }
- }
- steps[0] = j;
- for(n = 0; n < steps[0]; n++) {
- i = 0;
- j = 0;
- m = COMP_START;
- k = COMP_LEN_START;
- while(i < MAX_LEN) {
- histogram_2D_comp(j,n) = histogram_2D_comp(i,n);
- i++;
- if(i > COMP_START) {
- m = min(MAX_LEN, m+k);
- while(i < m) {
- histogram_2D_comp(j,n)+= histogram_2D_comp(i,n);
- i++;
- }
- k = COMP_LEN_INC_A * k + COMP_LEN_INC_B;
- }
- j++;
- }
- }
- steps[1] = j;
- // move(histogram_2D, histogram_2D_comp);
- histogram_2D.clear();
- histogram_2D.resize(steps[0], steps[1]);
- for(i = 0; i < steps[0]; i++){
- for(j = 0; j < steps[1]; j++){
- histogram_2D(i,j) = histogram_2D_comp(i,j);
- }
- }
- }
- void ZoneFeatures::horizontalRunLengths(floatarray &resulthist,
- floatarray &resultstats,
- const bytearray &image){
- int imwidth = image.dim(0);
- int imheight = image.dim(1);
- intarray histogram_fg;
- histogram_fg.resize(MAX_LEN);
- fill(histogram_fg,0);
- intarray histogram_bg;
- histogram_bg.resize(MAX_LEN);
- fill(histogram_bg,0);
- int current_run_length_bg = 0;
- int current_run_length_fg = 0;
- int run_length_count_fg = 0;
- int run_length_count_bg = 0;
- bool end_fg = false;
- bool end_bg = false;
- // indicator that the end of the running segment occurs
- float mean_fg = 0, variance_fg = 0;
- float mean_bg = 0, variance_bg = 0;
- for(int j = 0; j < imheight; j++){
- for(int i = 0; i < imwidth; i++){
- if(image(i,j) == 0){
- current_run_length_fg++;
- end_fg = false;
- }
- else
- end_fg = true;
- if((current_run_length_fg > 0) && (end_fg || (i == imwidth-1))){
- current_run_length_fg = min(current_run_length_fg, MAX_LEN);
- histogram_fg(current_run_length_fg-1)++;
- run_length_count_fg++;
- mean_fg += current_run_length_fg;
- variance_fg += current_run_length_fg * current_run_length_fg;
- current_run_length_fg = 0;
- end_fg = false;
- }
- if(image(i,j) == 255){
- current_run_length_bg++;
- end_bg = false;
- }
- else
- end_bg = true;
- if((current_run_length_bg > 0) && (end_bg || (i == imwidth-1))){
- current_run_length_bg = min(current_run_length_bg, MAX_LEN);
- histogram_bg(current_run_length_bg-1)++;
- run_length_count_bg++;
- mean_bg += current_run_length_bg;
- variance_bg += current_run_length_bg * current_run_length_bg;
- current_run_length_bg = 0;
- end_bg = false;
- }
- }
- }
- compressHist(histogram_fg);
- compressHist(histogram_bg);
- for(int i=0, l=histogram_fg.length(); i<l; i++)
- resulthist.push(histogram_fg[i]);
- for(int i=0, l=histogram_bg.length(); i<l; i++)
- resulthist.push(histogram_bg[i]);
- if(run_length_count_fg){
- mean_fg/= run_length_count_fg;
- variance_fg = variance_fg/run_length_count_fg - mean_fg*mean_fg;
- }
- else{
- mean_fg=0; variance_fg=0;
- }
- resultstats.push(run_length_count_fg);
- resultstats.push(mean_fg);
- resultstats.push(variance_fg);
- if(run_length_count_bg){
- mean_bg/= run_length_count_bg;
- variance_bg = variance_bg/run_length_count_bg - mean_bg*mean_bg;
- }
- else{
- mean_bg=0; variance_bg=0;
- }
- resultstats.push(run_length_count_bg);
- resultstats.push(mean_bg);
- resultstats.push(variance_bg);
- }
- void ZoneFeatures::verticalRunLengths(floatarray &resulthist,
- floatarray &resultstats,
- const bytearray &image){
- int imwidth = image.dim(0);
- int imheight = image.dim(1);
- intarray histogram_fg;
- histogram_fg.resize(MAX_LEN);
- fill(histogram_fg,0);
- intarray histogram_bg;
- histogram_bg.resize(MAX_LEN);
- fill(histogram_bg,0);
- int current_run_length_bg = 0;
- int current_run_length_fg = 0;
- int run_length_count_fg = 0;
- int run_length_count_bg = 0;
- bool end_fg = false;
- bool end_bg = false;
- // indicator that the end of the running segment occurs
- float mean_fg = 0, variance_fg = 0;
- float mean_bg = 0, variance_bg = 0;
- for(int i = 0; i < imwidth; i++){
- for(int j = 0; j < imheight; j++){
- if(image(i,j) == 0){
- current_run_length_fg++;
- end_fg = false;
- }
- else
- end_fg = true;
- if((current_run_length_fg > 0) && (end_fg || (i == imheight-1))){
- current_run_length_fg = min(current_run_length_fg, MAX_LEN);
- histogram_fg(current_run_length_fg-1)++;
- run_length_count_fg++;
- mean_fg += current_run_length_fg;
- variance_fg += current_run_length_fg * current_run_length_fg;
- current_run_length_fg = 0;
- end_fg = false;
- }
- if(image(i,j) == 255){
- current_run_length_bg++;
- end_bg = false;
- }
- else
- end_bg = true;
- if((current_run_length_bg > 0) && (end_bg || (i == imheight-1))){
- current_run_length_bg = min(current_run_length_bg, MAX_LEN);
- histogram_bg(current_run_length_bg-1)++;
- run_length_count_bg++;
- mean_bg += current_run_length_bg;
- variance_bg += current_run_length_bg * current_run_length_bg;
- current_run_length_bg = 0;
- end_bg = false;
- }
- }
- }
- compressHist(histogram_fg);
- compressHist(histogram_bg);
- for(int i=0, l=histogram_fg.length(); i<l; i++)
- resulthist.push(histogram_fg[i]);
- for(int i=0, l=histogram_bg.length(); i<l; i++)
- resulthist.push(histogram_bg[i]);
- if(run_length_count_fg){
- mean_fg/= run_length_count_fg;
- variance_fg = variance_fg/run_length_count_fg - mean_fg*mean_fg;
- }
- else{
- mean_fg=0; variance_fg=0;
- }
- resultstats.push(run_length_count_fg);
- resultstats.push(mean_fg);
- resultstats.push(variance_fg);
- if(run_length_count_bg){
- mean_bg/= run_length_count_bg;
- variance_bg = variance_bg/run_length_count_bg - mean_bg*mean_bg;
- }
- else{
- mean_bg=0; variance_bg=0;
- }
- resultstats.push(run_length_count_bg);
- resultstats.push(mean_bg);
- resultstats.push(variance_bg);
- }
- void ZoneFeatures::mainDiagRunLengths(floatarray &resulthist,
- floatarray &resultstats,
- const bytearray &image){
- int imwidth = image.dim(0);
- int imheight = image.dim(1);
- intarray histogram_fg;
- histogram_fg.resize(MAX_LEN);
- fill(histogram_fg,0);
- intarray histogram_bg;
- histogram_bg.resize(MAX_LEN);
- fill(histogram_bg,0);
- int current_run_length_bg = 0;
- int current_run_length_fg = 0;
- int run_length_count_fg = 0;
- int run_length_count_bg = 0;
- bool end_fg = false;
- bool end_bg = false;
- // indicator that the end of the running segment occurs
- float mean_fg = 0, variance_fg = 0;
- float mean_bg = 0, variance_bg = 0;
- int pix = 0;
- for(int i = 0; i < imwidth + imheight; i++){
- for(int j = 0; j < min(imwidth, imheight); j++){
- if(i < imwidth){
- if(j < i+1)
- pix = image(i-j, j);
- else
- j = imwidth*imheight;
- }
- else{
- if(j < imwidth-1+imheight-i)
- pix = image(imwidth - j - 1, i - (imwidth-1) + j);
- else
- j = imwidth*imheight;
- }
- if((pix == 0) && (j != imwidth*imheight)){
- current_run_length_fg++;
- end_fg = false;
- }
- else
- end_fg = true;
- if( (current_run_length_fg > 0) &&
- (end_fg || (j == imwidth*imheight))){
- current_run_length_fg = min(current_run_length_fg, MAX_LEN);
- histogram_fg(current_run_length_fg-1)++;
- run_length_count_fg++;
- mean_fg += current_run_length_fg;
- variance_fg += current_run_length_fg * current_run_length_fg;
- current_run_length_fg = 0;
- end_fg = false;
- }
- if((pix == 255) && (j != imwidth*imheight)){
- current_run_length_bg++;
- end_bg = false;
- }
- else
- end_bg = true;
- if( (current_run_length_bg > 0) &&
- (end_bg || (j == imwidth*imheight))){
- current_run_length_bg = min(current_run_length_bg, MAX_LEN);
- histogram_bg(current_run_length_bg-1)++;
- run_length_count_bg++;
- mean_bg += current_run_length_bg;
- variance_bg += current_run_length_bg * current_run_length_bg;
- current_run_length_bg = 0;
- end_bg = false;
- }
- }
- }
- compressHist(histogram_fg);
- compressHist(histogram_bg);
- for(int i=0, l=histogram_fg.length(); i<l; i++)
- resulthist.push(histogram_fg[i]);
- for(int i=0, l=histogram_bg.length(); i<l; i++)
- resulthist.push(histogram_bg[i]);
- if(run_length_count_fg){
- mean_fg/= run_length_count_fg;
- variance_fg = variance_fg/run_length_count_fg - mean_fg*mean_fg;
- }
- else{
- mean_fg=0; variance_fg=0;
- }
- resultstats.push(run_length_count_fg);
- resultstats.push(mean_fg);
- resultstats.push(variance_fg);
- if(run_length_count_bg){
- mean_bg/= run_length_count_bg;
- variance_bg = variance_bg/run_length_count_bg - mean_bg*mean_bg;
- }
- else{
- mean_bg=0; variance_bg=0;
- }
- resultstats.push(run_length_count_bg);
- resultstats.push(mean_bg);
- resultstats.push(variance_bg);
- }
- void ZoneFeatures::sideDiagRunLengths(floatarray &resulthist,
- floatarray &resultstats,
- const bytearray &image){
- int imwidth = image.dim(0);
- int imheight = image.dim(1);
- intarray histogram_fg;
- histogram_fg.resize(MAX_LEN);
- fill(histogram_fg,0);
- intarray histogram_bg;
- histogram_bg.resize(MAX_LEN);
- fill(histogram_bg,0);
- int current_run_length_bg = 0;
- int current_run_length_fg = 0;
- int run_length_count_fg = 0;
- int run_length_count_bg = 0;
- bool end_fg = false;
- bool end_bg = false;
- // indicator that the end of the running segment occurs
- float mean_fg = 0, variance_fg = 0;
- float mean_bg = 0, variance_bg = 0;
- int pix = 0;
- for(int i = 0; i < imwidth + imheight; i++){
- for(int j = 0; j < min(imwidth, imheight); j++){
- if(i < imheight){
- if(j < i+1)
- pix = image(j, (imheight-1) - i + j);
- else
- j = imwidth*imheight;
- }
- else{
- if(j < imwidth-1+imheight-i)
- pix = image(i - (imheight-1) + j, j);
- else
- j = imwidth*imheight;
- }
- if((pix == 0) && (j != imwidth*imheight)){
- current_run_length_fg++;
- end_fg = false;
- }
- else
- end_fg = true;
- if( (current_run_length_fg > 0) &&
- (end_fg || (j == imwidth*imheight))){
- current_run_length_fg = min(current_run_length_fg, MAX_LEN);
- histogram_fg(current_run_length_fg-1)++;
- run_length_count_fg++;
- mean_fg += current_run_length_fg;
- variance_fg += current_run_length_fg * current_run_length_fg;
- current_run_length_fg = 0;
- end_fg = false;
- }
- if((pix == 255) && (j != imwidth*imheight)){
- current_run_length_bg++;
- end_bg = false;
- }
- else
- end_bg = true;
- if( (current_run_length_bg > 0) &&
- (end_bg || (j == imwidth*imheight))){
- current_run_length_bg = min(current_run_length_bg, MAX_LEN);
- histogram_bg(current_run_length_bg-1)++;
- run_length_count_bg++;
- mean_bg += current_run_length_bg;
- variance_bg += current_run_length_bg * current_run_length_bg;
- current_run_length_bg = 0;
- end_bg = false;
- }
- }
- }
- compressHist(histogram_fg);
- compressHist(histogram_bg);
- for(int i=0, l=histogram_fg.length(); i<l; i++)
- resulthist.push(histogram_fg[i]);
- for(int i=0, l=histogram_bg.length(); i<l; i++)
- resulthist.push(histogram_bg[i]);
- if(run_length_count_fg){
- mean_fg/= run_length_count_fg;
- variance_fg = variance_fg/run_length_count_fg - mean_fg*mean_fg;
- }
- else{
- mean_fg=0; variance_fg=0;
- }
- resultstats.push(run_length_count_fg);
- resultstats.push(mean_fg);
- resultstats.push(variance_fg);
- if(run_length_count_bg){
- mean_bg/= run_length_count_bg;
- variance_bg = variance_bg/run_length_count_bg - mean_bg*mean_bg;
- }
- else{
- mean_bg=0; variance_bg=0;
- }
- resultstats.push(run_length_count_bg);
- resultstats.push(mean_bg);
- resultstats.push(variance_bg);
- }
- void ZoneFeatures::concompHist(floatarray &result,
- rectarray &concomps){
- intarray histogram_width;
- histogram_width.resize(MAX_LEN);
- fill(histogram_width,0);
- intarray histogram_height;
- histogram_height.resize(MAX_LEN);
- fill(histogram_height,0);
- intarray histogram_2D;
- histogram_2D.resize(MAX_LEN,MAX_LEN);
- fill(histogram_2D,0);
- int width, height;
- for(int i=0, l=concomps.length(); i<l; i++){
- if(concomps[i].width() <=0 || concomps[i].height() <=0)
- continue;
- width = concomps[i].width();
- height = concomps[i].height();
- histogram_width(min(width, MAX_LEN)-1)++;
- histogram_height(min(height, MAX_LEN)-1)++;
- histogram_2D(min(width, MAX_LEN)-1,min(height, MAX_LEN)-1)++;
- }
- compressHist(histogram_width);
- compressHist(histogram_height);
- compress2DHist(histogram_2D);
- for(int i=0, l=histogram_width.length(); i<l; i++)
- result.push(histogram_width[i]);
- for(int i=0, l=histogram_height.length(); i<l; i++)
- result.push(histogram_height[i]);
- for(int i=0, l=histogram_2D.dim(0); i<l; i++)
- for(int j=0, k=histogram_2D.dim(1); j<k; j++)
- result.push(histogram_2D(i,j));
- }
- static inline double distance(float x1, float y1, float x2, float y2) {
- return (x2-x1)*(x2-x1) + (y2-y1)*(y2-y1);
- }
- void ZoneFeatures::concompNeighbors(floatarray &result,
- rectarray &concomps){
- // bounding boxes nearest neighbor distance
- int num_boxes = concomps.length();
- intarray histogram;
- histogram.resize(MAX_LEN);
- fill(histogram,0);
- float xc, yc;
- double dist_min, dist;
- for(int i=0; i<num_boxes; i++) {
- xc = concomps[i].xcenter();
- yc = concomps[i].ycenter();
- dist_min = 100000;
- for(int j=0; j<num_boxes; j++) {
- dist = distance(xc, yc, concomps[j].xcenter(), concomps[j].ycenter());
- if( dist && dist<dist_min)
- dist_min = dist;
- }
- dist_min = sqrt(dist_min);
- if(dist_min<0 || dist_min>=MAX_LEN)
- continue;
- histogram( int (dist_min) )++;
- }
- compressHist(histogram);
- for(int i=0, l=histogram.length(); i<l; i++)
- result.push(histogram[i]);
- }
- void ZoneFeatures::extractFeatures(floatarray &feature, bytearray &image){
- if(!contains_only(image, byte(0), byte(255))){
- fprintf(stderr,"Binary image expected! ");
- fprintf(stderr,"skipping feature extraction...\n");
- return ;
- }
- // RUNNING LENGTHS
- floatarray rl_stats;
- horizontalRunLengths(feature,rl_stats,image);
- verticalRunLengths(feature,rl_stats,image);
- mainDiagRunLengths(feature,rl_stats,image);
- sideDiagRunLengths(feature,rl_stats,image);
- for(int index=0; index<rl_stats.length(); index++)
- feature.push(rl_stats[index]);
- // CONNECTED COMPONENTS
- bytearray in;
- copy(in, image);
- invert(in);
- // Do connected component analysis
- intarray charimage;
- copy(charimage,in);
- label_components(charimage,false);
- // Clean non-text and noisy boxes and get character statistics
- rectarray bboxes,boxes;
- bounding_boxes(bboxes,charimage);
- for(int i=0, l=bboxes.length(); i<l; i++)
- if(bboxes[i].area())
- boxes.push(bboxes[i]);
- concompHist(feature, boxes);
- concompNeighbors(feature, boxes);
- int z = 1;
- feature.push(z);
- }
- ZoneFeatures *make_ZoneFeatures() {
- return new ZoneFeatures();
- }
- void LogReg::loadData(){
- class_num = log_reg_class_num;
- factor = log_reg_factor;
- offset = log_reg_offset;
- feature_len = log_reg_feature_len;
- lambda.resize(class_num, feature_len);
- int index = 0;
- for(int i = 0; i < class_num; i++){
- for(int j = 0; j < feature_len; j++){
- lambda(i,j) = log_reg_data[index++];
- }
- }
- }
- zone_class LogReg::classify(floatarray &feature){
- float sum, image_probability;
- float sum_total = 0;
- float sum_max = 0;
- int image_class = -1;
- for(int k = 0; k < class_num; k++){
- sum = 0;
- for(int j = 0; j < feature_len; j++)
- sum += lambda(k,j) * feature(j);
- sum = exp(factor * sum + feature_len * offset);
- if (sum > sum_max){
- sum_max = sum;
- image_class = k;
- }
- sum_total += sum;
- }
- image_probability = sum_max / sum_total;
- //fprintf(stderr,"%d %f \n",image_class,image_probability);
- switch(image_class){
- case 0: return math;
- case 1: return logo;
- case 2: return text;
- case 3: return table;
- case 4: return drawing;
- case 5: return halftone;
- case 6: return ruling;
- case 7: return noise;
- default: return undefined;
- }
- }
- void LogReg::getClassProbabilities(floatarray &probability,
- floatarray &feature){
- float sum,sum_total = 0;
- probability.resize(class_num);
- for(int k = 0; k < class_num; k++){
- sum = 0;
- for(int j = 0; j < feature_len; j++)
- sum += lambda(k,j) * feature(j);
- sum = exp(factor * sum + feature_len * offset);
- probability[k] = sum;
- sum_total += sum;
- }
- for(int k = 0; k < class_num; k++)
- probability[k] /= sum_total;
- }
- LogReg *make_LogReg() {
- return new LogReg();
- }