/Spikes/GocrDemo/ocr0.c
C | 6756 lines | 5455 code | 438 blank | 863 comment | 4512 complexity | ffa277b2d87e6c78c5649170766ee916 MD5 | raw file
Possible License(s): Apache-2.0
Large files files are truncated, but you can click here to view the full file
- /*
- rule based OCR engine, partly rewritten for edges (old=pixel)
- */
- /*
- This is a Optical-Character-Recognition program
- Copyright (C) 2000-2009 Joerg Schulenburg
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- see README for email address
- >>> DO NOT EDIT THIS FILE IF YOU NOT REALLY KNOW WHAT YOU ARE DOING! <<<
-
- I have invested lot of time, to write this part of the program.
- This engine should recognize chars allways right or return UNKNOWN.
- If you change something, test all other example files too,
- to be sure that all things work better. (JoergS)
-
- This engine was pixelbased until 0.40 which was not successfull enough.
- Also code changes always hade side effects. The vectorisation of the code
- starts from version 0.41 with the chars XNz and seems to be much better
- to handle. Vectorization means we frame each character by a chain of
- vectors and dont care about pixels anymore. Unfortunatly I have to
- replace all the pixel codes, which is a long process. Old code will be lost.
- (JorgS)
- ToDo:
- - if box1->p and b differ, reduce probability
- - probability makes life much easier here
- - use only one box!?, may be bits have usefull infos
- - divide this file, suggestion: classify chars:
- high=ABCDEFGHIJKLMNOPQRSTUVWXYZbdfhklt, low=acegijmnopqrsuvwxyz
- or
- often_used=etianmsurwdkgo rarely_used=hvjcflpqxyz.,:
- or
- every char (large overhead)
- - two-pass version (first pass without tolerance)
- 2nd pass with tolerance (ex: one tiny more in sdata->holes)
- general feature extraction:
- - white holes at middle, upper, lower position (cost much time)
- - test lines and triangles insteat of rectangles
-
- char is removed, wchar_t is used (better code)
-
- making a static global variable-set x.x0,x.x1, and call test_a,
- test_b ... (faster compilation, but not reentrant!)
-
- - adding slant-angle (if detected) to distinguish between l and / ?
- - ac (alternate chars) as string add_ac(box1,"/") => box1->ac="Il/";
- for better context correction or output: "Ha[lI][lI]o!"
- */
- #include <stdlib.h>
- #include <stdio.h>
- // #include "pgm2asc.h"
- #include "ocr0.h"
- // #include "ocr1.h"
- #include "amiga.h"
- #include "pnm.h"
- #include "gocr.h"
- #define IFV if(JOB->cfg.verbose&4)
- #define MM {IFV fprintf(stderr,"\nDBG %c L%04d (%d,%d): ",(char)c_ask,__LINE__,box1->x0,box1->y0);}
- // the old debug mode (0.40) was only for a special char, for another char
- // code must be recompiled with C_ASK='char'
- // new debug mode (0.41) explains why char is declined or accepted as ABC...
- // the output can be filtered by external scripts
- // ToDo: we could reduce output to filter string
- #ifndef DO_DEBUG /* can be defined outside (configure --with-debug) */
- #define DO_DEBUG 0 /* 0 is the default */
- #endif
- /* this macro is for debugging output: "if char is declined, why?" */
- #if DO_DEBUG /* 0=Work mode, 1=debugging mode */
- // Setac: output, that char is choosen with a probability
- // Break: output, why the char is not choosen
- // MSG: debugging functions for char C_ASK, mostly messages
- // DBG: definitions usefull only for debugging
- #define Setac(box1,ac,ad) { MM;IFV fprintf(stderr,"setac %d",ad);setac(box1,ac,ad); }
- #define Break { MM;IFV fprintf(stderr,"break"); break; }
- #define MSG(x) { MM;IFV x }
- #define DBG(x) x
- #else
- #define Setac(box1,ac,ad) setac(box1,ac,ad)
- #define Break break
- #define MSG(x)
- #define DBG(x)
- #endif
- /* extern "C"{ */
- // static inline int sq(int x) { return x*x; } /* square */
- /*
- * go from vector j1 to vector j2 and measure maximum deviation of
- * the steps from the line connecting j1 and j2
- * return the squared maximum distance
- * in units of the box size times 1024
- * ToDo: 1) better give back max-dx and max-dy ???
- * errors if j1 and j2 are in different frames or belong to
- * more then one frame?
- * 2) Better get deviation from a complete vector graphic?
- * The vectorgraphic is the ideal test char adapted to the
- * extrem vertices of the real char.
- */
- int line_deviation( struct box *box1, int j1, int j2 ) {
- int r1x, r1y, r2x, r2y, r3x, r3y, i, x, y, d, dist, maxdist=0, frame, l2;
- r1x=box1->frame_vector[j1][0];
- r1y=box1->frame_vector[j1][1];
- r2x=box1->frame_vector[j2][0];
- r2y=box1->frame_vector[j2][1];
- if (!box1->num_frames) return(-1);
- if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] ||
- j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) {
- fprintf(stderr,"Error in "__FILE__" L%d: idx out of range",__LINE__);
- return(-1);
- }
- /* get the frame the endvector belongs to */
- for (i=0;i<box1->num_frames;i++)
- if (j2<box1->num_frame_vectors[i]) break;
- frame=i;
- /* frame(j1)<=frame(j2) possible */
- for (i=j1;;i++) { // do it for each vector between j1 and j2
- if (i >= box1->num_frame_vectors[frame])
- i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
- if (i==j2) break;
- // for (i=j1;i!=j2;i=(i+1)%box1->num_frame_vectors[0]) {~}
- r3x=box1->frame_vector[i][0];
- r3y=box1->frame_vector[i][1];
- // Language=german
- // german: Abstand Punkt von Strecke, Laenge Lotrechte
- // germ.Strecke : l1=(r1+r2)/2+d*(r2-r1)/2 for d=-1..1
- // germ.Lotrechte: l2=r3+b*[-(r2-r1).y,(r2-r1).x]
- // Schnittpunkt : l1=l2,
- // eq1x: (r1x+r2x)/2-r3x+d*(r2x-r1x)/2+b*(r2y-r1y)=0
- // eq1y: (r1y+r2y)/2-r3y+d*(r2y-r1y)/2-b*(r2x-r1x)=0
- // eq2x: b*(r2x-r1x)*(r2y-r1y)=-((r1x+r2x)/2-r3x+d*(r2x-r1x)/2)*(r2x-r1x)
- // eq2y: b*(r2x-r1x)*(r2y-r1y)= ((r1y+r2y)/2-r3y+d*(r2y-r1y)/2)*(r2y-r1y)
- // eq2y-eq2x: ... in units of 1024 (fast integer rounded correctly)
- l2=sq(r2x-r1x)+sq(r2y-r1y); // square of distance r2-r1
- if (l2==0) {
- // fprintf(stderr,"ocr0 L%d: r1==r2 r1= %d %d",__LINE__, r1x, r1y); // debugging
- d=-1024;
- } else
- d=-( ((r1x+r2x)-2*r3x)*(r2x-r1x)
- +((r1y+r2y)-2*r3y)*(r2y-r1y))*1024/l2; // ..-1024..+1024..
- if (d<=-1024) { x=r1x; y=r1y; } // starting point
- else {
- if (d>=1024) { x=r2x; y=r2y; } // end point
- else {
- x=((r1x+r2x)+1)/2+(d*(r2x-r1x))/2048;
- y=((r1y+r2y)+1)/2+(d*(r2y-r1y))/2048;
- /* we have the crossing point x,y now */
- }
- }
- dist=sq((x-r3x)*1024/(box1->x1-box1->x0+1))
- +sq((y-r3y)*1024/(box1->y1-box1->y0+1)); // 0..2*sq(1024)
- if (dist>maxdist) maxdist=dist;
- // for debugging:
- // fprintf(stderr,"\nDBG dev: %d-%d-%d dist=%5d max=%5d d=%d %d,%d-%d,%d"
- // " vector= %d %d crosspoint= %d %d ",
- // j1,i,j2,dist,maxdist,d,r1x,r1y,r2x,r2y,r3x,r3y,x,y);
- }
- return maxdist;
- }
- /*
- * search vectors between j1 and j2 for nearest point a to point r
- * example:
- *
- * r-> $$...$$ $ - mark vectors
- * @@$..@@ @ - black pixels
- * @@$..@@ . - white pixels
- * @@@@.$@
- * a-> @@$@$@@
- * @$.@@@@
- * @@..$@@
- * @@..$@@
- * j1 --> $$...$$ <-- j2
- *
- * ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry?
- * j1 and j2 must be in the same frame
- * return aa?
- * 2009-07:
- * - change from normalized (dx=128,dy=128) to absolute distance
- * - simpler and no squeeze effect (problem getting right i2 for "3")
- */
- int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry) {
- int x,y,d,i,aa[4]; /* x,y,normalized_distance^2,vector_index */
- int frame=0;
- // int x0=box1->x0, y0=box1->y0,
- // x1=box1->x1, y1=box1->y1;
- // int dx=box1->x1-x0+1, dy=box1->y1-y0+1;
- if (!box1->num_frames) return(-1);
- if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] ||
- j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) {
- fprintf(stderr,"Error in "__FILE__" L%d: idx %d-%d out of range\n",__LINE__,j1,j2);
- out_x(box1);
- return(-1);
- }
- aa[0]=x=box1->frame_vector[j2][0]; /* x */
- aa[1]=y=box1->frame_vector[j2][1]; /* y */
- /* maximum is (distance*128)^2 if r is inside the box */
- // aa[2]=d=2*sq(128)+sq((rx-(x0+x1)/2)*128/dx)+sq((ry-(y0+y1)/2)*128/dy);
- aa[2]=d=2*(sq(x-rx)+sq(y-ry)); /* must be greater than min. dist, Jul09 */
- aa[3]=j2; /* vector index */
- /* get the frame the endvector belongs to */
- for (i=0;i<box1->num_frames;i++)
- if (j2<box1->num_frame_vectors[i]) break;
- frame=i;
- /* frame(j1)<=frame(j2) possible */
- for (i=j1;;i++) {
- if (i >= box1->num_frame_vectors[frame])
- i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
- x=box1->frame_vector[i][0]; /* take a vector */
- y=box1->frame_vector[i][1];
- /* distance to upper left end, normalized to 128 */
- // d=sq((x-rx)*128/dx)+sq((y-ry)*128/dy); // old 2009-07
- d=sq(x-rx)+sq(y-ry);
- if (d<aa[2]) { aa[0]=x; aa[1]=y; aa[2]=d; aa[3]=i; }
- if (i==j2) break;
- }
- return aa[3];
- }
- // test for umlauts, if ret>0 and m==1 box1 is changed
- // m>0 modify box1->dots
- // m==2 modify box1->y0
- // called by pgm2asc + ocr0(?)
- int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier){
- // pix p=*(box1->p);
- int r,y,x,x0,x1,y0,y1,dx,dy,m1,m2,m3,
- xl,xr,yu,yl; // left, right, upper and lower border of dots
- wchar_t mod='\0'; /* (TeX-) modifier ~"'` for compose() */
- DBG( wchar_t c_ask='"'; )
- r=0;
- x0=box1->x0; x1=box1->x1; dx=x1-x0+1;
- y0=box1->y0; y1=box1->y1; dy=y1-y0+1;
- m1=box1->m1; m2=box1->m2; m3=box1->m3;
- xl=x0; xr=x1; yu=yl=y0;
- if( dy < 5 || 4*y0 > 3*m2+m3 ) return 0; // no low chars: .,-=
- /* modifier in box included? */
- if( 2*y1 > m1+m2 ){
- /* modifier in box included? */
- for(y=y0;2*y<y0+y1;y++)if( get_bw(xl,xr,y,y,box1->p,cs,1)==0 ) break;
- if( 2*y<y0+y1 ){ /* yes => extract */
- yl=y;
- while( get_bw(xl,xr,y,y,box1->p,cs,1)==0 && 2*y<=y0+y1) y++;
- if( m&2 ) box1->y0=y; /* set new upper bond */
- }
- }
- if( yu>=yl ) { if(m) box1->dots=0; return 0; } /* nothing found */
- if( get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==1 ) // neighbour overlap?
- while( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==1 && 2*xl<x0+x1) xl++;
- for(;xl<x1;xl++)if( get_bw(xl,xl,yu,yl,box1->p,cs,1)==1 ) break;
- for(;xr>xl;xr--)if( get_bw(xr,xr,yu,yl,box1->p,cs,1)==1 ) break;
- if ( yl-1>yu ) { // tall box ij"a"o"u
- #if 0
- x=box1->y0; box1->y0=m1; out_x(box1); box1->y0=x;
- fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
- fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
- #define DEBUG 1
- #endif
- {
- x=xl;y=yu;
- if( get_bw(xl,x1+1,yu,yl-1,box1->p,cs,1)==0 ) r=0; // neighbour overlap?
- else
- if( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==0
- || get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==0 ) // be sure there are gap to neighbours
- if( get_bw(xr ,xr ,yu,yl-1,box1->p,cs,1)==0
- || get_bw(xr+1,xr+1,yu,yl-1,box1->p,cs,1)==0 )
- { int i,j,x;
- r=1;
- // ...@@@.... RING_ABOVE // ..@@@..@@. TILDE
- // ..@...@... // @@.@@@@@..
- // ..@...@... // @.........
- // ..@..@@...
- // ...@@@....
- for (i=yu;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==1) break;
- for ( ;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==0) break;
- for (j=xl;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==1) break;
- for ( ;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==0) break;
- for ( x=j;x<xr;x++) if (get_bw(x,x,yu,i,box1->p,cs,1)==1) break;
- // vert. gap detected
- if( j<xr && x<xr && j<x && xr-xl>2
- && num_obj(xl,xr,yu,yl-1,box1->p,cs)>=2 // not best!!!
- && num_cross(xl,xr,yu +(yl-yu)/4,yu+ (yl-yu)/4,box1->p,cs) == 2
- && num_cross(xl,xr,yl-1-(yl-yu)/2,yl-1-(yl-yu)/2,box1->p,cs) == 2
- ){ // may be the following lines are not quite ok
- while( get_bw(xl,xr,yl,yl,box1->p,cs,1)==0 && 2*yl<y0+y1) yl++;
- r=2;
- // out_x(box1);printf(" x,y=%d,%d i=%d xl=%d xr=%d yu=%d yl=%d",x0,y0,i-x0,xl-x0,xr-x0,yu-y0,yl-y0);
- mod = DIAERESIS;
- }
- if( m&2 ) box1->y0=yl;
- /* if( m&2 ) box1->y0= ( (r==1) ? yu : yl ); */
- // out_x(box1);
- }
- if(r==0){ // divided fr != fi
- while( get_bw(x0,x1,yu,yu,box1->p,cs,1)==0 && 2*yu<y0+y1) yu++;
- if(m)box1->y0=yu;
- }
- if( r==1 ){ yl--;
- // .@@@. ..@@.
- // .@@.. .@@..
- // .@... .@@..
- //
- // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
- // > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
- // && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
- // < loop(box1->p,xr,yl,xr-xl,cs,0,LE)) // -dx/8 ) // é Nov03
- if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
- - loop(box1->p,xr,yu,xr-xl,cs,0,LE)
- > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
- - loop(box1->p,xr,yl,xr-xl,cs,0,LE)+1) // -dx/8 ) // é Nov03
- mod = ACUTE_ACCENT; // '
- if( xr-xl+1 > 3*(yl-yu+1)
- && get_bw(xl,xr,yu,yl,box1->p,cs,2)==0 )
- mod = MACRON; // "-" above
- // .@@@. .@@..
- // ..@@. ..@@.
- // ...@. ..@@.
- //
- // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
- // < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
- // && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
- // > loop(box1->p,xr,yl,xr-xl,cs,0,LE) ) // +dx/8 ) à Nov03
- if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
- - loop(box1->p,xr,yu,xr-xl,cs,0,LE)
- < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
- - loop(box1->p,xr,yl,xr-xl,cs,0,LE) -1 ) // +dx/8 ) à Nov03
- mod = GRAVE_ACCENT; // `
- #ifdef DEBUG
- fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
- fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
- #endif
- if( (xr-xl+1) < 2*(yl-yu+1)+2
- && 2*(xr-xl+1)+2 > (yl-yu+1) ) {
- int i,i1,i2,i3,i4;
- i1=loop(box1->p,xl ,(yu+yl)/2,xr-xl+1,cs,0,RI);
- i1=loop(box1->p,xl+i1,(yu+yl)/2,xr-xl+1,cs,1,RI);
- i2=loop(box1->p,(xl+xr)/2,yu ,yl-yu+1,cs,0,DO);
- i2=loop(box1->p,(xl+xr)/2,yu+i2,yl-yu+1,cs,1,DO);
- for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
- if (getpixel(box1->p,xl+i,yu+i)< cs) break; i3=i;
- for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
- if (getpixel(box1->p,xl+i,yu+i)>=cs) break; i3=i-i3;
- for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
- if (getpixel(box1->p,xr-i,yu+i)< cs) break; i4=i;
- for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
- if (getpixel(box1->p,xr-i,yu+i)>=cs) break; i4=i-i4;
- #ifdef DEBUG
- fprintf(stderr,"\n#DEBUG DOT_ABOVE %d %d %d %d",i1,i2,i3,i4);
- #endif
- if ( (xr-xl<5 && yl-yu<8) /* to small */
- || (i1>=(xr-xl+1)/2+2 && i2>=(yl-yu+1)/2+2 /* symmetrical */
- && abs(i3-i4)<=i1/4+2 && abs(i1-i2)<=i1/4+2
- && abs(i3-i1)<=i1/4+4 && abs(i4-i2)<=i1/4+4)
- )
- mod = DOT_ABOVE; // "." above, ToDo: improve it!
- }
- if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
- > loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/8
- || loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
- > loop(box1->p,xl,yl-1,xr-xl,cs,0,RI)-dx/8 )
- && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
- > loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/8
- || loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
- > loop(box1->p,xr,yl-1,xr-xl,cs,0,LE)-dx/8 )
- && num_cross(xl,xr,yu ,yu ,box1->p,cs) == 1
- && ( num_cross(xl,xr,yl ,yl ,box1->p,cs) == 2
- || num_cross(xl,xr,yl-1,yl-1,box1->p,cs) == 2 ))
- mod = CIRCUMFLEX_ACCENT; // "^"
-
- if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
- < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
- || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
- < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
- && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
- < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
- || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
- < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
- && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
- || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
- && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
- mod = CARON; // "v" above
-
- if( /* test for bow (new0.3.6) */
- loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
- + loop(box1->p,xl,yl ,xr-xl,cs,0,RI)
- - 2*loop(box1->p,xl,(yl+yu)/2,xr-xl,cs,0,RI) > dx/16+1
- && xr-xl>10)
- if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
- < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
- || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
- < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
- && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
- < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
- || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
- < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
- && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
- || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
- && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
- mod = BREVE; // round "u" above
-
- if( xr-xl>3 && yl-yu>1 )
- if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
- > loop(box1->p,xl,yl,xr-xl,cs,0,RI)
- && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
- < loop(box1->p,xr,yl,xr-xl,cs,0,LE)
- && num_cross(xl,xr,yu,yu,box1->p,cs) == 2
- && num_cross(xl,xr,yl,yl,box1->p,cs) == 2 )
- mod = TILDE;
- if( xr-xl>2 && yl-yu>2)
- if( num_cross(xl,xr,(yu+yl)/2,(yu+yl)/2,box1->p,cs) >1 )
- if( num_cross((xl+xr)/2,(xl+xr)/2,yu,yl,box1->p,cs) >1 )
- if( num_hole(xl,xr,yu,yl,box1->p,cs,NULL) == 1 )
- mod = RING_ABOVE;
- #ifdef DEBUG
- printf("\n#DEBUG umlaut mod=0x%04x x=%d..%d y=%d..%d r=%d %s",
- (int)mod,yu-box1->y0,yl-box1->y0,
- xl-box1->x0,xr-box1->x0,r,((mod==CARON)?"CARON":
- ((mod==ACUTE_ACCENT)?"ACUTE":
- ((mod==TILDE)?"TILDE":"?"))));
- out_x(box1);
- #endif
- }
- }
- if (m) box1->dots=r; // set to 0 also possible after division
- if (m) box1->modifier=mod; /* should be resetted after compose ??? */
- MSG(fprintf(stderr,"umlaut mod=%s dots=%d y0o=%d",decode(mod,ASCII),r,y0);)
- }
- // printf(" modifier=%c",mod);
- if (modifier) *modifier=mod; /* set modifier */
- return r;
- }
-
- static wchar_t ocr0_eE(ocr0_shared_t *sdata){
- struct box *box1=sdata->box1;
- int i,i1,i2,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,bad_e=0,
- x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
- int dx=x1-x0+1,dy=y1-y0+1, /* size */
- ad; /* tmp-vars */
- int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
- // --- most frequent letter e first!!!
- // --- test e ---------------------------------------------------
- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6)
- DBG( wchar_t c_ask='e'; )
- if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
- if (sdata->holes.num != 1) ad=97*ad/100;
- /* ToDo: may be a two pass version intolerant/tolerant is better */
- if( loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)>dx/3 ) Break; // rough test
- if( loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO)>dy/3 ) Break;
- if( loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP)>dy/3 ) Break;
- if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 2
- && num_cross(x0,x1,y0+dy/4+1,y0+dy/4+1,box1->p,cs) > 2 ) Break; // gt
- x=(x0+x1)/2;i= num_cross(x,x,y0,y1,box1->p,cs); // v0.40
- if (i!=3) { x=(x0+2*x1)/3;i= num_cross(x,x,y0,y1,box1->p,cs); }
- if (i!=3) { x=(x0+3*x1)/4;i= num_cross(x,x,y0,y1,box1->p,cs); }
- if (i!=3) { i= num_cross((x0+2*x1)/3,(x0+x1)/2,y0,y1,box1->p,cs); }
- i=loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI); if( i>dx/2 ) Break;
- j=loop(box1->p,x0,y0 ,x1-x0,cs,0,RI); if( j<i ) Break;
- j=loop(box1->p,x0,y1 ,x1-x0,cs,0,RI); if( j<i ) Break;
- i=loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO); if( i>dx/2 ) Break;
- j=loop(box1->p,x1-dx/3,y0,y1-y0,cs,0,DO); if( j<i ) i=j;
- j=loop(box1->p,x0 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
- j=loop(box1->p,x1 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
- i=loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP); if( i>dx/2 ) Break;
- j=loop(box1->p,x0 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
- j=loop(box1->p,x1 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
- j=2*loop(box1->p,x0, (y0+y1)/2,x1-x0,cs,0,RI)
- -loop(box1->p,x0,(3*y0+y1)/4,x1-x0,cs,0,RI)
- -loop(box1->p,x0,(y0+3*y1)/4,x1-x0,cs,0,RI);
- if (dx>3 && j>=dx/4) Break; // ~g 4x6font
- for(y=1;y<dy/2;y++) if( num_cross(x0,x1,y0+y,y0+y,box1->p,cs) == 2 ) break;
- if( y==dy/2 ) Break; // v0.2.5 ~ bad_t
- for(i=0,j=x0+dx/4;j<=x1-dx/4 && i<=dx/4;j++)
- if( num_cross(j,j,y0,y1,box1->p,cs) == 3 ) i++;
- if( dx>4 && dy>5 && (i<dx/4-1 || i==0) ) Break; // ~g but 4x6-e
- // look for horizontal white line (right gap) => set x,y
- for(x=0,y=i=y0+dy/3;i<y1-dy/6;i++){
- j=loop(box1->p,x1,i,y1-y0,cs,0,LE);
- if(j>=x) { x=j;y=i; }
- }
- if (x<dx/2){ // no gap found, fat font???
- // check smallest thickness left > 2* smallest thickness right
- for(i1=dx,i=y0+dy/3;i<y1-dy/6;i++){
- j =loop(box1->p,x0 ,i,y1-y0,cs,0,RI); if (j>dx/2) break;
- j =loop(box1->p,x0+j,i,y1-y0,cs,1,RI);
- if (j<i1) i1=j; // smallest thickness on left bow
- }
- for(i2=dx,y=i=y0+dy/3;i<y1-dy/6;i++){
- j =loop(box1->p,x1 ,i,y1-y0,cs,0,LE);
- j =loop(box1->p,x1-j,i,y1-y0,cs,1,LE);
- if(j<i2) { i2=j;y=i; }
- } if (3*i2>2*i1) Break; // not accepted, if right line is not very thinn
- x =loop(box1->p,x1 ,y,y1-y0,cs,0,LE);
- x+=loop(box1->p,x1-x,y,y1-y0,cs,1,LE);
- x+=loop(box1->p,x1-x,y,y1-y0,cs,0,LE);
- if (3*i2>i1) ad=99*ad/100;
- if (2*i2>i1) ad=99*ad/100;
- bad_e=60; // used later?
- }
- if (x<dx/2) Break;
- for(i=1,j=x0+dx/6;j<x1-dx/6 && i;j++)
- if( num_cross(j,j,y0,y,box1->p,cs) > 1 ) i=0;
- if( i ) Break;
- // ..@@@@...<-
- // .@@@@@@;.
- // @@,...@@.
- // @@.....@,
- // @@@@@@@@@
- // @@.,;.@,. <- problem (y) == bad_e>50
- // @@.....@.
- // @@,...@@.
- // .@@@,@@@.
- // ..@@@@;..<-
- if (dy>11 && bad_e<50)
- if ( num_cross(x0,x1,y,y,box1->p,cs) != 1 ) Break; // except "geschwungenem e"
- if ( num_cross(x0,x1-dx/3,y ,y ,box1->p,cs) != 1
- && num_cross(x0,x1-dx/3,y+1,y+1,box1->p,cs) != 1 ) Break;
- // if( num_hole(x0, x1, y0 , y ,box1->p,cs,NULL) < 1 ){
- if( sdata->holes.num == 0 || sdata->holes.hole[0].y1 >= y-y0){
- if( sdata->hchar ) Break; // ~ \it t
- // look if thinn font (may be h-line is broken) Mai00
- for(j=0,i=x0+dx/8;i<x1-1;i++)
- if( get_bw(i,i,y0+dy/4,y,box1->p,cs,1) == 1 ) j++;
- if(j<2*dx/4) Break;
- }
- if( sdata->holes.num>0 && sdata->holes.hole[0].y0 > y-y0) Break;
- if( sdata->holes.num>1 && sdata->holes.hole[1].y0 > y-y0) Break;
- if( sdata->holes.num==1 && sdata->holes.hole[0].x0 >= dx/2) {
- ad=95*ad/100; } /* 8*10 @ (=at) is not an e */
- // look for horizontal gap
- for(x=0,y=i=y0+dy/4;i<y1-dy/4;i++){
- j=loop(box1->p,x0,i,x1-x0,cs,0,RI);
- if(j>=x) { x=j;y=i; }
- }
- if (y>y0+dy/4 && y<y1-dy/4 && x>dx/2) Break; // s
- if (x>dx/4) ad=99*ad/100;
- if( num_cross(x0+dx/2,x1 ,y1-dy/4,y1 ,box1->p,cs) == 0
- && num_cross(x0+dx/2,x1-1,y1-dy/4,y1 ,box1->p,cs) == 0
- && num_cross(x0+dx/2,x1 ,y1-dy/4,y1-1,box1->p,cs) == 0 ) {
- if (sdata->gchar) Break; // ~p
- ad=99*ad/100;
- }
- /* upper case is for 5x6 box */
- if( sdata->hchar // broken B ? should also work when linedetection fails
- && loop(box1->p,x1,y1-dy/3,dx,cs,0,LE)<=dx/8 ) {
- x = loop(box1->p,x0,y0+dy/2,dx,cs,0,RI);
- if( loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)<=x
- && loop(box1->p,x0,y0+dy/8,dx,cs,0,RI)<=x ) Break;
- if( loop(box1->p,x0,y1-dy/4,dx,cs,0,RI)<=x
- && loop(box1->p,x0,y1-dy/8,dx,cs,0,RI)<=x ) Break;
- }
- x = loop(sdata->bp,0,dy-2 ,dx,cs,0,RI);
- if( loop(sdata->bp,0,dy-1-dy/8,dx,cs,0,RI)>x && dy>16) Break; // some Q
- if (box1->m2) {
- if (sdata->gchar) ad=99*ad/100;
- if (sdata->hchar) ad=99*ad/100;
- } else ad=99*ad/100;
- Setac(box1,(wchar_t)'e',ad);
- if (ad>=100) return 'e';
- break;
- }
- // --- test E ---------------------------------------------------
- for(ad=d=100;dx>2 && dy>4 ;){ // min 3x4
- // rewritten for vectors 0.43
- int i1, i2, i3, i4, i5; // line derivation + corners
- DBG( wchar_t c_ask='E'; )
- if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
- /* half distance to the center */
- d=2*sq(128/4);
- /* now we check for the upper right end of the h */
- if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */
- if (aa[0][2]>d/2) Break; /* upper left end */
- if (aa[1][2]>d/2) Break; /* lower left end */
- if (aa[2][2]>d/2) Break; /* lowerright end */
- /*
- E f near E
-
- OOOOOOOO OOOO
- O5 O O
- O4 O
- OOOO3 OOOOOO
- O2 O
- O O
- O1 O O
- OOOOOOOO OOOOOO
- */
- // check the bow from below
- for (i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) {
- if (y1-box1->frame_vector[ i][1]>dy/4) break; // fatal!
- } if (i!=aa[2][3]) Break; // ~AHKMNRX
- // search most left+down between bottom right and top right
- i1=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y1);
- i5=nearest_frame_vector(box1, i1,aa[3][3], x0, y0);
- i3=nearest_frame_vector(box1, i1, i5, x1, (y0+y1)/2);
- i2=nearest_frame_vector(box1, i1, i3, x0, (2*y0+y1)/3);
- i4=nearest_frame_vector(box1, i3, i5, x0, (y0+2*y1)/3);
- i =nearest_frame_vector(box1, aa[0][3],aa[1][3], x0-dx/4, (y0+y1)/2);
- if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]-1-dx/16) Break;
- if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]) ad=99*ad/100; // f
- MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);)
- // holes right open?
- for( i=1,y=y0; y<y0+dy/4 && i; y++ ) // long black line
- if( get_bw(x0+dx/3,x1-dx/6,y,y,box1->p,cs,2) == 0 ) i=0;
- if( i ) Break;
- for( i=1,y=y1; y>y1-dy/4 && i; y-- ) // long black line
- if( get_bw(x0+dx/6,x1-dx/4,y,y,box1->p,cs,2) == 0 ) i=0;
- if( i ) Break;
- for( i=1,y=y0+dy/3; y<y1-dy/3 && i; y++ ){ // black line
- j=loop(box1->p,x0 ,y,dx,cs,0,RI);
- j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>dx/3 ) i=0;
- } if( i ) Break;
- x=x1-dx/3; y=y0; // von oben durchbohren!
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,DO); if( x<=x1 || y>y0+dy/2 ) Break;
- x=x1-dx/3; y=y1; // von unten durchbohren!
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); if( y<y1-dy/4 ) Break;
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,UP); if( y<y0-dy/3 ) Break;
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,UP); if( x<=x1 || y<y0+dy/2 ) Break;
- x=x1-dx/3; y=y0; // von oben durchbohren!
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
- y+=dy/15;
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x<x0 ) Break;
- if (dx>15 && x==x0) ad=99*ad/100; // to thin
- x+=dx/15+1;
- turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y1-dy/3 ) Break;
- // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) Break;
- if (sdata->holes.num > 0) Break;
- i=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); if(i>dx/2) Break;
- j=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break; i=j;
- j=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break;
- j=loop(box1->p,x1,y1-dy/4,dx,cs,0,LE);
- for( x=dx,y=y0+dy/6; y<y1-dy/9; y++ ) // left border straight
- { i=loop(box1->p,x0,y,dx,cs,0,RI);
- if (i>j/2 && ad>98) ad=99*ad/100;
- if (i>dx/4) break;
- if(i<x) x=i;
- } if( y<y1-dy/9 ) Break; // t
- if(dy>3*dx) // ~[
- if( get_bw(x0+dx/2,x0+dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) Break;
- if (box1->m2) {
- if (!hchar) ad=ad*99/100;
- if ( gchar) ad=ad*99/100;
- }
- Setac(box1,(wchar_t)'E',ad);
- if (ad>=100) return 'E';
- break;
- }
- return box1->c;
- }
- static wchar_t ocr0_n(ocr0_shared_t *sdata){
- struct box *box1=sdata->box1;
- int i,j,d,x,y,i1,i2,i3,handwritten=0,
- x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
- int dx=x1-x0+1,dy=y1-y0+1, /* size */
- ad; /* tmp-vars */
- // --- test n ---------------------------------------------------
- // glued rm is very similar to glued nn -> thickness of h-line should grow
- // may02: tested for 8x12 font
- for(ad=d=100;dx>2 && dy>3;){ // min 3x4
- DBG( wchar_t c_ask='n'; )
- if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
- i= num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs);
- j= num_cross( 0,dx-1,dy/2,dy/2,sdata->bp,cs);
- if( (i<2 || i>3) && j!=2 ) Break;
- if( loop(sdata->bp,dx/2,0,dy,cs,0,DO) > dy/8 && sdata->hchar ) Break; /* tt */
- y=5*dy/8; /* also for handwritten n, where first bow goes not down enough */
- if( num_cross( 0,dx/2,y ,y ,sdata->bp,cs) != 1
- && num_cross( 0,dx/2,y-1,y-1,sdata->bp,cs) != 1
- && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) < 1 ) Break; // n rr
- // ~thick_w
- y=loop(sdata->bp,dx-1-dx/4,0,dy,cs,0,DO); if(y>dy/2) Break;
- if(y>1)if( get_bw(dx-1-dx/4,dx-1,0,y-2,sdata->bp,cs,1) == 1 ) Break;
- y=3*dy/4;
- if( num_cross(0, dx/2,y ,y ,sdata->bp,cs) == 1
- && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) == 0 ) Break; // ~p
- y=dy/2;
- if( num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) == 2
- && num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) == 2 ) { // n rr
- /* printed n */
- x =loop(sdata->bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line
- x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap
- x+=loop(sdata->bp,x,y,dx-x,cs,0,RI); if(x< dx/2) Break; i2=x; // 2nd v-line
- x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x<3*dx/4) Break; i3=x; // 2nd gap
- i=dy/4; y=13*dy/16;
- if( num_cross(dx/2,dx-1,y,y,sdata->bp,cs)==2 ) i=3*dy/8; // \it n
- if (i<2 && i<dy/2) i++; // correct for small fonts like 8x12
- // the same game for the lower part =>l1 l2 l3 l4 ???
- for(x=i1;x<i2;x++) if( loop(sdata->bp,x, 0,dy,cs,0,DO)>=i ) break;
- if(x <i2) Break; // gap detected
- for(x=i1;x<i2;x++) if( loop(sdata->bp,x,dy-1,dy,cs,0,UP) >dy/4 ) break;
- if(x==i2) Break; // no gap detected (glued serifs ??? )
- // glued rm as nn ???
- for(y=0,x=(i1+i2)/2;x<i2;x++){
- i=loop(sdata->bp,x,0,dy,cs,0,DO);
- i=loop(sdata->bp,x,i,dy,cs,1,DO); // measure thickness
- if( i>y ) y=i; if( i<y/2 ) break;
- }
- if(x <i2) Break; // unusual property for n
- if( dy>7 )
- if( loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,LE)
- +loop(sdata->bp, 0,dy-1-dy/8,dx,cs,0,RI)-dx/8-1
- > loop(sdata->bp,dx-1,dy-1-dy/2,dx,cs,0,LE)
- +loop(sdata->bp, 0,dy-1-dy/2,dx,cs,0,RI) ) ad=90*ad/100; // broken o
- if( dy>7 && dx>7 )
- if( loop(sdata->bp,dx-1, dy/2,dx,cs,0,LE)==0
- && loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,RI)>dx/8 ) ad=98*ad/100; // broken o
- } else { /* check handwritten n */
- if( num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) != 3
- && num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) != 3 ) Break;
- i =loop(sdata->bp,0,dy/2-dy/8,dx,cs,0,RI); if (i>dx/4) Break;
- i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if (i>dx/2) Break;
- i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,0,RI);
- if( num_cross(i,i, 0,dy/2-2*dy/8,sdata->bp,cs) != 0 ) Break;
- i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI);
- if( num_cross(i,i,dy/2+1, dy-1,sdata->bp,cs) != 0 ) Break;
- handwritten=80;
- }
- i= loop(sdata->bp,dx-1 ,dy/2,dx,cs,0,LE); if(i>5)
- if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,sdata->bp,cs,1) == 1 ) Break; // ~rr
- i+=loop(sdata->bp,dx-1-i,dy/2,dx,cs,1,LE);
- if( get_bw(dx-1-i ,dx-1-i ,0,dy/2,sdata->bp,cs,1) == 0 ) Break; // ~rv
- if( get_bw(dx/2,dx/2,dy/4,dy/4,sdata->bp,cs,1) == 0
- && get_bw(dx/2,dx-1,dy-2,dy-2,sdata->bp,cs,1) == 0
- && get_bw(dx/2,dx/2,dy/4,dy-2,sdata->bp,cs,1) == 1 ) Break; // ~P
- // glued ri ???
- if( box1->dots>0 && box1->m1 )
- if( get_bw((x1+x0)/2,x1,box1->m1,y0-1,box1->p,cs,1) == 1 )
- if( num_cross( 0,dx-1,0 ,0 ,sdata->bp,cs) >2
- || num_cross( 0,dx-1,1 ,1 ,sdata->bp,cs) >2 ) Break;
- i=loop(sdata->bp,dx-1, dy-1,dx,cs,0,LE); if (i>dx/2)
- i=loop(sdata->bp,dx-1, dy-2,dx,cs,0,LE);
- x=loop(sdata->bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
- if (sdata->hchar && i-x>1) Break; // ß
- x=loop(sdata->bp, 0,dy-1,dx,cs,0,LE); // check for serifs
- i=loop(sdata->bp, 0,dy-2,dx,cs,0,LE); if (i<x) x=i;
- i=loop(sdata->bp, 0, 1,dx,cs,0,LE); if (i<x) x=i;
- i=loop(sdata->bp, 0, 2,dx,cs,0,LE); if (i<x) x=i;
- if (sdata->hchar && x>0) Break; // fl
- if (num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs)>=3) ad=98*ad/100; // small M
- if (sdata->hchar || 2*y0<box1->m1+box1->m2) ad=96*ad/100;
- if (sdata->gchar) ad=96*ad/100; // ß fl
- if (dx<5) { // for small fonts no middle line is possible for m
- ad=99*ad/100; // 4x6 m
- if (num_cross(0,dx-1,dy/8,dy/8,sdata->bp,cs)>=2) {
- ad=97*ad/100; // ~m
- if (dy<=4) Setac(box1,'m',97); // only for 4x6 font!
- }
- }
- Setac(box1,'n',ad);
- break;
- }
- return box1->c;
- }
- static wchar_t ocr0_M(ocr0_shared_t *sdata){
- struct box *box1=sdata->box1;
- pix *bp=sdata->bp;
- int d,x,y,i0,i1,i2,i3,t1,hchar=sdata->hchar,gchar=sdata->gchar,
- x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
- int dx=x1-x0+1,dy=y1-y0+1, /* size */
- ad; /* tmp-vars */
- // ------------------ test M ---------------------------
- for(ad=d=100;dx>3 && dy>3;){ // dy<=dx nicht perfekt! besser mittleres
- // min-suchen fuer m
- DBG( wchar_t c_ask='M'; )
- if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
- for (y=dy/4;y<=3*dy/4;y++)
- if (num_cross(0,dx-1,y,y,bp,cs)>=3) break;
- if (y>3*dy/4 && dx>4) Break;
- if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<2
- && num_cross(0,dx-1, dy/8, dy/8,bp,cs)<2 ) Break; /* fat M */
- if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<2 ) Break;
-
- x = loop(bp,dx-1 ,dy-1,dx,cs,0,LE); // ~ melted kl
- x = loop(bp,dx-1-x,dy-1,dx,cs,1,LE); if( x>dx/2 ) Break;
- if( loop(bp, 0,7*dy/16,dx,cs,0,RI)
- + loop(bp,dx-1,7*dy/16,dx,cs,0,LE) > dx/2 ) Break; // ~K
- if( dy>8 /* following lines should be extend to range check */
- && loop(bp, dx/4,dy-1, dy,cs,0,UP)<dy/4
- && loop(bp,3*dx/8,dy-1, dy,cs,0,UP)<dy/4 )
- if( loop(bp, 0,dy-1-dy/ 8,dx,cs,0,RI)
- < loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)-dx/32 ) Break; // ~it_u
- if( num_cross(0,dx-1, dy/2, dy/2,bp,cs)==2
- && num_cross(0,dx-1, dy/4, dy/4,bp,cs)> 2
- && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 2 ) Break; // ~it_u
- if( num_cross(0 ,dx-1,3*dy/4,3*dy/4,bp,cs)==2
- && num_cross(dx/2,dx/2,3*dy/4, dy-1,bp,cs)> 0 ) Break; // ~it_v
- if( loop(bp,3*dx/4, 0,dy,cs,0,DO)
- > loop(bp,2*dx/4, 0,dy,cs,0,DO)
- && loop(bp,3*dx/4,dy-1,dy,cs,0,UP)
- < loop(bp,2*dx/4,dy-1,dy,cs,0,UP) ) Break; // ~N
- if( loop(bp,3*dx/4, dy/8,dy,cs,0,DO)
- > loop(bp,2*dx/4, dy/8,dy,cs,0,DO)
- && loop(bp,3*dx/4,dy-1-dy/8,dy,cs,0,UP)
- < loop(bp,2*dx/4,dy-1-dy/8,dy,cs,0,UP) ) Break; // ~serif_N
- // i0 is lower end of upper serifen (widest gap? )
- i0=0;
-
- if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=4 ){ // Is it a N ?
- if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==3 ){
- for(y=dy/2+1;y<dy;y++){
- if( num_cross(0,dx-1,y,y,bp,cs)<3 ) break;
- }
- if( num_cross(0,dx-1,y,y,bp,cs)==2 ){
- x =loop(bp,dx-1 ,y-1,dx,cs,0,LE);
- x+=loop(bp,dx-1-x,y-1,dx,cs,1,LE);
- x+=loop(bp,dx-1-x,y-1,dx,cs,0,LE);
- if( loop(bp,dx-x,y-1,dy,cs,0,UP)>y-2 ) Break; // ~N
- }
- }
- }
- // MNWK
- for(i2=0,i1=x=dx/2;x<dx-dx/4;x++){ // lowest pixel
- y=loop(bp,x,0,dy,cs,0,DO); if(y>i2) {i2=y;i1=x;} else break; }
- i3=i2+loop(bp,i1,i2,dy-i2,cs,1,DO);
- if(i2<dy/4) {
- if (!sdata->hchar) Break; // rm
- ad=99*ad/100;
- }
- if (i2==0 && dx>8 && dy>12) Break; // glued and bad splitted serifen-MN
- // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) != 0 ) Break; // small A
- if (sdata->holes.num != 0) Break;
- t1=loop(bp,0 ,3*dy/4,dx,cs,0,RI);
- t1=loop(bp,t1,3*dy/4,dx,cs,1,RI); // thickness of line?
- if( 7*(t1+1)<dx )
- if( num_cross(i1,dx-1,i2-1,i2-1,bp,cs)!=2
- || num_cross(0 ,i1 ,i2-1,i2-1,bp,cs)!=2 ) Break; // too hard ???
- // ~u_n-pair
- if( num_cross(0,dx-1,0,0,bp,cs)!=2
- && num_cross(0,dx-1,1,1,bp,cs)!=2
- && num_cross(0,dx-1,2,2,bp,cs)!=2 ) Break;
- // ~nn v0.2.4a3
- if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)==4
- && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)==4 ){
- i1 =loop(bp, 0, dy/4,dx,cs,0,RI);
- i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
- i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
- i2 =loop(bp, 0,3*dy/4,dx,cs,0,RI);
- i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
- i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
- if( i1>=i2 ) Break; // no good M
- i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
- i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
- if( i1>=i2 ) Break; // no good M
- i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
- i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
- if( i1<=i2 ) Break; // no good M
- }
- if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==2
- && num_cross(0,dx-1,dy/4,dy/4,bp,cs)==2 && !hchar ) Break; // ~ \it u
- if (dy<17)
- if( num_cross(0,dx-1, 0, 0,bp,cs)<2 ) ad=99*ad/100;
- if (dx>5) /* 4x6 font has only 1 cross at y=1 */
- if( num_cross(0,dx-1, 1, 1,bp,cs)<2 ) ad=96*ad/100; // kt
- if( num_cross(dx/2,dx/2, 0, dy-1,bp,cs)!=1) ad=98*ad/100; // kt
- if (dx<5 && loop(bp,dx/2,0,dy,cs,0,DO)>=3*dy/8) ad=96*ad/100; // 4x6 H
-
- if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<=2
- && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<=2
- && dx>8 && dy>12 ){
- ad=98*ad/100;
- for(y=5*dy/16;y<5*dy/8;y++) // look for H-line
- if( num_cross(0,dx-1,y ,y ,bp,cs)==1 ) break;
- if( y<5*dy/8 ) ad=95*ad/100;
- if( y<5*dy/8 )
- if( num_cross(2+dx/6,dx-3-dx/6,y-2,y-2,bp,cs)==0
- || num_cross(2+dx/6,dx-3-dx/6,y-1,y-1,bp,cs)==0 ) Break; // ~H bad!
- }
- if( loop(bp,3*dx/8, 0,dy,cs,0,DO) >dy/2
- && loop(bp,5*dx/8,dy-1,dy,cs,0,UP) >dy/2 ) ad=95*ad/100;
- if(!hchar){
- ad=98*ad/100; /* not sure */
- if( loop(bp,0, dy/4,dx,cs,0,RI)
- < loop(bp,0,dy-1-dy/8,dx,cs,0,RI)-dx/16 ) Break; // ~wi glued
- }
- if( gchar ) ad=98*ad/100;
- if (ad>99 && dx<8) ad=99*ad/100; /* give 5x8 N a chance */
- Setac(box1,'M',ad);
- break;
- }
- return box1->c;
- }
- static wchar_t ocr0_N(ocr0_shared_t *sdata){
- struct box *box1=sdata->box1;
- int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
- x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
- int dx=x1-x0+1,dy=y1-y0+1, /* size */
- (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
- dbg[9],
- ad; /* tmp-vars */
- // --- test N ------- +hchar -gchar
- for(ad=d=100;dx>3 && dy>3;){ // 4x6font
- int j;
- DBG( wchar_t c_ask='N'; )
- if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
- if (sdata->holes.num > 0) ad=98*ad/100; /* # */
- if (dx<6) ad=99*ad/100;
- if (dx<5) ad=99*ad/100;
- /* half distance to the center */
- d=2*sq(128/4);
- /* now we check for the 4 ends of the x */
- if (aa[0][2]>d) Break;
- if (aa[1][2]>d) Break;
- if (aa[2][2]>d) Break;
- if (aa[3][2]>d) Break;
- if (aa[3][0]-aa[0][0]<dx/2) Break;
- if (aa[2][0]-aa[1][0]<dx/2) Break;
- if (aa[1][1]-aa[0][1]<dy/2) Break;
- if (aa[2][1]-aa[3][1]<dy/2) Break;
- if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold an N */
- if (aa[2][0]-aa[1][0]<4-1) Break; /* to small */
- if (abs(aa[3][1]-aa[0][1])>(dy+2)/5) Break; /* glued tu */
- if (abs(aa[3][1]-aa[0][1])>(dy+4)/8) ad=98*ad/100; /* glued tu */
- /* left and right vertical line */
- d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break;
- ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
- d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break;
- /* i1: uppermost left ^ from bottom (near 0,0) */
- i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/8, y0);
- x=box1->frame_vector[i1][0];
- y=box1->frame_vector[i1][1];
- MSG( fprintf(stderr,"i1= %d (%d,%d) left ^ from below", i1,x-x0,y-y0);)
- if (y-y0 > 5*dy/8) Break;
- if (x-x0 > 5*dx/8) Break;
- /* i3: uppermost right ^ ~H */
- i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0);
- MSG( fprintf(stderr,"i3= %d (%d,%d) right ^ (ad=%d)",\
- i3, box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0,ad);)
- /* check lower border of diagonal line, may fail on fonts where
- * line ends on middle of right vertical line (screen font) */
- dbg[0]=d=line_deviation(box1,i1, aa[2][3]);
- /* check right border of left vertical line */
- /* but split to possible lower left serif + vert. line */
- j=nearest_frame_vector(box1,aa[1][3],i1, x0+dx/2, y1+dy/2);
- dbg[1]=d=line_deviation(box1, aa[1][3],j );
- +line_deviation(box1, j,i1);
- MSG(fprintf(stderr," i1-a2 %d a1_serif-i1 %d ad=%d",dbg[0],dbg[1],ad);)
- if (dbg[0] > sq(1024/4)) Break;
- if (dx>4 && dbg[1] > sq(1024/4)) ad=97*ad/100; // d=0..2*sq(1024)
- if (dx>4 && dbg[1] > sq(1024/3)) Break; // d=0..2*sq(1024)
- // serif N has d=sq(1024/3)=116508
- MSG( fprintf(stderr,"ad %d", ad); )
- /* i2: lowest right v from top, same frame? N-tilde etc.? */
- i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x1, y1-dy/8);
- x=box1->frame_vector[i2][0];
- y=box1->frame_vector[i2][1];
- MSG( fprintf(stderr,"i2= %d (%d,%d) lowest right v from top",\
- i2, box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0);)
- if (y-y0 < 3*dy/8) Break;
- if (x-x0 < 3*dx/8) Break;
- // test H
- if ( box1->frame_vector[i3][0]-box1->frame_vector[i1][0]> dx/4
- && box1->frame_vector[i3][1]-box1->frame_vector[i1][1]<=dy/8
- && y<=box1->frame_vector[i1][1]) Break;
- /* check if upper left and lower right point are joined directly */
- /* but split to possible upper right serif + down line */
- j=nearest_frame_vector(box1,i2,aa[0][3], x0+dx/2, y0-dy/2);
- dbg[2]=d=line_deviation(box1,i2, j)
- +line_deviation(box1, j, aa[0][3]);
- /* check if upper right and lower right point are joined directly */
- /* but split to possible upper right serif + vert. line */
- j=nearest_frame_vector(box1,aa[3][3],i2, x0+dx/2, y0-dy/2);
- dbg[3]=d=line_deviation(box1, aa[3][3],j)
- +line_deviation(box1, j,i2); // ToDo: split once more?
- MSG( fprintf(stderr," i2-a0 %d a3-i2 %d ad %d",dbg[2],dbg[3], ad); )
- if (dbg[2] > sq(1024/4)) Break;
- // serif N, ToDo: do it better
- if (dbg[3] > sq(1024/4)) ad=97*ad/100;
- if (dbg[3] > sq(1024/3)) Break;
- MSG( fprintf(stderr,"ad %d", ad); )
- MSG( fprintf(stderr,"check against melted tu"); )
- // i1 = left ^ from below, i2 = lowest right v from top
- // sample gocr_Device*: 3-8,dy=27
- if ( (box1->frame_vector[i1][1]-y0)
- -(y1-box1->frame_vector[i2][1])>dy/8) ad=99*ad/100; /* ~ tu */
- MSG( fprintf(stderr,"tu ad %d", ad); )
- if (box1->frame_vector[i2][0]
- -box1->frame_vector[i1][0]<=dx/8) Break; /* nonsignificant distance */
- MSG( fprintf(stderr,"i2-i1<=dx/8 ad %d", ad); )
- /* i1: uppermost left ^ from bottom (near 0,0) */
- /* i2: lowest right v from top, same frame? N-tilde etc.? */
- if (box1->frame_vector[i2][1]
- -box1->frame_vector[i1][1]<=dy/8) {
- // may happen on screen fonts 7x10
- if (dx>8) ad=97*ad/100; /* too flat (ff,H) */
- }
- MSG( fprintf(stderr,"i2-i1<=dy/8 ad %d", ad); )
- if (box1->frame_vector[i2][1]
- -box1->frame_vector[i1][1]<=dy/2) ad=99*ad/100;
- MSG( \
- fprintf(stderr,"^v %d %d %d %d line dev %d %d %d %d max %d %d ad %d",\
- box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\
- box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\
- dbg[0],dbg[1],dbg[2],dbg[3],sq(1024/4),sq(1024),ad);)
- ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100;
- MSG( fprintf(stderr,"ad %d", ad); )
- ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100;
- MSG( fprintf(stderr,"ad %d", ad); )
- ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100;
- MSG( fprintf(stderr,"ad %d", ad); )
- ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100;
- MSG( fprintf(stderr,"ad %d", ad); )
- if (!hchar) ad=99*ad/100;
- if ( gchar) ad=98*ad/100; // \sc N
- Setac(box1,'N',ad);
- break;
- }
- return box1->c;
- }
- static wchar_t ocr0_h(ocr0_shared_t *sdata){
- struct box *box1=sdata->box1;
- pix *bp=sdata->bp;
- int i,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
- x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
- int dx=x1-x0+1,dy=y1-y0+1, /* size */
- ad; /* tmp-vars */
- int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
-
- // --- test h ---------------------------------------------------
- for(ad=d=100;dx>2 && dy>3;){ // min 3x4
- // rewritten for vectors 0.42
- int i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners
- DBG( wchar_t c_ask='h'; )
- if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
- /* half distance to the center */
- d=2*sq(128/4);
- /* now we check for the upper right end of the h */
- if (aa[3][2]<d/4) Break; /* [2] = distance, ~BCDEF... */
- if (aa[0][2]>d/2) Break; /* upper left end */
- if (aa[1][2]>d/2) Break; /* lower left end */
- if (aa[2][2]>d/2) Break; /* lowerright end */
- /*
- type A B=italic ???
- 18 OOO
- O O O
- O O
- O7OOO OOOO
- O4 O O O
- O O O O
- O O O O O
- 2O3 5O6 O OOO
- */
- i1=i8=aa[0][3];
- i2=i3=aa[1][3];
- i5=i6=aa[2][3];
- // check the bow from below (fails on melted serifs)
- for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) {
- if (box1->frame_vector[ i][1]
- <box1->frame_vector[i4][1]) i4=i; // get next maximum
- if (box1->frame_vector[ i][1]<=y0) break; // fatal!
- }
- if (box1->frame_vector[i4][1]-y0<dy/4) Break; // ~MN
- if (y1-box1->frame_vector[i4][1]<dy/4) Break; // ~BCDEGIJLOQSUYZ
- // two steps for i7 to go around pitfalls on italic h
- i7=nearest_frame_vector(box1, i6, i8, (x0+x1)/2, (y0+y1)/2);
- i7=nearest_frame_vector(box1, i6, i7, x0, (y0+y1)/2);
- i3=nearest_frame_vector(box1, i2, i4, (x0+x1)/2, y1);
- i5=nearest_frame_vector(box1, i4, i6, (x0+x1)/2, y1);
- MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
- /* ... new part /// old obsolete part ... */
- if( get_bw(0 ,dx/2,dy/8 ,dy/8 ,bp,cs,1) != 1 ) Break;
- if( get_bw(0 ,dx/2,dy/2 ,dy/2 ,bp,cs,1) != 1 ) Break;
- if( get_bw(dx/2 ,dx-1,dy-1-dy/3,dy-1-dy/3,bp,cs,1) != 1 ) Break;
- if( get_bw(dx/2 ,dx/2,dy/5 ,dy-1-dy/3,bp,cs,1) != 1 ) Break;
- if( get_bw(dx-1-dx/3,dx-1,0 ,1 ,bp,cs,1) == 1 ) Break;
- if( get_bw(dx-1-dx/3,dx-1,1 ,dy/6 ,bp,cs,1) == 1 ) Break;
- if( dy>18 )
- if( get_bw(dx-1-dx/3,dx-1,dy/6 ,dy/5 ,bp,cs,1) == 1 ) Break;
- if( get_bw(dx-1-dx/3,dx-1,dy-1-dy/4,dy-1 ,bp,cs,1) == 0 ) Break; // s-
- for( x=x0+dx/3;x<x1-dx/3;x++)
- if( get_bw(x, x,y1-dy/4, y1, box1->p,cs,1) == 0 ) break;
- if( x>=x1-dx/3 ) Break;
- for(i=dy/4,y=y0+dy/3;y<=y1 && i;y++){
- …
Large files files are truncated, but you can click here to view the full file