PageRenderTime 66ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 1ms

/Spikes/GocrDemo/ocr0.c

http://stp-iphone.googlecode.com/
C | 6756 lines | 5455 code | 438 blank | 863 comment | 4512 complexity | ffa277b2d87e6c78c5649170766ee916 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. rule based OCR engine, partly rewritten for edges (old=pixel)
  3. */
  4. /*
  5. This is a Optical-Character-Recognition program
  6. Copyright (C) 2000-2009 Joerg Schulenburg
  7. This program is free software; you can redistribute it and/or
  8. modify it under the terms of the GNU General Public License
  9. as published by the Free Software Foundation; either version 2
  10. of the License, or (at your option) any later version.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program; if not, write to the Free Software
  17. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18. see README for email address
  19. >>> DO NOT EDIT THIS FILE IF YOU NOT REALLY KNOW WHAT YOU ARE DOING! <<<
  20. I have invested lot of time, to write this part of the program.
  21. This engine should recognize chars allways right or return UNKNOWN.
  22. If you change something, test all other example files too,
  23. to be sure that all things work better. (JoergS)
  24. This engine was pixelbased until 0.40 which was not successfull enough.
  25. Also code changes always hade side effects. The vectorisation of the code
  26. starts from version 0.41 with the chars XNz and seems to be much better
  27. to handle. Vectorization means we frame each character by a chain of
  28. vectors and dont care about pixels anymore. Unfortunatly I have to
  29. replace all the pixel codes, which is a long process. Old code will be lost.
  30. (JorgS)
  31. ToDo:
  32. - if box1->p and b differ, reduce probability
  33. - probability makes life much easier here
  34. - use only one box!?, may be bits have usefull infos
  35. - divide this file, suggestion: classify chars:
  36. high=ABCDEFGHIJKLMNOPQRSTUVWXYZbdfhklt, low=acegijmnopqrsuvwxyz
  37. or
  38. often_used=etianmsurwdkgo rarely_used=hvjcflpqxyz.,:
  39. or
  40. every char (large overhead)
  41. - two-pass version (first pass without tolerance)
  42. 2nd pass with tolerance (ex: one tiny more in sdata->holes)
  43. general feature extraction:
  44. - white holes at middle, upper, lower position (cost much time)
  45. - test lines and triangles insteat of rectangles
  46. char is removed, wchar_t is used (better code)
  47. making a static global variable-set x.x0,x.x1, and call test_a,
  48. test_b ... (faster compilation, but not reentrant!)
  49. - adding slant-angle (if detected) to distinguish between l and / ?
  50. - ac (alternate chars) as string add_ac(box1,"/") => box1->ac="Il/";
  51. for better context correction or output: "Ha[lI][lI]o!"
  52. */
  53. #include <stdlib.h>
  54. #include <stdio.h>
  55. // #include "pgm2asc.h"
  56. #include "ocr0.h"
  57. // #include "ocr1.h"
  58. #include "amiga.h"
  59. #include "pnm.h"
  60. #include "gocr.h"
  61. #define IFV if(JOB->cfg.verbose&4)
  62. #define MM {IFV fprintf(stderr,"\nDBG %c L%04d (%d,%d): ",(char)c_ask,__LINE__,box1->x0,box1->y0);}
  63. // the old debug mode (0.40) was only for a special char, for another char
  64. // code must be recompiled with C_ASK='char'
  65. // new debug mode (0.41) explains why char is declined or accepted as ABC...
  66. // the output can be filtered by external scripts
  67. // ToDo: we could reduce output to filter string
  68. #ifndef DO_DEBUG /* can be defined outside (configure --with-debug) */
  69. #define DO_DEBUG 0 /* 0 is the default */
  70. #endif
  71. /* this macro is for debugging output: "if char is declined, why?" */
  72. #if DO_DEBUG /* 0=Work mode, 1=debugging mode */
  73. // Setac: output, that char is choosen with a probability
  74. // Break: output, why the char is not choosen
  75. // MSG: debugging functions for char C_ASK, mostly messages
  76. // DBG: definitions usefull only for debugging
  77. #define Setac(box1,ac,ad) { MM;IFV fprintf(stderr,"setac %d",ad);setac(box1,ac,ad); }
  78. #define Break { MM;IFV fprintf(stderr,"break"); break; }
  79. #define MSG(x) { MM;IFV x }
  80. #define DBG(x) x
  81. #else
  82. #define Setac(box1,ac,ad) setac(box1,ac,ad)
  83. #define Break break
  84. #define MSG(x)
  85. #define DBG(x)
  86. #endif
  87. /* extern "C"{ */
  88. // static inline int sq(int x) { return x*x; } /* square */
  89. /*
  90. * go from vector j1 to vector j2 and measure maximum deviation of
  91. * the steps from the line connecting j1 and j2
  92. * return the squared maximum distance
  93. * in units of the box size times 1024
  94. * ToDo: 1) better give back max-dx and max-dy ???
  95. * errors if j1 and j2 are in different frames or belong to
  96. * more then one frame?
  97. * 2) Better get deviation from a complete vector graphic?
  98. * The vectorgraphic is the ideal test char adapted to the
  99. * extrem vertices of the real char.
  100. */
  101. int line_deviation( struct box *box1, int j1, int j2 ) {
  102. int r1x, r1y, r2x, r2y, r3x, r3y, i, x, y, d, dist, maxdist=0, frame, l2;
  103. r1x=box1->frame_vector[j1][0];
  104. r1y=box1->frame_vector[j1][1];
  105. r2x=box1->frame_vector[j2][0];
  106. r2y=box1->frame_vector[j2][1];
  107. if (!box1->num_frames) return(-1);
  108. if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] ||
  109. j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) {
  110. fprintf(stderr,"Error in "__FILE__" L%d: idx out of range",__LINE__);
  111. return(-1);
  112. }
  113. /* get the frame the endvector belongs to */
  114. for (i=0;i<box1->num_frames;i++)
  115. if (j2<box1->num_frame_vectors[i]) break;
  116. frame=i;
  117. /* frame(j1)<=frame(j2) possible */
  118. for (i=j1;;i++) { // do it for each vector between j1 and j2
  119. if (i >= box1->num_frame_vectors[frame])
  120. i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
  121. if (i==j2) break;
  122. // for (i=j1;i!=j2;i=(i+1)%box1->num_frame_vectors[0]) {~}
  123. r3x=box1->frame_vector[i][0];
  124. r3y=box1->frame_vector[i][1];
  125. // Language=german
  126. // german: Abstand Punkt von Strecke, Laenge Lotrechte
  127. // germ.Strecke : l1=(r1+r2)/2+d*(r2-r1)/2 for d=-1..1
  128. // germ.Lotrechte: l2=r3+b*[-(r2-r1).y,(r2-r1).x]
  129. // Schnittpunkt : l1=l2,
  130. // eq1x: (r1x+r2x)/2-r3x+d*(r2x-r1x)/2+b*(r2y-r1y)=0
  131. // eq1y: (r1y+r2y)/2-r3y+d*(r2y-r1y)/2-b*(r2x-r1x)=0
  132. // eq2x: b*(r2x-r1x)*(r2y-r1y)=-((r1x+r2x)/2-r3x+d*(r2x-r1x)/2)*(r2x-r1x)
  133. // eq2y: b*(r2x-r1x)*(r2y-r1y)= ((r1y+r2y)/2-r3y+d*(r2y-r1y)/2)*(r2y-r1y)
  134. // eq2y-eq2x: ... in units of 1024 (fast integer rounded correctly)
  135. l2=sq(r2x-r1x)+sq(r2y-r1y); // square of distance r2-r1
  136. if (l2==0) {
  137. // fprintf(stderr,"ocr0 L%d: r1==r2 r1= %d %d",__LINE__, r1x, r1y); // debugging
  138. d=-1024;
  139. } else
  140. d=-( ((r1x+r2x)-2*r3x)*(r2x-r1x)
  141. +((r1y+r2y)-2*r3y)*(r2y-r1y))*1024/l2; // ..-1024..+1024..
  142. if (d<=-1024) { x=r1x; y=r1y; } // starting point
  143. else {
  144. if (d>=1024) { x=r2x; y=r2y; } // end point
  145. else {
  146. x=((r1x+r2x)+1)/2+(d*(r2x-r1x))/2048;
  147. y=((r1y+r2y)+1)/2+(d*(r2y-r1y))/2048;
  148. /* we have the crossing point x,y now */
  149. }
  150. }
  151. dist=sq((x-r3x)*1024/(box1->x1-box1->x0+1))
  152. +sq((y-r3y)*1024/(box1->y1-box1->y0+1)); // 0..2*sq(1024)
  153. if (dist>maxdist) maxdist=dist;
  154. // for debugging:
  155. // fprintf(stderr,"\nDBG dev: %d-%d-%d dist=%5d max=%5d d=%d %d,%d-%d,%d"
  156. // " vector= %d %d crosspoint= %d %d ",
  157. // j1,i,j2,dist,maxdist,d,r1x,r1y,r2x,r2y,r3x,r3y,x,y);
  158. }
  159. return maxdist;
  160. }
  161. /*
  162. * search vectors between j1 and j2 for nearest point a to point r
  163. * example:
  164. *
  165. * r-> $$...$$ $ - mark vectors
  166. * @@$..@@ @ - black pixels
  167. * @@$..@@ . - white pixels
  168. * @@@@.$@
  169. * a-> @@$@$@@
  170. * @$.@@@@
  171. * @@..$@@
  172. * @@..$@@
  173. * j1 --> $$...$$ <-- j2
  174. *
  175. * ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry?
  176. * j1 and j2 must be in the same frame
  177. * return aa?
  178. * 2009-07:
  179. * - change from normalized (dx=128,dy=128) to absolute distance
  180. * - simpler and no squeeze effect (problem getting right i2 for "3")
  181. */
  182. int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry) {
  183. int x,y,d,i,aa[4]; /* x,y,normalized_distance^2,vector_index */
  184. int frame=0;
  185. // int x0=box1->x0, y0=box1->y0,
  186. // x1=box1->x1, y1=box1->y1;
  187. // int dx=box1->x1-x0+1, dy=box1->y1-y0+1;
  188. if (!box1->num_frames) return(-1);
  189. if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] ||
  190. j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) {
  191. fprintf(stderr,"Error in "__FILE__" L%d: idx %d-%d out of range\n",__LINE__,j1,j2);
  192. out_x(box1);
  193. return(-1);
  194. }
  195. aa[0]=x=box1->frame_vector[j2][0]; /* x */
  196. aa[1]=y=box1->frame_vector[j2][1]; /* y */
  197. /* maximum is (distance*128)^2 if r is inside the box */
  198. // aa[2]=d=2*sq(128)+sq((rx-(x0+x1)/2)*128/dx)+sq((ry-(y0+y1)/2)*128/dy);
  199. aa[2]=d=2*(sq(x-rx)+sq(y-ry)); /* must be greater than min. dist, Jul09 */
  200. aa[3]=j2; /* vector index */
  201. /* get the frame the endvector belongs to */
  202. for (i=0;i<box1->num_frames;i++)
  203. if (j2<box1->num_frame_vectors[i]) break;
  204. frame=i;
  205. /* frame(j1)<=frame(j2) possible */
  206. for (i=j1;;i++) {
  207. if (i >= box1->num_frame_vectors[frame])
  208. i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
  209. x=box1->frame_vector[i][0]; /* take a vector */
  210. y=box1->frame_vector[i][1];
  211. /* distance to upper left end, normalized to 128 */
  212. // d=sq((x-rx)*128/dx)+sq((y-ry)*128/dy); // old 2009-07
  213. d=sq(x-rx)+sq(y-ry);
  214. if (d<aa[2]) { aa[0]=x; aa[1]=y; aa[2]=d; aa[3]=i; }
  215. if (i==j2) break;
  216. }
  217. return aa[3];
  218. }
  219. // test for umlauts, if ret>0 and m==1 box1 is changed
  220. // m>0 modify box1->dots
  221. // m==2 modify box1->y0
  222. // called by pgm2asc + ocr0(?)
  223. int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier){
  224. // pix p=*(box1->p);
  225. int r,y,x,x0,x1,y0,y1,dx,dy,m1,m2,m3,
  226. xl,xr,yu,yl; // left, right, upper and lower border of dots
  227. wchar_t mod='\0'; /* (TeX-) modifier ~"'` for compose() */
  228. DBG( wchar_t c_ask='"'; )
  229. r=0;
  230. x0=box1->x0; x1=box1->x1; dx=x1-x0+1;
  231. y0=box1->y0; y1=box1->y1; dy=y1-y0+1;
  232. m1=box1->m1; m2=box1->m2; m3=box1->m3;
  233. xl=x0; xr=x1; yu=yl=y0;
  234. if( dy < 5 || 4*y0 > 3*m2+m3 ) return 0; // no low chars: .,-=
  235. /* modifier in box included? */
  236. if( 2*y1 > m1+m2 ){
  237. /* modifier in box included? */
  238. for(y=y0;2*y<y0+y1;y++)if( get_bw(xl,xr,y,y,box1->p,cs,1)==0 ) break;
  239. if( 2*y<y0+y1 ){ /* yes => extract */
  240. yl=y;
  241. while( get_bw(xl,xr,y,y,box1->p,cs,1)==0 && 2*y<=y0+y1) y++;
  242. if( m&2 ) box1->y0=y; /* set new upper bond */
  243. }
  244. }
  245. if( yu>=yl ) { if(m) box1->dots=0; return 0; } /* nothing found */
  246. if( get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==1 ) // neighbour overlap?
  247. while( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==1 && 2*xl<x0+x1) xl++;
  248. for(;xl<x1;xl++)if( get_bw(xl,xl,yu,yl,box1->p,cs,1)==1 ) break;
  249. for(;xr>xl;xr--)if( get_bw(xr,xr,yu,yl,box1->p,cs,1)==1 ) break;
  250. if ( yl-1>yu ) { // tall box ij"a"o"u
  251. #if 0
  252. x=box1->y0; box1->y0=m1; out_x(box1); box1->y0=x;
  253. fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
  254. fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
  255. #define DEBUG 1
  256. #endif
  257. {
  258. x=xl;y=yu;
  259. if( get_bw(xl,x1+1,yu,yl-1,box1->p,cs,1)==0 ) r=0; // neighbour overlap?
  260. else
  261. if( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==0
  262. || get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==0 ) // be sure there are gap to neighbours
  263. if( get_bw(xr ,xr ,yu,yl-1,box1->p,cs,1)==0
  264. || get_bw(xr+1,xr+1,yu,yl-1,box1->p,cs,1)==0 )
  265. { int i,j,x;
  266. r=1;
  267. // ...@@@.... RING_ABOVE // ..@@@..@@. TILDE
  268. // ..@...@... // @@.@@@@@..
  269. // ..@...@... // @.........
  270. // ..@..@@...
  271. // ...@@@....
  272. for (i=yu;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==1) break;
  273. for ( ;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==0) break;
  274. for (j=xl;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==1) break;
  275. for ( ;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==0) break;
  276. for ( x=j;x<xr;x++) if (get_bw(x,x,yu,i,box1->p,cs,1)==1) break;
  277. // vert. gap detected
  278. if( j<xr && x<xr && j<x && xr-xl>2
  279. && num_obj(xl,xr,yu,yl-1,box1->p,cs)>=2 // not best!!!
  280. && num_cross(xl,xr,yu +(yl-yu)/4,yu+ (yl-yu)/4,box1->p,cs) == 2
  281. && num_cross(xl,xr,yl-1-(yl-yu)/2,yl-1-(yl-yu)/2,box1->p,cs) == 2
  282. ){ // may be the following lines are not quite ok
  283. while( get_bw(xl,xr,yl,yl,box1->p,cs,1)==0 && 2*yl<y0+y1) yl++;
  284. r=2;
  285. // out_x(box1);printf(" x,y=%d,%d i=%d xl=%d xr=%d yu=%d yl=%d",x0,y0,i-x0,xl-x0,xr-x0,yu-y0,yl-y0);
  286. mod = DIAERESIS;
  287. }
  288. if( m&2 ) box1->y0=yl;
  289. /* if( m&2 ) box1->y0= ( (r==1) ? yu : yl ); */
  290. // out_x(box1);
  291. }
  292. if(r==0){ // divided fr != fi
  293. while( get_bw(x0,x1,yu,yu,box1->p,cs,1)==0 && 2*yu<y0+y1) yu++;
  294. if(m)box1->y0=yu;
  295. }
  296. if( r==1 ){ yl--;
  297. // .@@@. ..@@.
  298. // .@@.. .@@..
  299. // .@... .@@..
  300. //
  301. // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
  302. // > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
  303. // && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
  304. // < loop(box1->p,xr,yl,xr-xl,cs,0,LE)) // -dx/8 ) // &eacute; Nov03
  305. if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
  306. - loop(box1->p,xr,yu,xr-xl,cs,0,LE)
  307. > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
  308. - loop(box1->p,xr,yl,xr-xl,cs,0,LE)+1) // -dx/8 ) // &eacute; Nov03
  309. mod = ACUTE_ACCENT; // '
  310. if( xr-xl+1 > 3*(yl-yu+1)
  311. && get_bw(xl,xr,yu,yl,box1->p,cs,2)==0 )
  312. mod = MACRON; // "-" above
  313. // .@@@. .@@..
  314. // ..@@. ..@@.
  315. // ...@. ..@@.
  316. //
  317. // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
  318. // < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
  319. // && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
  320. // > loop(box1->p,xr,yl,xr-xl,cs,0,LE) ) // +dx/8 ) &agrave; Nov03
  321. if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
  322. - loop(box1->p,xr,yu,xr-xl,cs,0,LE)
  323. < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
  324. - loop(box1->p,xr,yl,xr-xl,cs,0,LE) -1 ) // +dx/8 ) &agrave; Nov03
  325. mod = GRAVE_ACCENT; // `
  326. #ifdef DEBUG
  327. fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
  328. fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
  329. #endif
  330. if( (xr-xl+1) < 2*(yl-yu+1)+2
  331. && 2*(xr-xl+1)+2 > (yl-yu+1) ) {
  332. int i,i1,i2,i3,i4;
  333. i1=loop(box1->p,xl ,(yu+yl)/2,xr-xl+1,cs,0,RI);
  334. i1=loop(box1->p,xl+i1,(yu+yl)/2,xr-xl+1,cs,1,RI);
  335. i2=loop(box1->p,(xl+xr)/2,yu ,yl-yu+1,cs,0,DO);
  336. i2=loop(box1->p,(xl+xr)/2,yu+i2,yl-yu+1,cs,1,DO);
  337. for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
  338. if (getpixel(box1->p,xl+i,yu+i)< cs) break; i3=i;
  339. for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
  340. if (getpixel(box1->p,xl+i,yu+i)>=cs) break; i3=i-i3;
  341. for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
  342. if (getpixel(box1->p,xr-i,yu+i)< cs) break; i4=i;
  343. for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
  344. if (getpixel(box1->p,xr-i,yu+i)>=cs) break; i4=i-i4;
  345. #ifdef DEBUG
  346. fprintf(stderr,"\n#DEBUG DOT_ABOVE %d %d %d %d",i1,i2,i3,i4);
  347. #endif
  348. if ( (xr-xl<5 && yl-yu<8) /* to small */
  349. || (i1>=(xr-xl+1)/2+2 && i2>=(yl-yu+1)/2+2 /* symmetrical */
  350. && abs(i3-i4)<=i1/4+2 && abs(i1-i2)<=i1/4+2
  351. && abs(i3-i1)<=i1/4+4 && abs(i4-i2)<=i1/4+4)
  352. )
  353. mod = DOT_ABOVE; // "." above, ToDo: improve it!
  354. }
  355. if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
  356. > loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/8
  357. || loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
  358. > loop(box1->p,xl,yl-1,xr-xl,cs,0,RI)-dx/8 )
  359. && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
  360. > loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/8
  361. || loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
  362. > loop(box1->p,xr,yl-1,xr-xl,cs,0,LE)-dx/8 )
  363. && num_cross(xl,xr,yu ,yu ,box1->p,cs) == 1
  364. && ( num_cross(xl,xr,yl ,yl ,box1->p,cs) == 2
  365. || num_cross(xl,xr,yl-1,yl-1,box1->p,cs) == 2 ))
  366. mod = CIRCUMFLEX_ACCENT; // "^"
  367. if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
  368. < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
  369. || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
  370. < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
  371. && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
  372. < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
  373. || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
  374. < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
  375. && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
  376. || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
  377. && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
  378. mod = CARON; // "v" above
  379. if( /* test for bow (new0.3.6) */
  380. loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
  381. + loop(box1->p,xl,yl ,xr-xl,cs,0,RI)
  382. - 2*loop(box1->p,xl,(yl+yu)/2,xr-xl,cs,0,RI) > dx/16+1
  383. && xr-xl>10)
  384. if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
  385. < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
  386. || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
  387. < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
  388. && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
  389. < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
  390. || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
  391. < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
  392. && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
  393. || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
  394. && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
  395. mod = BREVE; // round "u" above
  396. if( xr-xl>3 && yl-yu>1 )
  397. if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
  398. > loop(box1->p,xl,yl,xr-xl,cs,0,RI)
  399. && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
  400. < loop(box1->p,xr,yl,xr-xl,cs,0,LE)
  401. && num_cross(xl,xr,yu,yu,box1->p,cs) == 2
  402. && num_cross(xl,xr,yl,yl,box1->p,cs) == 2 )
  403. mod = TILDE;
  404. if( xr-xl>2 && yl-yu>2)
  405. if( num_cross(xl,xr,(yu+yl)/2,(yu+yl)/2,box1->p,cs) >1 )
  406. if( num_cross((xl+xr)/2,(xl+xr)/2,yu,yl,box1->p,cs) >1 )
  407. if( num_hole(xl,xr,yu,yl,box1->p,cs,NULL) == 1 )
  408. mod = RING_ABOVE;
  409. #ifdef DEBUG
  410. printf("\n#DEBUG umlaut mod=0x%04x x=%d..%d y=%d..%d r=%d %s",
  411. (int)mod,yu-box1->y0,yl-box1->y0,
  412. xl-box1->x0,xr-box1->x0,r,((mod==CARON)?"CARON":
  413. ((mod==ACUTE_ACCENT)?"ACUTE":
  414. ((mod==TILDE)?"TILDE":"?"))));
  415. out_x(box1);
  416. #endif
  417. }
  418. }
  419. if (m) box1->dots=r; // set to 0 also possible after division
  420. if (m) box1->modifier=mod; /* should be resetted after compose ??? */
  421. MSG(fprintf(stderr,"umlaut mod=%s dots=%d y0o=%d",decode(mod,ASCII),r,y0);)
  422. }
  423. // printf(" modifier=%c",mod);
  424. if (modifier) *modifier=mod; /* set modifier */
  425. return r;
  426. }
  427. static wchar_t ocr0_eE(ocr0_shared_t *sdata){
  428. struct box *box1=sdata->box1;
  429. int i,i1,i2,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,bad_e=0,
  430. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  431. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  432. ad; /* tmp-vars */
  433. int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
  434. // --- most frequent letter e first!!!
  435. // --- test e ---------------------------------------------------
  436. for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6)
  437. DBG( wchar_t c_ask='e'; )
  438. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  439. if (sdata->holes.num != 1) ad=97*ad/100;
  440. /* ToDo: may be a two pass version intolerant/tolerant is better */
  441. if( loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)>dx/3 ) Break; // rough test
  442. if( loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO)>dy/3 ) Break;
  443. if( loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP)>dy/3 ) Break;
  444. if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 2
  445. && num_cross(x0,x1,y0+dy/4+1,y0+dy/4+1,box1->p,cs) > 2 ) Break; // gt
  446. x=(x0+x1)/2;i= num_cross(x,x,y0,y1,box1->p,cs); // v0.40
  447. if (i!=3) { x=(x0+2*x1)/3;i= num_cross(x,x,y0,y1,box1->p,cs); }
  448. if (i!=3) { x=(x0+3*x1)/4;i= num_cross(x,x,y0,y1,box1->p,cs); }
  449. if (i!=3) { i= num_cross((x0+2*x1)/3,(x0+x1)/2,y0,y1,box1->p,cs); }
  450. i=loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI); if( i>dx/2 ) Break;
  451. j=loop(box1->p,x0,y0 ,x1-x0,cs,0,RI); if( j<i ) Break;
  452. j=loop(box1->p,x0,y1 ,x1-x0,cs,0,RI); if( j<i ) Break;
  453. i=loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO); if( i>dx/2 ) Break;
  454. j=loop(box1->p,x1-dx/3,y0,y1-y0,cs,0,DO); if( j<i ) i=j;
  455. j=loop(box1->p,x0 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
  456. j=loop(box1->p,x1 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
  457. i=loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP); if( i>dx/2 ) Break;
  458. j=loop(box1->p,x0 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
  459. j=loop(box1->p,x1 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
  460. j=2*loop(box1->p,x0, (y0+y1)/2,x1-x0,cs,0,RI)
  461. -loop(box1->p,x0,(3*y0+y1)/4,x1-x0,cs,0,RI)
  462. -loop(box1->p,x0,(y0+3*y1)/4,x1-x0,cs,0,RI);
  463. if (dx>3 && j>=dx/4) Break; // ~g 4x6font
  464. for(y=1;y<dy/2;y++) if( num_cross(x0,x1,y0+y,y0+y,box1->p,cs) == 2 ) break;
  465. if( y==dy/2 ) Break; // v0.2.5 ~ bad_t
  466. for(i=0,j=x0+dx/4;j<=x1-dx/4 && i<=dx/4;j++)
  467. if( num_cross(j,j,y0,y1,box1->p,cs) == 3 ) i++;
  468. if( dx>4 && dy>5 && (i<dx/4-1 || i==0) ) Break; // ~g but 4x6-e
  469. // look for horizontal white line (right gap) => set x,y
  470. for(x=0,y=i=y0+dy/3;i<y1-dy/6;i++){
  471. j=loop(box1->p,x1,i,y1-y0,cs,0,LE);
  472. if(j>=x) { x=j;y=i; }
  473. }
  474. if (x<dx/2){ // no gap found, fat font???
  475. // check smallest thickness left > 2* smallest thickness right
  476. for(i1=dx,i=y0+dy/3;i<y1-dy/6;i++){
  477. j =loop(box1->p,x0 ,i,y1-y0,cs,0,RI); if (j>dx/2) break;
  478. j =loop(box1->p,x0+j,i,y1-y0,cs,1,RI);
  479. if (j<i1) i1=j; // smallest thickness on left bow
  480. }
  481. for(i2=dx,y=i=y0+dy/3;i<y1-dy/6;i++){
  482. j =loop(box1->p,x1 ,i,y1-y0,cs,0,LE);
  483. j =loop(box1->p,x1-j,i,y1-y0,cs,1,LE);
  484. if(j<i2) { i2=j;y=i; }
  485. } if (3*i2>2*i1) Break; // not accepted, if right line is not very thinn
  486. x =loop(box1->p,x1 ,y,y1-y0,cs,0,LE);
  487. x+=loop(box1->p,x1-x,y,y1-y0,cs,1,LE);
  488. x+=loop(box1->p,x1-x,y,y1-y0,cs,0,LE);
  489. if (3*i2>i1) ad=99*ad/100;
  490. if (2*i2>i1) ad=99*ad/100;
  491. bad_e=60; // used later?
  492. }
  493. if (x<dx/2) Break;
  494. for(i=1,j=x0+dx/6;j<x1-dx/6 && i;j++)
  495. if( num_cross(j,j,y0,y,box1->p,cs) > 1 ) i=0;
  496. if( i ) Break;
  497. // ..@@@@...<-
  498. // .@@@@@@;.
  499. // @@,...@@.
  500. // @@.....@,
  501. // @@@@@@@@@
  502. // @@.,;.@,. <- problem (y) == bad_e>50
  503. // @@.....@.
  504. // @@,...@@.
  505. // .@@@,@@@.
  506. // ..@@@@;..<-
  507. if (dy>11 && bad_e<50)
  508. if ( num_cross(x0,x1,y,y,box1->p,cs) != 1 ) Break; // except "geschwungenem e"
  509. if ( num_cross(x0,x1-dx/3,y ,y ,box1->p,cs) != 1
  510. && num_cross(x0,x1-dx/3,y+1,y+1,box1->p,cs) != 1 ) Break;
  511. // if( num_hole(x0, x1, y0 , y ,box1->p,cs,NULL) < 1 ){
  512. if( sdata->holes.num == 0 || sdata->holes.hole[0].y1 >= y-y0){
  513. if( sdata->hchar ) Break; // ~ \it t
  514. // look if thinn font (may be h-line is broken) Mai00
  515. for(j=0,i=x0+dx/8;i<x1-1;i++)
  516. if( get_bw(i,i,y0+dy/4,y,box1->p,cs,1) == 1 ) j++;
  517. if(j<2*dx/4) Break;
  518. }
  519. if( sdata->holes.num>0 && sdata->holes.hole[0].y0 > y-y0) Break;
  520. if( sdata->holes.num>1 && sdata->holes.hole[1].y0 > y-y0) Break;
  521. if( sdata->holes.num==1 && sdata->holes.hole[0].x0 >= dx/2) {
  522. ad=95*ad/100; } /* 8*10 @ (=at) is not an e */
  523. // look for horizontal gap
  524. for(x=0,y=i=y0+dy/4;i<y1-dy/4;i++){
  525. j=loop(box1->p,x0,i,x1-x0,cs,0,RI);
  526. if(j>=x) { x=j;y=i; }
  527. }
  528. if (y>y0+dy/4 && y<y1-dy/4 && x>dx/2) Break; // s
  529. if (x>dx/4) ad=99*ad/100;
  530. if( num_cross(x0+dx/2,x1 ,y1-dy/4,y1 ,box1->p,cs) == 0
  531. && num_cross(x0+dx/2,x1-1,y1-dy/4,y1 ,box1->p,cs) == 0
  532. && num_cross(x0+dx/2,x1 ,y1-dy/4,y1-1,box1->p,cs) == 0 ) {
  533. if (sdata->gchar) Break; // ~p
  534. ad=99*ad/100;
  535. }
  536. /* upper case is for 5x6 box */
  537. if( sdata->hchar // broken B ? should also work when linedetection fails
  538. && loop(box1->p,x1,y1-dy/3,dx,cs,0,LE)<=dx/8 ) {
  539. x = loop(box1->p,x0,y0+dy/2,dx,cs,0,RI);
  540. if( loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)<=x
  541. && loop(box1->p,x0,y0+dy/8,dx,cs,0,RI)<=x ) Break;
  542. if( loop(box1->p,x0,y1-dy/4,dx,cs,0,RI)<=x
  543. && loop(box1->p,x0,y1-dy/8,dx,cs,0,RI)<=x ) Break;
  544. }
  545. x = loop(sdata->bp,0,dy-2 ,dx,cs,0,RI);
  546. if( loop(sdata->bp,0,dy-1-dy/8,dx,cs,0,RI)>x && dy>16) Break; // some Q
  547. if (box1->m2) {
  548. if (sdata->gchar) ad=99*ad/100;
  549. if (sdata->hchar) ad=99*ad/100;
  550. } else ad=99*ad/100;
  551. Setac(box1,(wchar_t)'e',ad);
  552. if (ad>=100) return 'e';
  553. break;
  554. }
  555. // --- test E ---------------------------------------------------
  556. for(ad=d=100;dx>2 && dy>4 ;){ // min 3x4
  557. // rewritten for vectors 0.43
  558. int i1, i2, i3, i4, i5; // line derivation + corners
  559. DBG( wchar_t c_ask='E'; )
  560. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  561. /* half distance to the center */
  562. d=2*sq(128/4);
  563. /* now we check for the upper right end of the h */
  564. if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */
  565. if (aa[0][2]>d/2) Break; /* upper left end */
  566. if (aa[1][2]>d/2) Break; /* lower left end */
  567. if (aa[2][2]>d/2) Break; /* lowerright end */
  568. /*
  569. E f near E
  570. OOOOOOOO OOOO
  571. O5 O O
  572. O4 O
  573. OOOO3 OOOOOO
  574. O2 O
  575. O O
  576. O1 O O
  577. OOOOOOOO OOOOOO
  578. */
  579. // check the bow from below
  580. for (i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) {
  581. if (y1-box1->frame_vector[ i][1]>dy/4) break; // fatal!
  582. } if (i!=aa[2][3]) Break; // ~AHKMNRX
  583. // search most left+down between bottom right and top right
  584. i1=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y1);
  585. i5=nearest_frame_vector(box1, i1,aa[3][3], x0, y0);
  586. i3=nearest_frame_vector(box1, i1, i5, x1, (y0+y1)/2);
  587. i2=nearest_frame_vector(box1, i1, i3, x0, (2*y0+y1)/3);
  588. i4=nearest_frame_vector(box1, i3, i5, x0, (y0+2*y1)/3);
  589. i =nearest_frame_vector(box1, aa[0][3],aa[1][3], x0-dx/4, (y0+y1)/2);
  590. if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]-1-dx/16) Break;
  591. if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]) ad=99*ad/100; // f
  592. MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);)
  593. // holes right open?
  594. for( i=1,y=y0; y<y0+dy/4 && i; y++ ) // long black line
  595. if( get_bw(x0+dx/3,x1-dx/6,y,y,box1->p,cs,2) == 0 ) i=0;
  596. if( i ) Break;
  597. for( i=1,y=y1; y>y1-dy/4 && i; y-- ) // long black line
  598. if( get_bw(x0+dx/6,x1-dx/4,y,y,box1->p,cs,2) == 0 ) i=0;
  599. if( i ) Break;
  600. for( i=1,y=y0+dy/3; y<y1-dy/3 && i; y++ ){ // black line
  601. j=loop(box1->p,x0 ,y,dx,cs,0,RI);
  602. j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>dx/3 ) i=0;
  603. } if( i ) Break;
  604. x=x1-dx/3; y=y0; // von oben durchbohren!
  605. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
  606. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
  607. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,DO); if( x<=x1 || y>y0+dy/2 ) Break;
  608. x=x1-dx/3; y=y1; // von unten durchbohren!
  609. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); if( y<y1-dy/4 ) Break;
  610. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,UP); if( y<y0-dy/3 ) Break;
  611. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,UP); if( x<=x1 || y<y0+dy/2 ) Break;
  612. x=x1-dx/3; y=y0; // von oben durchbohren!
  613. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
  614. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
  615. y+=dy/15;
  616. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x<x0 ) Break;
  617. if (dx>15 && x==x0) ad=99*ad/100; // to thin
  618. x+=dx/15+1;
  619. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y1-dy/3 ) Break;
  620. // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) Break;
  621. if (sdata->holes.num > 0) Break;
  622. i=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); if(i>dx/2) Break;
  623. j=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break; i=j;
  624. j=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break;
  625. j=loop(box1->p,x1,y1-dy/4,dx,cs,0,LE);
  626. for( x=dx,y=y0+dy/6; y<y1-dy/9; y++ ) // left border straight
  627. { i=loop(box1->p,x0,y,dx,cs,0,RI);
  628. if (i>j/2 && ad>98) ad=99*ad/100;
  629. if (i>dx/4) break;
  630. if(i<x) x=i;
  631. } if( y<y1-dy/9 ) Break; // t
  632. if(dy>3*dx) // ~[
  633. if( get_bw(x0+dx/2,x0+dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) Break;
  634. if (box1->m2) {
  635. if (!hchar) ad=ad*99/100;
  636. if ( gchar) ad=ad*99/100;
  637. }
  638. Setac(box1,(wchar_t)'E',ad);
  639. if (ad>=100) return 'E';
  640. break;
  641. }
  642. return box1->c;
  643. }
  644. static wchar_t ocr0_n(ocr0_shared_t *sdata){
  645. struct box *box1=sdata->box1;
  646. int i,j,d,x,y,i1,i2,i3,handwritten=0,
  647. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  648. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  649. ad; /* tmp-vars */
  650. // --- test n ---------------------------------------------------
  651. // glued rm is very similar to glued nn -> thickness of h-line should grow
  652. // may02: tested for 8x12 font
  653. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  654. DBG( wchar_t c_ask='n'; )
  655. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  656. i= num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs);
  657. j= num_cross( 0,dx-1,dy/2,dy/2,sdata->bp,cs);
  658. if( (i<2 || i>3) && j!=2 ) Break;
  659. if( loop(sdata->bp,dx/2,0,dy,cs,0,DO) > dy/8 && sdata->hchar ) Break; /* tt */
  660. y=5*dy/8; /* also for handwritten n, where first bow goes not down enough */
  661. if( num_cross( 0,dx/2,y ,y ,sdata->bp,cs) != 1
  662. && num_cross( 0,dx/2,y-1,y-1,sdata->bp,cs) != 1
  663. && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) < 1 ) Break; // n rr
  664. // ~thick_w
  665. y=loop(sdata->bp,dx-1-dx/4,0,dy,cs,0,DO); if(y>dy/2) Break;
  666. if(y>1)if( get_bw(dx-1-dx/4,dx-1,0,y-2,sdata->bp,cs,1) == 1 ) Break;
  667. y=3*dy/4;
  668. if( num_cross(0, dx/2,y ,y ,sdata->bp,cs) == 1
  669. && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) == 0 ) Break; // ~p
  670. y=dy/2;
  671. if( num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) == 2
  672. && num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) == 2 ) { // n rr
  673. /* printed n */
  674. x =loop(sdata->bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line
  675. x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap
  676. x+=loop(sdata->bp,x,y,dx-x,cs,0,RI); if(x< dx/2) Break; i2=x; // 2nd v-line
  677. x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x<3*dx/4) Break; i3=x; // 2nd gap
  678. i=dy/4; y=13*dy/16;
  679. if( num_cross(dx/2,dx-1,y,y,sdata->bp,cs)==2 ) i=3*dy/8; // \it n
  680. if (i<2 && i<dy/2) i++; // correct for small fonts like 8x12
  681. // the same game for the lower part =>l1 l2 l3 l4 ???
  682. for(x=i1;x<i2;x++) if( loop(sdata->bp,x, 0,dy,cs,0,DO)>=i ) break;
  683. if(x <i2) Break; // gap detected
  684. for(x=i1;x<i2;x++) if( loop(sdata->bp,x,dy-1,dy,cs,0,UP) >dy/4 ) break;
  685. if(x==i2) Break; // no gap detected (glued serifs ??? )
  686. // glued rm as nn ???
  687. for(y=0,x=(i1+i2)/2;x<i2;x++){
  688. i=loop(sdata->bp,x,0,dy,cs,0,DO);
  689. i=loop(sdata->bp,x,i,dy,cs,1,DO); // measure thickness
  690. if( i>y ) y=i; if( i<y/2 ) break;
  691. }
  692. if(x <i2) Break; // unusual property for n
  693. if( dy>7 )
  694. if( loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,LE)
  695. +loop(sdata->bp, 0,dy-1-dy/8,dx,cs,0,RI)-dx/8-1
  696. > loop(sdata->bp,dx-1,dy-1-dy/2,dx,cs,0,LE)
  697. +loop(sdata->bp, 0,dy-1-dy/2,dx,cs,0,RI) ) ad=90*ad/100; // broken o
  698. if( dy>7 && dx>7 )
  699. if( loop(sdata->bp,dx-1, dy/2,dx,cs,0,LE)==0
  700. && loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,RI)>dx/8 ) ad=98*ad/100; // broken o
  701. } else { /* check handwritten n */
  702. if( num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) != 3
  703. && num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) != 3 ) Break;
  704. i =loop(sdata->bp,0,dy/2-dy/8,dx,cs,0,RI); if (i>dx/4) Break;
  705. i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if (i>dx/2) Break;
  706. i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,0,RI);
  707. if( num_cross(i,i, 0,dy/2-2*dy/8,sdata->bp,cs) != 0 ) Break;
  708. i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI);
  709. if( num_cross(i,i,dy/2+1, dy-1,sdata->bp,cs) != 0 ) Break;
  710. handwritten=80;
  711. }
  712. i= loop(sdata->bp,dx-1 ,dy/2,dx,cs,0,LE); if(i>5)
  713. if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,sdata->bp,cs,1) == 1 ) Break; // ~rr
  714. i+=loop(sdata->bp,dx-1-i,dy/2,dx,cs,1,LE);
  715. if( get_bw(dx-1-i ,dx-1-i ,0,dy/2,sdata->bp,cs,1) == 0 ) Break; // ~rv
  716. if( get_bw(dx/2,dx/2,dy/4,dy/4,sdata->bp,cs,1) == 0
  717. && get_bw(dx/2,dx-1,dy-2,dy-2,sdata->bp,cs,1) == 0
  718. && get_bw(dx/2,dx/2,dy/4,dy-2,sdata->bp,cs,1) == 1 ) Break; // ~P
  719. // glued ri ???
  720. if( box1->dots>0 && box1->m1 )
  721. if( get_bw((x1+x0)/2,x1,box1->m1,y0-1,box1->p,cs,1) == 1 )
  722. if( num_cross( 0,dx-1,0 ,0 ,sdata->bp,cs) >2
  723. || num_cross( 0,dx-1,1 ,1 ,sdata->bp,cs) >2 ) Break;
  724. i=loop(sdata->bp,dx-1, dy-1,dx,cs,0,LE); if (i>dx/2)
  725. i=loop(sdata->bp,dx-1, dy-2,dx,cs,0,LE);
  726. x=loop(sdata->bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
  727. if (sdata->hchar && i-x>1) Break; // &szlig;
  728. x=loop(sdata->bp, 0,dy-1,dx,cs,0,LE); // check for serifs
  729. i=loop(sdata->bp, 0,dy-2,dx,cs,0,LE); if (i<x) x=i;
  730. i=loop(sdata->bp, 0, 1,dx,cs,0,LE); if (i<x) x=i;
  731. i=loop(sdata->bp, 0, 2,dx,cs,0,LE); if (i<x) x=i;
  732. if (sdata->hchar && x>0) Break; // fl
  733. if (num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs)>=3) ad=98*ad/100; // small M
  734. if (sdata->hchar || 2*y0<box1->m1+box1->m2) ad=96*ad/100;
  735. if (sdata->gchar) ad=96*ad/100; // &szlig; fl
  736. if (dx<5) { // for small fonts no middle line is possible for m
  737. ad=99*ad/100; // 4x6 m
  738. if (num_cross(0,dx-1,dy/8,dy/8,sdata->bp,cs)>=2) {
  739. ad=97*ad/100; // ~m
  740. if (dy<=4) Setac(box1,'m',97); // only for 4x6 font!
  741. }
  742. }
  743. Setac(box1,'n',ad);
  744. break;
  745. }
  746. return box1->c;
  747. }
  748. static wchar_t ocr0_M(ocr0_shared_t *sdata){
  749. struct box *box1=sdata->box1;
  750. pix *bp=sdata->bp;
  751. int d,x,y,i0,i1,i2,i3,t1,hchar=sdata->hchar,gchar=sdata->gchar,
  752. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  753. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  754. ad; /* tmp-vars */
  755. // ------------------ test M ---------------------------
  756. for(ad=d=100;dx>3 && dy>3;){ // dy<=dx nicht perfekt! besser mittleres
  757. // min-suchen fuer m
  758. DBG( wchar_t c_ask='M'; )
  759. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  760. for (y=dy/4;y<=3*dy/4;y++)
  761. if (num_cross(0,dx-1,y,y,bp,cs)>=3) break;
  762. if (y>3*dy/4 && dx>4) Break;
  763. if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<2
  764. && num_cross(0,dx-1, dy/8, dy/8,bp,cs)<2 ) Break; /* fat M */
  765. if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<2 ) Break;
  766. x = loop(bp,dx-1 ,dy-1,dx,cs,0,LE); // ~ melted kl
  767. x = loop(bp,dx-1-x,dy-1,dx,cs,1,LE); if( x>dx/2 ) Break;
  768. if( loop(bp, 0,7*dy/16,dx,cs,0,RI)
  769. + loop(bp,dx-1,7*dy/16,dx,cs,0,LE) > dx/2 ) Break; // ~K
  770. if( dy>8 /* following lines should be extend to range check */
  771. && loop(bp, dx/4,dy-1, dy,cs,0,UP)<dy/4
  772. && loop(bp,3*dx/8,dy-1, dy,cs,0,UP)<dy/4 )
  773. if( loop(bp, 0,dy-1-dy/ 8,dx,cs,0,RI)
  774. < loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)-dx/32 ) Break; // ~it_u
  775. if( num_cross(0,dx-1, dy/2, dy/2,bp,cs)==2
  776. && num_cross(0,dx-1, dy/4, dy/4,bp,cs)> 2
  777. && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 2 ) Break; // ~it_u
  778. if( num_cross(0 ,dx-1,3*dy/4,3*dy/4,bp,cs)==2
  779. && num_cross(dx/2,dx/2,3*dy/4, dy-1,bp,cs)> 0 ) Break; // ~it_v
  780. if( loop(bp,3*dx/4, 0,dy,cs,0,DO)
  781. > loop(bp,2*dx/4, 0,dy,cs,0,DO)
  782. && loop(bp,3*dx/4,dy-1,dy,cs,0,UP)
  783. < loop(bp,2*dx/4,dy-1,dy,cs,0,UP) ) Break; // ~N
  784. if( loop(bp,3*dx/4, dy/8,dy,cs,0,DO)
  785. > loop(bp,2*dx/4, dy/8,dy,cs,0,DO)
  786. && loop(bp,3*dx/4,dy-1-dy/8,dy,cs,0,UP)
  787. < loop(bp,2*dx/4,dy-1-dy/8,dy,cs,0,UP) ) Break; // ~serif_N
  788. // i0 is lower end of upper serifen (widest gap? )
  789. i0=0;
  790. if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=4 ){ // Is it a N ?
  791. if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==3 ){
  792. for(y=dy/2+1;y<dy;y++){
  793. if( num_cross(0,dx-1,y,y,bp,cs)<3 ) break;
  794. }
  795. if( num_cross(0,dx-1,y,y,bp,cs)==2 ){
  796. x =loop(bp,dx-1 ,y-1,dx,cs,0,LE);
  797. x+=loop(bp,dx-1-x,y-1,dx,cs,1,LE);
  798. x+=loop(bp,dx-1-x,y-1,dx,cs,0,LE);
  799. if( loop(bp,dx-x,y-1,dy,cs,0,UP)>y-2 ) Break; // ~N
  800. }
  801. }
  802. }
  803. // MNWK
  804. for(i2=0,i1=x=dx/2;x<dx-dx/4;x++){ // lowest pixel
  805. y=loop(bp,x,0,dy,cs,0,DO); if(y>i2) {i2=y;i1=x;} else break; }
  806. i3=i2+loop(bp,i1,i2,dy-i2,cs,1,DO);
  807. if(i2<dy/4) {
  808. if (!sdata->hchar) Break; // rm
  809. ad=99*ad/100;
  810. }
  811. if (i2==0 && dx>8 && dy>12) Break; // glued and bad splitted serifen-MN
  812. // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) != 0 ) Break; // small A
  813. if (sdata->holes.num != 0) Break;
  814. t1=loop(bp,0 ,3*dy/4,dx,cs,0,RI);
  815. t1=loop(bp,t1,3*dy/4,dx,cs,1,RI); // thickness of line?
  816. if( 7*(t1+1)<dx )
  817. if( num_cross(i1,dx-1,i2-1,i2-1,bp,cs)!=2
  818. || num_cross(0 ,i1 ,i2-1,i2-1,bp,cs)!=2 ) Break; // too hard ???
  819. // ~u_n-pair
  820. if( num_cross(0,dx-1,0,0,bp,cs)!=2
  821. && num_cross(0,dx-1,1,1,bp,cs)!=2
  822. && num_cross(0,dx-1,2,2,bp,cs)!=2 ) Break;
  823. // ~nn v0.2.4a3
  824. if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)==4
  825. && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)==4 ){
  826. i1 =loop(bp, 0, dy/4,dx,cs,0,RI);
  827. i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
  828. i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
  829. i2 =loop(bp, 0,3*dy/4,dx,cs,0,RI);
  830. i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
  831. i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
  832. if( i1>=i2 ) Break; // no good M
  833. i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
  834. i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
  835. if( i1>=i2 ) Break; // no good M
  836. i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
  837. i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
  838. if( i1<=i2 ) Break; // no good M
  839. }
  840. if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==2
  841. && num_cross(0,dx-1,dy/4,dy/4,bp,cs)==2 && !hchar ) Break; // ~ \it u
  842. if (dy<17)
  843. if( num_cross(0,dx-1, 0, 0,bp,cs)<2 ) ad=99*ad/100;
  844. if (dx>5) /* 4x6 font has only 1 cross at y=1 */
  845. if( num_cross(0,dx-1, 1, 1,bp,cs)<2 ) ad=96*ad/100; // kt
  846. if( num_cross(dx/2,dx/2, 0, dy-1,bp,cs)!=1) ad=98*ad/100; // kt
  847. if (dx<5 && loop(bp,dx/2,0,dy,cs,0,DO)>=3*dy/8) ad=96*ad/100; // 4x6 H
  848. if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<=2
  849. && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<=2
  850. && dx>8 && dy>12 ){
  851. ad=98*ad/100;
  852. for(y=5*dy/16;y<5*dy/8;y++) // look for H-line
  853. if( num_cross(0,dx-1,y ,y ,bp,cs)==1 ) break;
  854. if( y<5*dy/8 ) ad=95*ad/100;
  855. if( y<5*dy/8 )
  856. if( num_cross(2+dx/6,dx-3-dx/6,y-2,y-2,bp,cs)==0
  857. || num_cross(2+dx/6,dx-3-dx/6,y-1,y-1,bp,cs)==0 ) Break; // ~H bad!
  858. }
  859. if( loop(bp,3*dx/8, 0,dy,cs,0,DO) >dy/2
  860. && loop(bp,5*dx/8,dy-1,dy,cs,0,UP) >dy/2 ) ad=95*ad/100;
  861. if(!hchar){
  862. ad=98*ad/100; /* not sure */
  863. if( loop(bp,0, dy/4,dx,cs,0,RI)
  864. < loop(bp,0,dy-1-dy/8,dx,cs,0,RI)-dx/16 ) Break; // ~wi glued
  865. }
  866. if( gchar ) ad=98*ad/100;
  867. if (ad>99 && dx<8) ad=99*ad/100; /* give 5x8 N a chance */
  868. Setac(box1,'M',ad);
  869. break;
  870. }
  871. return box1->c;
  872. }
  873. static wchar_t ocr0_N(ocr0_shared_t *sdata){
  874. struct box *box1=sdata->box1;
  875. int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
  876. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
  877. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  878. (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
  879. dbg[9],
  880. ad; /* tmp-vars */
  881. // --- test N ------- +hchar -gchar
  882. for(ad=d=100;dx>3 && dy>3;){ // 4x6font
  883. int j;
  884. DBG( wchar_t c_ask='N'; )
  885. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  886. if (sdata->holes.num > 0) ad=98*ad/100; /* # */
  887. if (dx<6) ad=99*ad/100;
  888. if (dx<5) ad=99*ad/100;
  889. /* half distance to the center */
  890. d=2*sq(128/4);
  891. /* now we check for the 4 ends of the x */
  892. if (aa[0][2]>d) Break;
  893. if (aa[1][2]>d) Break;
  894. if (aa[2][2]>d) Break;
  895. if (aa[3][2]>d) Break;
  896. if (aa[3][0]-aa[0][0]<dx/2) Break;
  897. if (aa[2][0]-aa[1][0]<dx/2) Break;
  898. if (aa[1][1]-aa[0][1]<dy/2) Break;
  899. if (aa[2][1]-aa[3][1]<dy/2) Break;
  900. if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold an N */
  901. if (aa[2][0]-aa[1][0]<4-1) Break; /* to small */
  902. if (abs(aa[3][1]-aa[0][1])>(dy+2)/5) Break; /* glued tu */
  903. if (abs(aa[3][1]-aa[0][1])>(dy+4)/8) ad=98*ad/100; /* glued tu */
  904. /* left and right vertical line */
  905. d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break;
  906. ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
  907. d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break;
  908. /* i1: uppermost left ^ from bottom (near 0,0) */
  909. i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/8, y0);
  910. x=box1->frame_vector[i1][0];
  911. y=box1->frame_vector[i1][1];
  912. MSG( fprintf(stderr,"i1= %d (%d,%d) left ^ from below", i1,x-x0,y-y0);)
  913. if (y-y0 > 5*dy/8) Break;
  914. if (x-x0 > 5*dx/8) Break;
  915. /* i3: uppermost right ^ ~H */
  916. i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0);
  917. MSG( fprintf(stderr,"i3= %d (%d,%d) right ^ (ad=%d)",\
  918. i3, box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0,ad);)
  919. /* check lower border of diagonal line, may fail on fonts where
  920. * line ends on middle of right vertical line (screen font) */
  921. dbg[0]=d=line_deviation(box1,i1, aa[2][3]);
  922. /* check right border of left vertical line */
  923. /* but split to possible lower left serif + vert. line */
  924. j=nearest_frame_vector(box1,aa[1][3],i1, x0+dx/2, y1+dy/2);
  925. dbg[1]=d=line_deviation(box1, aa[1][3],j );
  926. +line_deviation(box1, j,i1);
  927. MSG(fprintf(stderr," i1-a2 %d a1_serif-i1 %d ad=%d",dbg[0],dbg[1],ad);)
  928. if (dbg[0] > sq(1024/4)) Break;
  929. if (dx>4 && dbg[1] > sq(1024/4)) ad=97*ad/100; // d=0..2*sq(1024)
  930. if (dx>4 && dbg[1] > sq(1024/3)) Break; // d=0..2*sq(1024)
  931. // serif N has d=sq(1024/3)=116508
  932. MSG( fprintf(stderr,"ad %d", ad); )
  933. /* i2: lowest right v from top, same frame? N-tilde etc.? */
  934. i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x1, y1-dy/8);
  935. x=box1->frame_vector[i2][0];
  936. y=box1->frame_vector[i2][1];
  937. MSG( fprintf(stderr,"i2= %d (%d,%d) lowest right v from top",\
  938. i2, box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0);)
  939. if (y-y0 < 3*dy/8) Break;
  940. if (x-x0 < 3*dx/8) Break;
  941. // test H
  942. if ( box1->frame_vector[i3][0]-box1->frame_vector[i1][0]> dx/4
  943. && box1->frame_vector[i3][1]-box1->frame_vector[i1][1]<=dy/8
  944. && y<=box1->frame_vector[i1][1]) Break;
  945. /* check if upper left and lower right point are joined directly */
  946. /* but split to possible upper right serif + down line */
  947. j=nearest_frame_vector(box1,i2,aa[0][3], x0+dx/2, y0-dy/2);
  948. dbg[2]=d=line_deviation(box1,i2, j)
  949. +line_deviation(box1, j, aa[0][3]);
  950. /* check if upper right and lower right point are joined directly */
  951. /* but split to possible upper right serif + vert. line */
  952. j=nearest_frame_vector(box1,aa[3][3],i2, x0+dx/2, y0-dy/2);
  953. dbg[3]=d=line_deviation(box1, aa[3][3],j)
  954. +line_deviation(box1, j,i2); // ToDo: split once more?
  955. MSG( fprintf(stderr," i2-a0 %d a3-i2 %d ad %d",dbg[2],dbg[3], ad); )
  956. if (dbg[2] > sq(1024/4)) Break;
  957. // serif N, ToDo: do it better
  958. if (dbg[3] > sq(1024/4)) ad=97*ad/100;
  959. if (dbg[3] > sq(1024/3)) Break;
  960. MSG( fprintf(stderr,"ad %d", ad); )
  961. MSG( fprintf(stderr,"check against melted tu"); )
  962. // i1 = left ^ from below, i2 = lowest right v from top
  963. // sample gocr_Device*: 3-8,dy=27
  964. if ( (box1->frame_vector[i1][1]-y0)
  965. -(y1-box1->frame_vector[i2][1])>dy/8) ad=99*ad/100; /* ~ tu */
  966. MSG( fprintf(stderr,"tu ad %d", ad); )
  967. if (box1->frame_vector[i2][0]
  968. -box1->frame_vector[i1][0]<=dx/8) Break; /* nonsignificant distance */
  969. MSG( fprintf(stderr,"i2-i1<=dx/8 ad %d", ad); )
  970. /* i1: uppermost left ^ from bottom (near 0,0) */
  971. /* i2: lowest right v from top, same frame? N-tilde etc.? */
  972. if (box1->frame_vector[i2][1]
  973. -box1->frame_vector[i1][1]<=dy/8) {
  974. // may happen on screen fonts 7x10
  975. if (dx>8) ad=97*ad/100; /* too flat (ff,H) */
  976. }
  977. MSG( fprintf(stderr,"i2-i1<=dy/8 ad %d", ad); )
  978. if (box1->frame_vector[i2][1]
  979. -box1->frame_vector[i1][1]<=dy/2) ad=99*ad/100;
  980. MSG( \
  981. fprintf(stderr,"^v %d %d %d %d line dev %d %d %d %d max %d %d ad %d",\
  982. box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\
  983. box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\
  984. dbg[0],dbg[1],dbg[2],dbg[3],sq(1024/4),sq(1024),ad);)
  985. ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100;
  986. MSG( fprintf(stderr,"ad %d", ad); )
  987. ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100;
  988. MSG( fprintf(stderr,"ad %d", ad); )
  989. ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100;
  990. MSG( fprintf(stderr,"ad %d", ad); )
  991. ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100;
  992. MSG( fprintf(stderr,"ad %d", ad); )
  993. if (!hchar) ad=99*ad/100;
  994. if ( gchar) ad=98*ad/100; // \sc N
  995. Setac(box1,'N',ad);
  996. break;
  997. }
  998. return box1->c;
  999. }
  1000. static wchar_t ocr0_h(ocr0_shared_t *sdata){
  1001. struct box *box1=sdata->box1;
  1002. pix *bp=sdata->bp;
  1003. int i,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  1004. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  1005. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  1006. ad; /* tmp-vars */
  1007. int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
  1008. // --- test h ---------------------------------------------------
  1009. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  1010. // rewritten for vectors 0.42
  1011. int i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners
  1012. DBG( wchar_t c_ask='h'; )
  1013. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  1014. /* half distance to the center */
  1015. d=2*sq(128/4);
  1016. /* now we check for the upper right end of the h */
  1017. if (aa[3][2]<d/4) Break; /* [2] = distance, ~BCDEF... */
  1018. if (aa[0][2]>d/2) Break; /* upper left end */
  1019. if (aa[1][2]>d/2) Break; /* lower left end */
  1020. if (aa[2][2]>d/2) Break; /* lowerright end */
  1021. /*
  1022. type A B=italic ???
  1023. 18 OOO
  1024. O O O
  1025. O O
  1026. O7OOO OOOO
  1027. O4 O O O
  1028. O O O O
  1029. O O O O O
  1030. 2O3 5O6 O OOO
  1031. */
  1032. i1=i8=aa[0][3];
  1033. i2=i3=aa[1][3];
  1034. i5=i6=aa[2][3];
  1035. // check the bow from below (fails on melted serifs)
  1036. for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) {
  1037. if (box1->frame_vector[ i][1]
  1038. <box1->frame_vector[i4][1]) i4=i; // get next maximum
  1039. if (box1->frame_vector[ i][1]<=y0) break; // fatal!
  1040. }
  1041. if (box1->frame_vector[i4][1]-y0<dy/4) Break; // ~MN
  1042. if (y1-box1->frame_vector[i4][1]<dy/4) Break; // ~BCDEGIJLOQSUYZ
  1043. // two steps for i7 to go around pitfalls on italic h
  1044. i7=nearest_frame_vector(box1, i6, i8, (x0+x1)/2, (y0+y1)/2);
  1045. i7=nearest_frame_vector(box1, i6, i7, x0, (y0+y1)/2);
  1046. i3=nearest_frame_vector(box1, i2, i4, (x0+x1)/2, y1);
  1047. i5=nearest_frame_vector(box1, i4, i6, (x0+x1)/2, y1);
  1048. MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
  1049. /* ... new part /// old obsolete part ... */
  1050. if( get_bw(0 ,dx/2,dy/8 ,dy/8 ,bp,cs,1) != 1 ) Break;
  1051. if( get_bw(0 ,dx/2,dy/2 ,dy/2 ,bp,cs,1) != 1 ) Break;
  1052. if( get_bw(dx/2 ,dx-1,dy-1-dy/3,dy-1-dy/3,bp,cs,1) != 1 ) Break;
  1053. if( get_bw(dx/2 ,dx/2,dy/5 ,dy-1-dy/3,bp,cs,1) != 1 ) Break;
  1054. if( get_bw(dx-1-dx/3,dx-1,0 ,1 ,bp,cs,1) == 1 ) Break;
  1055. if( get_bw(dx-1-dx/3,dx-1,1 ,dy/6 ,bp,cs,1) == 1 ) Break;
  1056. if( dy>18 )
  1057. if( get_bw(dx-1-dx/3,dx-1,dy/6 ,dy/5 ,bp,cs,1) == 1 ) Break;
  1058. if( get_bw(dx-1-dx/3,dx-1,dy-1-dy/4,dy-1 ,bp,cs,1) == 0 ) Break; // s-
  1059. for( x=x0+dx/3;x<x1-dx/3;x++)
  1060. if( get_bw(x, x,y1-dy/4, y1, box1->p,cs,1) == 0 ) break;
  1061. if( x>=x1-dx/3 ) Break;
  1062. for(i=dy/4,y=y0+dy/3;y<=y1 && i;y++){
  1063. if( num_cross(x0,x1 ,y,y, box1->p,cs) == 2 ) i--;
  1064. } if( i ) Break;
  1065. for(i=dy/4,y=y0;y<=y0+dy/2 && i;y++){
  1066. if( num_cross(x0,x0+dx/2,y,y, box1->p,cs) == 1 ) i--;
  1067. } if( i ) Break;
  1068. // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) // could happen
  1069. if (sdata->holes.num > 0)
  1070. if (sdata->holes.hole[0].y0 > dy/3
  1071. && sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
  1072. // if( num_hole(x0, x1, y0+dy/3 , y1-dy/3 ,box1->p,cs,NULL) != 1 ) Break; // mini
  1073. if( loop(bp,dx-1,dy/3,dx,cs,0,LE)+dx/8
  1074. < loop(bp,dx-1,dy/2,dx,cs,0,LE)
  1075. && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8
  1076. < loop(bp,dx-1,dy/2,dx,cs,0,LE)) Break; // ~k Okt00
  1077. i=loop(bp,0,dy-1-dy/4,dx,cs,0,RI);
  1078. if (i>1 && num_cross(x0,x0,y0+dy/8+2,y0+dy/2, box1->p,cs) == 1 ){ // fi fu
  1079. ad=(99-(1<<i))*ad/100;
  1080. if (num_cross(x0,x0,y0,y0+dy/8+2, box1->p,cs) == 0 ) ad=97*ad/100;
  1081. if (num_cross(x0+dx/2,x0+dx/2,y0,y0+dy/8+2, box1->p,cs) == 1 ) ad=97*ad/100;
  1082. if (ad<1) break;
  1083. }
  1084. i =loop(bp,0,dy/4,dx,cs,0,RI);
  1085. i+=loop(bp,i,dy/4,dx,cs,1,RI)+1;
  1086. for ( ; i<dx-dx/3; i++ )
  1087. if( loop(bp,i,0,dy,cs,0,DO)>5*dy/8 ) {
  1088. ad=98*ad/100; // melted hi, li, but handwritten h
  1089. MSG(fprintf(stderr,"ad=%d",ad);) }
  1090. if( num_cross(x0,x0,y0+(dy+3)/8,y1,box1->p,cs) > 1 ) {
  1091. ad=98*ad/100; // melted fr
  1092. MSG(fprintf(stderr,"ad=%d",ad);) }
  1093. i=loop(bp,dx-1,3*dy/4,dx,cs,0,LE); // melted "fr" for vertikal letters
  1094. if (i>dx/4 && loop(bp,dx-1-i,dy-1,dy,cs,1,UP)>dy/2) {
  1095. ad=94*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
  1096. i=loop(bp,dx-1,1+dy/16,dx,cs,0,LE); if (i<dx/4) {
  1097. ad=98*ad/100;
  1098. MSG(fprintf(stderr,"ad=%d",ad);) }
  1099. if( num_cross(dx-i+1+dx/8,dx-i+1+dx/8,0,1+dy/16,bp,cs) > 0 ) {
  1100. ad=95*ad/100; // melted fi
  1101. MSG(fprintf(stderr,"ad=%d",ad);) }
  1102. if (loop(box1->p,x1,y0+1+dy/16,dx,cs,0,LE)<dx/4) {
  1103. ad=98*ad/100; // fi
  1104. MSG(fprintf(stderr,"ad=%d",ad);) }
  1105. if (loop(box1->p,x1,y0 ,dx,cs,0,LE)<dx/4
  1106. || loop(box1->p,x1,y0+1,dx,cs,0,LE)<dx/4) {
  1107. ad=98*ad/100; // li
  1108. MSG(fprintf(stderr,"ad=%d",ad);) }
  1109. if (sdata->holes.num > 0) ad=97*ad/100;
  1110. if (box1->m2) {
  1111. if ( gchar) ad=98*ad/100;
  1112. if (!hchar) ad=97*ad/100;
  1113. } else ad=99*ad/100;
  1114. Setac(box1,'h',ad);
  1115. break;
  1116. }
  1117. return box1->c;
  1118. }
  1119. static wchar_t ocr0_H(ocr0_shared_t *sdata){
  1120. struct box *box1=sdata->box1;
  1121. pix *bp=sdata->bp;
  1122. int i,j,j1,d,x,y,ya,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
  1123. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  1124. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  1125. ad; /* tmp-vars */
  1126. // --- test H ---------------------------------------------------
  1127. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  1128. DBG( wchar_t c_ask='H'; )
  1129. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  1130. if ( num_cross(0,dx-1,dy/4 ,dy/4 ,bp,cs) != 2
  1131. && num_cross(0,dx-1,dy/4-1,dy/4-1,bp,cs) != 2 ) Break;
  1132. if ( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2
  1133. && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) Break;
  1134. if ( loop(bp,0 ,dy/8,dx,cs,0,RI)
  1135. + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) Break; // ~A
  1136. for ( j1=0,i=1,y=y0+dy/10; y<y1-dy/10 && i; y++ ) // 2 vertikal lines
  1137. { j=loop(box1->p,x0 ,y,dx,cs,0,RI)
  1138. +loop(box1->p,x1 ,y,dx,cs,0,LE); if( j>dx/2 ) i=0; if(j>j1)j1=j; }
  1139. if ( !i ) Break;
  1140. for ( i=1,y=dy/4; y<dy-1-dy/4 && i; y++ ) // max - min width
  1141. { j=loop(bp,0 ,y,dx,cs,0,RI)
  1142. +loop(bp,dx-1,y,dx,cs,0,LE); if( j1-j>dx/5 ) i=0; }
  1143. if (!i) Break; // ~K Jul00
  1144. for (i=0,ya=y=y0+dy/3; y<y1-dy/3; y++ ) // horizontal line
  1145. { j=loop(box1->p,x0 ,y,dx,cs,0,RI);
  1146. j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } }
  1147. if (i<=dx/2) Break; ya-=y0;
  1148. if (num_cross(0,dx-1,ya ,ya ,bp,cs) != 1
  1149. && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) Break; /* Dec00 */
  1150. for (y=ya; y<dy-dy/4; y++ ) // ~M Dec00
  1151. if (num_cross(0,dx-1,y ,y ,bp,cs) > 2
  1152. && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break;
  1153. if (y<dy-dy/4) Break;
  1154. for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
  1155. if (get_bw( x, x,y0 ,y0+dy/4,box1->p,cs,1) == 0 ) i=0;
  1156. } if (i) Break;
  1157. for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
  1158. if (get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0;
  1159. } if (i) Break;
  1160. for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
  1161. if (num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0;
  1162. } if (i) Break;
  1163. for (i=1,y=y0;y<=y0+dy/4 && i;y++){
  1164. if (num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
  1165. } if (i) Break;
  1166. for(i=1,y=y1-dy/4;y<=y1 && i;y++){
  1167. if (num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
  1168. } if (i) Break;
  1169. if (get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) Break;
  1170. if (get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) Break;
  1171. i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) Break;
  1172. i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2<i1-dx/4 || i2>i1+dx/8) Break;
  1173. i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3<i2-dx/4 || i3>i2+dx/8) Break;
  1174. if (abs(i1+i3-2*i2)>dx/16+1) Break;
  1175. // test for thick tall N looking like a H
  1176. if (num_cross(x0,x1,y0, y1 , box1->p,cs) < 2 ) Break; // sure N
  1177. if (num_cross(x0,x1,y0,(y0+y1)/2, box1->p,cs) < 2 ) Break; // sure N
  1178. i1=loop(bp, 0, dy/4,dx,cs,0,RI);
  1179. i1=loop(bp, i1, dy/4,dx,cs,1,RI); // right side of left vert. line
  1180. i2=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI);
  1181. i2=loop(bp, i2,dy-1-dy/4,dx,cs,1,RI); // right side of left vert. line
  1182. i3=loop(bp,dx-1 ,dy-1-dy/4,dx,cs,0,LE);
  1183. i3=loop(bp,dx-1-i3,dy-1-dy/4,dx,cs,1,LE); // left side of right vert. line
  1184. if (dx<10 && i1-i2>dx/4) Break;
  1185. if (dx<10 && i1-i2>dx/8) ad=99*ad/100; // 7x10 ~N
  1186. i =loop(bp, 0,dy/2+1+dy/8,dx,cs,0,RI);
  1187. i+=loop(bp, i,dy/2+1+dy/8,dx,cs,1,RI);
  1188. i =loop(bp, i,dy/2+1+dy/8,dx,cs,0,RI);
  1189. if (i<dx/2-1 && 5*i1>6*i2 && 5*i3>6*i2 && i1>i2 && i3>i2) Break; // ???
  1190. if (dx>8)
  1191. if (loop(bp,dx-1, 3*dy/8,dx,cs,0,LE)
  1192. -loop(bp,dx-1, dy/8,dx,cs,0,LE)>dx/4
  1193. && loop(bp,dx-1, 3*dy/8,dx,cs,0,LE)
  1194. -loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)>dx/4 ) Break; // ~K
  1195. // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) Break;
  1196. if (sdata->holes.num != 0) Break;
  1197. MSG( fprintf(stderr,"i123 %d %d %d",i1,i2,i3); )
  1198. if ( gchar) ad=99*ad/100;
  1199. if (!hchar) ad=98*ad/100;
  1200. Setac(box1,'H',ad);
  1201. break;
  1202. }
  1203. return box1->c;
  1204. }
  1205. static wchar_t ocr0_k(ocr0_shared_t *sdata){
  1206. struct box *box1=sdata->box1;
  1207. pix *bp=sdata->bp;
  1208. int i,j,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  1209. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  1210. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  1211. ad; /* tmp-vars */
  1212. int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
  1213. // --- test k ---------------------------------------------------
  1214. for(ad=100;dx>2 && dy>3;){ // min 3x4
  1215. // rewritten for vectors 0.43
  1216. int d, i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners
  1217. DBG( wchar_t c_ask='k'; )
  1218. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  1219. /* half distance to the center */
  1220. d=2*sq(128/4);
  1221. /* now we check for the upper right end of the h */
  1222. if (aa[3][2]<d/4) Break; /* [2] = distance, ~BCDEF... */
  1223. if (aa[0][2]>d/2) Break; /* upper left end */
  1224. if (aa[1][2]>d/2) Break; /* lower left end */
  1225. if (aa[2][2]>d/2) Break; /* lowerright end */
  1226. /*
  1227. type A B=italic ???
  1228. 18 OOO
  1229. O O O
  1230. O O6 O
  1231. O7 OO O OO
  1232. O4OO OO OO
  1233. O OO O O
  1234. O OO O O O
  1235. 2O3 O5 O OOO
  1236. */
  1237. i1=i8=aa[0][3];
  1238. i2=i3=aa[1][3];
  1239. i5= aa[2][3];
  1240. // check the bow from below
  1241. for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) {
  1242. if (box1->frame_vector[ i][1]
  1243. <box1->frame_vector[i4][1]) i4=i; // get next maximum
  1244. if (box1->frame_vector[ i][1]<=y0) break; // fatal!
  1245. }
  1246. if (box1->frame_vector[i4][1]-y0<dy/4) Break; // ~MN
  1247. if (y1-box1->frame_vector[i4][1]<dy/4) Break; // ~BCDEGIJLOQSUYZ
  1248. i6=nearest_frame_vector(box1, i5, i8, x1, (2*y0+y1)/3);
  1249. // two steps for i7 to go around pitfalls on italic h
  1250. i7=nearest_frame_vector(box1, i6, i8, x0, y1);
  1251. i3=nearest_frame_vector(box1, i2, i4, (x0+x1)/2, y1);
  1252. i =nearest_frame_vector(box1, i5, i6, x0, (y0+2*y1)/3);
  1253. if (x1-box1->frame_vector[i][0]<dx/4) Break; // h
  1254. if (x1-box1->frame_vector[i][0]<dx/2) ad=98*ad/100;
  1255. MSG(fprintf(stderr,"i1-7 ad %d %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7,ad);)
  1256. if( num_cross(0, dx-1,0,0,bp,cs) != 1
  1257. && num_cross(0, dx-1,1,1,bp,cs) != 1 ) Break;
  1258. if( num_cross(0,3*dx/4, dy/8 , dy/8 ,bp,cs) != 1
  1259. || num_cross(0,3*dx/4,3*dy/16,3*dy/16,bp,cs) != 1 ) Break;
  1260. if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2
  1261. && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break;
  1262. if( dx<8
  1263. && num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) != 2
  1264. && num_cross(dx-2,dx-2,dy/4,dy-1,bp,cs) != 2 ) Break;
  1265. i1=loop(bp,0,dy/2-dy/4,dx,cs,0,RI);
  1266. i2=loop(bp,0,dy/2 ,dx,cs,0,RI);if(i2>dx/2) Break;
  1267. i3=loop(bp,0,dy/2+dy/4,dx,cs,0,RI);
  1268. if (abs(i1+i3-2*i2)>(dx+8)/16+1 || i1<i3-1) Break; // v-line on left side?
  1269. if( get_bw(x0 ,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
  1270. if( get_bw(x0+dx/2,x1, y1-dy/3,y1 ,box1->p,cs,1) != 1 ) Break;
  1271. if( get_bw(x1-dx/4,x1, y0 ,y0+3*dy/16,box1->p,cs,1) == 1 ) Break;
  1272. if( get_bw(x1-dx/4,x1, y0+dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; //~1
  1273. if( get_bw(x1-dx/4,x1, y1-dy/8,y1 ,box1->p,cs,1) != 1 ) Break;
  1274. if (sdata->holes.num > 0)
  1275. if (sdata->holes.hole[0].y0 > dy/4) Break;
  1276. // if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) Break;
  1277. for(y=y0+1;y<y0+dy/2;y++) // luecke ???
  1278. if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break;
  1279. if( y<y0+dy/2 ) Break;
  1280. for(i=1,x=x0;x<=x0+dx/2 && i;x++)
  1281. if(get_line(x,y0 ,x ,y1,box1->p,cs,100)>50) i=0;
  1282. if( i ) Break; // no vertikal line!
  1283. /* check for falling line in the lower left corner */
  1284. for (j=x=0,y=5*dy/8;y<7*dy/8;y++) {
  1285. i= loop(bp,dx-1,y,dx,cs,0,LE); if(i>x) { x=i;j=y; }
  1286. } // x=dx/6 on fat k
  1287. if (x + loop(bp,dx-1-x,y,dx,cs,1,LE)/2 <dx/4) Break;
  1288. if (x + loop(bp,dx-1-x,y,dx,cs,1,LE)/2 <dx/2) ad=98*ad/100;
  1289. x=dx-1-x; y=j;
  1290. i =loop(bp,dx-1,dy-1,dx,cs,0,LE); if(i>dx/2)
  1291. i =loop(bp,dx-1,dy-2,dx,cs,0,LE); if(i>dx/2) Break;
  1292. i+=loop(bp,dx-1-i,dy-1,dx,cs,1,LE)/2;
  1293. if( get_line(x,y,dx-1-i,dy-1,bp,cs,100)<60 ) Break;
  1294. for(y=y0+dy/3;y<y1;y++) if( num_cross(x0,x1,y,y,box1->p,cs)==2 ) break;
  1295. if( y==y1 ) Break;
  1296. if(
  1297. // num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL)>0 // ~A happens!
  1298. sdata->holes.num > 0 )
  1299. if (sdata->holes.hole[0].x1>dx-1-dx/4
  1300. || sdata->holes.hole[0].y1>dy-1-dy/4
  1301. || sdata->holes.hole[0].y0< dy/4) Break;
  1302. // if ( num_hole(x0,x1-dx/4,y0+dy/4,y1-dy/4,box1->p,cs,NULL)==0 ) Break;
  1303. i=loop(bp,0,dy-1,dx,cs,0,RI);
  1304. i=loop(bp,i,dy-1,dx,cs,1,RI); if (dx>8 && 4*i>3*dx) Break; // ~glued_tz
  1305. i =loop(bp,0,dy/4,dx,cs,0,RI);
  1306. if (i>dx/4
  1307. && i+loop(bp,i,dy/4,dx,cs,1,RI)>dx/2
  1308. && loop(bp, 0,0,dx,cs,0,RI)<=dx/4
  1309. && loop(bp,dx-1,0,dx,cs,0,LE)>=dx/2 ) ad=90*ad/100; // divided Q
  1310. if( 2*y0>(box1->m1+box1->m2) ) ad=99*ad/100;
  1311. if ( gchar) ad=99*ad/100;
  1312. if (!hchar) ad=99*ad/100;
  1313. Setac(box1,'k',ad);
  1314. break;
  1315. }
  1316. return box1->c;
  1317. }
  1318. static wchar_t ocr0_K(ocr0_shared_t *sdata){
  1319. struct box *box1=sdata->box1;
  1320. pix *bp=sdata->bp;
  1321. int i,j,i1,i2,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  1322. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  1323. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  1324. ad,ya,xa,yb,xb,yc,xc,yd,xd,ye,xe,yf,xf; /* tmp-vars */
  1325. // --- test K ---------------------------------------------------
  1326. for(ad=d=100;dx>2 && dy>3;){ // Mar2000 perfect??? no ocr-a X Jul09
  1327. DBG( wchar_t c_ask='K'; )
  1328. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  1329. for(y=dy/8;y<dy-dy/8;y++)
  1330. if( !get_bw(0,dx/2,y,y,bp,cs,1) ) break;
  1331. if( y<dy-dy/8 ) Break;
  1332. for(j=0,i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
  1333. y= loop(box1->p,x,y0,y1-y0,cs,0,DO); if (y>3*dy/4) { i=1;break; }
  1334. if (dy>15 && j>dy/8){
  1335. j =loop(box1->p,x-1,y0+y-1,x1-x0,cs,0,LE)/2;
  1336. y+=loop(box1->p,x-j,y0+y-1,y1-y0,cs,0,DO)-1;
  1337. }
  1338. if(y>=dy/4) i=0; /* ok, found gap */
  1339. } if( i ) Break;
  1340. for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap
  1341. i=loop(box1->p,x,y1,dy,cs,0,UP);
  1342. /* on small chars bypass possible low left serifs */
  1343. if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP);
  1344. if (i2>1) i+=i2-1; }
  1345. if (i>y) { y=i; i1=x; }
  1346. } if( y<=dy/8 ) Break; if (y<dy/4) ad=80*ad/100;
  1347. for(i=1,x=x0+dx/3;x<=x1-dx/8 && i;x++){
  1348. if( num_cross(x,x,y0,y1, box1->p,cs) == 2 ) i=0;
  1349. } if( i ) Break;
  1350. for(i=1,y=y0;y<=y0+dy/4 && i;y++){
  1351. if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
  1352. } if( i ) Break;
  1353. if( dx<10 ){
  1354. for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){
  1355. if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0;
  1356. } if( i ) Break;
  1357. }
  1358. for(i=1,y=y1-dy/4;y<=y1 && i;y++){
  1359. if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
  1360. } if( i ) Break;
  1361. if( get_bw(x1-dx/3,x1,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; // ~k
  1362. if( loop(bp,0, dy/4,dx,cs,0,RI)
  1363. +loop(bp,0,3*dy/4,dx,cs,0,RI)
  1364. <2*loop(bp,0, dy/2,dx,cs,0,RI)-2-dx/32 )
  1365. { if (dy>=16) { Break; } else ad=98*ad/100; } // ~X Jul09
  1366. i=loop(box1->p,x1,y0+ dy/4,x1-x0+1,cs,0,LE); if(i>dx/2) Break;
  1367. j=loop(box1->p,x1,y0+ dy/2,x1-x0+1,cs,0,LE);
  1368. x=loop(box1->p,x1,y0+3*dy/8,x1-x0+1,cs,0,LE); if(x>j) j=x;
  1369. if(j<=i ) Break; i=j;
  1370. j=loop(box1->p,x1,y1-dy/4,x1-x0+1,cs,0,LE); if(j>=i ) Break;
  1371. // out_x(box1); // detailed analysis
  1372. //
  1373. // a d <= that are main points of K
  1374. // | /
  1375. // b/e
  1376. // | \ .
  1377. // c f
  1378. ya= dy/4;xa=loop(bp,0,ya,dx,cs,0,RI);xa+=loop(bp,xa,ya,dx,cs,1,RI)/2;
  1379. yc=dy-dy/4;xc=loop(bp,0,yc,dx,cs,0,RI);xc+=loop(bp,xc,yc,dx,cs,1,RI)/2;
  1380. yb=dy/2; xb=dx-1-loop(bp,dx-1,dy/2,dx,cs,0,LE);
  1381. for(yd=ye=yf=xe=y=i=0,xf=xd=dx;y<dy/4;y++){ // range 0..1/4
  1382. x =loop(bp,dx-1, y,dx,cs,0,LE); if(x<xd){ xd=x;yd= y; }
  1383. x =loop(bp,dx-1,dy-1-y,dx,cs,0,LE); if(x<xf){ xf=x;yf=dy-1-y; }
  1384. x =loop(bp,dx-1,dy/2+y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2+y; }
  1385. x =loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2-y; }
  1386. #if 0 // removed v0.2.4a2
  1387. x =loop(bp,0 ,dy/2+y,dx,cs,0,RI); // middle left border
  1388. x+=loop(bp,x ,dy/2+y,dx,cs,1,RI); // test 2nd cross
  1389. x+=loop(bp,x ,dy/2+y,dx,cs,0,RI); if(x<xb){ xb=x;yb=dy/2+y; }
  1390. #endif
  1391. x =loop(bp,0 ,dy/2-y,dx,cs,0,RI);
  1392. x+=loop(bp,x ,dy/2-y,dx,cs,1,RI); // test 2nd cross
  1393. x+=loop(bp,x ,dy/2-y,dx,cs,0,RI); if(x<xb){ xb=x;yb=dy/2-y; }
  1394. x =dx-1-loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x<xb){ xb=x;yb=dy/2-y; }
  1395. }
  1396. xd=dx-1-xd;xe=dx-1-xe;xf=dx-1-xf;
  1397. xb+=loop(bp,xb,yb,dx,cs,1,RI)/4; // detect center of line
  1398. xe-=loop(bp,xe,ye,dx,cs,1,LE)/4;
  1399. xd-=loop(bp,xd,yd,dx,cs,1,LE)/4;
  1400. xf-=loop(bp,xf,yf,dx,cs,1,LE)/4;
  1401. #if 0
  1402. MSG( \
  1403. printf("a=%d %d b=%d %d c=%d %d d=%d %d e=%d %d f=%d %d dxdy %d %d",\
  1404. xa,ya,xb,yb,xc,yc,xd,yd,xe,ye,xf,yf,dx,dy);\
  1405. )
  1406. #endif
  1407. if( get_line2(xa,ya,xc,yc,bp,cs,100)<95 ) Break;
  1408. if( dx>8 ){ // example szaka0103
  1409. if( xe>5*dx/8 || xb>5*dx/8 ) Break; // ~{\it n}
  1410. i=loop(bp,xb,yb,xb,cs,1,LE); // thick center? see font22
  1411. if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) // right up
  1412. if( get_line2(xb-i/2,yb,xd,yd,bp,cs,100)<95 ) Break;
  1413. if( get_line2(xe,ye,xf,yf,bp,cs,100)<95 ) Break; // right down
  1414. xe+=loop(bp,xe,ye,dx,cs,1,RI); if( xe>=xf ) Break; // ~{\it n}
  1415. } else {
  1416. if( dy<16 && !hchar ) Break;
  1417. if( loop(bp,0,1,dy,cs,1,DO)<=3*dx/4
  1418. && loop(bp,1,1,dy,cs,1,DO)<=3*dx/4
  1419. && loop(bp,2,1,dy,cs,1,DO)<=3*dx/4 ) Break; // ~x
  1420. }
  1421. if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)<=dx/8){
  1422. ad=99*ad/100; /* broken B ? */
  1423. if (sdata->holes.num > 0)
  1424. if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
  1425. // if( num_hole(x0,x1,y0,(y0+2*y1)/3,box1->p,cs,NULL)>0) Break; // broken B
  1426. }
  1427. if(box1->m3 && !hchar) ad=99*ad/100;
  1428. if(box1->m3 && gchar) ad=99*ad/100;
  1429. // printf(" ok xe=%d",xe);
  1430. Setac(box1,'K',ad);
  1431. break;
  1432. }
  1433. return box1->c;
  1434. }
  1435. static wchar_t ocr0_f(ocr0_shared_t *sdata){
  1436. struct box *box1=sdata->box1;
  1437. pix *bp=sdata->bp;
  1438. int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  1439. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  1440. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  1441. (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
  1442. ab[8][4], /* special points (x,y,dist^2,vector_idx) */
  1443. ad; /* tmp-vars */
  1444. /* x=mindist_to_a y=0 "t"
  1445. 0>..$$. 0>..$$ 0>..$$ end right bow a--..$$ a--.$7. y>0 "f"
  1446. 1>.$..$ 1>.$.. 1>.$$$ start right bow .$7. .$..
  1447. .@... .@.. 2>.@@. start upper end .@.. .@..
  1448. 2>.$... 2>.$.. 3>$$$$ crossing bar .$.. $$$.
  1449. 3>$@$$. 3>$@$. $@@$ $@$. .@..
  1450. 4>.$... 4>.$.. 4>.$$. lower end .$.. .$..
  1451. .@... .@.. .@@. .@.. .@..
  1452. .@... .@.. .@@. .@.. .@..
  1453. 5>.$... 5>.$.. 5>.$$. lower start .$.. .$..
  1454. 6>..... 6>$... 6>.... optional left bow
  1455. */
  1456. // --- test f like t ---------------------------------------------------
  1457. for(ad=d=100;dx>2 && dy>5;){ // sometimes no hchar!
  1458. // rewritten for vectors 0.43
  1459. int d, i1, i2, i3, i4, i5, i6, i7, i8, i9; // line derivation + corners
  1460. DBG( wchar_t c_ask='f'; )
  1461. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  1462. /* half distance to the center */
  1463. d=2*sq(128/4);
  1464. /* now we check for the upper right end of the h */
  1465. if (aa[3][2]>d/2) Break; /* [2] = distance, ~BCDEF... */
  1466. if (aa[0][2]>d ) Break; /* upper left end */
  1467. /*
  1468. 9
  1469. OOO
  1470. O 7 O8
  1471. O6
  1472. 1OOOO5
  1473. O4
  1474. O
  1475. 2O3
  1476. OOOOO
  1477. */
  1478. i1=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0-dx/2,(5*y0+3*y1)/8);
  1479. /* we need i for 4x6 font, where left side of h-bar is near (x0,y1) */
  1480. i =aa[1][3]; if (box1->frame_vector[i][1]<y1-dy/8)
  1481. i =nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y1+dy/4);
  1482. i2=nearest_frame_vector(box1, i1, i, x1, y1);
  1483. i =nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y1+dy/4);
  1484. i3=nearest_frame_vector(box1, i,aa[3][3], x0, y1);
  1485. i7=nearest_frame_vector(box1, i3,aa[3][3],(x0+x1)/2, y0);
  1486. i8=nearest_frame_vector(box1, i7,aa[0][3], x1, (3*y0+y1)/4);
  1487. i9=nearest_frame_vector(box1,aa[3][3],aa[0][3],(x0+2*x1)/3,y0-dy/4);
  1488. i5=nearest_frame_vector(box1, i3, i7, x1+dx/4, (5*y0+3*y1)/8);
  1489. i4=nearest_frame_vector(box1, i3, i5, x0, (3*y0+y1)/4);
  1490. i6=nearest_frame_vector(box1, i5, i7, x0, (y0+3*y1)/4);
  1491. MSG(fprintf(stderr,"i1-9 %d %d %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7,i8,i9);)
  1492. // check if vertical line is near to the left side
  1493. if (box1->frame_vector[i2][0]-x0>dx/2) Break; // ~3
  1494. i =nearest_frame_vector(box1, aa[0][3], i2, x1+2*dx, (y0+y1)/2);
  1495. // MSG(fprintf(stderr,"i %d",i);)
  1496. if (box1->frame_vector[i ][0]
  1497. -box1->frame_vector[i9][0]>dx/8) Break; // ~3
  1498. if( (box1->dots) ) Break; // Bold-face is gchar
  1499. if (dy<=box1->m3-box1->m2+1) Break;
  1500. for(x=0,j=y=2+(3*dy+4)/32;y<=5*dy/8;y++){ // upper cross line min=2
  1501. i=loop(bp,0,y,dx,cs,0,RI); if( y>dy/4 && i>5*dx/8 ) break;
  1502. i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;j=y; }
  1503. if( y<3*dy/4 && y>dy/4
  1504. && num_cross(0,dx-1,y ,y ,bp,cs) != 1
  1505. && num_cross(0,dx-1,y+1,y+1,bp,cs) != 1 // against noise
  1506. ) break;
  1507. } if( y<=5*dy/8 ) Break; y=j;// if( y>dy/2 || y<dy/8 ) Break;
  1508. // x is thickest width of vertical line here
  1509. i=loop(bp,(dx+1)/2,0,dy,cs,0,DO)/2;
  1510. if( i>dy/8
  1511. && num_cross( 0, (dx+1)/2,i,i,bp,cs) > 0
  1512. && num_cross((dx+1)/2,dx-1,i,i,bp,cs) > 0 ) Break; // ~Y
  1513. if (loop(bp,3*dx/4, 0,dy,cs,0,DO)>dy/8
  1514. && loop(bp,3*dx/4-1,0,dy,cs,0,DO)>dy/8) Break; // upper bow
  1515. i=3*dy/4; if (box1->m3 && i>=box1->m3) i=box1->m3-1;
  1516. if (num_cross(0,dx-1,i,i,bp,cs)!=1) Break;
  1517. // the middle bar appear in a wide vertical range, get part below
  1518. for (i1=dx,i2=y,j=y+1;j<dy-dy/4;j++){
  1519. i=loop(bp,0,j,dx,cs,0,RI);
  1520. i=loop(bp,i,j,dx,cs,1,RI); // thickness vert. line
  1521. if (i<i1) { i1=i; i2=j; if (2*i<=x) break; }
  1522. } i=i1; j=i2; /* i=dx, j=y below horiz-bar */
  1523. MSG(fprintf(stderr,"j=%d i=%d y=%d x=%d",j,i,y,x);)
  1524. // bar should have twice of the thickness of v-line
  1525. if (x<2*i && x<dx) Break;
  1526. if (x<i+2+dx/8) ad=97*ad/100; // fat f
  1527. // check for the upper bow to the right top side
  1528. i3=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0);
  1529. MSG(fprintf(stderr,"xy= %d %d %d %d",x0,y0,\
  1530. box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);)
  1531. ab[7][0]=box1->frame_vector[i3][0];
  1532. ab[7][1]=box1->frame_vector[i3][1];
  1533. ab[7][3]=i3;
  1534. if (ab[7][1]-y0<=dy/16) ad=95*ad/100; // ~t
  1535. // because of the dx,dy scaling the horiz. bar could be nearer to (x1,y0)
  1536. // as the upper right end of the "t"
  1537. if (aa[3][0]-x0>3*dx/4 && aa[3][1]-y0>3*dy/16) ad=99*ad/100; // ~t
  1538. j=loop(bp,0,dy/8,dx,cs,0,RI); // if j>dx/2 we have italic f
  1539. if ((2*x<dx && j<=dx/2) || 3*x<dx) Break; // bar should be not to small
  1540. for(i=dy/8;i<dy;i++)
  1541. if (loop(bp,0,i,dx,cs,0,RI)>(j+dx/4)) break;
  1542. if (i<dy) Break; // check for v-line
  1543. if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<dx/2 )
  1544. if( loop(bp,dx-1,dy/2,dx,cs,0,LE)-1
  1545. <=loop(bp,dx-1, y ,dx,cs,0,LE) )
  1546. if( loop(bp,dx-1, y-1,dx,cs,0,LE)
  1547. <=loop(bp,dx-1, y ,dx,cs,0,LE) ) Break; // ~1
  1548. if( loop(bp,0,dy/2,dx,cs,0,RI)-1
  1549. >loop(bp,0, 1,dx,cs,0,RI) ) Break; // ~X
  1550. i=y;j=1; // j used as flag
  1551. if( num_cross(0,dx-1,0,0,bp,cs)==1 && hchar) //~r
  1552. if( num_cross(0,dx-1,dy-1,dy-1,bp,cs)!=1
  1553. && num_cross(0,dx-1,dy-2,dy-2,bp,cs)!=1 ) Break; // ~* etc.
  1554. // check for upper bow to right
  1555. for(y=1;j && y<i; y++) // no @@ pattern
  1556. if( num_cross(0,dx-1,y ,y ,bp,cs) ==2 ) j=0;
  1557. if (j==0) { ad=(ad+101)/2; }
  1558. for(y=1;j && y<i; y++) // no @@ pattern, try to detect it
  1559. for(x=0;j && x<dx ;x++){ // ..
  1560. if( (getpixel(bp,x ,y )>=cs || dx<7) && getpixel(bp,x+1,y )>=cs
  1561. && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs )
  1562. { j=0;break; }
  1563. } if(j) ad=98*ad/100; // not detected
  1564. // if( num_hole (x0 , x1 , y0, y1,box1->p,cs,NULL) != 0 ) Break; // ~e
  1565. if (sdata->holes.num != 0) Break; // ~e
  1566. for(i1=i2=dx,y=7*dy/8;y<dy;y++){
  1567. x=loop(bp,0 ,y,dx,cs,0,RI);if(x<i1)i1=x;
  1568. x=loop(bp,dx-1,y,dx,cs,0,LE);if(x<i2)i2=x;
  1569. }
  1570. if(i1>i2+dx/4) Break; // ~t ~e
  1571. if(i1>i2+1) ad=96*ad/100; // ~t ~e
  1572. if( loop(bp,0,3*dy/4,dx,cs,0,RI)<i1-dx/4 ) Break;
  1573. if( dx>5 && !hchar)
  1574. if( loop(bp,dx-1,dy/2,dx,cs,0,LE)>3*dx/4 )
  1575. if( loop(bp,dx-1,dy-1,dy,cs,0,UP)<dx/2 ) Break; // ~c
  1576. if( dx>8 )
  1577. if( loop(bp, 0,2*dy/3 ,dx,cs,0,RI)>2*dx/3
  1578. || loop(bp, 0,2*dy/3-1,dx,cs,0,RI)>2*dx/3 )
  1579. if( loop(bp,dx-1, dy/4 ,dx,cs,0,LE)>2*dx/3 ) Break; // ~5 ~S
  1580. if (!hchar)
  1581. if ( get_bw(x0+dx/8,x0+dx/8,y0+dy/4,y1-dy/16,box1->p,cs,2) == 0
  1582. && num_cross(x1-dx/4,x1-dx/4,y0,y1,box1->p,cs)!=2
  1583. && num_cross(x1-dx/8,x1-dx/8,y0,y1,box1->p,cs)!=2 ) Break; // ~r
  1584. if (dy>15)
  1585. if( num_cross(x0,x1,y1-dy/4,y1-dy/4,box1->p,cs)>1
  1586. && num_cross(x0,x1,y0+dy/4,y0+dy/4,box1->p,cs)>1 ) Break; // ~H
  1587. if( dx>4 )
  1588. if( loop(bp,dx-1 ,3*dy/4,dx,cs,0,LE)-
  1589. loop(bp,0 ,3*dy/4,dx,cs,0,RI)>dx/5+1
  1590. && loop(bp,dx-1-dx/8,dy-1 ,dy,cs,0,UP)<dy/4 ) {
  1591. if( loop(bp,dx-1 ,5*dy/16,dx,cs,0,LE)-
  1592. loop(bp,0 ,5*dy/16,dx,cs,0,RI)>=dx/5+1) ad=98*ad/100; // ~E
  1593. i=loop(bp,dx/8,0,dy,cs,0,DO);
  1594. if (i<dy/8 || i>dy/2) {
  1595. ad=98*ad/100; // ~E, could also be a "f" with big serifs
  1596. MSG(fprintf(stderr,"ad=%d",ad);) }
  1597. if (!gchar) { ad=98*ad/100;
  1598. MSG(fprintf(stderr,"ad=%d",ad);) }
  1599. }
  1600. i = loop(bp,dx-1 ,3*dy/4,dx ,cs,0,LE)/2;
  1601. if (loop(bp,dx-1-i , dy-1,dy/2,cs,0,UP)<dy/4)
  1602. if (loop(bp,0 ,3*dy/4,dx ,cs,0,RI)<dx/4) {
  1603. ad=98*ad/100; // ~E but serif-f
  1604. MSG(fprintf(stderr,"ad=%d",ad);) }
  1605. if( loop(bp,0,dy/4,dx ,cs,0,RI)>1
  1606. && loop(bp,0, 0,dy/4,cs,0,DO)<dy/4 ) {
  1607. ad=95*ad/100; // ~I
  1608. MSG(fprintf(stderr,"ad=%d",ad);) }
  1609. if (get_bw(x0+dx/16,x1-dx/16,y0,y0,box1->p,cs,2) == 0) { // white pixels?
  1610. ad=98*ad/100; // F
  1611. MSG(fprintf(stderr,"ad=%d",ad);) }
  1612. if (!hchar) ad=ad*98/100; // d*=100;d/=128 // not 100% !
  1613. if (box1->m4>0 && gchar && ad<99 &&
  1614. 8*box1->y1 >= box1->m4*7+box1->m3) ad++;
  1615. Setac(box1,'f',ad);
  1616. break;
  1617. }
  1618. return box1->c;
  1619. }
  1620. static wchar_t ocr0_bB(ocr0_shared_t *sdata){
  1621. struct box *box1=sdata->box1;
  1622. pix *bp=sdata->bp;
  1623. int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
  1624. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  1625. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  1626. ad; /* tmp-vars */
  1627. // --- test B ---------------------------------------------------
  1628. for(ad=d=100;dx>2 && dy>4;){ // min 3x4
  1629. DBG( wchar_t c_ask='B'; )
  1630. if (sdata->holes.num < 2) Break; /* tolerant against a tiny hole */
  1631. for(i=1,y=y0;y<y1-dy/2 && i;y++)
  1632. if( get_bw(x0,x0+dx/2, y , y ,box1->p,cs,1) != 1 ) i=0;
  1633. if( !i ) Break;
  1634. for(i=1,y=y1-dy/2;y<y1 && i;y++)
  1635. if( get_bw(x0,x0+dx/3, y , y ,box1->p,cs,1) != 1 ) i=0;
  1636. if( !i ) Break;
  1637. if( get_bw(x1,x1 , y0 , y0 ,box1->p,cs,1) == 1 ) Break;
  1638. if( num_cross(x0+dx/2, x0+dx/2,y0,y1 ,box1->p,cs) != 3 )
  1639. if( num_cross(x1-dx/3, x1-dx/3,y0,y1 ,box1->p,cs) != 3 ) Break;
  1640. /* --- detect center of lower hole --- */
  1641. y = loop(box1->p,x0+dx/2,y1 ,dy,cs,0,UP); if (y>1+dy/8) Break;
  1642. y+= loop(box1->p,x0+dx/2,y1-y,dy,cs,1,UP); if (y>dy/3) Break;
  1643. y=y1-y-loop(box1->p,x0+dx/2,y1-y,dy,cs,0,UP)/2; if (y<y0+3*dy/8) Break;
  1644. if (y<y0+dy/2) ad=96*ad/100;
  1645. if( num_cross(0,dx-1,y-y0 ,y-y0 ,bp,cs) != 2 )
  1646. if( num_cross(0,dx-1,y-y0+1,y-y0+1,bp,cs) != 2 ) Break;
  1647. if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
  1648. if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 )
  1649. if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 2 ) Break;
  1650. for( y=dy/4;y<3*dy/4;y++ ) if( num_cross(0,dx-1,y,y,bp,cs)==1 ) break;
  1651. if( y==3*dy/4 ) Break;
  1652. if( loop(box1->p,x0,y0+ y ,dx,cs,0,RI)
  1653. > loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)+dx/32 )
  1654. if( get_bw(x0,x0,y0,y0,box1->p,cs,1) == 0 )
  1655. if( get_bw(x0,x0,y1,y1,box1->p,cs,1) == 0 ) Break; // ~8
  1656. i1=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI);
  1657. i2=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI);
  1658. i =loop(box1->p,x0,y0+dy/2-dy/ 8,dx,cs,0,RI); if(i>i2) i2=i;
  1659. i =loop(box1->p,x0,y0+dy/2-dy/16,dx,cs,0,RI); if(i>i2) i2=i;
  1660. i3=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI);
  1661. if(dy>16 && i3<i2 && i1+i3<2*i2){
  1662. if (i3+i1<2*i2-dx/16) ad=98*ad/100; // ~8
  1663. if (i3+i1<2*i2-dx/8 ) ad=96*ad/100;
  1664. if( loop(box1->p,x0,y0+ 1 ,dx,cs,0,RI)
  1665. >= loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 )
  1666. if( loop(box1->p,x0,y0+ 0 ,dx,cs,0,RI)
  1667. > loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 )
  1668. if( loop(box1->p,x0,y1- 0 ,dx,cs,0,RI)
  1669. > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 )
  1670. if( loop(box1->p,x0,y1- 1 ,dx,cs,0,RI)
  1671. > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) Break; // ~8 Aug00
  1672. }
  1673. if (sdata->holes.num != 2) Break;
  1674. if (sdata->holes.hole[0].y0 < y-1
  1675. && sdata->holes.hole[1].y0 < y-1 ) Break;
  1676. if (sdata->holes.hole[0].y1 > y+1
  1677. && sdata->holes.hole[1].y1 > y+1 ) Break;
  1678. // if( num_hole(0,dx-1,0 ,y+1 ,bp,cs,NULL) != 1 ) Break;
  1679. // if( num_hole(0,dx-1,y-1,dy-1,bp,cs,NULL) != 1 ) Break;
  1680. // out_x(box1);
  1681. for( x=dx,y=dy/6; y<dy-dy/8; y++ ) // left border straight
  1682. { i=loop(box1->p,x0,y0+y,dx,cs,0,RI); if( i>x+dx/9 ) break;
  1683. if(i<x) x=i;
  1684. } if( y<dy-dy/8 ) Break; // ~8 bad_a
  1685. for( x=dx,y=1;y<dy/4;y++ ) // right border straight
  1686. { i=loop(bp,dx-1,dy-y,dx,cs,0,LE);
  1687. if( i<x ) x=i; else if( i>x )break;
  1688. } if( y<dy/4 ) Break; // ~ff (serifen?)
  1689. x=loop(bp,0,dy/2 ,dx,cs,0,RI);
  1690. i=loop(bp,0,dy/2-1,dx,cs,0,RI); if (i>x) x=i; // allow dust
  1691. i=loop(bp,0,dy/2+1,dx,cs,0,RI); if (i>x) x=i;
  1692. if ( loop(bp,0, dy/8,dx,cs,0,RI)
  1693. +loop(bp,0,7*dy/8,dx,cs,0,RI) > 2*x+1 ) Break; // not konvex!
  1694. if(!hchar){ // ~ fat_a
  1695. ad=99*ad/100;
  1696. x =loop(bp,0,dy/4,dx,cs,0,RI);
  1697. if(loop(bp,0,dy/2,dx,cs,0,RI)>x+dx/8) ad=97*ad/100;
  1698. }
  1699. if ( (!hchar) && (dx<=10 || dy<=10) ) ad=97*ad/100; // hchar or good_quality
  1700. if (gchar) ad=99*ad/100;
  1701. Setac(box1,'B',ad);
  1702. break;
  1703. }
  1704. // --- test b ---------------------------------------------------
  1705. for(ad=d=100;dx>3 && dy>4;){ // min 3x4
  1706. DBG( wchar_t c_ask='b'; )
  1707. if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
  1708. for(y=y0;y<y1;y++)
  1709. if( get_bw(x0 , x0+dx/2, y , y ,box1->p,cs,1) != 1 ) Break;
  1710. if(y<y1-dy/32-1) Break;
  1711. if( get_bw(x0+ dx/2, x0+dx/2, y1-dy/3, y1 ,box1->p,cs,1) != 1 ) Break;
  1712. if( get_bw(x1- dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
  1713. if( get_bw(x1- dx/3, x1 , y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break;
  1714. if( get_bw(x1-4*dx/9, x1 , y0+dy/5, y0+dy/5,box1->p,cs,1) == 1 ) Break;
  1715. if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 1 ) // &
  1716. if( num_cross(x0,x1,y0+dy/4-1,y0+dy/4-1,box1->p,cs) > 1 )
  1717. if( dy<16 ||
  1718. num_cross(x0,x1,y0+dy/5 ,y0+dy/5 ,box1->p,cs) > 1 ) Break; // fat b
  1719. for(i=j=0,y=dy/2;y<dy-dy/8;y++)
  1720. if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i++; else j++;
  1721. if( i<2*j ) Break; // v024a4
  1722. if (sdata->holes.num != 1) Break;
  1723. if (sdata->holes.hole[0].y0 < dy/4) Break;
  1724. if ((sdata->holes.hole[0].y1-sdata->holes.hole[0].y0+1)
  1725. *(sdata->holes.hole[0].x1-sdata->holes.hole[0].x0+1)*16
  1726. < dx*dy) ad=90*ad/100; // hole to small
  1727. if( num_hole( x0, x1 , y0+dy/4, y1,box1->p,cs,NULL) != 1 ) Break;
  1728. i=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
  1729. j=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(j>i) Break;
  1730. if (!hchar) ad=99*ad/100;
  1731. if ( gchar) ad=99*ad/100;
  1732. Setac(box1,'b',ad);
  1733. if (ad>=100) return 'b';
  1734. break;
  1735. }
  1736. return box1->c;
  1737. }
  1738. static wchar_t ocr0_dD(ocr0_shared_t *sdata){
  1739. struct box *box1=sdata->box1;
  1740. pix *bp=sdata->bp;
  1741. int i,d,x,y,ya,yb,hchar=sdata->hchar,gchar=sdata->gchar,
  1742. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  1743. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  1744. ad; /* tmp-vars */
  1745. // --- test D ---------------------------------------------------
  1746. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  1747. DBG( wchar_t c_ask='D'; )
  1748. if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
  1749. if( get_bw(x0 ,x0+dx/3,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break;
  1750. if( get_bw(x1-dx/3,x1 ,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break;
  1751. if( get_bw(x1 ,x1 ,y0 ,y0+dy/16,box1->p,cs,1) == 1 ) Break;
  1752. if( get_bw(x1-dx/2,x1 ,y0+dy/4,y0+dy/4 ,box1->p,cs,1) != 1 ) Break;
  1753. if( num_cross(x0+dx/2,x0+dx/2,y0 ,y1 ,box1->p,cs) != 2 )
  1754. if( num_cross(x1-dx/3,x1-dx/3,y0 ,y1 ,box1->p,cs) != 2 ) Break;
  1755. if( num_cross(x0 ,x1 ,y0+dy/3,y0+dy/3,box1->p,cs) != 2 ) Break;
  1756. if( num_cross(x0 ,x1 ,y1-dy/3,y1-dy/3,box1->p,cs) != 2 ) Break;
  1757. if (sdata->holes.num != 1) Break;
  1758. if (sdata->holes.hole[0].y0 > dy/3) Break;
  1759. if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
  1760. // if( num_hole (x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break;
  1761. // test if left edge is straight
  1762. for(x=0,y=bp->y-1-dy/8;y>=dy/5;y--){
  1763. i=loop(bp,0,y,x1-x0,cs,0,RI);
  1764. if( i+2+dx/16<=x ) break;
  1765. if( i>x ) x=i;
  1766. }
  1767. if (y>=dy/5 ) Break;
  1768. /* test if right edge is falling */
  1769. for(x=dx,y=0;y<dy/3;y++){
  1770. i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
  1771. if( i>x+dx/16 ) break;
  1772. if( i<x ) x=i;
  1773. }
  1774. if (y<dy/3 ) Break;
  1775. /* test if right edge is raising */
  1776. for(x=dx,y=bp->y-1;y>2*dy/3;y--){
  1777. i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
  1778. if( i>x+dx/16 ) break;
  1779. if( i<x ) x=i;
  1780. }
  1781. if (y>2*dy/3 ) Break;
  1782. if( loop(bp,dx-1,dy-1 ,dx,cs,0,LE) <=
  1783. loop(bp,dx-1,dy-2-dy/16,dx,cs,0,LE) ) Break; // P
  1784. y=loop(bp,dx/2,dy-1,dy,cs,0,UP)-1; if (dy>16) y/=2;
  1785. if ( y>=dy/16 ) { y-=dy/16;
  1786. if (get_bw(dx/2,dx-1,dy-1-y,dy-1-y,bp,cs,1)==1) Break; // ~A
  1787. }
  1788. ya=loop(bp, 0,dy-1,dy,cs,0,UP);
  1789. yb=loop(bp,dx/16+1,dy-1,dy,cs,0,UP);
  1790. if (ya<dy/2 && ya>dy/16 && ya>yb) Break; // ~O
  1791. if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)>dx/16) ad=99*ad/100; // O? 0907
  1792. if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)>=
  1793. loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) ad=97*ad/100; // ~O 0907
  1794. if ( loop(bp, dx/2, 0,dy,cs,0,DO)
  1795. -loop(bp, dx/2,dy-1,dy,cs,0,UP) > dy/8 ) ad=97*ad/100; // ~b
  1796. if (loop(bp, 0, 0,dx,cs,0,RI)>=dx/2
  1797. && loop(bp,dx-1,dy-1,dx,cs,0,LE)>=dx/2
  1798. && loop(bp, 0,dy/2,dx,cs,0,RI)< 2 ) ad=96*ad/100; // thin O
  1799. if(box1->dots) ad=ad*94/100;
  1800. if ( gchar) ad=99*ad/100;
  1801. if (!hchar) ad=99*ad/100;
  1802. Setac(box1,'D',ad);
  1803. break;
  1804. }
  1805. // --- test d ---------------------------------------------------
  1806. for(d=100;dx>2 && dy>3;){ // min 3x4
  1807. DBG( wchar_t c_ask='d'; )
  1808. ad=100;
  1809. if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
  1810. if( get_bw(x0 , x0+dx/2, y1-dy/6, y1-dy/9,box1->p,cs,1) != 1 ) Break;
  1811. if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
  1812. if( get_bw(x0+dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
  1813. if( get_bw(x1-dx/4, x1 , y0+dy/8, y0+dy/8,box1->p,cs,1) != 1 ) Break;
  1814. if( get_bw(x0+dx/2, x0+dx/2, y1-dy/4, y1 ,box1->p,cs,1) != 1 ) Break;
  1815. if(dy>19) // 0..dx/3 dont work on serif d, 0..dx/4 is more safe
  1816. if( get_bw(x0 , x0+dx/4, y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break;
  1817. if( get_bw(x0 , x0+dx/4, y0 , y0+dy/6,box1->p,cs,1) == 1 ) Break;
  1818. if( get_bw(x0 , x0+dx/4, y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break;
  1819. if( get_bw(x0+dx/2-1,x0+dx/2,y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; // ~"A
  1820. if( loop(bp,bp->x-1, bp->y/4,x1-x0,cs,0,LE) >
  1821. loop(bp,bp->x-1,3*bp->y/4,x1-x0,cs,0,LE)+1 ) Break;
  1822. // more than dx/8 of width should show two horizontal lines (o-like)
  1823. for (i=dx/8+1,x=0;x<dx && i;x++) {
  1824. if (num_cross(x, x, 0, dy-1, bp, cs) == 2) i--; // fails for serif?
  1825. } if (i>1) Break; // "d as a" patch
  1826. if (i==1) ad=99*ad/100; // "d as a" patch, serif?
  1827. for(i=dy/6+1,y=dy/4;y<dy && i;y++){
  1828. if( num_cross(0 ,dx-1,y ,y , bp,cs) == 2 ) i--;
  1829. if( num_cross(0 ,dx-1,y ,y , bp,cs) > 3 ) i++; // ~al
  1830. } if( i ) ad=98*ad/100;
  1831. for(i=dy/8+1,y=0;y<dy/2 && i;y++){
  1832. if( num_cross(0 ,dx-1,y ,y , bp,cs) == 1 )
  1833. if( num_cross(dx/2,dx-1,y ,y , bp,cs) == 1 ) i--;
  1834. } if( i ) Break;
  1835. if (sdata->holes.num<1) Break;
  1836. if (sdata->holes.num>1) {
  1837. if (dx<6) Break; ad=95*ad/100; } // glued j above 8 (4x6 sample)
  1838. MSG(fprintf(stderr,"hole[0].y0,y1= %d %d",sdata->holes.hole[0].y0,sdata->holes.hole[0].y1););
  1839. if ( sdata->holes.hole[0].y0 < dy/4 ) Break;
  1840. if (dy-sdata->holes.hole[0].y1 > dy/4+1) Break; // glued et
  1841. // if( num_hole(x0 , x1 , y0+dy/4 , y1 ,box1->p,cs,NULL) !=1 ) Break;
  1842. if( num_cross(0 ,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) != 2 ) { // glued al
  1843. if (dy>15) { Break; } else ad=96*ad/100;
  1844. }
  1845. if (!hchar) ad=98*ad/100;
  1846. if ( gchar) ad=99*ad/100;
  1847. Setac(box1,'d',ad);
  1848. break;
  1849. }
  1850. return box1->c;
  1851. }
  1852. static wchar_t ocr0_F(ocr0_shared_t *sdata){
  1853. struct box *box1=sdata->box1;
  1854. pix *bp=sdata->bp;
  1855. int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  1856. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  1857. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  1858. ad; /* tmp-vars */
  1859. // --- test F ---------------------------------------------------
  1860. for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx
  1861. DBG( wchar_t c_ask='F'; )
  1862. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  1863. if( get_bw(x0+dx/2,x0+dx/2,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break;
  1864. if( get_bw(x0,x0+dx/4,y1-dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break;
  1865. if( get_bw(x0,x0+dx/2,y0+dy/4,y0+dy/4,box1->p,cs,1) != 1 ) Break;
  1866. for (x=0,y=0;y<dy/4;y++) {
  1867. j=loop(bp,dx-1,dy-1-y,dx,cs,0,LE); if(j<3 || 3*j<dx) break; // ~f Jun00
  1868. if (j>x) x=j;
  1869. } if (y<dy/4 || x<dx/2) Break;
  1870. for( i=1,y=0; y<dy/4 && i; y++ ){ // long black line
  1871. j=loop(bp,0,y,dx,cs,0,RI);
  1872. j=loop(bp,j,y,dx,cs,1,RI); if( j>dx/2 ) i=0; }
  1873. if( i ) Break;
  1874. x=loop(bp,0,dy-1-dy/4,dx,cs,0,RI);
  1875. x=loop(bp,x,dy-1-dy/4,dx,cs,1,RI); // strichdicke
  1876. for( i=1,y=dy/3; y<dy-1-dy/3 && i; y++ ) // black line
  1877. { j=loop(bp,0,y,dx,cs,0,RI);
  1878. j=loop(bp,j,y,dx,cs,1,RI); if( j>dx/3 && ((j>2*x && dx>8) || j>x+1)) i=0; }
  1879. if( i ) Break;
  1880. y=dy/8; if (y<1) y=1;
  1881. for( i=1; y<dy-1-dy/2; y++ ){ // search horizontal white gap
  1882. x =loop(bp,dx-1,y,dx,cs,0,LE); if(x<2) continue; // skip serifs
  1883. j =loop(bp,dx-x,y,dy/4,cs,0,UP);
  1884. x+=loop(bp,dx-x,y-j+1,dx,cs,0,LE); if (x>=dx/3) { i=0; break; }
  1885. }
  1886. if( i ) Break;
  1887. // check for vertical line on left side
  1888. for(i=1,y=1;y<=dy/2 && i;y++)
  1889. if( get_bw(0,dx/2,y,y,bp,cs,1) != 1 ) i=0;
  1890. if( !i ) Break;
  1891. for(i=1,y=dy/2;y<dy && i;y++)
  1892. if( get_bw(0,dx/3,y,y,bp,cs,1) != 1 ) i=0;
  1893. if( !i ) Break;
  1894. i=loop(bp,dx-1,dy-1,dx,cs,0,LE); // serif or E ?
  1895. if (i<=dx/3) {
  1896. if (loop(bp,dx-1,(dy+4)/8,dx,cs,0,LE)>dx/8 // no serif
  1897. || loop(bp, 0, dy-3,dx,cs,0,RI)<1) break;
  1898. ad=99*ad/100;
  1899. }
  1900. if( get_bw(dx-1-dx/4,dx-1,dy-1-dy/4,dy-1,bp,cs,1) == 1 ) Break; // ~E
  1901. if( get_bw(dx-1 ,dx-1,0 ,dy/3,bp,cs,1) != 1 ) Break;
  1902. if( loop(bp,0, bp->y/4,dx,cs,0,RI) <
  1903. loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1 ) Break;
  1904. // if( num_hole(x0 , x1 , y0 , y1 ,box1->p,cs,NULL) >0 ) Break;
  1905. if (sdata->holes.num > 0) Break;
  1906. for(i=0,x=dx/4;x<dx-1;x++)
  1907. if( num_cross(x,x,0,dy-2,bp,cs) == 2 ) i++;
  1908. if ( i<1 ) Break; // 0.2.4a4
  1909. if(dy<20) /* special case of small fi, not very elegant */
  1910. if( get_bw( 1, 1,1,1,bp,cs,1) == 1
  1911. && get_bw( 0, 0,2,2,bp,cs,1) == 1
  1912. && get_bw(dx-2,dx-1,0,0,bp,cs,1) == 0
  1913. && get_bw( 0, 1,0,0,bp,cs,1) == 0
  1914. && get_bw( 0, 0,0,1,bp,cs,1) == 0 ) Break;
  1915. // check for screen font f
  1916. i= loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1;
  1917. if (i>=0 && loop(bp,dy-1,i,dy,cs,0,UP)<=3*dy/4 ) ad=ad*98/100;
  1918. // check for screen font P
  1919. i= loop(bp,bp->x-1,bp->y/4,dx,cs,0,LE);
  1920. if (i<1) {
  1921. j=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE);
  1922. j= loop(bp,bp->x-1-j,bp->y/4,3*dy/4,cs,0,DO);
  1923. if (j<=dy/2) {
  1924. i=loop(bp,bp->x-1,0,dx,cs,0,LE);
  1925. ad=ad*98/100;
  1926. if (i>dx/8) Break;
  1927. if (i) ad=98*ad/100;
  1928. }
  1929. }
  1930. if (!hchar) if ((box1->m2-box1->y0)*8>=dy) { // ignore bad m1..4
  1931. if ( num_cross(2*dx/3,2*dx/3,0,dy-1,bp,cs) < 2 ) ad=90*ad/100; // ~r
  1932. }
  1933. if (gchar) ad=99*ad/100;
  1934. Setac(box1,'F',ad);
  1935. break;
  1936. }
  1937. return box1->c;
  1938. }
  1939. static wchar_t ocr0_uU(ocr0_shared_t *sdata){
  1940. struct box *box1=sdata->box1;
  1941. pix *bp=sdata->bp;
  1942. int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  1943. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  1944. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  1945. ad; /* tmp-vars */
  1946. wchar_t bc=UNKNOWN;
  1947. // --- test uU ---------------------------------------------------
  1948. // in Mitte so breit wie oben (bei V kontinuierlich schmaler)
  1949. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  1950. DBG( wchar_t c_ask='u'; )
  1951. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  1952. for(y=y0+dy/4;y<y1-dy/4;y++) /* also handwritten u */
  1953. if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break;
  1954. if( y<y1-dy/4 ) Break;
  1955. if( get_bw(dx/2,dx/2,dy/2,dy-1,bp,cs,1)==0 ) Break;
  1956. if( get_bw(dx/2,dx-1,dy/2,dy/2,bp,cs,1)==0 ) Break;
  1957. for(i=0,x=3*dx/8;x<dx-dx/4;x++){
  1958. y=loop(bp,x,0,dy,cs,0,DO); if(y>i)i=y; if(y<i && i>1) break;
  1959. } if( i<dy/4 ) Break; x--;
  1960. if( get_bw(0,x ,i-1,i-1,bp,cs,1)==0 ) Break;
  1961. if( get_bw(x,dx-1,i-1,i-1,bp,cs,1)==0 ) Break;
  1962. for(i=dy/8+2,y=dy/8;y<dy-(dy+2)/4 && i;y++){ // 12%+1 Fehler
  1963. j=num_cross(0,dx/2-((y>dy/2)?dx/8:0),y,y,bp,cs);
  1964. if( y<dy/2 && num_cross(dx/2,dx-1,y,y,bp,cs)>1 ) i--; // ~{\it v}
  1965. if( y<dy/2 && (j<1 && j>2) ) { i--; ad=90*ad/100; }
  1966. if( y>dy/2 && j!=1 ) { i--; ad=95*ad/100; }
  1967. } if( !i ) Break;
  1968. for(i=dy/16+1,y=dy/8;y<dy-dy/4 && i;y++){ // 12%+1 Fehler
  1969. j=num_cross(dx-dx/2,dx-1,y,y,bp,cs);
  1970. if( y>dy/2 && (j<1 && j>2) ) i--;
  1971. if( y<dy/2 && j!=1 ) i--;
  1972. } if( !i ) Break;
  1973. for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
  1974. if( get_bw( x, x, y0, y0+dy/3,box1->p,cs,1) != 1 ) i=0;
  1975. } if( i ) Break;
  1976. for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
  1977. if( get_bw( x, x,y0+dy/3,y1-dy/3,box1->p,cs,3) != 2 ) i--;
  1978. } if( !i ) Break;
  1979. for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
  1980. if( get_bw( x, x,y1-dy/2,y1,box1->p,cs,3) == 2 ) i=0;
  1981. if( get_bw( x, x,y1-dy/3,y1,box1->p,cs,3) == 2 ) ad=98*ad/100;
  1982. } if( !i ) Break;
  1983. if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2
  1984. && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==1 ) Break; // ~{\it v}
  1985. i=loop(bp,0,dy-1-dy/16,dx,cs,0,RI);
  1986. j=loop(bp,0,dy-1-dy/8 ,dx,cs,0,RI);
  1987. if( i<j ) Break; // ~ll v0.2.4a3
  1988. if(dy>15)
  1989. if( loop(bp,dx-1,dy/16,dx,cs,0,LE)
  1990. > loop(bp,dx-1,dy/8 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad 0 (thinn)
  1991. if( hchar && dy>7)
  1992. if( loop(bp, 0, dy-1,dx,cs,1,RI)==dx
  1993. && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/16
  1994. && loop(bp, 0,3*dy/4,dx,cs,0,RI)>dx/16
  1995. && loop(bp,dx-1, dy/2,dx,cs,0,LE)>dx/16
  1996. && loop(bp, 0, dy/2,dx,cs,0,RI)>dx/16
  1997. ) Break; // melted ll
  1998. i=loop(bp, 0,dy-2-dy/8,dx,cs,0,RI);
  1999. j=loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE);
  2000. if ( i>dx/4 && j>dx/4 && i+j>=dx/2) Break; // v
  2001. if (i+j>=dx/2) ad=97*ad/100;
  2002. if ( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=2 ) ad=96*ad/100; // w
  2003. if ( loop(bp,dx/2,dy-1,dy,cs,0,UP)>0 ) ad=98*ad/100; // w
  2004. if (ad==100) ad=99; // ToDo: only if lines.wt<100
  2005. bc='u';
  2006. if (gchar) ad=98*ad/100;
  2007. if (hchar) bc='U';
  2008. if (box1->dots>0) ad=99*ad/100;
  2009. Setac(box1,bc,ad);
  2010. break;
  2011. }
  2012. return box1->c;
  2013. }
  2014. static wchar_t ocr0_micro(ocr0_shared_t *sdata){
  2015. struct box *box1=sdata->box1;
  2016. pix *bp=sdata->bp;
  2017. int i,j,d,x,y,i2,hchar=sdata->hchar,gchar=sdata->gchar,
  2018. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  2019. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  2020. ad; /* tmp-vars */
  2021. // --- test \mu &micro; MICRO_SIGN --------------------------------------
  2022. // in Mitte so breit wie oben (bei V kontinuierlich schmaler)
  2023. if( gchar && !hchar )
  2024. for(ad=d=100;dx>2 && dy>4;){ // min 3x4
  2025. DBG( wchar_t c_ask='u'; )
  2026. if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */
  2027. for(y=y0+dy/8;y<box1->m3-dy/4;y++)
  2028. if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break;
  2029. if( y<box1->m3-dy/4 ) break;
  2030. if( get_bw(dx/2,dx/2,3*dy/8,7*dy/8,bp,cs,1)==0 ) break;
  2031. if( get_bw(dx/2,dx-1,3*dy/8,7*dy/8,bp,cs,1)==0 ) break;
  2032. for(y=dy/2;y<dy;y++){
  2033. x=loop(bp,dx-1,y,dx,cs,0,LE); if(8*x>5*dx) break;
  2034. } if( y>=dy || 2*y>box1->m3+box1->m4) break; i2=y;
  2035. for(i=0,x=2*dx/8;x<dx-1-dx/4;x++){
  2036. y=loop(bp,x,0,dy,cs,0,DO); if(y>i)i=y; if(y<i && i>1) break;
  2037. } if( i<dy/4 ) break; x--;
  2038. if( get_bw(0,x ,i-1,i-1,bp,cs,1)==0 ) break;
  2039. if( get_bw(x,dx-1,i-1,i-1,bp,cs,1)==0 ) break;
  2040. for(i=dy/16+1,y=dy/8;y<dy-(box1->m4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler
  2041. j=num_cross(0,dx/2,y,y,bp,cs);
  2042. if( y<dy/2 && num_cross(dx/2,dx-1,y,y,bp,cs)>1 ) i--; // ~{\it v}
  2043. if( y<dy/2 && (j<1 && j>2) ) i--;
  2044. if( y>dy/2 && j!=1 ) i--;
  2045. } if( !i ) break;
  2046. for(i=dy/16+1,y=dy/8;y<dy-(box1->m4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler
  2047. j=num_cross(dx-dx/2,dx-1,y,y,bp,cs);
  2048. if( y>dy/2 && (j<1 && j>2) ) i--;
  2049. if( y<dy/2 && j!=1 ) i--;
  2050. } if( !i ) break;
  2051. for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
  2052. if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0;
  2053. } if( i ) break;
  2054. for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
  2055. if( get_bw( x, x,y0+dy/4,y1-dy/2,box1->p,cs,3) != 2 ) i--;
  2056. } if( !i ) break;
  2057. if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)!=1 ) break;
  2058. if( num_cross(dx-dx/2,dx-1,dy-dy/2,dy-dy/2,bp,cs)!=1 ) break;
  2059. if( get_bw( (dx+2)/4,dx-1,dy-2-3*dy/16,dy-1,bp,cs,1) == 1 ) break;
  2060. if( num_cross(0,dx/4,dy-1,dy-1,bp,cs)!=1 ) break;
  2061. Setac(box1,MICRO_SIGN,ad);
  2062. break;
  2063. }
  2064. return box1->c;
  2065. }
  2066. static wchar_t ocr0_vV(ocr0_shared_t *sdata){
  2067. struct box *box1=sdata->box1;
  2068. pix *bp=sdata->bp;
  2069. int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
  2070. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  2071. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  2072. ad; /* tmp-vars */
  2073. wchar_t bc=UNKNOWN;
  2074. // --- test v -------------------------------------------------
  2075. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  2076. DBG( wchar_t c_ask='v'; )
  2077. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  2078. x=loop(bp,dx/2,0,dx,cs,1,RI)+dx/2; // be sure in the upper gap
  2079. y=loop(bp, x,0,(dy+1)/2,cs,0,DO)-1; // (x,y) should be in the gap
  2080. if (x>3*dx/4 || y<dy/4) Break;
  2081. if( get_bw(x0,x0+x,y0+y,y0+y,box1->p,cs,1) != 1 ) Break;
  2082. if( get_bw(x0+x,x1,y0+y,y0+y,box1->p,cs,1) != 1 ) Break;
  2083. if( get_bw(x0+x,x0+x,y1-dy/2,y1, box1->p,cs,1) != 1 ) Break;
  2084. if( get_bw(x0+x, x0+x ,y0, y0+dy/3,box1->p,cs,1) == 1 ) // it v?
  2085. if( get_bw(x0+x+1,x0+x+1,y0, y0+dy/3,box1->p,cs,1) == 1 ) Break;
  2086. // UVW
  2087. if(((num_cross( 0,dx/2+1,dy/ 8,dy/ 8,bp,cs)!=1)
  2088. && (num_cross( 0,dx/2+1,dy/16,dy/16,bp,cs)!=1) // it v
  2089. && (num_cross(dx/2+1,dx -1,dy/ 8,dy/ 8,bp,cs)!=1)) /* () added on Sep00 */
  2090. || ((num_cross( 0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs)> 1)
  2091. && (num_cross( 0,dx-1,dy-1 ,dy-1 ,bp,cs)> 1)) ) Break;
  2092. // UV
  2093. if( get_bw(0 ,dx/8,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break;
  2094. if( get_bw(dx-1-dx/8,dx-1,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break;
  2095. if( loop(bp,0 ,dy/6 ,dx,cs,0,RI)
  2096. >=loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) && dy>6 ) Break;
  2097. if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI)
  2098. >loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI)
  2099. && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE)
  2100. >loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) Break; // better OR ?
  2101. if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI)
  2102. >=loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI)
  2103. && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE)
  2104. >=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) ad=99*ad/100; // font21
  2105. if( loop(bp,dx-1,dy/6 ,dx,cs,0,LE)
  2106. >=loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) && dy>6 ) Break;
  2107. x=loop(bp,0,dy-1,dx,cs,0,RI); // 3*x>dx changed to 2*x>dx May2001 JS
  2108. x=loop(bp,x,dy-1,dx,cs,1,RI); if ( dx>14 && 2*x>dx ) Break; // U
  2109. if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2
  2110. && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==2 ) Break; // ~{\it u}
  2111. #if 0
  2112. // measure thickness of lower v
  2113. i=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)
  2114. +loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE);
  2115. j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI)
  2116. +loop(bp,dx-1,dy-1-dy/4 ,dx,cs,0,LE);
  2117. if( box1->m1 && hchar && dy>15 && j>=i-dx/32 ) Break; // ~Y
  2118. #endif
  2119. /* V has serifs only on upper site! Y also on bottom, check it. Okt00 */
  2120. i=loop(bp, 0, 0,dx,cs,0,RI);
  2121. i=loop(bp, i, 0,dx,cs,1,RI); i1=i; // thickness
  2122. i=loop(bp, 0, 1,dx,cs,0,RI);
  2123. i=loop(bp, i, 1,dx,cs,1,RI); if(i>i1) i1=i; // thiggest
  2124. i=loop(bp, 0,dy/4,dx,cs,0,RI);
  2125. i=loop(bp, i,dy/4,dx,cs,1,RI); i2=i;
  2126. i=loop(bp, 0,dy ,dx,cs,0,RI);
  2127. i=loop(bp, i,dy ,dx,cs,1,RI); i3=i; // thickness
  2128. i=loop(bp, 0,dy-1,dx,cs,0,RI);
  2129. i=loop(bp, i,dy-1,dx,cs,1,RI); if(i>i3) i3=i; // thiggest
  2130. if( y0 < box1->m2 )
  2131. if( i1-i2 > dx/32+2
  2132. && i3-i2 > dx/32+2 ) Break; // ~serif_Y
  2133. if( y0 < box1->m2 ) // uppercase V ?
  2134. if( i1-i2 < dx/32+2 ) /* no serif detected */
  2135. if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs)==1 ){
  2136. j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI);
  2137. j=loop(bp, j,dy-1-dy/4 ,dx,cs,1,RI);
  2138. if (j<i2+1) Break; // ~Y
  2139. if (j<=i2+1) ad=99*ad/100; // ~Y
  2140. }
  2141. ad=99*ad/100; // be carefull (remove later)
  2142. if( loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI)
  2143. >loop(bp,0 ,dy-1 ,dx,cs,0,RI) ) ad=96*ad/100;
  2144. if (num_cross(0,dx-1, dy/2, dy/2,bp,cs)==1
  2145. && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 1) Break; // 5x8 * Jul09
  2146. if (gchar) ad=99*ad/100;
  2147. bc='v';
  2148. if( hchar ) bc='V';
  2149. Setac(box1, bc, ad);
  2150. break;
  2151. }
  2152. return box1->c;
  2153. }
  2154. static wchar_t ocr0_rR(ocr0_shared_t *sdata){
  2155. struct box *box1=sdata->box1;
  2156. pix *bp=sdata->bp;
  2157. int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
  2158. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  2159. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  2160. ad; /* tmp-vars */
  2161. // --- test r -------
  2162. for(ad=d=100;dy>3 && dx>1;){ // dy>dx, 4x6 font, dx=2 smallest prop-font
  2163. DBG( wchar_t c_ask='r'; )
  2164. if (sdata->holes.num > 0
  2165. && ( sdata->holes.hole[0].y1 > dy/2 // tiny hole in upper left
  2166. || sdata->holes.hole[0].x1 > dx/2 ) // is tolerated, ~Pp
  2167. ) Break; /* tolerant against a tiny hole */
  2168. if( 2*dy<box1->m3-box1->m1) Break;
  2169. if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8 ) Break;
  2170. x= loop(bp,dx-1,dy/2,dx,cs,0,LE); if (x<=dx/2) ad=99*ad/100; // ~t
  2171. if (loop(bp,dx-1-x/2,0,dy,cs,0,DO)>dy/8) ad=99*ad/100; // ~t
  2172. if( dx>4 )
  2173. if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8+2 ) Break; // ~v Jun00
  2174. i=dy-(dy+20)/32; // ignore dust on the ground
  2175. for( y=4*dy/8; y<i; y++ ){ // center down v-line
  2176. if( y<dy-2*dy/8 && num_cross(0,dx-1,y,y,bp,cs) !=1 ) break;
  2177. i1= loop(bp,0 ,y,dx,cs,0,RI); if(i1>3*dx/8) break;
  2178. i2= loop(bp,dx-1,y,dx,cs,0,LE); if(i1>i2) break;
  2179. if( (i1+(dx-i2
  2180. -1))/2 >= 4*dx/8 ) break; // mass middle should be left
  2181. }
  2182. if (y<i) Break;
  2183. for( x=4*dx/8; x<dx-dx/8; x++ ){ // right upper h-line
  2184. if( get_bw(x,x,0,(dy+2)/4,bp,cs,1) !=1 ) break; }
  2185. if (x<dx-dx/8) Break;
  2186. if( loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)>5*dx/8 // not a C
  2187. && get_bw(dx-1-dx/8,dx-1,dy-1-dy/4,dy-1,bp,cs,1) ==1 ) Break;
  2188. if( loop(bp, 0,5*dy/8,dx,cs,0,RI)<=dx/8
  2189. && loop(bp,dx-1,5*dy/8,dx,cs,0,LE)>=5*dy/8
  2190. && loop(bp,dx/2, dy-1,dy,cs,0,UP)<=dy/8 ) Break; // ~c
  2191. if( loop(bp, 0,3*dy/8,dx,cs,0,RI)
  2192. > loop(bp,dx-1,3*dy/8,dx,cs,0,LE)+dx/8 ) {
  2193. if( loop(bp, 0, dy/8,dx,cs,0,RI)<dx/8 ) Break; // ~z (broken)
  2194. ad=98*ad/100;
  2195. }
  2196. if( loop(bp,0,dy/3,dx,cs,0,RI)>3*dx/4 ) Break; // ~i
  2197. if( loop(bp,0,dy/4,dx,cs,0,RI)>3*dx/8 // ~I
  2198. && get_bw(0,dx/8,0,dy/4,bp,cs,1) ==1 ) Break;
  2199. if( num_cross(0,dx-1,dy/2, dy/2 ,bp,cs)!=1
  2200. && num_cross(0,dx-1,dy/2+1,dy/2+1,bp,cs)!=1 ) Break; // ~n 024a3
  2201. // itallic t is sometimes not high enough, look for v-like shape
  2202. for(y=3*dy/4;y<dy-1;y++)
  2203. if( num_cross(0,dx-1,y, y ,bp,cs)==2
  2204. && num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) break; // ~t
  2205. if(y<dy-1) Break;
  2206. if (loop(bp,dx-1-dx/4,dy-1,dx,cs,0,UP)<dy/4) ad=98*ad/100; // ~f (serif)
  2207. if( num_cross(dx-1,dx-1,0,3*dy/4,bp,cs)>1 ) ad=95*ad/100; // ~f
  2208. if( num_cross(dx/2 ,dx/2 ,0,dy-1,bp,cs)>2
  2209. && num_cross(dx/2+1,dx/2+1,0,dy-1,bp,cs)>2 ) Break; // ~f
  2210. if (box1->dots) ad=98*ad/100; /* could be modified latin2-r */
  2211. if (hchar) ad=96*ad/100;
  2212. if (gchar) ad=97*ad/100;
  2213. Setac(box1,'r',ad);
  2214. break; // not 100% sure!
  2215. }
  2216. // --- test R ---------------------------------------------------
  2217. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  2218. DBG( wchar_t c_ask='R'; )
  2219. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  2220. if( num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 2 ) Break; // ~P
  2221. if (loop(bp, dx/2, dy/4,dy,cs,0,DO)>dy/2) Break; // ~C
  2222. if (loop(bp, dx/2, 0,dy,cs,0,DO)>dy/8
  2223. && loop(bp, dx/2,dy/16,dx,cs,0,RI)<dx/2
  2224. && dy>=16 ) Break;
  2225. for(i=1,y=y0+dy/8;y<=y1-dy/8 && i;y++){ // left v-line
  2226. if( get_bw(x0 , x0+dx/2,y, y,box1->p,cs,1) != 1 ) i=0;
  2227. } if( !i ) Break;
  2228. for(i=1,x=x0+3*dx/8;x<=x1-dx/4 && i;x++){ // upper h-line
  2229. if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0;
  2230. } if( !i ) Break;
  2231. for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap
  2232. i=loop(box1->p,x,y1,dy,cs,0,UP);
  2233. /* on small chars bypass possible low left serifs */
  2234. if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP);
  2235. if (i2>1) i+=i2-1; }
  2236. if (i>y) { y=i; i1=x; }
  2237. } if( y<=dy/8 ) Break; if (y<dy/4) ad=80*ad/100;
  2238. for(i=1,x=x0+dx/3;x<=x1-dx/8 && i;x++){ // vert crossed 2 ???
  2239. if( num_cross(x,x,y0,y1, box1->p,cs) == 2 ) i=0;
  2240. } if( i ) Break;
  2241. for(i=1,y=y0;y<=y0+3*dy/8 && i;y++){ // upper 2 vert lines
  2242. if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
  2243. } if( i ) Break;
  2244. for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ // midle h line
  2245. if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0;
  2246. } if( i ) ad=95*ad/100; /* sometimes there is a small gap */
  2247. for(i=1,y=y1-dy/4;y<=y1 && i;y++){ // lower 2 vert lies
  2248. if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
  2249. } if( i ) Break;
  2250. if( get_bw(x1-dx/3,x1,y0,y0+dy/4,box1->p,cs,1) != 1 ) Break; // pixel ru
  2251. x=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(x>dx/2) Break; i=x; // ru
  2252. x=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(x<=i ) Break; i=x; // rc
  2253. x=loop(bp,dx-1, 5*dy/8,dx,cs,0,LE); if(x>i ) i=x;
  2254. x=loop(bp,dx-1, 6*dy/8,dx,cs,0,LE); if(x>i ) i=x;
  2255. x=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(x>=i ) Break; // rd
  2256. i1=loop(bp,0, dy/4,dx,cs,0,RI); // straight
  2257. i2=loop(bp,0, dy/2,dx,cs,0,RI);
  2258. i3=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); if( abs(i1+i3-2*i2)>1+dx/16 ) Break;
  2259. if (dy>15)
  2260. if (loop(bp,dx-1, dy/2,dx,cs,0,LE)>=loop(bp,dx-1, dy-1,dx,cs,0,LE)
  2261. && loop(bp,dx-1,3*dy/16,dx,cs,0,LE)>=loop(bp,dx-1,dy/16,dx,cs,0,LE)+dx/8 ) Break; // ~ff
  2262. if (dy>7)
  2263. if (loop(bp,dx-1,dy-2 ,dx,cs,0,LE)
  2264. >loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)) {
  2265. ad=98*ad/100;
  2266. if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)==0
  2267. && loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)>0 ) Break; // broken B ??
  2268. }
  2269. j=sdata->holes.num;
  2270. if (j != 1) {
  2271. i=num_hole (x0,x1,y0,y1-dy/3,box1->p,cs,NULL);
  2272. // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL);
  2273. if (i==0) ad=90*ad/100; /* some times there is a small gap */
  2274. if (j>1 || j>i) Break;
  2275. }
  2276. if (sdata->holes.num < 1) ad=90*ad/100;
  2277. if (sdata->holes.num==1)
  2278. if (sdata->holes.hole[0].y1 > 3*dy/4) ad=95*ad/100; // alpha
  2279. if (!hchar) ad=98*ad/100;
  2280. if ( gchar) ad=98*ad/100;
  2281. Setac(box1,'R',ad);
  2282. break;
  2283. }
  2284. return box1->c;
  2285. }
  2286. static wchar_t ocr0_m(ocr0_shared_t *sdata){
  2287. struct box *box1=sdata->box1;
  2288. pix *bp=sdata->bp;
  2289. int i,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
  2290. handwritten=0,
  2291. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  2292. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  2293. ad; /* tmp-vars */
  2294. // --- test m -------
  2295. for(ad=d=100;dx>4 && dy>3;){
  2296. DBG( wchar_t c_ask='m'; )
  2297. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  2298. if (sdata->holes.num > 0) ad=96*ad/100;
  2299. x =loop(bp,dx-1,dy/2,dx,cs,0,LE); if(3*x>dx) Break; // ~K
  2300. y=dy/2;
  2301. i=num_cross(0,dx-1,y ,y ,bp,cs); if (i!=3)
  2302. i=num_cross(0,dx-1,y+1,y+1,bp,cs);
  2303. if (num_cross(0,dx-1,dy/2,dy/2,bp,cs)==1) Break; // 5x8 * Jul09
  2304. if (i<3 && i>5) Break; // m ru rn, handwritten m
  2305. // im or glued.mm cut to nm
  2306. if (i>3) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
  2307. if (i>=5) { // melted rw ? (Oct08 JS)
  2308. x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // 1st v-line
  2309. x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap
  2310. x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i1=x-i1; // 2nd v-line
  2311. x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i2=x;// 2nd gap
  2312. x+=loop(bp,x,y,dx-x,cs,0,RI); i2=x-i2; // 3th v-line
  2313. // printf("\nDBG i1,i2 %d %d", i1, i2);
  2314. if (i1>2*i2) Break; // rw
  2315. }
  2316. for (i=0,y=dy-1-dy/8;y>dy/2;y--) {
  2317. i=num_cross(0,dx-1,y,y,bp,cs); if (i>2) break;
  2318. } if (i>3) Break;
  2319. for ( ;y>dy/2;y--) {
  2320. i=num_cross(0,dx-1,y,y,bp,cs); if (i!=3) break;
  2321. } if (i>5) Break; y++; i5=y;
  2322. if (y> dy/2) handwritten=10;
  2323. if (y>3*dy/4) handwritten=60;
  2324. /* @@...............
  2325. @@......,........
  2326. @@,...@@@....@@@.
  2327. @@,,.@@@@..@@@@@,
  2328. @@@.@@@@@.@@@@@@,
  2329. @@;@@@@@@@@@;,@@,
  2330. @@@@@,.@@@@,,,@@@ <- i5
  2331. ,@@@...;@@....@@@
  2332. .@;...........,@@
  2333. ...............@@
  2334. i1 i2 i3 i4
  2335. */
  2336. x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line
  2337. x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // first gap
  2338. x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i2=x; // 2nd v-line
  2339. x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i3=x; // 2nd gap
  2340. x+=loop(bp,x,y,dx-x,cs,0,RI); if(x<5*dx/8) Break; i4=x; // 3th v-line
  2341. if (x>=dx) Break; // missing 3th v-line, ~W
  2342. MSG(fprintf(stderr,"y=%d x=%d %d %d %d",y,i1,i2,i3,i4);)
  2343. if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/4 ) Break; // same gap width? rn
  2344. if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/8 ) ad=98*ad/100; // same gap width? rn
  2345. // the same game for the lower part =>l1 l2 l3 l4 ???
  2346. i =loop(bp,0,5*dy/8,dx,cs,0,RI);
  2347. i =loop(bp,i,5*dy/8,dx,cs,1,RI);
  2348. x =loop(bp,0,dy-dy/32-1,dx,cs,0,RI);
  2349. x =loop(bp,x,dy-dy/32-1,dx,cs,1,RI);
  2350. if( x > i+1 ) i=1; else i=0; /* looks like serif m, Okt00 */
  2351. for(y=0,x=i1;x<i2;x++) {
  2352. i=loop(bp,x,dy-1,dy,cs,0,UP); if (i>y) y=i;
  2353. }
  2354. if(y<dy/4 || y<y1-y0-i5-1-dy/16) Break; // no gap detected
  2355. for(y=0,x=i3;x<i4;x++) {
  2356. i=loop(bp,x,dy-1,dy,cs,0,UP); if (i>y) y=i;
  2357. }
  2358. if(y<dy/4) Break; // no gap detected
  2359. for(x=i1;x<i4;x++) if( loop(bp,x,0,dy,cs,0,DO)>=dy/2 ) break;
  2360. if(x<i4 && handwritten<10) Break; // gap detected
  2361. // glued rn as m ??? hmm seems a ballance act
  2362. if(i2-i1>i4-i3+dx/16){
  2363. for(y=0,x=(i1+i2)/2;x<i2;x++){
  2364. i=loop(bp,x,0,dy,cs,0,DO);
  2365. i=loop(bp,x,i,dy,cs,1,DO); // measure thickness
  2366. if( i>y ) y=i; if( 2*i<y ) Break;
  2367. }
  2368. if(x <i2) Break; // unusual property for m (see n)
  2369. }
  2370. if(gchar) ad=99*ad/100;
  2371. if(hchar) ad=99*ad/100;
  2372. if( loop(bp,dx-1,dy/16,dx,cs,0,LE)<2
  2373. && loop(bp,dx-1,dy/4 ,dx,cs,0,LE)>3 ) Break; // melted WT
  2374. x=loop(bp,dx-1,dy/2,dx,cs,0,LE);
  2375. if (x>2 && loop(bp,dx-1-x/2,0,dy,cs,0,DO)<dy/2) Break; // melt toc
  2376. if (loop(bp,(i3+i4)/2,0,dy,cs,0,DO)>dy/2) Break; // N
  2377. // {\it m}
  2378. if( loop(bp,1, dy/4,dx,cs,0,RI)
  2379. >loop(bp,0,7*dy/8,dx,cs,0,RI) )
  2380. Setac(box1,'m',98*ad/100);
  2381. if (handwritten<10){
  2382. x =loop(bp,0,dy/4,dx,cs,0,RI);
  2383. x+=loop(bp,x,dy/4,dx,cs,1,RI);
  2384. for( ;x<i4;x++){ // x=i1 ?
  2385. i=loop(bp,x,0,dy,cs,0,DO);
  2386. if (i>=dy/4) ad=99*ad/100;
  2387. if (i>(dy+2)/4) ad=95*ad/100;
  2388. if (3*i>dy) Break;
  2389. }
  2390. if(x<i4) Break; // gap detected
  2391. }
  2392. if (box1->dots) ad=99*ad/100;
  2393. Setac(box1,'m',ad);
  2394. if (ad>=100) return 'm';
  2395. break;
  2396. }
  2397. return box1->c;
  2398. }
  2399. static wchar_t ocr0_tT(ocr0_shared_t *sdata){
  2400. struct box *box1=sdata->box1;
  2401. pix *bp=sdata->bp;
  2402. int i,i1,i2,i3,i4,j,d,x,y,yb,hchar=sdata->hchar,gchar=sdata->gchar,
  2403. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  2404. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  2405. ad; /* tmp-vars */
  2406. // --- test T ---------------------------------------------------
  2407. for(ad=d=100;dx>2 && dy>3;){ // dx>1 dy>2*dx
  2408. DBG( wchar_t c_ask='T'; )
  2409. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  2410. // upper horizontal line
  2411. i1= loop (bp, dx/8,0,dy,cs,0,DO); // left side
  2412. i2= loop (bp,dx-1-dx/8,0,dy,cs,0,DO); // right side
  2413. i3= loop (bp, dx/8,i1,dy,cs,1,DO); // left side
  2414. i4= loop (bp,dx-1-dx/8,i2,dy,cs,1,DO); // right side
  2415. if (i1>dy/4 || i2>dy/4) Break;
  2416. for (x=dx/8;x<dx-1-dx/8;x++) {
  2417. i= loop (bp,x,0,dy,cs,0,DO);
  2418. if (i>i1+dy/8 && i>i2+dy/8) break;
  2419. if (i<i1-dy/8 && i<i2-dy/8) break;
  2420. } if (x<dx-1-dx/8) Break;
  2421. if( get_bw( 0,dx-1, dy/2, dy/2,bp,cs,1) != 1 ) Break;
  2422. if( get_bw( 0,(dx-1)/8, dy/2,dy-1-dy/8,bp,cs,1) == 1 ) Break;
  2423. if( get_bw( 0,3*dx/16, dy/2,dy-1-dy/4,bp,cs,1) == 1 ) Break;
  2424. if( get_bw(dx-1-dx/4,dx-1, dy/2,dy-1-dy/4,bp,cs,1) == 1 ) Break;
  2425. // center width
  2426. for( y=dy/4;y<3*dy/4;y++){ // between top and bottom-serif
  2427. i=dx/4+loop(bp,dx/4,y,dx,cs,0,RI); // left side of vertical line
  2428. j= loop(bp, i,y,dx,cs,1,RI); // width of vertical line
  2429. if (2*j>dx+1 || i+j>=dx || i<dx/4-1) break; // ~r?7 Jan08,
  2430. // but allow serif T Jul09
  2431. } if (y<3*dy/4) Break; // Jan07
  2432. // down width
  2433. for( y=3*dy/4;y<dy;y++){
  2434. i= loop(bp,dx/4,y,dx,cs,0,RI);
  2435. i= loop(bp, i,y,dx,cs,1,RI);if(4*i>3*x) break; //~I
  2436. } if( y<dy ) Break;
  2437. i =dx/4+loop(bp,dx/4,dy/4,dx,cs,0,RI);if(i>3*dx/4) Break; // ~7
  2438. i+= loop(bp,i ,dy/4,dx,cs,1,RI);if(i>3*dx/4) Break;
  2439. if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1
  2440. && num_cross(0,dx-1, dy-2, dy-2,bp,cs) != 1 ) Break;
  2441. if( num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1
  2442. && num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 ) Break;
  2443. if (box1->m3 && 2*y1>box1->m3+box1->m4
  2444. && loop(bp,0, 0,dy/2,cs,0,DO)>=dy/4
  2445. && loop(bp,0,dy-1,dy ,cs,0,UP)<=dy/2) ad=96*ad/100; // ~J
  2446. if (gchar) ad=98*ad/100;
  2447. if( loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8) ad=99*ad/100; // ~J
  2448. i = loop(bp,0,dy/2,dx,cs,0,RI);
  2449. j = loop(bp,i,dy/2,dx,cs,1,RI);
  2450. if( 2*i>=dx || 2*(dx-j-i)<i) ad=95*ad/100; // ~J
  2451. Setac(box1,'T',ad);
  2452. if (ad>=100) return 'T';
  2453. break;
  2454. }
  2455. // --- test t ---------------------------------------------------
  2456. // written t can look like a + or even with missing right side
  2457. // smallest t found in win-screenshot (prop-font) dx=2
  2458. for(ad=d=100;dx>1 && dy>=box1->m3-box1->m2-1;){ // sometimes no hchar!
  2459. DBG( wchar_t c_ask='t'; )
  2460. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  2461. if (dy<=box1->m3-box1->m2+1) ad=96*ad/100; // bad line detection?
  2462. if (num_cross(0,dx-1,0 ,0 ,bp,cs) != 1) { // font4x5 *=x+-
  2463. if (dy<10) Break; ad=98*ad/100;
  2464. }
  2465. if (num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1) { // *=x+-
  2466. if (dy<10) Break; ad=98*ad/100;
  2467. }
  2468. for(x=0,yb=j=y=dy/32+3*dy/16;y<5*dy/8;y++)if(y>0){ // upper cross line
  2469. i=loop(bp,0,y,dx,cs,0,RI);
  2470. i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;yb=j=y; } // hor. line
  2471. i=num_cross(0,dx-1,y ,y ,bp,cs);
  2472. j=num_cross(0,dx-1,y+1,y+1,bp,cs); if (i>2 && j>2) break;
  2473. if( y<11*dy/16
  2474. && num_cross(0,dx-1,y ,y ,bp,cs) != 1
  2475. && ( num_cross(0,dx-1,y+dy/8,y+dy/8,bp,cs) != 1 || dy<13) // against noise
  2476. ) break;
  2477. } if( y<4*dy/8 ) Break;
  2478. if (dy>12 && x>4 && x>dx/2 && yb<=(dy+4)/8)
  2479. if ( loop(bp,dx-1-3*x/4,yb,dy,cs,1,UP)
  2480. <=loop(bp,dx-1-1*x/4,yb,dy,cs,1,UP)+1 )
  2481. if ( loop(bp,0 ,dy/2,dy,cs,1,UP)>dx/8 ) Break; // ~C
  2482. if (x<dx/2) ad=95*ad/100; // unusual small ?
  2483. if (x>=dx && 9*dx>=8*dy) { ad=99*ad/100; } // +
  2484. i=loop(bp,dx-1,0,dx,cs,0,LE);
  2485. for(y=0;y<dy/4;y++){
  2486. if( num_cross(0,dx-1,y ,y ,bp,cs) == 2
  2487. && num_cross(0,dx-1,y+1,y+1,bp,cs) == 2 ) break;
  2488. j=loop(bp,dx-1,y,dx,cs,0,LE); if(j-i>1) break; i=j;
  2489. }
  2490. if( y<dy/4 ) Break; // ~f
  2491. i=loop(bp,dx-1,yb,dx,cs,0,LE);
  2492. for(y=dy/8;y<yb;y++)
  2493. if( loop(bp,dx-1,y,dx,cs,0,LE)>i ) break;
  2494. if( y==yb ) break;
  2495. j=loop(bp,0, dy/2,dx,cs,0,RI);
  2496. j=loop(bp,j, dy/2,dx,cs,1,RI); i=j; // thickness
  2497. j=loop(bp,0, dy/4,dx,cs,0,RI);
  2498. j=loop(bp,j, dy/4,dx,cs,1,RI); if (j<i) i=j; // thickness
  2499. j=loop(bp,0,3*dy/4,dx,cs,0,RI);
  2500. j=loop(bp,j,3*dy/4,dx,cs,1,RI); if (j<i) i=j; // thickness
  2501. if( 2*x<3*i ) Break;
  2502. if( loop(bp,dx-1,dy/2,dx,cs,0,LE)-dx/8
  2503. <=loop(bp,dx-1, yb ,dx,cs,0,LE) )
  2504. if( loop(bp,dx-1, yb ,dx,cs,0,LE)-dx/8
  2505. >=loop(bp,dx-1,yb/2,dx,cs,0,LE) ) Break; // ~1 ???
  2506. j=1;
  2507. for(y=1;j && y<yb; y++) // no @@ pattern
  2508. for(x=0;j && x<dx-2;x++){ // ..
  2509. if( getpixel(bp,x ,y )>=cs && getpixel(bp,x+1,y )>=cs
  2510. && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) { j=0;break; }
  2511. } if(!j) Break;
  2512. if( num_cross(0,dx-1,dy-2,dy-2,bp,cs) == 2
  2513. && num_cross(0,dx-1,dy-1,dy-1,bp,cs) == 2 ) Break; // ~* (5er)
  2514. if( dy>= 16
  2515. && loop(bp, 0, 3*dy/4,dx,cs,0,RI)
  2516. >=loop(bp, 0, dy-2,dx,cs,0,RI)
  2517. && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE)
  2518. <=loop(bp,dx-1, dy-2,dx,cs,0,LE)
  2519. && loop(bp,dx-1, 1,dx,cs,0,LE)+dx/16
  2520. <loop(bp,dx-1,3*dy/16,dx,cs,0,LE)
  2521. && ( loop(bp, 0, 1,dx,cs,0,RI)
  2522. >loop(bp, 0,3*dy/16,dx,cs,0,RI)+dx/16
  2523. || loop(bp,dx-1, 0,dx,cs,0,LE)==0
  2524. || loop(bp,dx-1, 1,dx,cs,0,LE)==0) ) ad=96*ad/100; // ~f Jan02
  2525. if(dx<8 && dy>12){ // thin f's could easily confound with t
  2526. x=loop(bp,dx-1,3*dy/16,dx,cs,0,LE);
  2527. if (x)
  2528. if (loop(bp,dx-x,0,dy,cs,0,DO)<3*dy/16
  2529. && loop(bp, 0, 3*dy/4,dx,cs,0,RI)+1
  2530. >=loop(bp, 0, dy-2,dx,cs,0,RI)
  2531. && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE)
  2532. <=loop(bp,dx-1, dy-2,dx,cs,0,LE) ) Break;
  2533. }
  2534. if (dx>7)
  2535. if( num_cross( 0,dx-1,2*dy/3,2*dy/3,bp,cs) > 1
  2536. && num_cross( 0,dx/2,2*dy/3,2*dy/3,bp,cs) > 0
  2537. && num_cross(dx/2,dx-1,2*dy/3,2*dy/3,bp,cs) > 0 )
  2538. if (sdata->holes.num > 0)
  2539. if (sdata->holes.hole[0].y0 > dy/4) Break; // ~6
  2540. // if ( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; // ~6
  2541. if( num_cross(0,dx-1,3*dy/4, 3*dy/4, bp,cs) >= 2
  2542. && num_cross(0,dx-1,3*dy/4-1,3*dy/4-1,bp,cs) >= 2 ){
  2543. ad=99*ad/100; /* italic t ? */
  2544. if (loop(bp,dx/2 ,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h
  2545. if (loop(bp,dx/2+1,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h
  2546. }
  2547. x= loop(bp,dx-1,dy/2,dx,cs,0,LE);
  2548. i= loop(bp,dx-1,dy/8,dx,cs,0,LE);
  2549. if (i>x && loop(bp,dx-x,0,dy,cs,0,DO)>=dy/2) ad=90*ad/100; /* ~\ */
  2550. x= loop(bp,0, 0,dx,cs,0,RI);
  2551. i= loop(bp,0, 1,dx,cs,0,RI); if (i<x) x=i;
  2552. i= loop(bp,0,dy/4,dx,cs,0,RI);
  2553. if (i-x>1) Break; // l
  2554. // this happens quite often, do not be to strong
  2555. if (!box1->m2) ad=99*ad/100;
  2556. if (box1->m2) {
  2557. if (!hchar) ad=99*ad/100; /* some times t is not long enough */
  2558. if( y0>=box1->m2-(box1->m2-box1->m1)/4 ) ad=99*ad/100; /* to short */
  2559. if( y0>=box1->m2 ) ad=99*ad/100; /* to short */
  2560. }
  2561. if (sdata->holes.num > 0) ad=95*ad/100;
  2562. if (gchar) ad=99*ad/100;
  2563. if (box1->dots) ad=90*ad/100;
  2564. Setac(box1,'t',ad);
  2565. break;
  2566. }
  2567. return box1->c;
  2568. }
  2569. static wchar_t ocr0_sS(ocr0_shared_t *sdata){
  2570. struct box *box1=sdata->box1;
  2571. pix *bp=sdata->bp;
  2572. int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
  2573. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  2574. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  2575. ad; /* tmp-vars */
  2576. wchar_t ac;
  2577. // --- test sS near 5 ---------------------------------------------------
  2578. for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (4x6 font)
  2579. DBG( wchar_t c_ask='s'; )
  2580. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  2581. if( num_cross( dx/2, dx/2,0,dy-1,bp,cs)!=3
  2582. && num_cross(5*dx/8,3*dx/8,0,dy-1,bp,cs)!=3
  2583. && dy>4 ) Break;
  2584. if( num_cross(0,dx-1,dy/2 ,dy/2 ,bp,cs)!=1
  2585. && num_cross(0,dx-1,dy/2-1,dy/2-1,bp,cs)!=1 ) Break;
  2586. // get the upper and lower hole koords
  2587. y=dy/4;
  2588. x =loop(bp,0,dy/4,dx,cs,0,RI); if(x>3*dx/8) Break; /* slanted too */
  2589. x +=loop(bp,x,dy/4,dx,cs,1,RI); if(x>5*dx/8) Break; /* fat too */
  2590. i1 =loop(bp,x,dy/4,dx,cs,0,RI); i1=(i1+2*x)/2; // upper center x
  2591. y=11*dy/16;
  2592. x =loop(bp,dx-1 ,y,dx,cs,0,LE); if(x>dx/4) Break;
  2593. x +=loop(bp,dx-1-x,y,dx,cs,1,LE); if(dx>5 && dy>7 && x>dx/2) Break;
  2594. if (x>3*dx/4) Break; if(x>dx/2) { ad=98*ad/100; MSG({})}
  2595. i2 =loop(bp,dx-1-x,y,dx,cs,0,LE); i2=dx-1-(i2+2*x)/2; // upper center x
  2596. MSG(fprintf(stderr,"~3 upper right gap (i1=%d,dy/4)",i1);)
  2597. // use i3 temporary for x of left upper border of slanted S
  2598. i3 =loop(bp,0,dy/4,dx,cs,0,RI);
  2599. for( y=dy/4;y<dy/2;y++ ) { // Jul09 ~3 font4x5
  2600. x = loop(bp,0,y,dx,cs,0,RI);
  2601. if (x>i3+dx/8
  2602. && loop(bp,x-1,y,dx,cs,0,UP)>dy/8+1) break; // +1 for s font 5x8 Jul09
  2603. }
  2604. if( y<dy/2 ) Break;
  2605. y=dy/2-loop(bp,dx-1,dy/2,dy/2,cs,1,UP);
  2606. // if( !joined(bp,i1,dy/4,dx-1,y,cs) ){
  2607. // break; // sometimes thick small fonts have no gap
  2608. // }
  2609. for(y=dy/4;y<dy/2;y++){
  2610. x=loop(bp,dx-1,y,dx,cs,0,LE);if(x>dx/8) break;
  2611. }
  2612. if(y==dy/2) Break; // Mai00
  2613. y=dy/2+loop(bp,0,dy/2,dy/2,cs,1,DO);
  2614. if( !joined(bp,0,y,i2,11*dy/16,cs) ) Break;
  2615. if (sdata->holes.num > 0)
  2616. if (sdata->holes.hole[0].y0 > dy/4) Break; // ???
  2617. // if( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break;
  2618. i1=loop(bp,dx-1,dy-1,dx,cs,0,LE);
  2619. i2=loop(bp,dx-1,dy-2,dx,cs,0,LE);
  2620. if (i2-i1 >= dx/4) Break; // ~{ 5x7font
  2621. i1=loop(bp, 0, 0,dx,cs,0,RI);
  2622. i2=loop(bp, 0, 1,dx,cs,0,RI);
  2623. if (i2-i1 >= dx/4) Break; // ~} 5x7font
  2624. // sS5 \sl z left upper v-bow ?
  2625. i1=loop(bp, 0,dy/2,dx,cs,0,RI);
  2626. i1=loop(bp, i1,dy/2,dx,cs,1,RI);
  2627. if (4*i1>=3*dx) ad=97*ad/100; // ~5 7-segment
  2628. i1=loop(bp,0, dy/16,dx,cs,0,RI);
  2629. i2=loop(bp,0,4*dy/16,dx,cs,0,RI);
  2630. i3=loop(bp,0,7*dy/16,dx,cs,0,RI);
  2631. if( 2*i2+dx/32 >= i1+i3 ){
  2632. if( 2*i2+dx/32 > i1+i3 || dx>9 ) Break;
  2633. // very small s?
  2634. i1+=loop(bp,i1, dy/16,dx,cs,1,RI);
  2635. i2+=loop(bp,i2,4*dy/16,dx,cs,1,RI);
  2636. i3+=loop(bp,i3,7*dy/16,dx,cs,1,RI);
  2637. if( 2*i2+dx/32 >= i1+i3 ) Break;
  2638. }
  2639. for(y=7*dy/16;y<5*dy/8;y++){
  2640. if( num_cross( 0,dx-1,y ,y ,bp,cs)==2 )
  2641. if( num_cross( 0,dx-1,y+1,y+1,bp,cs)==1 )
  2642. if( num_cross( 0,dx/4,y,y,bp,cs)==1 ) break; // ~5
  2643. } if(y<5*dy/8) Break; // v0.2.4a5
  2644. if ( loop(bp, dx-1,dy-2-dy/32,dx,cs,0,LE)
  2645. > loop(bp, 0, 1+dy/32,dx,cs,0,RI) + dx/4 ) Break; // ~5 Dec00
  2646. ac='s';
  2647. if (gchar) { ad=98*ad/100; MSG(fprintf(stderr,"gchar=bad");) }
  2648. if ( loop(bp, dx-1, 0,dx,cs,1,LE) // ToDo: improve
  2649. > loop(bp, 0,dy-1,dx,cs,1,RI)+dx/8 ) ad=98*ad/100; // ~5 4x5 font
  2650. if( hchar ){ // (slanted) S but 5 is very similar! check it
  2651. ac='S';
  2652. if ( loop(bp,3*dx/4, 0,dy,cs,1,DO) // ToDo: improve
  2653. > loop(bp, dx/4,dy-1,dy,cs,1,UP) ) ad=99*ad/100; // ~5
  2654. if ( loop(bp, dx-1,dy-1-dy/32,dx,cs,0,LE)
  2655. > loop(bp, 0, 0+dy/32,dx,cs,0,RI) ) ad=99*ad/100; // ~5
  2656. if ( loop(bp, 0,dy-1-dy/32,dx,cs,0,RI)
  2657. > loop(bp, dx-1, 0+dy/32,dx,cs,0,LE) ) ad=99*ad/100; // ~5
  2658. }
  2659. Setac(box1,ac,ad);
  2660. break;
  2661. }
  2662. return box1->c;
  2663. }
  2664. static wchar_t ocr0_gG(ocr0_shared_t *sdata){
  2665. struct box *box1=sdata->box1;
  2666. pix *bp=sdata->bp;
  2667. int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
  2668. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  2669. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  2670. ad; /* tmp-vars */
  2671. // --- test g ---------------------------------------------------
  2672. /* some g's have crotchet at upper right end, so hchar can be set */
  2673. // ~italic g
  2674. for(ad=d=100;dx>2 && dy>4;){ // min 3x5
  2675. DBG( wchar_t c_ask='g'; )
  2676. if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
  2677. if( get_bw(x0+dx/2, x0+dx/2, y1-dy/2, y1,box1->p,cs,1) != 1 ) Break;
  2678. if( get_bw(x1-dx/4, x1 , y1-dy/4, y1,box1->p,cs,1) != 1 ) Break; // ~p
  2679. if( get_bw(x0+dx/2, x0+dx/2, y0, y0+dy/2,box1->p,cs,1) != 1 ) Break;
  2680. if( num_cross(x0+dx/2, x0+dx/2, y0, y1, box1->p,cs) < 3 )
  2681. if( num_cross(x1-dx/2, x1-dx/2, y0, y1, box1->p,cs) < 3 ) Break;
  2682. if (sdata->holes.num < 1) Break;
  2683. for (i=0;i<sdata->holes.num;i++){
  2684. if (sdata->holes.hole[i].y1 < 5*dy/8+1) break;
  2685. } if (i==sdata->holes.num) Break; // no upper hole found
  2686. // if( num_hole ( x0, x1, y0, y0+5*dy/8, box1->p,cs,NULL) != 1 ) Break;
  2687. for(y=dy/4;y<dy;y++) if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break;
  2688. if( y==dy ) Break; // ~q
  2689. if( get_bw(0,dx/2,7*dy/8,7*dy/8,bp,cs,1) != 1 ) Break; // ~q
  2690. y =loop(bp,dx/16,0,dy,cs,0,DO); if(y<=dy/8)
  2691. y+=loop(bp,dx/16,y,dy,cs,1,DO); if(16*y>=15*dy) Break; // ~B
  2692. if (num_cross(x1, x1, (y0+y1)/2, y1, box1->p,cs)>1) {
  2693. ad=98*ad/100; // ~&
  2694. if (num_cross(x1 , x1 , y0, (y0+y1)/2, box1->p,cs)<1 ) ad=96*ad/100;
  2695. if (num_cross(x1-1, x1-1, y0, (y0+y1)/2, box1->p,cs)<1 ) ad=95*ad/100;
  2696. }
  2697. // looking for a gap
  2698. for (x=0,y=dy/4;y<dy-dy/4;y++){
  2699. i=loop(bp,dx-1,y,dy,cs,0,LE); if (i>x) x=i;
  2700. } // in a good font x is greater dx/2
  2701. if (x<dx/2) { // bad font? or %
  2702. if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) > 2
  2703. || num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) > 2) ad=90*ad/100;
  2704. if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2
  2705. || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100;
  2706. }
  2707. if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) >2 ) ad=99*ad/100; // ~/o
  2708. /* test for horizontal symmetry ~8 */
  2709. for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
  2710. if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
  2711. if (y==dy) Break; /* ~8 */
  2712. if (box1->m4==0) ad=98*ad/100;
  2713. if ( hchar) ad=96*ad/100;
  2714. if (!gchar) ad=96*ad/100;
  2715. ad=98*ad/100;
  2716. Setac(box1,'g',ad);
  2717. break;
  2718. }
  2719. // --- test rundes G ---------------------------------------------
  2720. for(ad=d=100;dx>3 && dy>4;){ // min 3x4
  2721. DBG( wchar_t c_ask='G'; )
  2722. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  2723. if( get_bw(x0 ,x0+dx/2,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
  2724. if( get_bw(x0+dx/2,x1-dx/4,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
  2725. if( get_bw(x0+dx/2,x0+dx/2,y1-dy/4,y1 ,box1->p,cs,1) != 1 ) Break;
  2726. if( get_bw(x0 ,x0+dx/2,y1-dy/3,y1-dy/3,box1->p,cs,1) != 1 ) Break; // ~S
  2727. for( y=y0+dy/4;y<y1-dy/3;y++ )
  2728. if( get_bw(x1-dx/2,x1,y,y,box1->p,cs,1) == 0 ) break;
  2729. if( y==y1-dy/3 ) Break; // no gap
  2730. if( num_cross(x0+dx/2 , x0+dx/2 , y0, y, box1->p,cs) != 1
  2731. || num_cross(x0+dx/2+1, x0+dx/2+1, y0, y, box1->p,cs) != 1 ) Break; // ~e
  2732. x=x0; y=y1;
  2733. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); // left bow?
  2734. if( y<y0+dy/4 ) Break; // filter W
  2735. x=x1; y=y1-dy/3; // upper right offen bow
  2736. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST);
  2737. if( x<x1-3*dx/8 ) Break;
  2738. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
  2739. if( x<x0+dx/2 ){ // not sure, try again (not best)
  2740. x=x1; y=y1-dy/4;
  2741. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST);
  2742. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
  2743. if( x<x0+dx/2 ) Break;
  2744. }
  2745. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,UP); // upper end right midle
  2746. if( x<=x1 ) Break;
  2747. if( y<y0+3*dy/8 ) Break;
  2748. if( y>y1-dy/4 ) Break;
  2749. x=x1-dx/3;y=y1; // follow left C-bow, filter S
  2750. turmite(box1->p,&x,&y,x0,x1,y0+dy/4,y1,cs,LE,UP); // w=LE b=UP
  2751. if( y>y0+dy/4+1 ) Break; /* leave box below for S or on top for CG */
  2752. MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);)
  2753. /* if (y<y0) y++; else x++; */ /* enter the box again */
  2754. turmite(box1->p,&x,&y,x0,x1,y0 ,y1,cs,RI,UP);
  2755. MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);)
  2756. if( y>y0 ) Break;
  2757. if (sdata->holes.num > 0) Break;
  2758. // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) > 0 ) Break;
  2759. if( dx>4 && dy>6){ // no (<[
  2760. for(i=1,y=0;i && y<dy/3;y++)
  2761. if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i=0;
  2762. if( i ) ad=98*ad/100;
  2763. for(i=1,y=0;i && y<dy/3;y++)
  2764. if( num_cross(0,dx-1,dy-1-y,dy-1-y,bp,cs) == 2 ) i=0;
  2765. if( i ) Break;
  2766. }
  2767. for(i=1,y=dy/2;i && y<dy;y++)
  2768. if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i=0;
  2769. if( i ) Break;
  2770. for(i=0,y=3*dy/4;y<dy;y++){
  2771. x=loop(bp,0,y,dx,cs,0,RI); // Kante abfallend <=> Z
  2772. if( x<i-dx/20 ) break;
  2773. if( x>i ) i=x;
  2774. } if( y<dy ) Break;
  2775. // only check the middle!
  2776. for(i=0,i1=y=dy/4;y<dy-dy/4;y++){ // look for horizontal line
  2777. x=loop(bp,dx-1 ,y,dx/4,cs,0,LE);
  2778. x=loop(bp,dx-1-x,y,dx/2,cs,1,LE); if(x>i){ i=x;i1=y; }
  2779. } if( i1<=dy/4 || i1>=dy-dy/4 ) Break; // around the middle ?
  2780. // check from above for gap and left vertical line (~S)
  2781. x =loop(bp,0,i1,dx ,cs,0,RI);
  2782. x+=loop(bp,x,i1,dx-x,cs,1,RI); // left vertical bow
  2783. x+=loop(bp,x,i1,dx-x,cs,0,RI); if (x>=dx) ad=90*ad/100;
  2784. MSG(fprintf(stderr,"h-bar y dx %d %d ad= %d",i1,i,ad);)
  2785. i=1; // Mar06: adapted to 4x6 font
  2786. for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
  2787. for(y=dy/2;y<dy-1 && i;y++){ // .@
  2788. if( getpixel(bp,x ,y )>=cs
  2789. && getpixel(bp,x+1,y )< cs
  2790. && getpixel(bp,x+1,y-1)< cs
  2791. && getpixel(bp,x ,y-1)< cs ) { i=0;break; }
  2792. }
  2793. if(i) ad=95*ad/100; // ~C
  2794. if(!hchar) ad=98*ad/100;
  2795. if( gchar) ad=98*ad/100;
  2796. Setac(box1,'G',ad);
  2797. break;
  2798. }
  2799. // --- test \it g like 9 ----------------------------------------------
  2800. for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx
  2801. DBG( wchar_t c_ask='g'; )
  2802. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  2803. if( num_cross(x0+dx/2,x0+dx/2,y0,y1,box1->p,cs) != 3 // pre select
  2804. && num_cross(x0+dx/4,x1-dx/4,y0,y1,box1->p,cs) != 3 ) Break;
  2805. for( x=0,i=y=y0+dy/2;y<=y1-3*dy/16;y++){ // suche kerbe
  2806. j=loop(box1->p,x0,y,dx,cs,0,RI);
  2807. if( j>2 && j>dx/4 && y<y1-3 && j<dx/2 ) // long bow
  2808. j+=loop(box1->p,x0+j-2,y+1,dx,cs,0,RI)-2;
  2809. if( j>x ) { x=j; i=y; }
  2810. }
  2811. if( x<4*dx/8 ) Break;
  2812. if( num_cross(x0+dx/2,x1,i ,y1,box1->p,cs) != 1
  2813. && num_cross(x0+dx/2,x1,i+1,y1,box1->p,cs) != 1 ) Break;
  2814. if( num_hole(x0,x1,y0,i+1,box1->p,cs,NULL)!=1 ) Break;
  2815. if( num_hole(x0,x1,i-1,y1,box1->p,cs,NULL)!=0 ) Break;
  2816. if( loop(box1->p,x0,y1 ,dy,cs,0,RI)>dx/3 &&
  2817. loop(box1->p,x0,y1-1,dy,cs,0,RI)>dx/3) Break; // no q
  2818. for( x=0,i=y=y0+dy/3;y<=y1-dy/3;y++){ // suche kerbe
  2819. j=loop(box1->p,x1,y,dx,cs,0,LE);
  2820. if( j>x ) { x=j; i=y; }
  2821. } if( x>dx/2 ) Break; // no g
  2822. i1=loop(bp,dx-1,dy/8 ,dx,cs,0,LE); if(i1>dx/2) Break;
  2823. i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE);
  2824. i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); if(i1+i3<2*i2-dx/8) Break; // konvex
  2825. i1=loop(bp,dx-1,dy/4 ,dx,cs,0,LE); if(i1>dx/2) Break;
  2826. i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE);
  2827. for(y=dy/4;y<dy-1-dy/4;y++){
  2828. i2=loop(bp,dx-1,y,dx,cs,0,LE);
  2829. if(i1+i3-2*i2<-1-dx/16) break; // konvex from right ~g ~3
  2830. } if(y<dy-1-dy/4) Break;
  2831. x=loop(bp,dx -1,6*dy/8,dx,cs,0,LE); if(x>0){ x--; // robust
  2832. y=loop(bp,dx-x-1, dy-1,dy,cs,0,UP);
  2833. if(y<dy/8) Break; // ~q (serif!)
  2834. }
  2835. // %
  2836. if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) > 2) ad=90*ad/100;
  2837. if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2
  2838. || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100;
  2839. if (box1->m4==0) ad=98*ad/100;
  2840. if ( hchar) ad=96*ad/100;
  2841. if (!gchar) {
  2842. if (box1->m4 - box1->m3 > 2) ad=96*ad/100;
  2843. else {
  2844. if (y1 > box1->m3) ad=99*ad/100;
  2845. else ad=97*ad/100;
  2846. }
  2847. }
  2848. if (ad>99) ad=99; // never be sure to have a 9
  2849. Setac(box1,'g',ad);
  2850. break;
  2851. }
  2852. return box1->c;
  2853. }
  2854. // rewritten for vector usage v0.41
  2855. static wchar_t ocr0_xX(ocr0_shared_t *sdata){
  2856. struct box *box1=sdata->box1;
  2857. // pix *bp=sdata->bp; // obsolete
  2858. int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  2859. x0=box1->x0, x1=box1->x1, y0=box1->y0, y1=box1->y1; // ,cs=sdata->cs;
  2860. int dx=x1-x0+1, dy=y1-y0+1, /* size */
  2861. (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
  2862. ad; /* tmp-vars */
  2863. wchar_t bc=UNKNOWN;
  2864. // --- test xX ---------------------------------------------------
  2865. // rewritten for vectors 0.41
  2866. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  2867. int ld, i1, i2, i3, i4; // lien derivation, 4 inner edges
  2868. DBG( wchar_t c_ask='x'; )
  2869. if (sdata->holes.num > 0) Break; /* # */
  2870. /* half distance to the center */
  2871. d=2*sq(128/4);
  2872. /* now we check for the 4 ends of the x */
  2873. if (aa[0][2]>d) Break;
  2874. if (aa[1][2]>d) Break;
  2875. if (aa[2][2]>d) Break;
  2876. if (aa[3][2]>d) Break;
  2877. if (aa[3][0]-aa[0][0]<dx/2) Break;
  2878. if (aa[2][0]-aa[1][0]<dx/2) Break;
  2879. if (aa[1][1]-aa[0][1]<dy/2) Break;
  2880. if (aa[2][1]-aa[3][1]<dy/2) Break;
  2881. if (aa[0][1]-y0 > dy/16
  2882. && aa[3][1]-y0 > dy/16
  2883. && y1-aa[1][1] > dy/16
  2884. && y1-aa[2][1] > dy/16) Break; // 10x10 ~* (X + vert. line)
  2885. /* searching for 4 notches between neighbouring ends */
  2886. /* 0907: false positive on font4x5 '*'='-' | 'x' */
  2887. i1=nearest_frame_vector(box1, aa[0][3], aa[1][3], x0+dx, y0+dy/4);
  2888. i3=nearest_frame_vector(box1, aa[0][3], aa[1][3], x0+dx, y1-dy/4);
  2889. i2=nearest_frame_vector(box1, i1, i3, x0, y0+dy/2);
  2890. if (box1->frame_vector[i2][0]<=x0+ dx/8) Break;
  2891. if (box1->frame_vector[i2][0]<=x0+ dx/4) ad=98*ad/100;
  2892. i1=nearest_frame_vector(box1, aa[2][3], aa[3][3], x0 , y0+dy/4);
  2893. i3=nearest_frame_vector(box1, aa[2][3], aa[3][3], x0 , y1-dy/4);
  2894. i2=nearest_frame_vector(box1, i1, i3, x1, y0+dy/2);
  2895. if (box1->frame_vector[i2][0]>=x1- dx/8) Break;
  2896. if (box1->frame_vector[i2][0]>=x1- dx/4) ad=98*ad/100;
  2897. /* only left side */
  2898. for (j=i=aa[0][3];i!=aa[1][3];i=(i+1)%box1->num_frame_vectors[0]) {
  2899. if (box1->frame_vector[i][0]
  2900. >=box1->frame_vector[j][0]) j=i; /* notice most right vector */
  2901. } if (j==i) Break;
  2902. /* calculate the distance to the center */
  2903. x=box1->frame_vector[j][0];
  2904. y=box1->frame_vector[j][1]; i1=j;
  2905. if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
  2906. if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
  2907. if ( aa[0][0]+aa[1][0]-2*x>=0) Break;
  2908. if ( aa[1][0] >= x ) Break;
  2909. if ( aa[0][0] > x ) Break;
  2910. if ( aa[0][0] >= x ) ad=99*ad/100;
  2911. if (x-x0<dx/8) Break;
  2912. if (x-x0<dx/4) ad=99*ad/100;
  2913. /* check if upper left and center point are joined directly */
  2914. ld=line_deviation(box1, aa[0][3], j);
  2915. MSG(fprintf(stderr," 0-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
  2916. if (ld >2*sq(1024/4)) Break;
  2917. /* check if lower left and center point are joined directly */
  2918. ld=line_deviation(box1, j, aa[1][3]);
  2919. MSG(fprintf(stderr," X-1 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
  2920. if (ld >2*sq(1024/4)) Break;
  2921. /* only lower side */
  2922. for (j=i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) {
  2923. if (box1->frame_vector[i][1]
  2924. <=box1->frame_vector[j][1]) j=i; /* notice most upper vector */
  2925. } if (j==i) Break;
  2926. /* calculate the distance to the center */
  2927. x=box1->frame_vector[j][0];
  2928. y=box1->frame_vector[j][1]; i2=j;
  2929. if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
  2930. if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
  2931. if ( aa[1][1]+aa[2][1]-2*y<=0) Break;
  2932. /* check if lower left and center point are joined directly */
  2933. ld=line_deviation(box1, aa[1][3], j);
  2934. MSG(fprintf(stderr," 1-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
  2935. if (ld >2*sq(1024/4)) Break;
  2936. /* check if lower right and center point are joined directly */
  2937. ld=line_deviation(box1, j, aa[2][3]);
  2938. MSG(fprintf(stderr," X-2 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
  2939. if (ld >2*sq(1024/4)) Break;
  2940. /* only right side */
  2941. for (j=i=aa[2][3];i!=aa[3][3];i=(i+1)%box1->num_frame_vectors[0]) {
  2942. if (box1->frame_vector[i][0]
  2943. <=box1->frame_vector[j][0]) j=i; /* notice most left vector */
  2944. } if (j==i) Break;
  2945. /* calculate the distance to the center */
  2946. x=box1->frame_vector[j][0];
  2947. y=box1->frame_vector[j][1]; i3=j;
  2948. if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
  2949. if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
  2950. if ( aa[2][0]+aa[3][0]-2*x<=0) Break;
  2951. if ( aa[3][0] <= x ) Break;
  2952. if ( aa[2][0] < x ) Break;
  2953. if ( aa[2][0] <= x ) ad=99*ad/100;
  2954. if (dx-(x-x0)<dx/8) Break;
  2955. if (dx-(x-x0)<dx/4) ad=99*ad/100;
  2956. /* check if lower right and center point are joined directly */
  2957. ld=line_deviation(box1, aa[2][3], j);
  2958. MSG(fprintf(stderr," 2-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
  2959. if (ld >2*sq(1024/4)) Break;
  2960. /* check if upper right and center point are joined directly */
  2961. ld=line_deviation(box1, j, aa[3][3]);
  2962. MSG(fprintf(stderr," X-3 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
  2963. if (ld >2*sq(1024/4)) Break;
  2964. /* only upper side */
  2965. for (j=i=aa[3][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) {
  2966. if (box1->frame_vector[i][1]
  2967. >=box1->frame_vector[j][1]) j=i; /* notice lowest vector */
  2968. } if (j==i) Break;
  2969. /* calculate the distance to the center */
  2970. x=box1->frame_vector[j][0];
  2971. y=box1->frame_vector[j][1]; i4=j;
  2972. if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
  2973. if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
  2974. if ( aa[3][1]+aa[0][1]-2*y>=0) Break;
  2975. /* check if upper left and center point are joined directly */
  2976. ld=line_deviation(box1, aa[3][3], j);
  2977. MSG(fprintf(stderr," 3-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
  2978. if (ld >2*sq(1024/4)) Break;
  2979. /* check if lower left and center point are joined directly */
  2980. ld=line_deviation(box1, j, aa[0][3]);
  2981. MSG(fprintf(stderr," X-0 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
  2982. if (ld >2*sq(1024/4)) Break;
  2983. // center crossing of diagonal lines is small?
  2984. if (box1->frame_vector[i3][0] - box1->frame_vector[i1][0] > dx/2) Break;
  2985. if (gchar) ad=99*ad/100;
  2986. bc='x'; if(hchar) bc='X';
  2987. Setac(box1,bc,ad);
  2988. break;
  2989. }
  2990. // --- test \it x ---------------------------------------------------
  2991. #if 0
  2992. for(ad=d=99;dx>4 && dy>4;){ // min 3x4
  2993. DBG( wchar_t c_ask='x'; )
  2994. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  2995. if( get_bw(x0,x0+dx/4,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break;
  2996. if( get_bw(x1-dx/4,x1,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break;
  2997. if( num_cross(x0+dx/4,x1-dx/4,y0+dy/2,y0+dy/2, box1->p,cs) != 1 ) Break;
  2998. if( num_cross(x0,x1,y0+dy/4,y0+dy/4, box1->p,cs) != 3
  2999. && num_cross(x0,x1,y0+dy/8,y0+dy/8, box1->p,cs) < 3 ) Break;
  3000. if( num_cross(x0,x1,y1-dy/4,y1-dy/4, box1->p,cs) != 3
  3001. && num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 3 ) Break;
  3002. if( gchar ) ad=97*ad/100;
  3003. if( hchar ) ad=96*ad/100;
  3004. bc='x';
  3005. Setac(box1,bc,ad);
  3006. break;
  3007. }
  3008. #endif
  3009. return box1->c;
  3010. }
  3011. static wchar_t ocr0_yY(ocr0_shared_t *sdata){
  3012. struct box *box1=sdata->box1;
  3013. pix *bp=sdata->bp;
  3014. int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  3015. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  3016. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  3017. ad,xa,ya,xb,yb,xc,yc,xd,yd; /* tmp-vars */
  3018. wchar_t bc=UNKNOWN;
  3019. // --- test italic yY --------------------------------------------
  3020. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  3021. DBG( wchar_t c_ask='y'; )
  3022. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  3023. if (sdata->holes.num > 0) ad=97*ad/100;
  3024. if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) < 2
  3025. && num_cross(0,dx-1, 1, 1,bp,cs) < 2 ) Break;
  3026. if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1
  3027. && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break;
  3028. if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1
  3029. && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 ) Break;
  3030. if( num_cross(dx/3,dx/3,dy/4,dy-1,bp,cs) != 2
  3031. && num_cross(dx/2,dx/2,dy/4,dy-1,bp,cs) != 2 ) Break;
  3032. for(yc=y=0,xc=x=dx/4;x<dx-dx/4;x++){ // search deepest point
  3033. i=loop(bp,x,0,dy,cs,0,DO); if(i>y){ yc=y=i;xc=x; }
  3034. } if( y>12*dy/16 || y<3*dy/8 ) Break;
  3035. ya=dy/8; xa=xc-loop(bp,xc,ya,dx,cs,0,LE); if(xa< 0) Break;
  3036. yb=dy/8; xb=xc+loop(bp,xc,yb,dx,cs,0,RI); if(xb>=dx) Break;
  3037. for(y=dy/8;y<yc-dy/8;y++){
  3038. if( num_cross(xc,dx-1,y,y,bp,cs) != 1 ) break;
  3039. if( num_cross(0 ,xc ,y,y,bp,cs) < 1 ) break;
  3040. } if(y<yc-dy/8) Break;
  3041. yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE);
  3042. g_debug(fprintf(stderr," debug_yY: \n"
  3043. " /a b \n"
  3044. " | | \n"
  3045. " -c/ \n"
  3046. " \e-d \n");)
  3047. g_debug(fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d",
  3048. xa,ya,xb,yb,xc,yc,xd,yd);)
  3049. if(xd>6*dx/8) ad=99*ad/100; // why this???
  3050. if (loop(bp,dx-1,dy-1,dx,cs,0,LE)<1) Break;
  3051. // printf(" abcd=%d %d %d %d %d %d %d %d -",xa,ya,xb,yb,xc,yc,xd,yd);
  3052. if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) Break;
  3053. // if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break;
  3054. // printf("ok");
  3055. bc='y';
  3056. if(gchar && !hchar) bc='y'; else
  3057. if(hchar && (!gchar || dy<14)) bc='Y'; else ad=98*ad/100; // SMALL-CAPS ???
  3058. Setac(box1,bc,ad);
  3059. break;
  3060. }
  3061. // --- test yY ---------------------------------------------------
  3062. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  3063. DBG( wchar_t c_ask='y'; )
  3064. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  3065. if( get_bw(x0,x0,y1-dy/8,y1,box1->p,cs,1) == 1 ) {
  3066. if( get_bw(x0,x0+4*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break;
  3067. } else {
  3068. if( get_bw(x0,x0+3*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break;
  3069. }
  3070. if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) != 2
  3071. && num_cross(0,dx-1, 1, 1,bp,cs) != 2 ) Break;
  3072. if( num_cross(dx/2,dx/2,0, 1,bp,cs) != 0 ) Break;
  3073. if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1
  3074. && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break;
  3075. if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1
  3076. && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1
  3077. && num_cross(dx-dx/8-1,dx-dx/8-1,0,dy-1,bp,cs) != 1 ) Break;
  3078. if( loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8+1 // Jul00
  3079. < loop(bp, 0,dy-1-dy/8,dx,cs,0,RI) ) Break;
  3080. for(y=0,x=dx/4;x<dx-dx/4;x++){ // search lowest point
  3081. i=loop(bp,x,0,dy,cs,0,DO); if(i>y) y=i;
  3082. } if( y>10*dy/16 || y<2*dy/8 ) Break;
  3083. for(xc=xb=xa=dx,yc=yb=ya=y=0;y<dy/4;y++){
  3084. x =loop(bp, 0 , y,dx,cs,0,RI); if(x<xa){ xa=x;ya=y; }
  3085. x =loop(bp,dx-1 , y,dx,cs,0,LE); if(x<xb){ xb=x;yb=y; }
  3086. }
  3087. if(yb>dy/8) Break;
  3088. for(i=dx,yc=y=dy/4;y<3*dy/4;y++){
  3089. if( num_cross(0,dx-1,y,y,bp,cs) < 2 ) break;
  3090. x =loop(bp,dx-1 ,y,dx,cs,0,LE);
  3091. x+=loop(bp,dx-1-x,y,dx,cs,1,LE);
  3092. j =loop(bp,dx-1-x,y,dx,cs,0,LE); if(j<=i){ i=j;yc=y;xc=dx-1-x-j/2; }
  3093. } yc+=dy/16+1;
  3094. yc+=loop(bp,xc,yc,i,cs,1,DO)/2;
  3095. xa+= loop(bp,xa ,ya,dx,cs,1,RI)/2;
  3096. xb=dx-1-loop(bp,dx-1,yb,dx,cs,1,LE)/2;
  3097. yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE); if(xd>6*dx/8) Break;
  3098. /* check for serife at lower end */
  3099. for (i=0,x=dx-1;i<dy/4;i++) {
  3100. j=loop(bp,dx-1,dy-1-i,dx,cs,0,LE);
  3101. if (j>x+dx/16+1) break; /* detect serif */
  3102. if (j<x) x=j;
  3103. } if (i<dy/4) xd-=loop(bp,xd,yd,dx,cs,1,LE)/2;
  3104. MSG( fprintf(stderr," debug_yY: \n"
  3105. " a b \n"
  3106. " \\ / \n"
  3107. " c \n"
  3108. " ed ");)
  3109. MSG(fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d",
  3110. xa,ya,xb,yb,xc,yc,xd,yd);)
  3111. // check upper left line
  3112. if( get_line2(xa,ya,xc ,yc,bp,cs,100)<95
  3113. && get_line2(xa,ya,xc-1,yc,bp,cs,100)<95 ) Break;
  3114. // check upper right line
  3115. if( get_line2(xb,yb,xc ,yc,bp,cs,100)<95
  3116. && get_line2(xb,yb,xc-1,yc,bp,cs,100)<95 ) {
  3117. // Times-Italic y ???
  3118. xb+=loop(bp,xb,yb,dx/4,cs,1,RI)-1;
  3119. yb+=loop(bp,xb,yb,dy/8,cs,1,DO)-1;
  3120. if( get_line2(xb,yb,xc ,yc,bp,cs,100)<95 ) Break;
  3121. }
  3122. if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break;
  3123. // decission between V and Y is sometimes very difficult
  3124. // hope that the following code is the ultimate solution
  3125. if( yc>=5*dy/8 && !gchar)
  3126. if( get_line2(xa,ya,xd ,yd,bp,cs,100)>95 )
  3127. if( get_line2(xb,yb,xd ,yd,bp,cs,100)>95 )
  3128. { if (dx>4) { Break; } else ad=ad*98/100; } // ~V
  3129. xa=loop(bp,0,dy/8,dx,cs,0,RI);
  3130. xb=loop(bp,0,dy/2,dx,cs,0,RI);
  3131. xc=loop(bp,0,dy-1,dx,cs,0,RI);
  3132. if( 2*xb< xa+xc ) ad=98*ad/100; // ~V
  3133. if( 2*xb<=xa+xc ) ad=98*ad/100;
  3134. if( 2*xb<=xa+xc+1 ) ad=98*ad/100;
  3135. bc='y';
  3136. if ((!gchar) && (!hchar)) ad=98*ad/100;
  3137. if(y0<box1->m2-(box1->m2-box1->m1)/4)
  3138. { bc='Y'; if(gchar) ad=98*ad/100; }
  3139. // SMALL-CAPS ???
  3140. Setac(box1,bc,ad);
  3141. break;
  3142. }
  3143. return box1->c;
  3144. }
  3145. static wchar_t ocr0_zZ(ocr0_shared_t *sdata){
  3146. struct box *box1=sdata->box1;
  3147. int i1,i2,i3,i4,i5,dbg[9],
  3148. d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  3149. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
  3150. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  3151. (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
  3152. ad; /* tmp-vars */
  3153. wchar_t bc=UNKNOWN;
  3154. // --- test zZ -------
  3155. for(ad=d=100;dx>3 && dy>3;){ // dy>dx
  3156. DBG( wchar_t c_ask='z'; ) /* for debugging purpose */
  3157. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  3158. if (sdata->holes.num > 0) ad=98*ad/100; /* # */
  3159. /* half distance to the center */
  3160. d=2*sq(128/4);
  3161. /* now we check for the 4 edges of the z */
  3162. if (aa[0][2]>d) Break;
  3163. if (aa[1][2]>d) Break;
  3164. if (aa[2][2]>d) Break;
  3165. if (aa[3][2]>d) Break;
  3166. if (aa[3][0]-aa[0][0]<dx/2) Break;
  3167. if (aa[2][0]-aa[1][0]<dx/2) Break;
  3168. if (aa[1][1]-aa[0][1]<dy/2) Break;
  3169. if (aa[2][1]-aa[3][1]<dy/2) Break;
  3170. if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold a z */
  3171. if (aa[2][0]-aa[1][0]<4-1) Break; /* to small */
  3172. if (aa[3][1]-y0>dy/8) ad=99*ad/100;
  3173. if (aa[0][1]-y0>dy/8) ad=99*ad/100;
  3174. if (2*dx<dy) ad=99*ad/100;
  3175. MSG( \
  3176. fprintf(stderr,"aa.xy %d %d %d %d %d %d %d %d ad %d", \
  3177. aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\
  3178. aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,ad);)
  3179. /* upper and lower horizontal line */
  3180. d=line_deviation(box1, aa[3][3], aa[0][3]); if (d>2*sq(1024/4)) Break;
  3181. ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
  3182. d=line_deviation(box1, aa[1][3], aa[2][3]); if (d>2*sq(1024/4)) Break;
  3183. /* search uppermost right > from left side */
  3184. i1=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1, y0);
  3185. x=box1->frame_vector[i1][0];
  3186. y=box1->frame_vector[i1][1];
  3187. if (y-y0 > 5*dy/8) Break;
  3188. if (x-x0 < 3*dx/8) Break;
  3189. if (x-aa[0][0]<=dx/4) Break; // ~lI
  3190. if (x-aa[0][0]<=dx/3) ad=98*ad/100; // ~lI
  3191. if (x-aa[0][0]< dx/2) ad=99*ad/100; // ~lI, dx/2 for fat 8x10 font
  3192. /* search most right > ~2 from left side */
  3193. i3=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1+2*dx, (y0+y1)/2);
  3194. MSG(fprintf(stderr,"i23xy= %d %d %d %d ad %d",x-x0,y-y0,box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0,ad);)
  3195. /* dy=7 i3.y=2 for fat screen font (Apr09) */
  3196. if ( box1->frame_vector[i3][1]-y0> (dy+2)/4
  3197. && box1->frame_vector[i3][0]-x>= 0) Break;
  3198. if ( box1->frame_vector[i3][1]-y> dy/8+1 // +1 needed for 5x7 Z Jul09
  3199. && box1->frame_vector[i3][0]-x>=-dx/8) ad=98*ad/100;
  3200. if ( box1->frame_vector[i3][1]-y> dy/8+1
  3201. && box1->frame_vector[i3][0]-x>= 0) ad=97*ad/100;
  3202. if ( box1->frame_vector[i3][1]-y> dy/16+1) ad=99*ad/100; // 9x10 ~2
  3203. if (box1->frame_vector[i3][0]-aa[0][0]
  3204. < aa[3][0]-box1->frame_vector[i3][0]) Break; // ~lI
  3205. if (box1->frame_vector[i3][0]-aa[0][0]
  3206. <(aa[3][0]-box1->frame_vector[i3][0]-1)*2) ad=98*ad/100; // ~lI
  3207. /* better test for a bow or peaked angle */
  3208. /* upper part of a 2, on a Z a and b should be at c
  3209. .....$@@@@@@a...c. o1 (o1-a)=(dx+5)^2 =dx^2+10*dx+25
  3210. ...$$@@@@@@@@@.... (o1-b)=(dx+1)^2+4^2=dx^2+ 2*dx+18
  3211. ..$@@$@@@$@@@@@...
  3212. ..@@@.....$$@@@@..
  3213. ..@@.......@$@@@b.
  3214. ..$.........$@@@@.
  3215. .$$..........$@@@.
  3216. .$...........@@@@.
  3217. .............@@@@.<
  3218. .............$@@$.
  3219. ............$@@@..
  3220. ............@@$...
  3221. ............$@$...
  3222. --- snip ----
  3223. */
  3224. i4=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1+dx, y0);
  3225. i5=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1, y0-dx);
  3226. d=sq(box1->frame_vector[i5][0]-box1->frame_vector[i4][0])
  3227. +sq(box1->frame_vector[i5][1]-box1->frame_vector[i4][1]);
  3228. if (d>2*sq(dx/8+1)) Break;
  3229. if (d>0 && dx<8 ) ad=99*ad/100; // 7x10 2 ~Z
  3230. if (d>1 && dx<16) ad=98*ad/100; // 9x10 2 ~Z d=2
  3231. MSG( fprintf(stderr,"i45xy %d %d d %d ad %d upper right bow?",i4,i5,d,ad); )
  3232. /* check if upper left and upper right point are joined directly */
  3233. dbg[0]=d=line_deviation(box1, aa[0][3], i1); if (d >2*sq(1024/4)) Break;
  3234. /* check if lower right and upper left point are joined directly */
  3235. dbg[1]=d=line_deviation(box1, i1, aa[1][3]); if (d >2*sq(1024/4)) Break;
  3236. /* search lowest left < from right side */
  3237. i2=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y1);
  3238. x=box1->frame_vector[i2][0];
  3239. y=box1->frame_vector[i2][1];
  3240. if (y-y0 < 3*dy/8) Break;
  3241. if (x-x0 > 5*dx/8) Break;
  3242. if (aa[2][0]-x<=dx/4) Break; // ~lI
  3243. if (aa[2][0]-x<=dx/3) ad=98*ad/100; // ~lI
  3244. if (aa[2][0]-x< dx/2) ad=99*ad/100; // ~lI, dx/2 for 9x7 fat3 z
  3245. /* check if upper right and lower left point are joined directly */
  3246. dbg[2]=d=line_deviation(box1,i2, aa[3][3]);
  3247. MSG(fprintf(stderr,"left diag line? d= %d krit= 2..3*%d",d,sq(1024/4));)
  3248. if ( d >3*sq(1024/4)) Break;
  3249. if (dx>9 && d >2*sq(1024/4)) Break; // to strong for 5x7 Z
  3250. /* check if lower left and lower right point are joined directly */
  3251. dbg[3]=d=line_deviation(box1, aa[2][3],i2); if (d >2*sq(1024/4)) Break;
  3252. MSG( fprintf(stderr," i12 %d %d ad %d", i1, i2, ad); )
  3253. /* upper right nick - lower left nick, 0 for fat screen fonts 8x10 */
  3254. if (box1->frame_vector[i1][0]
  3255. -box1->frame_vector[i2][0]<dx/8-1) Break; /* 0 for 9x7 fat3 z */
  3256. /* 16384, 26788 for 8x10 screen font (Apr09) */
  3257. MSG( \
  3258. fprintf(stderr,"^v %d %d %d %d line deviation %d %d %d %d max %d ad %d",\
  3259. box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\
  3260. box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\
  3261. dbg[0], dbg[1], dbg[2], dbg[3], sq(1024)/16, ad);)
  3262. ad=(100-dbg[0]/sq(1024/4))*ad/100;
  3263. ad=(100-dbg[1]/sq(1024/4))*ad/100;
  3264. ad=(100-dbg[2]/sq(1024/4))*ad/100;
  3265. ad=(100-dbg[3]/sq(1024/4))*ad/100;
  3266. if ( gchar) ad=98*ad/100;
  3267. bc='z';
  3268. if( hchar ) bc='Z';
  3269. Setac(box1,bc,ad);
  3270. break;
  3271. }
  3272. return box1->c;
  3273. }
  3274. static wchar_t ocr0_wW(ocr0_shared_t *sdata){
  3275. struct box *box1=sdata->box1;
  3276. pix *bp=sdata->bp;
  3277. int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,handwritten=0,
  3278. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  3279. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  3280. ad,ya,yb,xa,xb,xc,xd,xe,t1; /* tmp-vars */
  3281. wchar_t ac;
  3282. // ------- test w ~{\it w} ---------------
  3283. for(ad=d=100;dx>3 && dy>3;){ // dy<=dx
  3284. DBG( wchar_t c_ask='w'; )
  3285. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  3286. // xa xe
  3287. // \ xc / <=ya connected xa-xb-xc-xd-xe
  3288. // xb xd <=yb
  3289. // get two lowest points i3,i4,ya
  3290. // out_x(box1);
  3291. // ~ul ~uf
  3292. // out_x(box1);
  3293. for(y=dy/8;y< dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs)< 2 ) break;
  3294. if(y<dy/2) Break;
  3295. yb=dy-1;
  3296. if (dx>4) { /* 4x6 is to small */
  3297. for(y=dy-1-dy/16;y>3*dy/4;y--)
  3298. if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break;
  3299. if(y==3*dy/4) Break;
  3300. }
  3301. yb=y;
  3302. t1=loop(bp,0 ,dy/4,dx,cs,0,RI);
  3303. t1=loop(bp,t1,dy/4,dx,cs,1,RI); // thickness of line?
  3304. for(i=j=0 ;y> dy/4;y--) if( num_cross(0,dx-1,y,y,bp,cs)==4 ) i++;
  3305. else if( num_cross(0,dx-1,y,y,bp,cs)>=3 ) j++;
  3306. if(i+5<dy/4 && 7*t1<dy) Break; // only for large letters
  3307. if(i+j==0 && (dy>6 || dx>4)) Break;
  3308. if(i+j==0 && dx<=4){
  3309. if (abs(loop(bp, 1,dy-1,dy,cs,0,UP)
  3310. -loop(bp,dx-2,dy-1,dy,cs,0,UP))>dy/8+1) Break; // 4x6 N
  3311. if ( ( loop(bp, 1, 0,dy,cs,0,DO)>=dy-2
  3312. && loop(bp, 0,dy-1,dy,cs,0,UP)>0)
  3313. || ( loop(bp,dx-2, 0,dy,cs,0,DO)>=dy-2
  3314. && loop(bp,dx-1,dy-1,dy,cs,0,UP)>0)) Break; // 4x6 UV
  3315. ad=ad*99/100; // 4x6 font
  3316. MSG(fprintf(stderr,"ad=%d",ad);)
  3317. }
  3318. if( num_cross(0,dx-1, 1, 1,bp,cs)< 2
  3319. && num_cross(0,dx-1,dy/16,dy/16,bp,cs)< 2 ) Break;
  3320. x =loop(bp,0 ,yb,dx,cs,0,RI);
  3321. xb=loop(bp,x ,yb,dx,cs,1,RI);xb=x+xb/2; if(xb>dx/2) Break;
  3322. x =loop(bp,dx-1 ,yb,dx,cs,0,LE);
  3323. xd=loop(bp,dx-1-x,yb,dx,cs,1,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break;
  3324. for(y=0,xc=x=xb+1;x<xd;x++)
  3325. if((i=loop(bp,x,dy-1,dy,cs,0,UP))>y){xc=x;y=i;}
  3326. if(dx>4 && !y) Break;
  3327. ya=dy-1-y; // flat
  3328. y=loop(bp,xc,ya,dy,cs,1,UP);if(y)y--;
  3329. if (dy>6 || dx>4) { // ~4x6 font
  3330. if( num_cross(0 ,xc ,ya-y ,ya-y ,bp,cs)!= 2
  3331. && num_cross(0 ,xc ,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break;
  3332. if( num_cross(xc,dx-1,ya-y ,ya-y ,bp,cs)!= 2
  3333. && num_cross(xc,dx-1,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break;
  3334. }
  3335. ya-=y/2;
  3336. x =loop(bp,0 ,1 ,dx,cs,0,RI);
  3337. xa=loop(bp,x ,1 ,dx,cs,1,RI);
  3338. if( x+xa>xb ){ // may be, here is a small but thick letter
  3339. // later add some proofs
  3340. xa=x+xa/4;
  3341. } else {
  3342. xa=x+xa/2;
  3343. }
  3344. x =loop(bp,dx-1 ,1 ,dx,cs,0,LE);
  3345. xe=loop(bp,dx-1-x,1 ,dx,cs,1,LE);xe=dx-1-x-xe/2;
  3346. MSG( fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d %d %d",
  3347. xa,1,xb,yb,xc,ya,xd,yb,xe,1);)
  3348. if (ya<dy/2 && xc<dx/2) ad=95*ad/100; /* ~N */
  3349. i= loop(bp,xa ,1 ,dx,cs,1,RI);
  3350. for (x=xa;x<xa+i;x++)
  3351. if( get_line2(x,1,xb,yb,bp,cs,100)>94 ) break;
  3352. if (x==xa+i) Break; // no vert. line found
  3353. if( get_line2(xb,yb-1,xc,ya ,bp,cs,100)<95
  3354. && get_line2(xb,yb-1,xc,ya+dy/32,bp,cs,100)<95
  3355. && get_line2(xb,yb-1,xc,ya+dy/16,bp,cs,100)<95 ) Break;
  3356. if( get_line2(xc, ya,xd, yb,bp,cs,100)<95
  3357. && get_line2(xc+1,ya,xd, yb,bp,cs,100)<95 ) Break;
  3358. if( get_line2(xd,yb,xe ,1+dy/16,bp,cs,100)<95
  3359. && get_line2(xd,yb,dx-1 ,1+dy/8 ,bp,cs,100)<95 // round w
  3360. && get_line2(xd,yb,xe+dx/20,1+dy/16,bp,cs,100)<95 ) Break;
  3361. // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break;
  3362. // ~ur
  3363. MSG(fprintf(stderr,"ad=%d",ad);)
  3364. for(i=0,y=5*dy/8;y<dy;y++){
  3365. x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>i ) i=x; if( x<i-2 ) break;
  3366. if (x<i) ad=98*ad/100;
  3367. } if( y<dy ) Break;
  3368. MSG(fprintf(stderr,"ad=%d",ad);)
  3369. ac=((hchar)?'W':'w');
  3370. if (gchar) ad=98*ad/100;
  3371. Setac(box1,ac,ad);
  3372. break;
  3373. }
  3374. // --- test ~w {\it w} ohmega? also handwritten -------
  3375. // italic
  3376. for(ad=d=100;dx>3 && dy>3;){ // dy<=dx 4x6font (like a H with fat bar)
  3377. DBG( wchar_t c_ask='w'; )
  3378. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  3379. // ~ul ~uf
  3380. if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)<2 ) Break;
  3381. if( num_cross(0,dx-1,dy/8,dy/8,bp,cs)<2 ) handwritten=40;
  3382. if( num_cross(0,dx-1,dy/4,dy/4,bp,cs)<2 ) handwritten=80;
  3383. for(i=0,y=0;y<dy-1;y++)
  3384. if( num_cross(0,dx-1,y,y,bp,cs)==3 ) i++;
  3385. if(i<=dy/8) Break; // three legs
  3386. // xa xe
  3387. // \ xc / <=yb connected xa-xb-xc-xd-xe
  3388. // xb xd
  3389. for(y=dy/2;y<dy-1-dy/8;y++)
  3390. if( num_cross(0,dx-1,y,y,bp,cs)==3 ) break;
  3391. yb=y;
  3392. x =loop(bp,0 ,yb,dx,cs,0,RI);
  3393. x+=loop(bp,x ,yb,dx,cs,1,RI); if(x>dx/2) Break;
  3394. xb=loop(bp,x ,yb,dx,cs,0,RI);xb=x+xb/2; if(xb>dx/2) Break;
  3395. x =loop(bp,dx-1 ,yb,dx,cs,0,LE);
  3396. x+=loop(bp,dx-1-x,yb,dx,cs,1,LE);
  3397. xd=loop(bp,dx-1-x,yb,dx,cs,0,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break;
  3398. if( num_cross(xb,xd,yb,yb ,bp,cs)!= 1 ) Break;
  3399. if( num_cross(xb,xb,yb,dy-1,bp,cs)!= 1 ) Break;
  3400. if( num_cross(xd,xd,yb,dy-1,bp,cs)!= 1 ) Break;
  3401. if( num_cross(xb,xb, 0,yb ,bp,cs)!= 0 ) Break;
  3402. if( num_cross(xd,xd, 0,yb ,bp,cs)!= 0 ) Break;
  3403. // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break;
  3404. if (sdata->holes.num != 0) Break;
  3405. // ~ur
  3406. for(i=0,y=3*dy/4;y<dy;y++){
  3407. x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>i ) i=x; if( x<i-2 ) break;
  3408. } if( y<dy ) Break; // fail for overlapping neighbouring slanted chars?
  3409. ac=((hchar)?'W':'w');
  3410. if (gchar) ad=98*ad/100;
  3411. Setac(box1,ac,ad);
  3412. Break;
  3413. }
  3414. return box1->c;
  3415. }
  3416. static wchar_t ocr0_aA(ocr0_shared_t *sdata){
  3417. struct box *box1=sdata->box1;
  3418. pix *bp=sdata->bp;
  3419. int i,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
  3420. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  3421. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  3422. ad,ya; /* tmp-vars */
  3423. // --- test A ---------------------------------------------------
  3424. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  3425. DBG( wchar_t c_ask='A'; )
  3426. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  3427. // first selection (rough sieve)
  3428. if( get_bw(dx/2 ,dx/2 ,dy-1-dy/8,dy-1,bp,cs,1) == 1
  3429. && get_bw(dx/2-1,dx/2-1,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) Break; // ~B
  3430. ya=0; /* upper end, not 0 for modified A etc. */
  3431. if (box1->modifier)
  3432. for (ya=0;ya<dy/2;ya++)
  3433. if (num_cross(0,dx-1,ya,ya,bp,cs)==0) break;
  3434. if (ya>=dy/2) ya=0; // already subtracted?
  3435. if( num_cross(0,dx-1,ya+ 1 ,ya+ 1 ,bp,cs)!=1 // 600dpi
  3436. && num_cross(0,dx-1,ya+ dy/8 ,ya+ dy/8 ,bp,cs)!=1
  3437. && num_cross(0,dx-1,ya+ dy/16 ,ya+ dy/16 ,bp,cs)!=1
  3438. && num_cross(0,dx-1,ya+ dy/8+1,ya+ dy/8+1,bp,cs)!=1 ) Break;
  3439. if( num_cross(0,dx-1, 7*dy/8 , 7*dy/8 ,bp,cs)!=2
  3440. && num_cross(0,dx-1, 7*dy/8-1, 7*dy/8-1,bp,cs)!=2 ) Break;
  3441. if ( num_cross( 0,dx/8,ya+dy/8,ya+0,bp,cs)>0 ) Break; // ~R
  3442. for(y=ya+dy/8;y<ya+dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs) > 1 ) break;
  3443. if( y==ya+dy/2 ) Break; i1=y;
  3444. if (dy>20) i1++; /* get arround some noise fat font */
  3445. x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) Break;
  3446. x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) Break; i2=x;
  3447. x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) Break; i2=(x+i2)/2;
  3448. // hole (i2,i1)
  3449. y+=loop(bp,i2,y,dy,cs,1,DO);
  3450. y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100;
  3451. if (y>5*dy/6) { MSG(fprintf(stderr,"x,y,i1,i2= %d %d %d %d",x,y,i1,i2);) }
  3452. if (y>5*dy/6) Break;
  3453. if( sdata->holes.num != ((box1->modifier==RING_ABOVE)?2:1)
  3454. || sdata->holes.hole[0].y1-ya >= dy-1-dy/4) Break;
  3455. // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) Break;
  3456. // out_x(box1);
  3457. i3=0;i4=0;
  3458. for(x=dx/3;x<2*dx/3;x++){
  3459. i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2)
  3460. i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break;
  3461. if(i4==1) i3=x;
  3462. } if(i4<1 || i4>2 || i3==0){
  3463. // ToDo: MSG(fprintf(stderr,"x,y,i4,i3= %d %d %d %d",x,y,i4,i3);)
  3464. Break;
  3465. }
  3466. if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) Break;
  3467. i1=loop(bp,dx-1,ya+ (dy-ya)/4,dx,cs,0,LE);
  3468. i2=loop(bp,dx-1,ya+ (dy-ya)/2,dx,cs,0,LE);
  3469. i3=loop(bp,dx-1,dy-1-(dy-ya)/4,dx,cs,0,LE);
  3470. if( 2*i2-dx/8>i1+i3 ) ad=99*ad/100; /* 6*8 font */
  3471. if( 2*i2+dx/4<i1+i3 || 2*i2-dx/4>i1+i3 ) Break;
  3472. i1=loop(bp,0 ,ya+ (dy-ya)/4,dx,cs,0,RI); // linke senkr. linie
  3473. i2=loop(bp,0 ,ya+ (dy-ya)/2,dx,cs,0,RI);
  3474. i3=loop(bp,0 ,dy-1-(dy-ya)/4,dx,cs,0,RI);
  3475. if( 2*i2-dx/8>i1+i3 ) ad=98*ad/100; /* 6*8 font */
  3476. if( 2*i2+dx/4<i1+i3 || 2*i2-dx/4>i1+i3 || i1<i3) Break;
  3477. // lower ends could be round on thick fonts
  3478. for(i3=dx,y=ya+(dy-ya)/4;y<7*dy/8;y++){ // increasing width
  3479. i1=loop(bp, 0, y,dx,cs,0,RI);
  3480. i2=loop(bp,dx-1, y,dx,cs,0,LE);
  3481. if(i1+i2>i3+dx/16) break; if( i1+12<i3 ) i3=i1+i2;
  3482. } if(y<7*dy/8) Break;
  3483. if ( loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)
  3484. -loop(bp, 0,dy/2 ,dx,cs,0,RI)>0) ad=97*ad/100; // italic-a
  3485. if (!hchar) ad=99*ad/100; // italic-a
  3486. Setac(box1,'A',ad);
  3487. break;
  3488. }
  3489. // --- test a -------------------------------------------
  3490. // with a open bow above the circle starting
  3491. // on the right side of the circle
  3492. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  3493. DBG( wchar_t c_ask='a'; )
  3494. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  3495. if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
  3496. if( get_bw(x1-dx/3, x1 , y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
  3497. if( get_bw(x1-dx/3, x1 , y0+dy/4, y0+dy/4,box1->p,cs,1) != 1 ) Break;
  3498. if( get_bw(x0+dx/2, x0+dx/2, y1-dy/3, y1, box1->p,cs,1) != 1 ) Break;
  3499. if( get_bw(x0+dx/2, x0+dx/2, y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break;
  3500. if( get_bw(x0+dx/3, x1-dx/3, y0 , y0 ,box1->p,cs,1) != 1 ) Break;
  3501. if( get_bw(x0+dx/4, x1-dx/2, y1 , y1 ,box1->p,cs,1) != 1 )
  3502. if( get_bw(x0+dx/4, x1-dx/3, y1-1 , y1-1 ,box1->p,cs,1) != 1 ) Break;
  3503. if( get_bw(x0 , x0 , y0+dy/2, y1 ,box1->p,cs,1) != 1 )
  3504. if( get_bw(x0+dx/8, x0+dx/8, y0+dy/2, y1 ,box1->p,cs,1) != 1 ) Break;
  3505. if( loop(bp,3*dx/8,0,dy,cs,0,DO) > 3*dy/16 ) Break; // ~d
  3506. if( num_cross(0,dx-1,dy/4 ,dy/4 , bp,cs) >2 // ~glued am != an
  3507. && num_cross(0,dx-1,dy/4+1,dy/4+1, bp,cs) >2 ) Break;
  3508. for( x=dx/4;x<dx-dx/4;x++ ){ // ar
  3509. i=loop(bp,x, 0,y1-y0,cs,0,DO); if (i>dy/2) break;
  3510. i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break;
  3511. } if( x<dx-dx/4 ) Break;
  3512. for(i=dx/8+1,x=dx/4;x<=dx-1-dx/4 && i;x++){
  3513. if( num_cross(x,x,0,bp->y-1, bp,cs) == 3 ) i--;
  3514. } if( i ) Break;
  3515. i1=loop(bp,0, dy/8,dx,cs,0,RI);
  3516. i3=loop(bp,0,3*dy/4,dx,cs,0,RI);
  3517. for(y=dy/8+1;y<3*dy/4;y++){
  3518. i2=loop(bp,0,y,dx,cs,0,RI);if(2*i2>i1+i3+1) break;
  3519. } if(y==3*dy/4) Break; // ~6
  3520. // ~ s (small thick s), look for vertikal line piece
  3521. for(x=3*dx/4;x<dx;x++)
  3522. if( loop(bp,x,dy/4,dy/2,cs,1,DO)>dy/4 ) break;
  3523. if( x==dx ) Break;
  3524. if (sdata->holes.num != 1) ad=96*ad/100; else
  3525. if (sdata->holes.num == 1)
  3526. if( num_hole ( x0, x1, y0+dy/3, y1 ,box1->p,cs,NULL) != 1 ) Break;
  3527. // if( num_hole ( x0, x1, y0, y1, box1->p,cs,NULL) != 1 ) Break;
  3528. if( num_hole ( x0, x1, y0, y1-dy/3 ,box1->p,cs,NULL) != 0 ){
  3529. i =loop(bp,0,dy/4,dx,cs,0,RI);
  3530. i =loop(bp,i,dy/4,dx,cs,1,RI);
  3531. if(i<dx/4+1) Break; // fat a
  3532. i =loop(bp,0,dy/4,dx,cs,0,RI);
  3533. i+=loop(bp,i,dy/4,dx,cs,1,RI);
  3534. for(y=dy/4;y<dy/2;y++)
  3535. if( num_cross(0,dx-1,y,y, bp,cs) !=2 ) break;
  3536. x =loop(bp,0,y-1,dx,cs,0,RI);
  3537. x+=loop(bp,x,y-1,dx,cs,1,RI);
  3538. if(x>i) Break; // ~ 8
  3539. }
  3540. /* test for horizontal symmetry ~8 */
  3541. for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
  3542. if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
  3543. if (y==dy) Break; /* ~8 */
  3544. if (hchar
  3545. && loop(bp,dx/4,0,dy,cs,0,DO)>dy/4) Break; // ~ serif d
  3546. if (hchar) ad=96*ad/100;
  3547. if (gchar) ad=96*ad/100;
  3548. Setac(box1,'a',ad);
  3549. break;
  3550. }
  3551. // --- test hand written a ---------------------------------------------------
  3552. // rarely char, without bow above the circle
  3553. for(ad=d=100;dx>3 && dy>3;){ // min 4x4
  3554. DBG( wchar_t c_ask='a'; )
  3555. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  3556. if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  3557. if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  3558. if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break;
  3559. if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  3560. if( get_bw(x0+dx/3 , x0+dx/3,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  3561. i = loop(bp,dx/2, 0 ,dy,cs,0,DO); if (i>dy/4) Break;
  3562. i+= loop(bp,dx/2, i ,dy,cs,1,DO); if (i>dy/2) Break;
  3563. i = loop(bp,dx/2, i ,dy,cs,0,DO); if (i<dy/4) Break;
  3564. if( get_bw(x0 , x0 ,y1 , y1 ,box1->p,cs,1) == 1 ) Break;
  3565. if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) != 2 ) Break;
  3566. if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND
  3567. if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
  3568. i = loop(bp,dx/2,dy-1 ,dy,cs,0,UP); if (i>dy/3) Break;
  3569. y = i+loop(bp,dx/2,dy-1-i,dy,cs,1,UP); if (i>dy/2) Break;
  3570. // normal 'a' has a well separated vertical line right from the circle
  3571. // but fat 'a' is like a 'o', only bigger on the right side
  3572. if( num_cross(x0+dx/2-1,x1,y1 ,y1 ,box1->p,cs) < 2 /* 4x6font */
  3573. && num_cross(x0+dx/2-1,x1,y1-i,y1-i ,box1->p,cs) < 2 /* 2 or 3 */
  3574. && num_cross(x0+dx/2-1,x1,y1-y,y1-y ,box1->p,cs) < 2 )
  3575. { if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)
  3576. <4*loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) { Break;}
  3577. else ad=98*ad/100;
  3578. }
  3579. if( num_cross(x0,x1,y0+dy/2 , y0+dy/2,box1->p,cs) < 2
  3580. || num_cross(x0,x1,y0+dy/3 , y0+dy/3,box1->p,cs) < 2 ) Break; // Jun00
  3581. if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 )
  3582. if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) Break;
  3583. if (sdata->holes.num != 1)
  3584. if( num_hole(x0,x1-2,y0 ,y1 ,box1->p,cs,NULL) != 1 )
  3585. // if( num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 )
  3586. Break;
  3587. if( num_hole(x0,x1 ,y0+dy/3,y1-1 ,box1->p,cs,NULL) != 0 ) Break;
  3588. if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
  3589. loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
  3590. if( loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)> dx/4
  3591. && loop(bp,dx-1,dy-2,x1-x0,cs,0,LE)> (dx+4)/8 ) ad=97*ad/100;
  3592. x=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
  3593. i=loop(bp,dx-1, dy/4,dx,cs,0,LE); if (abs(x-i)>dx/4) Break;
  3594. for( x=dx/4;x<dx-dx/4;x++ ){ // ar
  3595. i=loop(bp,x, 0,y1-y0,cs,0,DO); if (i>dy/2) break;
  3596. i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break;
  3597. } if( x<dx-dx/4 ) Break;
  3598. if( num_cross(x0 , x1, y1, y1,box1->p,cs) == 1 )
  3599. if( num_cross(x0 , x1, y0, y0,box1->p,cs) == 1 )
  3600. if( loop(bp,dx-1, 0,y1-y0,cs,0,DO)> dy/4
  3601. && loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~o
  3602. if( loop(bp,dx/2,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~q
  3603. if (hchar) ad=98*ad/100;
  3604. if (gchar) ad=98*ad/100;
  3605. // handwritten-a (alpha)
  3606. Setac(box1,'a',ad);
  3607. break;
  3608. }
  3609. // --- test A_A_WITH_OGONEK 0x0104 Centr.Eur.Font -------------------------
  3610. /* not sure if we should move this to a get_CentralEuropean-function */
  3611. for(ad=d=100;dx>2 && dy>4;){ // min 3x4
  3612. DBG( wchar_t c_ask='A'; )
  3613. if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
  3614. // first selection (grobes Sieb)
  3615. if( get_bw(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) break; // ~B
  3616. if( num_cross(0,dx-1, 1 , 1 ,bp,cs)!=1 // 600dpi
  3617. && num_cross(0,dx-1, dy/8 , dy/8 ,bp,cs)!=1
  3618. && num_cross(0,dx-1, dy/16 , dy/16 ,bp,cs)!=1
  3619. && num_cross(0,dx-1, dy/8+1, dy/8+1,bp,cs)!=1 ) break;
  3620. if( num_cross(0,dx-1, dy-1 , dy-1 ,bp,cs)!=1 ) break;
  3621. if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs)!=2
  3622. && num_cross(0,dx-1, dy/3 , dy/3 ,bp,cs)!=2 ) break;
  3623. if ( num_cross( 0,dx/8,dy/8, 0,bp,cs)>0 ) break; // ~R
  3624. for(y=dy/8;y<dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs) > 1 ) break;
  3625. if( y==dy/2 ) break; i1=y;
  3626. if (dy>20) i1++; /* get arround some noise fat font */
  3627. x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) break;
  3628. x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) break; i2=x;
  3629. x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) break; i2=(x+i2)/2;
  3630. // hole (i2,i1)
  3631. y+=loop(bp,i2,y,dy,cs,1,DO);
  3632. y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100;
  3633. if (y>5*dy/6) break;
  3634. if( sdata->holes.num != 1 || sdata->holes.hole[0].y1 >= dy-1-dy/4) break;
  3635. // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) break;
  3636. // out_x(box1);
  3637. i3=0;i4=0;
  3638. for(x=dx/3;x<2*dx/3;x++){
  3639. i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2)
  3640. i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break;
  3641. if(i4==1) i3=x;
  3642. } if(i4<1 || i4>2 || i3==0){
  3643. // ToDo: g_debug_A(printf(" A: x,y,i4,i3= %d %d %d %d\n",x,y,i4,i3);)
  3644. break;
  3645. }
  3646. if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) break;
  3647. /* dy/4 changed to dy/6 because of screenfonts */
  3648. /* there are strange fonts, one has a serif on the upper end of A */
  3649. if ( num_cross( 0,dx/8,dy/6, 0,bp,cs)>0 ) break;
  3650. if ( num_cross(dx-1-dx/4,dx-1, 0,dy/6,bp,cs)>0 ) break;
  3651. i1=loop(bp,dx-1, dy/4,dx,cs,0,LE);
  3652. i2=loop(bp,dx-1, dy/2,dx,cs,0,LE);
  3653. i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
  3654. if( 2*i2+dx/4<i1+i3 || 2*i2-dx/8>i1+i3 ) break;
  3655. i1=loop(bp,0 , dy/4,dx,cs,0,RI); // linke senkr. linie
  3656. i2=loop(bp,0 , dy/2,dx,cs,0,RI);
  3657. i3=loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI);
  3658. if( 2*i2+dx/4<i1+i3 || 2*i2-dx/8>i1+i3 || i1<i3) break;
  3659. // lower ends could be round on thick fonts
  3660. for(i3=dx,y=dy/4;y<6*dy/8;y++){ // increasing width
  3661. i1=loop(bp, 0, y,dx,cs,0,RI);
  3662. i2=loop(bp,dx-1, y,dx,cs,0,LE);
  3663. if(i1+i2>i3+dx/16) break; if( i1+12<i3 ) i3=i1+i2;
  3664. } if(y<6*dy/8) break;
  3665. if (!hchar) ad=96*ad/100;
  3666. if (!gchar) ad=98*ad/100;
  3667. Setac(box1,(wchar_t)LATIN_CAPITAL_LETTER_A_WITH_OGONEK,ad);
  3668. break;
  3669. }
  3670. return box1->c;
  3671. }
  3672. static wchar_t ocr0_cC(ocr0_shared_t *sdata){
  3673. struct box *box1=sdata->box1;
  3674. pix *bp=sdata->bp;
  3675. int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
  3676. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  3677. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  3678. ad,t1; /* tmp-vars */
  3679. wchar_t bc=UNKNOWN;
  3680. // --- test c,C ---------------------------------------------------
  3681. for(ad=d=100;dx>2 && dy>2;){ // min 3x4
  3682. DBG( wchar_t c_ask='c'; )
  3683. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  3684. if( get_bw(x0 , x0+dx/3,y0+dy/2, y0+dy/2,box1->p,cs,1) != 1 ) Break;
  3685. if( get_bw(x0+dx/2, x0+dx/2,y1-dy/3, y1, box1->p,cs,1) != 1 ) Break;
  3686. if( get_bw(x0+dx/2, x0+dx/2,y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break;
  3687. if( num_cross(x0,(x0+x1)/2,(y0+y1)/2,(y0+y1)/2,box1->p,cs) > 1 ) Break; // ~ocr-a-[
  3688. for(y=y0+dy/4;y<y0+3*dy/4;y++)
  3689. if( get_bw(x0+dx/2,x1,y,y,box1->p,cs,1) == 0 ) break;
  3690. if( y==y0+3*dy/4 ) Break; i1=y; // i1: upper end of right gap
  3691. // measure thickness of line!
  3692. t1=loop(bp, 0,dy/2,dx,cs,0,RI);
  3693. t1=loop(bp,t1,dy/2,dx,cs,1,RI);
  3694. if (t1>dx/2) Break;
  3695. for(y=i1,i2=0,x=x0+dx/2;x<x0+6*dx/8;x++){
  3696. i=y-1+loop(box1->p,x0+dx/2,i1,dy,cs,0,DO);
  3697. if( i>i2 ) { i2=i; }
  3698. } if(i2<y0+5*dy/8-t1/2) Break; // i2: lowest white point above lower bow
  3699. i3=i1+1-loop(box1->p,x0+5*dx/8,i1,dy,cs,0,UP);
  3700. i =i1+1-loop(box1->p,x0+4*dx/8,i1,dy,cs,0,UP); if(i<i3) i3=i;
  3701. if(i3>y0+ dy/4+t1/2) Break; // highest, i3: highest point below top-line
  3702. for(y=i1;y<y1-dy/8;y++)
  3703. if( get_bw(x0+dx/2,x1,y,y,box1->p,cs,1) == 1 ) break;
  3704. if( y-i1<dy/6 ) Break; i2=y-1; // i2: lower end of right gap
  3705. // pixelbased num_cross for streight lines could fail on small fonts
  3706. if( num_cross(x1-dx/4,x1-dx/4,i2,y0,box1->p,cs) < 1 ) Break; // ~L
  3707. if (loop(box1->p,x0,y0+3*dy/4,dx,cs,0,RI)>dx/16)
  3708. if( num_cross(x0+dx/2,x1,i3 ,y1,box1->p,cs) < 1
  3709. && num_cross(x0+dx/2,x1,y1-dy/4,y1,box1->p,cs) < 1 // may fail
  3710. && num_cross(x1 ,x1,y1-dy/4,y1,box1->p,cs) < 1 ) Break; // ~r
  3711. i=1;
  3712. for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
  3713. for(y=dy/2;y<dy-1-dy/8 && i;y++){ // .@
  3714. if( getpixel(bp,x ,y )>=cs
  3715. && getpixel(bp,x+1,y )< cs
  3716. && getpixel(bp,x+1,y-1)< cs
  3717. && getpixel(bp,x ,y-1)< cs ) { i=0;break; }
  3718. }
  3719. if(!i) ad=95*ad/100; // ~G
  3720. i=loop(bp,0,dy/2,dx,cs,0,RI);
  3721. for(y=0;y<dy;y++)if( loop(bp,0,y,dx,cs,0,RI)<i-1-dx/32 ) break;
  3722. if( y<dy ) Break; // ~r
  3723. // out_x(box1);
  3724. for(i5=0,i4=dx,y=dy/2;y>=dy/4;y--){
  3725. x =loop(bp,0,y,dx,cs,0,RI);
  3726. x+=loop(bp,x,y,dx,cs,1,RI); if(x>i5) i5=x;
  3727. i =loop(bp,x,y,dx,cs,0,RI); if(i<i4) i4=i;
  3728. if( i5<x-dx/32 && i>i4+dx/32 ) break; // unusual for c, more a bad e?
  3729. } if( y>=dy/4 ) Break;
  3730. if( !hchar ){ // test for e where the middle line is partly removed
  3731. x= loop(bp,0,dy/2,dx,cs,0,RI);
  3732. x=x +loop(bp,x,dy/2,dx,cs,1,RI);
  3733. y=dy/2-loop(bp,x,dy/2,dy,cs,0,UP)-1;
  3734. i=x +loop(bp,x,y,dx,cs,1,RI);
  3735. i=i +loop(bp,i,y,dx,cs,0,RI);
  3736. if( num_cross(x ,x ,1,dy/2,bp,cs) > 1
  3737. || num_cross(x+1,x+1,1,dy/2,bp,cs) > 1 )
  3738. if( num_cross(i-1,i-1,1,dy/2,bp,cs) > 1
  3739. || num_cross(i ,i ,1,dy/2,bp,cs) > 1 ) Break; // ~bad e
  3740. }
  3741. if( dy>16 && dy>3*dx && hchar ){ // ~[
  3742. x= loop(bp,0, dy/16,dx,cs,0,RI);
  3743. x=+loop(bp,0,dy-1-dy/16,dx,cs,0,RI);
  3744. i= loop(bp,0, dy/2 ,dx,cs,0,RI)*2;
  3745. if( i>=x )
  3746. if( num_cross(0,dx-1,dy/4,dy/4,bp,cs) < 2 ) Break;
  3747. }
  3748. if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2
  3749. && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
  3750. && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
  3751. && get_bw(x1,x1,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~[ */
  3752. x =loop(bp, 0,dy/2,dx,cs,0,RI);
  3753. i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
  3754. if( (i<dx/2 || i<3) && hchar && dy>7 )
  3755. if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
  3756. && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
  3757. && loop(bp,dx-1,dy-1-dy/ 8,dx,cs,0,LE)
  3758. > loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)
  3759. && loop(bp,dx-1, dy/ 8,dx,cs,0,LE)
  3760. > loop(bp,dx-1, dy/16,dx,cs,0,LE) ) Break; // ~(
  3761. // printf(" hchar=%d i1=%d i2=%d %d\n",hchar,i1-y0,i2-y0,9*dy/16);
  3762. // ~G without characteristic crotchet
  3763. if (hchar && dy>15 && dx>7 && i2-y0<9*dy/16 && i1-y0<=dy/4)
  3764. if ( loop(bp,5*dx/8,i2-y0,dy,cs,0,DO) > 2*dy/8 ){
  3765. Setac(box1,'G',90);
  3766. Break;
  3767. }
  3768. if (hchar){
  3769. i=1;
  3770. for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
  3771. for(y= 1;y<dy/4 && i;y++){ // .@
  3772. if( getpixel(bp,x ,y )>=cs
  3773. && getpixel(bp,x+1,y )< cs
  3774. && getpixel(bp,x+1,y-1)< cs
  3775. && getpixel(bp,x ,y-1)< cs ) { i=0;break; }
  3776. }
  3777. if (i) ad=98*ad/100; // ~(
  3778. if (dy>2*dx) ad=99*ad/100;
  3779. }
  3780. if( loop(bp,dx-1,dy/2,dx,cs,0,LE) < 6*dx/8 ) ad=98*ad/100;
  3781. i= loop(bp,dx-1,dy/16,dx,cs,0,LE);
  3782. j= loop(bp,dx/2,0 ,dy,cs,0,DO);
  3783. if (i>=dx/2 && j>dy/8 && j>2 && j<dy/2) Break; // t
  3784. if (dy>=3*dx && dy>12) ad=99*ad/100; // (
  3785. i= loop(bp,dx-1,dy-1,dy,cs,0,UP);
  3786. j= loop(bp,dx/2,dy-1,dy,cs,0,UP);
  3787. if (i==0 && j>dy/8) ad=95*ad/100; // <
  3788. i= loop(bp,dx-1, 0,dy,cs,0,DO);
  3789. j= loop(bp,dx/2, 0,dy,cs,0,DO);
  3790. if (i==0 && j>dy/8) ad=95*ad/100; // <
  3791. if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>= 3*dx/4) ad=98*ad/100; // <
  3792. if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>=(dx+1)/2) ad=98*ad/100; // <
  3793. if (loop(bp,0, dy/8,dx,cs,0,RI)>=dx/2) ad=98*ad/100; // <
  3794. if (gchar) ad=98*ad/100; // could happen for 5x7 font
  3795. bc=((hchar)?'C':'c');
  3796. Setac(box1,bc,ad);
  3797. break;
  3798. }
  3799. return box1->c;
  3800. }
  3801. static wchar_t ocr0_lL(ocr0_shared_t *sdata){
  3802. struct box *box1=sdata->box1;
  3803. pix *bp=sdata->bp;
  3804. int i,j,d,x,y,i0,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
  3805. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  3806. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  3807. ad; /* tmp-vars */
  3808. // --- test L ---------------------------------------------------
  3809. for(ad=d=100;dx>2 && dy>4;){ // min 3x4
  3810. DBG( wchar_t c_ask='L'; )
  3811. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  3812. i=loop(bp,dx-1,dy/2,dx,cs,0,LE);
  3813. if (i<3 && dy>8) {Break;}
  3814. if (i<dx/2) ad=98*ad/100; // ~G
  3815. if (dx<8 && 3*loop(bp,dx-1,0,dy,cs,0,DO)<=dy) break; // ~G
  3816. for( i=i1=0,y=y1-dy/4;y<=y1;y++){ // check bottom line (i1)
  3817. j=loop(box1->p,x0 ,y,dx,cs,0,RI);
  3818. j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ){ i=j;i1=y; }
  3819. } if( i<3*dx/4 ) Break; i1=i; // length of horizontal line
  3820. // line thickness (i2)
  3821. i=loop(box1->p,x0 ,y0+dy/2,dx,cs,0,RI); if( i>dx/2 ) Break;
  3822. j=loop(box1->p,x0+i,y0+dy/2,dx,cs,1,RI); if( i+j>dx/2 ) Break; i2=j;
  3823. if (loop(bp,dx-1, 0,dx,cs,0,LE)<dx/8
  3824. && loop(bp,dx-1, dy/4,dx,cs,0,LE)>dx/2
  3825. && loop(bp, 0,5*dy/8,dx,cs,0,RI)<dx/4
  3826. && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<dx/4) Break; // ~G
  3827. for( i=1,y=y0;y<=y1-dy/4 && i;y++){ // check vertical line
  3828. j=loop(box1->p,x0 ,y,dx,cs,0,RI);
  3829. if ( j>(dx+2)/4+(y1-dy/4-y)*dx/2/dy ) { i=0; break; }
  3830. x=loop(box1->p,x0+j,y,dx,cs,1,RI);
  3831. if( ((x>i2+1 || 4*x<3*i2) && y>y0+dy/8) || 4*x>3*i1 ) i=0;
  3832. } if( !i ) Break;
  3833. if( num_cross(0, dx-1-dx/8, dy-1-dy/2, dy-1-dy/2,bp,cs) != 1 ) Break;
  3834. if( num_cross(0, dx-1 , dy/3 , dy/3,bp,cs) != 1 ) Break;
  3835. if( num_cross(0, dx-1 , dy/8 , dy/8,bp,cs) != 1 ) Break;
  3836. if (loop(bp,0,dy-1,dx,cs,0,RI)
  3837. -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c
  3838. if (loop(box1->p,x0+dx/4,y1,dy,cs,0,UP)>1+dy/16) ad=99*ad/100; // ~4
  3839. if ( gchar) ad=98*ad/100;
  3840. if (!hchar) ad=99*ad/100;
  3841. if (5*dx<2*dy && loop(box1->p,x0,y1,dx,cs,0,RI)>dx/4) ad=99*ad/100; // ~l
  3842. Setac(box1,'L',ad);
  3843. break;
  3844. }
  3845. // --- test l ---------------------------------------------------
  3846. // recognize a "l" is a never ending problem, because there are lots of
  3847. // variants and the char is not very unique (under construction)
  3848. // --- test italic l ---------------------------------------------------
  3849. // --- test l ~italic (set flag-italic) --------------------------------
  3850. // if unsure d should be multiplied by 80..90%
  3851. for(ad=d=100; dy>dx && dy>5;){ // min 3x4
  3852. DBG( wchar_t c_ask='l'; )
  3853. if( box1->dots>0 ) Break;
  3854. if( num_cross(0, dx-1,dy/2,dy/2,bp,cs) != 1
  3855. || num_cross(0, dx-1,dy/4,dy/4,bp,cs) != 1 ) Break;
  3856. // mesure thickness
  3857. for(i1=0,i2=dx,y=dy/4;y<dy-dy/4;y++){
  3858. j = loop(bp,0,y,dx,cs,0,RI);
  3859. j = loop(bp,j,y,dx,cs,1,RI);
  3860. if( j>i1 ) { i1=j; } // thickest
  3861. if( j<i2 ) { i2=j; } // thinnest
  3862. }
  3863. if ( i1>2*i2 ) Break;
  3864. if(box1->m3 && dy<=box1->m3-box1->m2) ad=94*ad/100;
  3865. if( box1->m2-box1->m1>1 && y0>=box1->m2 ) ad=94*ad/100;
  3866. for(i0=0,i3=0,y=0;y<dy/4;y++){
  3867. j = loop(bp,0,y,dx,cs,0,RI);
  3868. if( j>i3 ) { i3=j; } // widest space
  3869. j = loop(bp,j,y,dx,cs,1,RI);
  3870. if( j>i0 ) { i0=j;i3=0; } // thickest
  3871. }
  3872. if ( i0>4*i2 || 3*i3>2*dx)
  3873. if ( loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/8
  3874. || loop(bp, 0,dy-1,dx,cs,0,RI)>3*dx/8) Break; // ~7
  3875. // detect serifs
  3876. x =loop(bp,0, 0,dx,cs,0,RI);
  3877. i3=loop(bp,x, 0,dx,cs,0,RI);
  3878. x =loop(bp,0, 1,dx,cs,0,RI);
  3879. x =loop(bp,x, 1,dx,cs,0,RI); if(x>i3) i3=x;
  3880. x =loop(bp,0,dy-1,dx,cs,0,RI);
  3881. i4=loop(bp,x,dy-1,dx,cs,0,RI);
  3882. x =loop(bp,0,dy-2,dx,cs,0,RI);
  3883. x =loop(bp,x,dy-2,dx,cs,0,RI); if(x>i4) i4=x;
  3884. if( i3>i1+dx/8+1 && i4>i1+dx/8+1 ) Break; // ~I
  3885. for(i=dx,j=0,y=1;y<dy/4;y++){
  3886. x=loop(bp,dx-1,y,dx,cs,0,LE); if(x>i+1) break; i=x;
  3887. if( num_cross(0,dx-1,y ,y ,bp,cs)==2
  3888. && num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) j=1;
  3889. } if ( y<dy/4 ) Break;
  3890. if(j){ // if loop at the upper end, look also on bottom
  3891. for(y=3*dy/4;y<dy;y++){
  3892. if( num_cross(0,dx-1,y ,y ,bp,cs)==2
  3893. && num_cross(0,dx-1,y-1-dy/32,y-1-dy/32,bp,cs)==2 ) break;
  3894. } if ( y==dy ) Break;
  3895. }
  3896. // if( get_bw(x0,x1,y0,y1,p,cs,2) == 0 ) Break; // unsure !I|
  3897. if(dx>3)
  3898. if( get_bw(dx-1-dx/8,dx-1,0,dy/6,bp,cs,1) != 1 )
  3899. if( get_bw(dx-1-dx/8,dx-1,0,dy/2,bp,cs,1) == 1 ) Break;
  3900. if( get_bw(dx-1-dx/8,dx-1,dy/4,dy/3,bp,cs,1) != 1 ) // large I ???
  3901. if( get_bw(0 ,dx/8,dy/4,dy/3,bp,cs,1) != 1 )
  3902. if( get_bw(dx-1-dx/8,dx-1,0 ,dy/8,bp,cs,1) == 1 )
  3903. if( get_bw(0 ,dx/8,0 ,dy/8,bp,cs,1) == 1 ) ad=ad*97/100;
  3904. if( get_bw(dx-1-dx/8,dx-1,dy/2,dy-1,bp,cs,1) != 1 ) // r ???
  3905. if( get_bw(0 ,dx/8,dy/2,dy-1,bp,cs,1) == 1 )
  3906. if( get_bw(dx-1-dx/8,dx-1,0 ,dy/3,bp,cs,1) == 1 )
  3907. if( get_bw(0 ,dx/8,0 ,dy/3,bp,cs,1) == 1 ) Break;
  3908. for( y=1;y<12*dy/16;y++ )
  3909. if( num_cross(0, dx-1, y , y ,bp,cs) != 1 // sure ?
  3910. && num_cross(0, dx-1, y-1, y-1,bp,cs) != 1 ) break;
  3911. if( y<12*dy/16 ) Break;
  3912. if(dx>3){
  3913. for( y=dy/2;y<dy-1;y++ )
  3914. if( get_bw(dx/4,dx-1-dx/4,y,y,bp,cs,1) != 1 ) break;
  3915. if( y<dy-1 ) Break;
  3916. }
  3917. // test ob rechte Kante gerade
  3918. for(x=dx,y=bp->y-1-5*dy/16;y>=dy/5;y--){ // rechts abfallende Kante/Knick?
  3919. i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
  3920. if( i-2-dx/16>=x ) break;
  3921. if( i<x ) x=i;
  3922. }
  3923. if (y>=dy/5 ) Break;
  3924. // test ob linke Kante gerade
  3925. for(x=0,y=bp->y-1-dy/5;y>=dy/5;y--){ // rechts abfallende Kante/Knick?
  3926. i=loop(bp,0,y,x1-x0,cs,0,RI);
  3927. if( i+2+dx/16<x ) break;
  3928. if( i>x ) x=i;
  3929. }
  3930. if (y>=dy/5 ) Break;
  3931. if (box1->m4 && y1<box1->m4)
  3932. if ( get_bw(x0,x1,y1+1,box1->m4+dy/8,box1->p,cs,1) == 1 )
  3933. ad=ad*97/100; // unsure !l|
  3934. i=loop(bp,dx-1,dy/16,dx,cs,0,LE);
  3935. j=loop(bp,dx-1,dy/2 ,dx,cs,0,LE);
  3936. if( i>3 && j>3 )
  3937. if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,bp,cs,1) == 1 ) Break; // ~t
  3938. for(y=5*dy/8;y<dy;y++)
  3939. if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) break;
  3940. if( y<dy ){
  3941. i =loop(bp,0,y,dx,cs,0,RI);
  3942. i+=loop(bp,i,y,dx,cs,1,RI);
  3943. i+=loop(bp,i,y,dx,cs,0,RI)/2; // middle of v-gap
  3944. if( num_cross(0,i,5*dy/8,5*dy/8,bp,cs)==0
  3945. && num_cross(i,i,5*dy/8, y,bp,cs)==0 ) Break; // ~J
  3946. }
  3947. if ( dx>8
  3948. && loop(bp, 0,3*dy/4,dx,cs,0,RI)>=dx/4
  3949. && loop(bp, 0,7*dy/8,dx,cs,0,RI)<=dx/8
  3950. && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<=dx/8
  3951. && loop(bp,dx-1,7*dy/8,dx,cs,0,LE)<=dx/8 ) Break; // ~J
  3952. if ( 2*i3>5*i1 ) // hmm \tt l can look very similar to 7
  3953. if ( loop(bp,0,dy/4,dx,cs,0,RI)>dx/2
  3954. && get_bw(0,dx/8,0,dy/4,bp,cs,1) == 1 ) Break; // ~7
  3955. if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/2
  3956. && get_bw(3*dx/4,dx-1,3*dy/4,dy-1,bp,cs,1) == 1) {
  3957. if (loop(bp,0,dy-1,dx,cs,0,RI)<dx/8) ad=99*ad/100; // ~L
  3958. if(5*dx>2*dy) ad=99*ad/100; // ~L
  3959. if(5*dx>3*dy) ad=99*ad/100; // ~L
  3960. }
  3961. if(!hchar){ // right part (bow) of h is never a l
  3962. if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1
  3963. && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break;
  3964. }
  3965. if( dx>3 && dy>3*dx )
  3966. if( loop(bp,dx/4,dy-1 ,dy,cs,0,UP)< dy/4
  3967. && loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)>=dx/2
  3968. && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)<=dx/4 ){
  3969. ad=98*ad/100; // ~]
  3970. if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)==0 ) Break;
  3971. }
  3972. for(x=0;x<dx/2;x++)
  3973. if( get_bw( x, x, 0,dy/4 ,bp,cs,1) == 1 ) break;
  3974. // works only for perpenticular char
  3975. if( get_bw( x,x+dx/16, 0,dy/16,bp,cs,1) == 0
  3976. && get_bw( x,x+dx/16,dy/4 ,dy/2 ,bp,cs,1) == 0
  3977. && get_bw( x,x+dx/16,dy/16,dy/4 ,bp,cs,1) == 1 ){
  3978. for(i=dx,y=0;y<dy/4;y++){
  3979. x=loop(bp,0,y,dx,cs,0,RI);
  3980. if( x>i ) break;
  3981. }
  3982. if( x>=loop(bp,0,y+1,dx,cs,0,RI) )
  3983. if( loop(bp,0 ,0,dy,cs,0,DO)>1 )
  3984. if( loop(bp,0 ,0,dy,cs,0,DO)
  3985. - loop(bp,dx/16+1,0,dy,cs,0,DO) < dx/16+1 ) Break; // ~1 Jul00,Nov00
  3986. if( num_cross(0,dx/2,y-1,y-1,bp,cs)==2 ) Break; // ~1
  3987. }
  3988. if(dx<8 && dy<12){ // screen font
  3989. i= loop(bp,0,0,dy,cs,0,DO);
  3990. if( loop(bp,dx/2,1,dy,cs,1,DO)>=dy-2
  3991. && loop(bp,0,dy/2,dx,cs,0,RI)>=2
  3992. && i>1 && i<dy/2 ) Break; // ~1
  3993. }
  3994. if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2
  3995. && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
  3996. && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
  3997. && get_bw(x0,x0+dx/4,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~] */
  3998. i=loop(bp,dx-1,dy/2,dx,cs,0,LE);
  3999. if( loop(bp, 0,dy/2,dx,cs,0,RI)>=dx/2
  4000. && (i<dx/2 || i==0) ) ad=98*ad/100; // ~]
  4001. if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2
  4002. && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
  4003. && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
  4004. && get_bw(x1-dx/4,x1,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~[ */
  4005. x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~()
  4006. i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
  4007. if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
  4008. && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
  4009. && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8
  4010. && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~(
  4011. if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8
  4012. && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8
  4013. && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8
  4014. && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~)
  4015. i= loop(bp, 0, 0,dy,cs,0,DO); // horizontal line?
  4016. if(dy>=12 && i>dy/8 && i<dy/2){
  4017. if( loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8
  4018. >loop(bp,dx-1, i,dx,cs,0,LE)
  4019. || loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8
  4020. >loop(bp,dx-1, i+1,dx,cs,0,LE) )
  4021. if( loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8
  4022. >loop(bp,dx-1, i,dx,cs,0,LE)
  4023. || loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8
  4024. >loop(bp,dx-1, i+1,dx,cs,0,LE) )
  4025. if( loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8
  4026. >loop(bp, 0, i,dx,cs,0,RI)
  4027. || loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8
  4028. >loop(bp, 0, i+1,dx,cs,0,RI) )
  4029. if( loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8
  4030. >loop(bp, 0, i,dx,cs,0,RI)
  4031. || loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8
  4032. >loop(bp, 0, i+1,dx,cs,0,RI) ) Break; // ~t
  4033. if( loop(bp, 0,i-1,dx,cs,0,RI)>1 && dx<6 ) Break; // ~t
  4034. if( loop(bp, 0,8*dy/16,dx,cs,0,RI)>dx/8
  4035. && loop(bp, 0, i,dx,cs,1,RI)>=dx-1
  4036. && loop(bp,dx-1,8*dy/16,dx,cs,0,LE)>dx/8
  4037. && loop(bp,dx-1, i-1,dx,cs,0,LE)>dx/8 ) Break; // ~t
  4038. }
  4039. // if( vertical_detected && dx>5 )
  4040. if( loop(bp,0, 1,dx,cs,0,RI)>=dx/2
  4041. && ( loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8
  4042. || loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8 ) )
  4043. if( ( loop(bp,dx-1, 0,dx,cs,0,LE)<=dx/8
  4044. || loop(bp,dx-1, 1,dx,cs,0,LE)<=dx/8 )
  4045. && loop(bp,dx-1,dy-2,dx,cs,0,LE)>=dx/2 ) ad=98*ad/100; // ~/
  4046. if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
  4047. if (!hchar || loop(bp,0,dy/4,dx,cs,0,RI)>dx/2){ // ~z
  4048. i=loop(bp,0,dy/16 ,dx,cs,0,RI);
  4049. i=loop(bp,i,dy/16 ,dx,cs,1,RI); j=i;
  4050. i=loop(bp,0,dy/16+1,dx,cs,0,RI);
  4051. i=loop(bp,i,dy/16+1,dx,cs,1,RI); if (i>j) j=i;
  4052. i=loop(bp,0,dy/16+2,dx,cs,0,RI);
  4053. i=loop(bp,i,dy/16+2,dx,cs,1,RI); if (i>j) j=i;
  4054. if (j*4>=dx*3) ad=98*ad/100; // ~z
  4055. if (j*8>=dx*7) ad=96*ad/100; // ~z
  4056. }
  4057. if( get_bw(x0,x0,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
  4058. if( get_bw(x1,x1,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
  4059. if (ad==100) ad--; /* I have to fix that:
  4060. .@@@@.<-
  4061. @@..@@
  4062. ....@@
  4063. ....@@<
  4064. ...@@.
  4065. ..@@@.
  4066. ..@@..
  4067. .@@...
  4068. @@....
  4069. @@@@@@<-
  4070. */
  4071. if(!hchar) ad=ad*99/100;
  4072. if( gchar) ad=ad*99/100;
  4073. Setac(box1,'l',ad);
  4074. // if( i<100 ) Break; ????
  4075. // if( loop(bp,0, 1,dx,cs,0,RI)<=dx/8
  4076. // && loop(bp,0,dy/2,dx,cs,0,RI)<=dx/8
  4077. // && loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 ) vertical_detected=1;
  4078. break;
  4079. }
  4080. return box1->c;
  4081. }
  4082. static wchar_t ocr0_oO(ocr0_shared_t *sdata){
  4083. struct box *box1=sdata->box1;
  4084. pix *bp=sdata->bp;
  4085. int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  4086. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  4087. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  4088. ad; /* tmp-vars */
  4089. wchar_t bc=UNKNOWN;
  4090. // --- test o,O ---------------------------------------------------
  4091. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  4092. DBG( wchar_t c_ask='o'; )
  4093. if (sdata->holes.num !=1 ) Break;
  4094. if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  4095. if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  4096. if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break;
  4097. if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  4098. if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/2 , y1-dy/3,box1->p,cs,1) != 0 ) Break;
  4099. if (sdata->holes.hole[0].y0 > dy/3
  4100. || sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
  4101. if( num_cross(x0+dx/2 ,x0+dx/2 ,y0, y1 ,box1->p,cs) != 2
  4102. && num_cross(x0+dx/2+1,x0+dx/2+1,y0, y1 ,box1->p,cs) != 2 ) Break;
  4103. if( num_cross(x0+dx/3,x1-dx/4,y0 , y0 ,box1->p,cs) != 1 ) // AND
  4104. if( num_cross(x0+dx/3,x1-dx/4,y0+1 , y0+1,box1->p,cs) != 1 ) Break;
  4105. if( num_cross(x0+dx/4,x1-dx/3,y1 , y1 ,box1->p,cs) != 1 ) // against "rauschen"
  4106. if( num_cross(x0+dx/4,x1-dx/3,y1-1 , y1-1,box1->p,cs) != 1 ) Break;
  4107. if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
  4108. if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
  4109. if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
  4110. if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
  4111. if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
  4112. loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
  4113. x=loop(bp,dx-1,dy-1-dy/3,x1-x0,cs,0,LE); // should be minimum
  4114. for( y=dy-1-dy/3;y<dy;y++ ){
  4115. i=loop(bp,dx-1,y,x1-x0,cs,0,LE);
  4116. if( i<x ) break; x=i;
  4117. }
  4118. if( y<dy ) Break;
  4119. // ~D
  4120. if( loop(bp,0, dy/16,dx,cs,0,RI)
  4121. + loop(bp,0,dy-1-dy/16,dx,cs,0,RI)
  4122. <= 2*loop(bp,0, dy/2 ,dx,cs,0,RI)+dx/8 ) Break; // not konvex
  4123. if( loop(bp,0 , 1+dy/16,dx,cs,0,RI) + dx/4
  4124. <= loop(bp,dx-1, 1+dy/16,dx,cs,0,LE) ) Break; // Dec00
  4125. if( loop(bp,dx-1, dy/16,dx,cs,0,LE)>dx/8 )
  4126. if( loop(bp,0 , dy/16,dx,cs,0,RI)<dx/16 ) Break;
  4127. if( loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)>dx/8 )
  4128. if( loop(bp,0 ,dy-1-dy/16,dx,cs,0,RI)<dx/16 ) Break;
  4129. if( get_bw(x1-dx/32,x1,y0,y0+dy/32,box1->p,cs,1) == 0
  4130. && get_bw(x1-dx/32,x1,y1-dy/32,y1,box1->p,cs,1) == 0
  4131. // && ( get_bw(x0,x0+dx/32,y0,y0+dy/32,box1->p,cs,1) == 1
  4132. && ( get_bw(0,dx/32,0,dy/32,bp,cs,1) == 1
  4133. || get_bw(x0,x0+dx/32,y1-dy/32,y1,box1->p,cs,1) == 1 ) ) Break; // ~D
  4134. // search lowest inner white point
  4135. for(y=dy,j=x=0;x<dx;x++) {
  4136. i =loop(bp,x,dy-1 ,y1-y0,cs,0,UP);
  4137. i+=loop(bp,x,dy-1-i,y1-y0,cs,1,UP);
  4138. if (i<=y) { y=i; j=x; }
  4139. } i=y;
  4140. // italic a
  4141. for(y=dy-1-i;y<dy-1;y++)
  4142. if( num_cross(j,dx-1,y,y,bp,cs) > 1 ) ad=99*ad/100; // ~a \it a
  4143. for(y=0;y<dy-1-i;y++)
  4144. if( num_cross(0,dx-1,y,y,bp,cs) > 2 ) ad=98*ad/100; // ~a \it a
  4145. if (loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)<dx/8) ad=98*ad/100; // \it a
  4146. if (loop(bp,dx-1, 0,x1-x0,cs,0,LE)<dx/8) ad=98*ad/100; // \it a
  4147. if (loop(bp,dx-1,dy-1-dy/8,x1-x0,cs,0,LE)+1+dx/16
  4148. <loop(bp, 0,dy-1-dy/8,x1-x0,cs,0,RI))
  4149. { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // \it a
  4150. if (loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)+1+(dy+3)/8
  4151. <loop(bp, 0,dy-1,y1-y0,cs,0,UP))
  4152. { ad=98*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // \it a
  4153. if (abs(loop(bp,dx/2, 0,dy,cs,0,DO)
  4154. -loop(bp,dx/2,dy-1,dy,cs,0,UP))>dy/8
  4155. || num_cross(0,dx-1, 0, 0,bp,cs) > 1
  4156. || num_cross(0,dx-1,dy-1,dy-1,bp,cs) > 1
  4157. ) ad=98*ad/100; // ~bq
  4158. // corrections for wrong recognized m1,m2 (all chars of same high)
  4159. if (hchar && 2*y0<box1->m1+box1->m2) i=1; else i=0;
  4160. if (gchar) ad=99*ad/100;
  4161. bc='o';
  4162. if (i){ bc='O'; }
  4163. if ( bc=='O' && ad>99) ad=99; /* we can never 100% sure, 0O */
  4164. if (bc=='o' && (!hchar) && y0<box1->m2) {
  4165. Setac(box1,'O',98*ad/100);
  4166. Setac(box1,'0',98*ad/100);
  4167. }
  4168. Setac(box1,bc,ad);
  4169. if (bc=='O') Setac(box1,'0',ad);
  4170. if (bc=='o') Setac(box1,'0',98*ad/100);
  4171. break;
  4172. }
  4173. return box1->c;
  4174. }
  4175. static wchar_t ocr0_pP(ocr0_shared_t *sdata){
  4176. struct box *box1=sdata->box1;
  4177. pix *bp=sdata->bp;
  4178. int i,j,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
  4179. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  4180. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  4181. ad; /* tmp-vars */
  4182. wchar_t bc=UNKNOWN;
  4183. // --- test pP ---------------------------------------------------
  4184. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  4185. DBG( wchar_t c_ask='p'; )
  4186. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  4187. if( get_bw(0 , dx/2,3*dy/4,3*dy/4,bp,cs,1) != 1 ) Break;
  4188. if( get_bw(0 , dx/2, dy/2, dy/2,bp,cs,1) < 1 ) Break;
  4189. if( get_bw(dx/4, dx-1, dy/4, dy/4,bp,cs,1) != 1 ) Break;
  4190. i= loop(bp,dx-1,3*dy/4,dx,cs,0,LE); if (i<dx/4) Break;
  4191. if( num_cross(x1-3*i/4,x1-3*i/4, y0, y1-3*dy/16,box1->p,cs) != 2 )
  4192. if( num_cross(x0+dx/2 ,x0+dx/2 , y0, y1-3*dy/16,box1->p,cs) != 2 )
  4193. if( num_cross(x0+dx/2+1,x0+dx/2+1, y0, y1-3*dy/16,box1->p,cs) != 2 ) Break;
  4194. if( num_cross(0,dx-1,7*dy/8 ,7*dy/8 ,bp,cs) != 1 )
  4195. if( num_cross(0,dx-1,7*dy/8-1,7*dy/8-1,bp,cs) != 1 ) Break;
  4196. if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
  4197. if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 3 ) // \it p with nice kurve
  4198. if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
  4199. if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break;
  4200. i= loop(bp,0,dy/2,dx,cs,0,RI); if(i<1) i++;
  4201. if( num_cross(i-1,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
  4202. if( num_cross(i-1,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break;
  4203. i1= loop(bp, 0,3*dy/8,dx,cs,0,RI); if (i1>=dx/2) ad=90*ad/100;
  4204. i2=i1+loop(bp,i1,3*dy/8,dx,cs,1,RI); // upper x-position of v line
  4205. i3= loop(bp, 0,7*dy/8,dx,cs,0,RI);
  4206. i4=i3+loop(bp,i3,7*dy/8,dx,cs,1,RI); // lower x-position of v line
  4207. // out_x(box1);printf(" p:");
  4208. for ( y=dy/8; y<7*dy/8; y++ ){
  4209. x=i2+ (8*y-3*dy)*(i4-i2)/(4*dy); // right limit of line
  4210. i= loop(bp,0,y,dx,cs,0,RI); if(i>x+dx/16) break;
  4211. } if ( y<7*dy/8 ) Break;
  4212. for ( x=0,j=y=dy/3; y<dy-dy/8; y++ ){ // suche unterkante (also 4x6)
  4213. i=loop(bp,dx-1,y,dx,cs,0,LE);
  4214. if ( i>x ) { x=i; j=y; } if(x>dx/2) break;
  4215. } if ( x<dx/2 || x>=dx) Break;
  4216. if( get_bw(3*dx/4,dx-1, y , dy-1,bp,cs,1) == 1 ) Break;
  4217. i=num_hole (x0,x1,y0,y1-dy/5,box1->p,cs,NULL);
  4218. // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL);
  4219. j=sdata->holes.num;
  4220. if (j!=1 && dx< 8) ad=96*ad/100;
  4221. if (j!=1 && dx>=8) ad=98*ad/100;
  4222. if (i==0 && j==0) ad=90*ad/100; /* some times there is a small gap */
  4223. if (i>1 || j>1 || j>i) Break;
  4224. // check for serif F
  4225. i= loop(bp,bp->x-1, bp->y/4, dx ,cs,0,LE);
  4226. i=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE);
  4227. j= loop(bp,bp->x-1-i,bp->y/4,3*dy/4,cs,0,DO);
  4228. if (j>dy/2) ad=80*ad/100; // its an serif-F
  4229. if( ((!hchar) && (!gchar)) || (hchar && gchar)) ad=95*ad/100;
  4230. bc='p';
  4231. if( hchar && ((!gchar) || dy<14)) bc='P';
  4232. if ( hchar && gchar) ad=98*ad/100; // \ss sz
  4233. if ((!hchar) && !gchar) ad=98*ad/100;
  4234. Setac(box1,bc,ad);
  4235. break;
  4236. }
  4237. return box1->c;
  4238. }
  4239. static wchar_t ocr0_qQ(ocr0_shared_t *sdata){
  4240. struct box *box1=sdata->box1;
  4241. pix *bp=sdata->bp;
  4242. int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
  4243. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  4244. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  4245. ad; /* tmp-vars */
  4246. // --- test Q ---------------------------------------------------
  4247. for(ad=d=100;dx>2 && dy>4;){ // min 3x4
  4248. DBG( wchar_t c_ask='Q'; )
  4249. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  4250. if( get_bw(x0 ,x0+dx/3,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
  4251. if( get_bw(x1-dx/3,x1 ,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
  4252. if( get_bw(x0+dx/2,x0+dx/2,y1-dy/3,y1, box1->p,cs,1) != 1 ) Break;
  4253. if( get_bw(x0+dx/2,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
  4254. if( get_bw(x0+dx/2,x0+dx/2,y0+dy/3,y1-dy/2,box1->p,cs,1) == 1 ) Break;
  4255. if( get_bw(x1 ,x1 ,y0 ,y0 ,box1->p,cs,1) == 1 ) Break; //alpha
  4256. if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) < 2 ) Break;
  4257. if( num_cross(x0+dx/5,x1-dx/5,y0 , y0 ,box1->p,cs) != 1 ) // AND
  4258. if( num_cross(x0+dx/5,x1-dx/5,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
  4259. if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
  4260. if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
  4261. if( get_bw(x1 ,x1 ,y1-dy/8 , y1 ,box1->p,cs,1) == 0 )
  4262. if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
  4263. if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
  4264. // i=num_hole(x0,x1,y0,y1,box1->p,cs,NULL);
  4265. i=sdata->holes.num;
  4266. if(!i) Break;
  4267. if( i!=1 && (i!=2 || num_hole(x0,x1,y0+dy/2,y1,box1->p,cs,NULL)!=1) ) Break;
  4268. x=x1;y=y1;
  4269. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x<x1-dx/2 ) Break;
  4270. turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
  4271. if( x<x1-dx/2 ) { if (gchar) ad=98*ad/100; else ad=90*ad/100; }
  4272. if( loop(bp,0 ,0 ,dx,cs,0,RI)
  4273. < loop(bp,0 ,2 ,dx,cs,0,RI) ) Break;
  4274. if( loop(bp,0 ,dy/8+2,dx,cs,0,RI)
  4275. +loop(bp,dx-1,dy/8+2,dx,cs,0,LE) > 5*dx/8 ) Break; // ~4 Okt00
  4276. x= loop(bp,dx-1,3*dy/8,dy,cs,0,LE); if( x>dx/4 ) Break;
  4277. if( loop(bp,dx-1-x,0 ,dy,cs,0,DO)
  4278. <= loop(bp,dx-2-x,0 ,dy,cs,0,DO) ) Break; // 4
  4279. if( loop(bp,dx-1,dy-2,dx,cs,0,LE)
  4280. <= loop(bp,dx-1,dy/2,dx,cs,0,LE) )
  4281. if( loop(bp, 1,dy-1,dy,cs,0,UP)
  4282. <= loop(bp,dx/2,dy-1,dy,cs,0,UP) )
  4283. if( loop(bp, 0,dy-2,dx,cs,0,RI)>dx/2 )
  4284. if( loop(bp, 0, 0,dx,cs,0,RI)>dx/2 ) Break; // 4
  4285. if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)
  4286. + loop(bp, 0,3*dy/4,dx,cs,0,RI)
  4287. < loop(bp,dx-1,2*dy/4,dx,cs,0,LE)
  4288. + loop(bp, 0,2*dy/4,dx,cs,0,RI) ) ad=94*ad/100; // 4
  4289. if( loop(bp,0 ,3*dy/4,dx,cs,1,RI) >= dx ) ad=94*ad/100; // 4
  4290. if( loop(bp,dx-1,dy/3,dx,cs,0,LE)> dx/4 ) Break;
  4291. j=loop(bp,dx/2,dy-1,dy,cs,0,UP);
  4292. if (j>1 && j>dy/8) {
  4293. if( get_bw(0,dx/2,dy-1-j/2,dy-1-j/2,bp,cs,1) == 1 ) { // ~RA
  4294. if (j<5) ad=95*ad/100;
  4295. else Break;
  4296. }
  4297. }
  4298. // italic a
  4299. for(i=0,y=0;y<dy/2;y++)
  4300. if( num_cross(0,dx-1,y,y,bp,cs) > 2 ) i++; if(i>dy/8) Break; // ~a \it a
  4301. if (i>0) ad=99*ad/100;
  4302. // ~o look at the lower right side for falling line
  4303. for(j=x=0,y=dy/2;y<dy;y++){
  4304. i=loop(bp,dx-1,y,dx,cs,0,LE);if(i>x){ x=i; }
  4305. if (x-i>j) j=x-i;
  4306. if( j>dx/16 ) Break; // falling line detected
  4307. }
  4308. if (j==0) Break; // no falling line => no Q
  4309. if (j<=dx/16) ad=98*ad/100;
  4310. if(y1<=box1->m3) ad=98*ad/100; // ~q no underlength! rare
  4311. if(!hchar) ad=96*ad/100;
  4312. Setac(box1,'Q',ad);
  4313. break;
  4314. }
  4315. // --- test q ---------------------------------------------------
  4316. for(ad=d=100;dx>2 && dy>3;){ // min 3x4
  4317. DBG( wchar_t c_ask='q'; )
  4318. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  4319. for ( y=y0; 2*y<=y0+y1; y++ ){ // detect ring
  4320. if( num_cross(x0,x1, y, y,box1->p,cs) == 2 ) Break;
  4321. } if (2*y>y0+y1) Break; /* < */
  4322. for ( y=(y0+y1)/2; y<=y1; y++ ){ // detect vert line
  4323. if( num_cross(x0, x1, y, y,box1->p,cs) == 1
  4324. && num_cross(x0,x0+dx/2, y, y,box1->p,cs) == 0 ) Break;
  4325. } if (y>y1) Break; /* O (y==y1 for 4x6font-q) */
  4326. for ( x=0,j=y=y0+dy/3; y<=y1-dy/8; y++ ){ // detect baseline
  4327. i=loop(box1->p,x0,y,dx,cs,0,RI);
  4328. if ( i>x ) { x=i; j=y; }
  4329. if ( x>dx/2 ) break;
  4330. } if ( x<dx/2 || x>=dx) Break;
  4331. if (y1-j+1<dy/4) ad=96*ad/100; // ~\it{a}
  4332. if( num_cross(x0+x/2,x0+x/2, j, y1,box1->p,cs) != 0 ) ad=96*ad/100; // ~g
  4333. if( loop(box1->p,x0+dx/16,j,dy,cs,0,UP)<1+dy/16 ){
  4334. ad=97*ad/100;
  4335. if (hchar || !gchar) Break; // 4
  4336. }
  4337. if( loop(box1->p,x0+dx/16,j-dy/32-1,dy,cs,1,RI)>=dx-dx/8
  4338. || loop(box1->p,x0+dx/16,j-dy/16-1,dy,cs,1,RI)>=dx-dx/8 ){
  4339. ad=96*ad/100; // 4
  4340. }
  4341. if( get_bw(x1-dx/3, x1, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
  4342. if( get_bw(x0, x0+dx/3, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
  4343. if( get_bw(x0, x0+dx/4, y1-dy/8, y1-dy/9,box1->p,cs,1) == 1 ) Break;
  4344. if( get_bw(x0, x0+dx/4, y1-dy/5, y1-dy/9,box1->p,cs,1) == 1 ) ad=99*ad/100;
  4345. if( num_cross(x0+dx/2,x0+dx/2, y0, j ,box1->p,cs) != 2 ) Break;
  4346. // if( num_hole (x0 ,x1 , y0, y1 ,box1->p,cs,NULL) != 1 )
  4347. if (sdata->holes.num != 1)
  4348. { if (dx<16) ad=98*ad/100; else Break; }
  4349. if( num_hole (x0 ,x1 , y0, j ,box1->p,cs,NULL) != 1 )
  4350. { if (dx<16) ad=98*ad/100; else Break; }
  4351. // ~\it g
  4352. if( loop(bp,0,dy-1-dy/4,dx,cs,0,RI)>5*dx/8
  4353. && get_bw(dx/4,dx/4,dy-1-dy/4,dy-1,bp,cs,1)==1 ) Break; // ~\it g
  4354. // what about unsure m1-m4?
  4355. if(!gchar){ ad=ad*99/100; } // ~4
  4356. if( hchar){ ad=ad*99/100; } // ~49
  4357. Setac(box1,'q',ad);
  4358. break;
  4359. }
  4360. return box1->c;
  4361. }
  4362. static wchar_t ocr0_iIjJ(ocr0_shared_t *sdata){
  4363. struct box *box1=sdata->box1;
  4364. pix *bp=sdata->bp;
  4365. int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
  4366. ax,ay,bx,by,cx,cy,ex,ey,
  4367. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  4368. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  4369. ad,ya,yb,yc,yd,ye,yf,xa,xb, /* tmp-vars */
  4370. (*aa)[4]=sdata->aa; /* the for line ends, (x,y,dist^2,vector_idx) */
  4371. // --- test i ---------------------------------------------------
  4372. // if(box1->dots==1) // what about \it neighbouring ij
  4373. for(ad=d=100;dy>3 && dx>0;){ // min 3x4 without dot
  4374. DBG( wchar_t c_ask='i'; )
  4375. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  4376. // ToDo: ':' check that high of dot is smaller than the vert. line!
  4377. /*
  4378. * o <== ya
  4379. * o
  4380. *
  4381. * ooo <== yb
  4382. * o
  4383. * o
  4384. * o
  4385. * ooo
  4386. */
  4387. ya=y0;
  4388. if (box1->dots!=1) ad=98*ad/100;
  4389. while(dy>3*dx && box1->m2){ // test for vertical i without detected dot
  4390. i= loop(bp,dx/2,dy-1 ,dy,cs,0,UP);
  4391. if (dy-1-i<box1->m3-2) break;
  4392. i+=loop(bp,dx/2,dy-1-i,dy,cs,1,UP);
  4393. // distance upper end to m2 > (m2-m1)/3
  4394. if (3*abs(dy-1-i-box1->m2)>box1->m2-box1->m1) break;
  4395. if( get_bw(x0,x1,y0,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 )
  4396. if( get_bw(x0,x1,y1-i ,y1-i ,box1->p,cs,1) == 0
  4397. || get_bw(x0,x1,y1-i-1,y1-i-1,box1->p,cs,1) == 0
  4398. || get_bw(x0,x1,y1-i-2,y1-i-2,box1->p,cs,1) == 0 )
  4399. {
  4400. Setac(box1,'i',ad);
  4401. return 'i'; /* beleave me, thats an "i"! */
  4402. } break;
  4403. }
  4404. // if( box1->dots!=1 ) Break;
  4405. if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1;
  4406. // out_x(box1);
  4407. for (y=ya;2*y<ya+y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
  4408. if (2*y>=ya+y1) Break; // hmm, gap only, no dot?
  4409. ya=y;
  4410. if (box1->m2 && ya>box1->m2+2) Break;
  4411. for ( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
  4412. if (2*y>=ya+y1) Break; // hmm no gap
  4413. for ( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
  4414. yb=y;
  4415. if (5*yb>=3*ya+2*y1) ad=99*ad/100; // large gap
  4416. if (2*yb>= ya+ y1) ad=97*ad/100; // very large gap, ~:
  4417. if (5*yb>=2*ya+3*y1) Break; // huge gap, ~:
  4418. if (loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2) // unusual (right part of ouml)
  4419. ad=95*ad/100;
  4420. // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs));
  4421. // printf(" dots=%d\n",box1->dots); out_x(box1);
  4422. // \sl ~f. !
  4423. for (y=y1;y>ya;y--) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
  4424. if (y>(ya+3*y1)/4) Break;
  4425. if (y>(ya+2*y1)/3) ad=96*ad/100;
  4426. y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */
  4427. if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) Break;
  4428. for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y;
  4429. for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y;
  4430. if( yd<3*(y1-yb+1)/4+yb-y0 ) Break;
  4431. y=(y1-yb+1)/2+yb-y0;
  4432. for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y;
  4433. for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y;
  4434. if( yf>(y1-yb+1)/4+yb-y0 ) Break;
  4435. if(yd>yc+2){
  4436. xa=loop(bp, 0,yc-1,dx,cs,0,RI);
  4437. xb=loop(bp,dx-1,yc-1,dx,cs,0,LE);
  4438. if(
  4439. xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */
  4440. > xa-loop(bp, 0,yc,dx,cs,0,RI) ){
  4441. y= loop(bp,dx-xb,yc-1,dy,cs,0,DO);
  4442. if(y>0){
  4443. i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO);
  4444. if( i>0 ) y+=i-1;
  4445. }
  4446. if( yc-1+y < yd-1 ) Break;
  4447. } else {
  4448. y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO);
  4449. if( yc-1+y < yd-2 ) Break;
  4450. }
  4451. }
  4452. if(yf<ye-2){
  4453. x=loop(bp,0 ,ye+1,dx,cs,0,RI);
  4454. y=loop(bp,x-1,ye+1,dy,cs,0,UP);
  4455. i=loop(bp,x ,ye+2-y,dy,cs,0,UP);
  4456. if( i>0 ) y+=i-1;
  4457. if( ye+1-y > yf+1 ) Break;
  4458. }
  4459. if( 2*y0 <= box1->m1+box1->m2
  4460. && loop(bp,0, 0,dx,cs,0,RI)+1
  4461. < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100;
  4462. if( gchar ) // i is more often than j, be sure that realy correct Mai00
  4463. if( loop(bp, 0,2*dy/4,dx,cs,0,RI)
  4464. -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)>dx/8 ) Break;
  4465. // could be a broken + or similar thing?
  4466. if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=90*ad/100;
  4467. if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/2
  4468. && loop(bp,dx-1, dy-1,dx,cs,0,LE)<dx/4 ) Break; // ~d=cl
  4469. // test for &eacute;
  4470. if( dx>5 && num_cross(x0+dx/2,x0+dx/2, ya, y1 ,box1->p,cs) >= 3 )
  4471. ad=95*ad/100;
  4472. Setac(box1,'i',ad);
  4473. break;
  4474. }
  4475. // --- test j ---------------------------------------------------
  4476. // if(box1->dots==1) // what about \it neighbouring ij
  4477. for(ad=d=100;dy>4 && dx>0;){ // min 3x4
  4478. DBG( wchar_t c_ask='j'; )
  4479. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  4480. ya=y0;
  4481. if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1;
  4482. for(y=ya;2*y<ya+y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
  4483. if(2*y>=ya+y1) Break; // hmm only gap
  4484. ya=y;
  4485. if( box1->m2 && ya>box1->m2+2 ) Break;
  4486. for( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
  4487. if(2*y>=ya+y1) Break; // hmm no gap
  4488. for( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
  4489. if(2*y>=ya+y1) Break; // hmm very large gap
  4490. yb=y;
  4491. if( loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2 ) Break; // unusual (right part of ouml)
  4492. // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs));
  4493. // printf(" dots=%d\n",box1->dots); out_x(box1);
  4494. // \sl ~f. !
  4495. for(y=(ya+y1)/2;y<=y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
  4496. if(y<=y1) Break;
  4497. y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */
  4498. if( num_cross(0,dx-1,y,y,bp,cs) >2 ) Break;
  4499. for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y;
  4500. for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y;
  4501. if( yd<3*(y1-yb+1)/4+yb-y0 ) Break;
  4502. y=(y1-yb+1)/2+yb-y0;
  4503. for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y;
  4504. for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y;
  4505. if( yf>(y1-yb+1)/4+yb-y0 ) Break;
  4506. if(yd>yc+2){
  4507. xa=loop(bp, 0,yc-1,dx,cs,0,RI);
  4508. xb=loop(bp,dx-1,yc-1,dx,cs,0,LE);
  4509. if(
  4510. xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */
  4511. > xa-loop(bp, 0,yc,dx,cs,0,RI) ){
  4512. y= loop(bp,dx-xb,yc-1,dy,cs,0,DO);
  4513. if(y>0){
  4514. i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO);
  4515. if( i>0 ) y+=i-1;
  4516. }
  4517. if( yc-1+y < yd-1 ) Break;
  4518. } else {
  4519. y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO);
  4520. if( yc-1+y < yd-2 ) Break;
  4521. }
  4522. }
  4523. if(yf<ye-2){
  4524. x=loop(bp,0 ,ye+1,dx,cs,0,RI);
  4525. y=loop(bp,x-1,ye+1,dy,cs,0,UP);
  4526. i=loop(bp,x ,ye+2-y,dy,cs,0,UP);
  4527. if( i>0 ) y+=i-1;
  4528. if( ye+1-y > yf+1 ) Break;
  4529. }
  4530. if( 2*y0 <= box1->m1+box1->m2
  4531. && loop(bp,0, 0,dx,cs,0,RI)+1
  4532. < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100;
  4533. if (loop(bp,0,dy-1,dx,cs,0,RI)
  4534. -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c
  4535. if( gchar ) // i is more often than j, be sure that realy correct Mai00
  4536. if( loop(bp, 0,2*dy/4,dx,cs,0,RI)
  4537. -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)<=dx/8 ) Break;
  4538. // could be a broken + or similar thing?
  4539. if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=80*ad/100;
  4540. if (!gchar) ad=96*ad/100;
  4541. if( box1->dots!=1 ) ad=98*ad/100;
  4542. Setac(box1,'j',ad);
  4543. break;
  4544. }
  4545. // --- test I ---------------------------------------------------
  4546. for(ad=d=100;dy>4 && dy>dx && 5*dy>4*(box1->m3-box1->m2);){ // min 3x4
  4547. DBG( wchar_t c_ask='I'; )
  4548. if( box1->dots==1 ) Break;
  4549. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  4550. x =loop(bp,0, dy/2,dx,cs,0,RI); // konvex? divided Q
  4551. if(loop(bp,0,7*dy/8,dx,cs,0,RI) > x+dx/8) Break;
  4552. for( y=dy/16;y<dy-1-dy/16;y++ )
  4553. if( num_cross(0, dx-1, y , y ,bp,cs) != 1 )
  4554. if( num_cross(0, dx-1, y+dy/16 , y+dy/16 ,bp,cs) != 1 ) break;
  4555. if( y<dy-1-dy/16 ) Break;
  4556. x =loop(bp,0, dy/2,dx,cs,0,RI);
  4557. i5=loop(bp,x, dy/2,dx,cs,1,RI); // center width
  4558. for(y=dy/4;y<3*dy/4;y++ ){ // same width ?
  4559. x =loop(bp,0, y,dx,cs,0,RI);
  4560. x =loop(bp,x, y,dx,cs,1,RI); // width
  4561. if( abs(x-i5)>1+dx/8 ) break;
  4562. } if( y<3*dy/4 ) Break;
  4563. // out_x(box1);
  4564. // upper max width
  4565. for(i2=i1=0,y=0;y<dy/4;y++ ){
  4566. x =loop(bp,0, y,dx,cs,0,RI);
  4567. x =loop(bp,x, y,dx,cs,1,RI); if(x>i1){ i1=x;i2=y; }
  4568. }
  4569. for(i4=i3=0,y=3*dy/4;y<dy;y++ ){
  4570. x =loop(bp,0, y,dx,cs,0,RI);
  4571. x =loop(bp,x, y,dx,cs,1,RI); if(x>i3){ i3=x;i4=y; }
  4572. }
  4573. if( abs(i3-i1)>1+dx/8 ) Break; // if i3>>i5 more sure!
  4574. if( i1>i5 ){ // look for edges else *80%
  4575. }
  4576. if(i1+1<i5 && !hchar) Break; // Jun00
  4577. // calculate upper and lower mass center
  4578. x =loop(bp,0, dy/8,dx,cs,0,RI); i1=x;
  4579. x+=loop(bp,x, dy/8,dx,cs,1,RI); i1=(i1+x-1)/2;
  4580. x =loop(bp,0,dy-1-dy/8,dx,cs,0,RI); i2=x;
  4581. x+=loop(bp,x,dy-1-dy/8,dx,cs,1,RI); i2=(i2+x-1)/2;
  4582. x =loop(bp,0,dy-2-dy/8,dx,cs,0,RI); i=x;
  4583. x+=loop(bp,x,dy-2-dy/8,dx,cs,1,RI); i=(i+x-1)/2; if( i>i2 ) i2=i;
  4584. // printf(" get_line(%d,%d) %d\n",i1,i2,
  4585. // get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100));
  4586. if( get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)<95 ) Break;
  4587. x =(i1-i2+4)/8; i1+=x; i2-=x;
  4588. // upper and lower width (what about serifs?)
  4589. y=dy/8;
  4590. x =loop(bp,i1, y+0,dx,cs,1,LE); i=x;
  4591. x =loop(bp,i1, y+1,dx,cs,1,LE); if(x>i)i=x;
  4592. x =loop(bp,i1, y+0,dx,cs,1,RI); j=x;
  4593. x =loop(bp,i1, y+1,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
  4594. x =loop(bp,i2,dy-y-1,dx,cs,1,LE); j=x;
  4595. x =loop(bp,i2,dy-y-2,dx,cs,1,LE); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
  4596. x =loop(bp,i2,dy-y-1,dx,cs,1,RI); j=x;
  4597. x =loop(bp,i2,dy-y-2,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
  4598. if(dy>15) // v024a4
  4599. if( loop(bp,dx-1,dy/16 ,dx,cs,0,LE)
  4600. > loop(bp,dx-1,dy/4 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad ) (thinn)
  4601. for(i=0,y=dy/16;y<15*dy/16 && i<2;y++)
  4602. if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
  4603. if( i>1 ) Break;
  4604. if(!hchar){ // right part (bow) of h is never a l
  4605. if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1
  4606. && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break;
  4607. if( loop(bp, 0,dy/4,dx,cs,0,RI)> dx/4
  4608. && loop(bp,dx-1,dy/4,dx,cs,0,LE)<=dx/4
  4609. && loop(bp, 1, 0,dy,cs,0,DO)<=dy/4 ) Break; // ~z
  4610. }
  4611. if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2
  4612. && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
  4613. && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
  4614. && get_bw(x0,x0,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~] */
  4615. if ( loop(bp,dx-1, dy/4,dx,cs,0,LE) > dx/2
  4616. && loop(bp,dx-1,3*dy/4,dx,cs,0,LE) > dx/2
  4617. && loop(bp, 0, dy/2,dx,cs,0,RI) < dx/4 ) Break; /* ~[ */
  4618. x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~()
  4619. i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
  4620. if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
  4621. && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
  4622. && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8
  4623. && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~(
  4624. if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8
  4625. && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8
  4626. && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8
  4627. && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~)
  4628. if( loop(bp, 0, dy/8,dx,cs,0,RI)
  4629. -(dx-loop(bp,dx-1,7*dy/8,dx,cs,0,LE)) > dx/4 ) Break; // ~/
  4630. if( loop(bp, 0, 0,dx,cs,0,RI) > dx/2 // ToDo: check for serifs
  4631. && loop(bp, 0, dy/8,dx,cs,0,RI) > dx/2
  4632. && loop(bp,dx-1,dy-1 ,dx,cs,0,LE) > dx/2
  4633. && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) > dx/2 ) ad=99*ad/100; // ~/
  4634. if (box1->m2 && 3*y0>box1->m1+2*box1->m2)
  4635. if( get_bw(x0+dx/8,x1-dx/8,box1->m1,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 )
  4636. Break; // ~i
  4637. if(i1+1<i5 && !hchar){ ad=65*ad/100; MSG({}) } // ~ slanted I
  4638. // be sure only for serif
  4639. i3=loop(bp,dx-1, dy/4,dx,cs,0,LE);
  4640. i4=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI);
  4641. if (i3<2 || i4<2
  4642. || get_bw(x1-i3/4,x1-i3/4,y0,y0+dy/4,box1->p,cs,1) != 1
  4643. || get_bw(x0+i4/4,x0+i4/4,y1-dy/4,y1,box1->p,cs,1) != 1 )
  4644. { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // ToDo: improve it
  4645. if(!hchar){ ad=96*ad/100; MSG({}) } // ~bad_small_r
  4646. if (box1->m4 && y1<box1->m4) { // probably lower dot?
  4647. if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1)
  4648. || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1)) {
  4649. ad=96*ad/100;
  4650. }
  4651. } // ~!
  4652. // a---b
  4653. // I
  4654. // I
  4655. // c---e
  4656. // check against Z
  4657. for(bx=0,ax=dx,ay=by=y=0;y<dy/4;y++){
  4658. i =loop(bp,dx-1 ,y,dx,cs,0,LE); if (dx-i-1>bx) { bx=dx-1-i; by=y; }
  4659. i+=loop(bp,dx-1-i,y,dx,cs,1,LE); if (dx-i-1<ax) { ax=dx-i; ay=y; }
  4660. }
  4661. for(cx=dx,ex=0,ey=cy=y=dy-1;y>dy-1-dy/4;y--){
  4662. i =loop(bp,0,y,dx,cs,0,RI); if (i<cx) { cx=i; cy=y; }
  4663. i+=loop(bp,i,y,dx,cs,1,RI); if (i>ex) { ex=i; ey=y; }
  4664. }
  4665. x=(3*ax+cx)/4; y=(3*ay+cy)/4; i= loop(bp,x,y,dx,cs,0,RI);
  4666. x=(3*bx+ex)/4; y=(3*by+ey)/4; j= loop(bp,x,y,dx,cs,0,LE);
  4667. if (j>0 && (2*i>3*j || 3*i<2*j )) ad=99*ad/100;
  4668. if (j>0 && ( i>2*j || 2*i< j )) ad=97*ad/100;
  4669. i=loop(bp,0,0,dy,cs,0,DO);
  4670. if (i>dy/8 && i<dy/2) ad=99*ad/100; // ~1
  4671. if (loop(bp,dx-1,0,dx,cs,0,LE)
  4672. -loop(bp, 0,0,dx,cs,0,RI)>dx/4) ad=96*ad/100; // ~l 5x7
  4673. if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
  4674. if (gchar) ad=98*ad/100; // J
  4675. if (box1->m3 && 2*y1<=box1->m2+box1->m3) ad=96*ad/100; // '
  4676. Setac(box1,'I',ad);
  4677. break;
  4678. }
  4679. // --- test J --------------------------------------------------- 22Nov06
  4680. for(ad=d=100;dy>4 && dy>=dx && dx>2;){ // min 3x4 ~Y)]d',
  4681. // rewritten for vectors 0.42
  4682. int ld, i1, i2, i3, i4, i5, i6, i7; // line derivation + corners
  4683. DBG( wchar_t c_ask='J'; )
  4684. if (sdata->holes.num > 0) Break; /* no hole */
  4685. /* half distance to the center */
  4686. d=2*sq(128/4);
  4687. /* now we check for the upper right end of the J */
  4688. if (aa[3][2]>d) Break; /* [2] = distance */
  4689. /* searching for 4 notches between neighbouring ends */
  4690. /*
  4691. type A B
  4692. 6OOOO 6O5
  4693. 7O5 7O
  4694. O O
  4695. O O
  4696. 2O 1O4 1O4
  4697. OO 2OO
  4698. 3 3
  4699. */
  4700. /* Warning: aa0 can be left upper or left lower point for type B */
  4701. /* get a point on the inner low left side of the J */
  4702. i =nearest_frame_vector(box1,aa[3][3],aa[1][3],(x0+x1)/2,y0);
  4703. /* failed for slanted J before Jun09 */
  4704. i1=nearest_frame_vector(box1,i ,aa[1][3], x1+dx/8,y1-dy/8);
  4705. /* get the most left point on the lower part of the J */
  4706. i2=nearest_frame_vector(box1,i1,aa[3][3], x0-2*dx, y1-dy/8);
  4707. /* get a point on the middle of the bottom of the J */
  4708. i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], (x0+x1)/2, y1);
  4709. /* get a point on the outer low right side of the J */
  4710. i4=nearest_frame_vector(box1,aa[1][3],aa[3][3], x1, (y0+2*y1)/3);
  4711. /* get a point on the outer right side below top serif */
  4712. i5=nearest_frame_vector(box1,aa[2][3],aa[3][3], (x0+2*x1)/3,y0);
  4713. /* get a point on the left side of upper serif */
  4714. i6=nearest_frame_vector(box1,aa[3][3],i1, x0, y0);
  4715. /* get a point on the most right left side of upper serif */
  4716. i7=nearest_frame_vector(box1,i6,i1, x1, y0);
  4717. MSG(fprintf(stderr," i1-i7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
  4718. /* check the highest point on lower left area */
  4719. i =nearest_frame_vector(box1,i1,i3,x0,y0);
  4720. if (box1->frame_vector[i ][1]-y0<dy/4) Break; // U
  4721. if (box1->frame_vector[i ][1]-y0<=dy/2) ad=97*ad/100; // imperfect a
  4722. /* check the lowest point on upper left area, serife? */
  4723. j =nearest_frame_vector(box1,i6,i7,x0,y1);
  4724. if (box1->frame_vector[i ][1]
  4725. -box1->frame_vector[j ][1]<=dy/4) Break; // imperfect a
  4726. if (box1->frame_vector[i7][1]>y0+dy/4) Break; // not to low
  4727. if (box1->frame_vector[i1][1]
  4728. -box1->frame_vector[i7][1]<dy/2) Break;
  4729. if (box1->frame_vector[i4][1]
  4730. -box1->frame_vector[i5][1]<dy/2) Break;
  4731. if (box1->frame_vector[i7][0]<x0+dx/2) Break;
  4732. if (box1->frame_vector[i1][0]
  4733. -box1->frame_vector[i2][0]<=dx/8) Break; // ~1
  4734. if (box1->frame_vector[i1][0]
  4735. -box1->frame_vector[i2][0]<=dx/4) ad=ad*99/100; // ~1
  4736. if (box1->frame_vector[i6][1]>y0+dy/8) ad=99*ad/100; // ~1
  4737. if (aa[0][2]==0) { // ]?
  4738. ad=99*ad/100;
  4739. if (aa[1][2]==0) ad=98*ad/100;
  4740. if (aa[2][2]<=aa[3][2]) ad=97*ad/100;
  4741. }
  4742. /* check for left bow */
  4743. for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) {
  4744. if (box1->frame_vector[ i][0] /* [0]=x */
  4745. <box1->frame_vector[i1][0]) break; /* curve? */
  4746. } if (i==i4) Break; // ~I
  4747. /* check for no right bow */
  4748. for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) {
  4749. if (box1->frame_vector[ i][0] /* [0]=x */
  4750. >box1->frame_vector[i4][0]) break;
  4751. } if (i!=i4) Break; // ~I
  4752. /* check for no right bow */
  4753. for (j=i=i5;i!=i6;i=(i+1)%box1->num_frame_vectors[0]) {
  4754. if (box1->frame_vector[ i][1] > y0+dy/4) break;
  4755. } if (i!=i6) Break; // ~Y
  4756. /* check if upper left and lower left points are joined directly */
  4757. ld=line_deviation(box1, i7, i1);
  4758. MSG(fprintf(stderr," i7,i1 %d %d linedist= %d/%d",i7,i1,ld,2*sq(1024/4));)
  4759. if (ld >2*sq(1024/4)) Break;
  4760. if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
  4761. if (6*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
  4762. if (7*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
  4763. if (8*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
  4764. /* check if lower right and upper right points are joined directly */
  4765. ld=line_deviation(box1, i4, i5);
  4766. MSG(fprintf(stderr," i4,i5 %d %d linedist= %d/%d",i4,i5,ld,2*sq(1024/4));)
  4767. if (ld >2*sq(1024/4)) Break;
  4768. if (5*ld >4*2*sq(1024/4)) ad=99*ad/100;
  4769. // J exists as gchar and ~gchar
  4770. if (!hchar){ ad=99*ad/100; }
  4771. if (box1->num_frames>1) {
  4772. ad=98*ad/100; // j
  4773. }
  4774. Setac(box1,'J',ad);
  4775. break;
  4776. }
  4777. return box1->c;
  4778. }
  4779. static wchar_t ocr0_brackets(ocr0_shared_t *sdata){
  4780. struct box *box1=sdata->box1;
  4781. pix *bp=sdata->bp;
  4782. int i,j,d,x,y,i1,i2,i3,i4,i5,i6,hchar=sdata->hchar,
  4783. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  4784. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  4785. (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
  4786. ad,r1,r2; /* tmp-vars */
  4787. wchar_t bc=UNKNOWN;
  4788. // --- test > derived from xX ---------------------------------------------------
  4789. // rewritten for vectors v0.41
  4790. for(ad=d=100;dx>1 && dy>2;){ // min 3x2
  4791. // 0 - indizes 0,1,i1,i2 pointing to edges of the char
  4792. // \ .
  4793. // \ .
  4794. // i1,i2
  4795. // /
  4796. // /
  4797. // 1
  4798. DBG( wchar_t c_ask='>'; )
  4799. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  4800. if (sdata->holes.num > 0 && (dx<6 || dy<6)) Break; /* # */
  4801. /* calculate the half distance to the center */
  4802. d=2*sq(128/4);
  4803. /* now we check for the 2 left ends of the > */
  4804. if (aa[0][2]>d) Break; /* upper left end */
  4805. if (aa[1][2]>d) Break; /* lower left end */
  4806. if (aa[1][1]-aa[0][1]<dy/2) Break;
  4807. /* searching for 4 notches between neighbouring ends */
  4808. /* run along left side from top to bottom */
  4809. for (j=i=aa[0][3];i!=aa[1][3];i=(i+1)%box1->num_frame_vectors[0]) {
  4810. if (box1->frame_vector[i][0]
  4811. >=box1->frame_vector[j][0]) j=i; /* notice most right vector */
  4812. } if (j==i || j==aa[0][3]) Break;
  4813. /* calculate the distance to the center */
  4814. x=box1->frame_vector[j][0];
  4815. y=box1->frame_vector[j][1];
  4816. if (2*x-aa[0][0]-aa[1][0]<dx) ad=99*ad/100;
  4817. if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)) Break;
  4818. if ( aa[0][0]+aa[1][0]-2*x>=0) Break;
  4819. i1=j;
  4820. d=line_deviation(box1, aa[0][3], j) >sq(1024/4);
  4821. /* check if upper left and center point are joined directly */
  4822. MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
  4823. if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
  4824. MSG(fprintf(stderr,"ad=%d", ad);)
  4825. d=line_deviation(box1, j, aa[1][3]);
  4826. /* check if lower left and center point are joined directly */
  4827. MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
  4828. if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
  4829. MSG(fprintf(stderr,"ad=%d", ad);)
  4830. /* run along right side from bottom to top */
  4831. for (j=i=aa[1][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) {
  4832. if (box1->frame_vector[i][0]
  4833. >=box1->frame_vector[j][0]) j=i; /* notice most right vector */
  4834. // MSG(fprintf(stderr,"search right: %d %d %d %d",i,j,aa[1][3],aa[0][3]);)
  4835. } if (j==i || j==aa[1][3]) Break;
  4836. /* calculate the distance to the center */
  4837. x=box1->frame_vector[j][0];
  4838. y=box1->frame_vector[j][1];
  4839. if ( (aa[0][0]+aa[1][0]-2*x)>= 0 ) Break;
  4840. if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)/4) Break;
  4841. if (aa[0][0]>=x || aa[1][0]>=x) Break;
  4842. i2=j;
  4843. d=line_deviation(box1, j, aa[0][3]);
  4844. /* check if upper left and center point are directly joined directly */
  4845. MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
  4846. if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
  4847. MSG(fprintf(stderr,"ad=%d", ad);)
  4848. d=line_deviation(box1, aa[1][3], j);
  4849. /* check if lower left and center point are directly joined */
  4850. MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
  4851. if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
  4852. MSG(fprintf(stderr,"ad=%d", ad);)
  4853. /*
  4854. ToDo: calculate momentums or max derivations
  4855. along lines to distinguish )]}>
  4856. i1,i2
  4857. */
  4858. if (sdata->gchar) ad=98*ad/100;
  4859. if (sdata->hchar) ad=99*ad/100;
  4860. bc='>';
  4861. Setac(box1,bc,ad);
  4862. break;
  4863. }
  4864. // --- test /\\ ------------------------------------------------
  4865. // if(bc==UNKNOWN)
  4866. // if(!box1->dots)
  4867. for(ad=d=100;dx>3 && dy>3;){ // min 4x4 for 4x6 font
  4868. DBG( wchar_t c_ask='/'; )
  4869. if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
  4870. #if 1
  4871. for(i=y=0;y<dy;y++){
  4872. if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
  4873. if( loop(bp, 0,y,dx,cs,0,RI)
  4874. + loop(bp,dx-1,y,dx,cs,0,LE)<3*dx/8 ) break;
  4875. }
  4876. if( y<dy ) Break;
  4877. if ( i>2 || (i>0 && dy<16)) Break;
  4878. #endif
  4879. /* get the center as exact as possible */
  4880. i2=dx-1-loop(bp,dx-1,dy/2 ,dx,cs,0,LE) // be exact for small fonts
  4881. +dx-1-loop(bp,dx-1,dy/2+dy%2-1,dx,cs,0,LE)
  4882. + loop(bp, 0,dy/2 ,dx,cs,0,RI)
  4883. + loop(bp, 0,dy/2+dy%2-1,dx,cs,0,RI);
  4884. if (abs(i2-2*dx)>1+dx/2) Break;
  4885. if (abs(i2-2*dx)> dx/2) ad=99*ad/100;
  4886. i1=loop(bp,dx-1,dy/16,dx,cs,0,LE); // right side
  4887. i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
  4888. i4=loop(bp, 0,0 ,dx,cs,0,RI); // left side
  4889. i6=loop(bp, 0,dy-1 ,dx,cs,0,RI);
  4890. i=(box1->m4+box1->m3)/2-box1->m2;
  4891. //
  4892. // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6);
  4893. // ~lI
  4894. for(i=i4,y=0;y<dy;y++){
  4895. x=loop(bp,0 ,y,dx,cs,0,RI);if(abs(x-i)>dx/6+1 ) break; i=x;
  4896. } if( y<dy ) Break;
  4897. for(i=i1,y=0;y<dy;y++){
  4898. x=loop(bp,dx-1,y,dx,cs,0,LE);if(abs(x-i)>dx/6+1 ) break; i=x;
  4899. } if( y<dy ) Break;
  4900. if(i1<=dx/8 && i6<=dx/8 && i4-(dx-i3)>dx/4 ) { Setac(box1,(bc='/'),ad);break; }
  4901. if(i4<=dx/8 && i3<=dx/8 && i6-(dx-i1)>dx/4 ) { Setac(box1,(bc='\\'),ad);break; }
  4902. Break;
  4903. }
  4904. // --- test ()<> ------------------------------------------------
  4905. // if(bc==UNKNOWN)
  4906. // if(!box1->dots)
  4907. for(ad=d=100;dx>1 && dy>4;){ // min 3x4
  4908. DBG( wchar_t c_ask='('; )
  4909. if (sdata->holes.num > 1) {Break;}; /* tolerant against a tiny hole */
  4910. #if 1
  4911. for(i=y=0;y<dy;y++){
  4912. if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
  4913. if( loop(bp, 0,y,dx,cs,0,RI)
  4914. + loop(bp,dx-1,y,dx,cs,0,LE)<3*dx/8 ) break;
  4915. }
  4916. if( y<dy ) {Break;};
  4917. if ( i>2 || (i>0 && dy<16)) {Break;};
  4918. #endif
  4919. /* look for the extrema => r1..r2 */
  4920. for(i=dx,r1=r2=y=dy/2-dy/8;y<=dy/2+dy/8;y++){
  4921. j=loop(bp, 0,y,dx,cs,0,RI); if(j==i) r2=y; if(j<i){ r2=r1=y; i=j; }
  4922. j=loop(bp,dx-1,y,dx,cs,0,LE); if(j==i) r2=y; if(j<i){ r2=r1=y; i=j; }
  4923. } y=(r1+r2)/2;
  4924. i1=loop(bp,dx-1, dy/16,dx,cs,0,LE);
  4925. i2=loop(bp,dx-1,y ,dx,cs,0,LE);
  4926. i3=loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE);
  4927. i4=loop(bp, 0,dy/16 ,dx,cs,0,RI);
  4928. i5=loop(bp, 0,y ,dx,cs,0,RI);
  4929. i6=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI);
  4930. if(dx>dy){
  4931. // from Aug06 vector-version of greater is used
  4932. // if(i2==0 && 3*i5>dx && i4<=dx/8 && i6<=dx/8) { Setac(box1,(bc='>'),98);{Break;}; }
  4933. if(i5==0 && 3*i2>dx && i1<=dx/8 && i3<=dx/8) { Setac(box1,(bc='<'),98);{Break;}; }
  4934. }
  4935. if( dx > 2 && 9*dx>=5*dy ){ // 4x6 screen-font (3*5)
  4936. ad=98;
  4937. if (dx<8) ad=99*ad/100;
  4938. if (dx<6) ad=96*ad/100;
  4939. if( 2*dx > JOB->res.avX && 4*dx>dy ) ad=98;
  4940. // printf(" %d %d %d %d %d %d\n",i5,i1,i3,i2,i4,i6);
  4941. if( i5==0 && i1<=dx/8+1 && i3<=dx/8+1 && i1+i3<=dx/8+1
  4942. && i2>=dx/2 && i4>=3*dx/4 && i6>=3*dx/4 ) {
  4943. if (2*loop(bp, 0, y/2,dx,cs,0,RI)+1+dx/16<i4+i5) ad=95*ad/100;
  4944. if (2*loop(bp, 0,dy-1-y/2,dx,cs,0,RI)+1+dx/16<i6+i5) ad=95*ad/100;
  4945. Setac(box1,(bc='<'),ad);{Break;};
  4946. }
  4947. /* obsolete code Aug06, will be removed if new code is stable
  4948. if( i2==0 && i4<=dx/8 && i6<=dx/8
  4949. && i5>=dx/2 && i1>=3*dx/4 && i3>=3*dx/4 ) {
  4950. if (2*loop(bp,dx-1, y/2,dx,cs,0,LE)+1+dx/16<i1+i2) ad=95*ad/100;
  4951. if (2*loop(bp,dx-1,dy-1-y/2,dx,cs,0,LE)+1+dx/16<i3+i2) ad=95*ad/100;
  4952. Setac(box1,(bc='>'),ad);{Break;};
  4953. }
  4954. */
  4955. }
  4956. i1=loop(bp,dx-1,dy/16,dx,cs,0,LE);
  4957. i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE);
  4958. i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
  4959. i4=loop(bp, 0,0 ,dx,cs,0,RI);
  4960. i5=loop(bp, 0,dy/2,dx,cs,0,RI);
  4961. i6=loop(bp, 0,dy-1,dx,cs,0,RI);
  4962. i=(box1->m4+box1->m3)/2-box1->m2;
  4963. //
  4964. // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6);
  4965. if(2*i2<i1+i3 && 2*i5>i4+i6 && 2*dx<dy && dy>=i){
  4966. Setac(box1,(bc=')'),98);break; }
  4967. if(2*i2>i1+i3 && 2*i5<i4+i6 && 2*dx<dy && dy>=i){
  4968. if(2*i2<=i1+i3+1 || 2*i5>=i4+i6-1) ad=98*ad/100;
  4969. if(2*i2<=i1+i3+2 || 2*i5>=i4+i6-2) ad=98*ad/100;
  4970. for(x=y=0;y<dy/4;y++){
  4971. i=loop(bp,0,y,dx,cs,0,RI);if( i>x ) x=i;
  4972. }
  4973. for(y=0;y<(dy+2)/4;y++){
  4974. i=loop(bp,0,y+dy/8,dx,cs,0,RI);if( i<x ) break;
  4975. }
  4976. if( y==(dy+2)/4 ) {Break;}; // ~l (left upper side must be convex) Jul00
  4977. if (loop(bp,0,dy/2+dy/8,dx,cs,0,RI)-i5>=dx/8+1) ad=99*ad/100; // ~{ Jul09
  4978. if (loop(bp,0,dy/2-dy/8,dx,cs,0,RI)-i5>=dx/8+1) ad=99*ad/100; // ~{ Jul09
  4979. Setac(box1,(bc='('),ad); break;
  4980. }
  4981. Break;
  4982. }
  4983. // --------- test [] --------------------------------
  4984. for(ad=d=98;dx>2 && dy>4 && dy>=2*dx;){ // (3,6) on 4x6 font
  4985. DBG( wchar_t c_ask=']'; )
  4986. if (sdata->holes.num > 1) { Break;} /* tolerant against a tiny hole */
  4987. if (!hchar) ad=97*ad/100;
  4988. for(y=0;y<dy;y++){
  4989. if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
  4990. } if (y<dy) {Break;};
  4991. if( get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) == 2
  4992. && get_bw(x0,x1,y0+1,y0+1,box1->p,cs,2) == 2 ) {Break;};
  4993. if( get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) == 2
  4994. && get_bw(x0,x1,y1-1,y1-1,box1->p,cs,2) == 2 ) {Break;};
  4995. if( get_bw(x0 ,x0,y0 ,y1 ,box1->p,cs,2) == 0
  4996. || get_bw(x0+1 ,x0+1,y0 ,y1 ,box1->p,cs,2) == 0 )
  4997. if( get_bw(x0+dx/2,x1,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 )
  4998. { Setac(box1,(bc='['),ad);break; }
  4999. if( get_bw(x1 ,x1,y0 ,y1 ,box1->p,cs,2) == 0
  5000. || get_bw(x1-1 ,x1-1,y0 ,y1 ,box1->p,cs,2) == 0 )
  5001. if( get_bw(x0,x1-dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 )
  5002. { Setac(box1,(bc=']'),ad);break; }
  5003. break;
  5004. }
  5005. #if CODE_NOT_COMPLETED
  5006. // --- test ] -------
  5007. for(ad=d=100;dx>2 && dy>3;){
  5008. DBG( wchar_t c_ask=']'; )
  5009. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  5010. if (sdata->holes.num > 0) ad=98*ad/100; /* # */
  5011. /* 1/8 distance to the center */
  5012. d=2*sq(128/16);
  5013. /* now we check for the 4 ends of the x */
  5014. if (aa[0][2]>d) Break;
  5015. if (aa[1][2]>d) Break;
  5016. if (aa[2][2]>d) Break;
  5017. if (aa[3][2]>d) Break;
  5018. if (aa[3][0]-aa[0][0]<7*dx/8) Break;
  5019. if (aa[2][0]-aa[1][0]<7*dx/8) Break;
  5020. if (aa[1][1]-aa[0][1]<7*dy/8) Break;
  5021. if (aa[2][1]-aa[3][1]<7*dy/8) Break;
  5022. if (aa[3][0]-aa[0][0]<2) Break; /* to small */
  5023. if (aa[2][0]-aa[1][0]<2) Break; /* to small */
  5024. MSG( fprintf(stderr," aa %d %d %d %d %d %d %d %d d %d %d %d %d",\
  5025. aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\
  5026. aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,\
  5027. aa[0][2],aa[1][2],aa[2][2],aa[3][2]);)
  5028. /* left and right vertical line */
  5029. d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break;
  5030. ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
  5031. d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break;
  5032. /* search uppermost left ^ */
  5033. i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y0);
  5034. x=box1->frame_vector[i1][0];
  5035. y=box1->frame_vector[i1][1];
  5036. if (y-y0 > 5*dy/8) Break;
  5037. if (x-x0 > 5*dx/8) Break;
  5038. /* search uppermost right ^ ~H */
  5039. i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0);
  5040. if ( box1->frame_vector[i3][0]-x> dx/4
  5041. && box1->frame_vector[i3][1]-y<=dy/8) Break;
  5042. /* check if upper left and lower right point are joined directly */
  5043. dbg[0]=d=line_deviation(box1,i1, aa[2][3]); if (d >2*sq(1024/4)) Break;
  5044. /* check if lower left and lower left point are joined directly */
  5045. dbg[1]=d=line_deviation(box1, aa[1][3],i1); if (d >2*sq(1024/4)) Break;
  5046. if (!hchar) ad=99*ad/100;
  5047. if ( gchar) ad=98*ad/100; // \sc N
  5048. ac=(wchar_t) ']';
  5049. Setac(box1,ac,ad);
  5050. if (ad>=100) return ac;
  5051. break;
  5052. }
  5053. #endif
  5054. // --------- test ocr-a-[] --------------------------------
  5055. if(bc==UNKNOWN)
  5056. for(ad=d=98;dx>5 && dy>7 && 2*dy>3*dx;){ // only for accurate font at the moment
  5057. DBG( wchar_t c_ask='['; )
  5058. if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
  5059. if (!hchar) ad=97*ad/100;
  5060. if( num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) break;
  5061. if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break;
  5062. if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)
  5063. +loop(bp, 0,dy/2,dx,cs,0,RI) <= dx/4 ) break; // O
  5064. for(y=dy/8;y<dy-dy/8;y++){
  5065. if( num_cross(0,dx,y,y,bp,cs) != 2 ) break;
  5066. } if (y<dy-dy/8) break;
  5067. if( get_bw((3*x0+5*x1)/8,x1,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0)
  5068. { Setac(box1,(bc='['),ad);break; }
  5069. if( get_bw(x0,(5*x0+3*x1)/8,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0)
  5070. { Setac(box1,(bc=']'),ad);break; }
  5071. break;
  5072. }
  5073. // --------- test {} --------------------------------
  5074. for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){
  5075. DBG( wchar_t c_ask='{'; )
  5076. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  5077. if (!hchar) ad=97*ad/100;
  5078. for(y=0;y<dy;y++){
  5079. if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
  5080. } if (y<dy) Break;
  5081. for(x=0;x<dx/2;x++){
  5082. if( num_cross(dx-1-x,dx-1-x,0,dy-1,bp,cs) != 2 ) break;
  5083. } if (y<dx/2) Break;
  5084. if ( num_cross(dx-1,dx-1,dy/4,dy-1-dy/4,bp,cs) != 0 ) Break;
  5085. if ( num_cross( 0, 0,dy/4,dy-1-dy/4,bp,cs) != 1 ) Break;
  5086. if ( loop(bp,0,dy-1,dx,cs,0,RI)>3*dx/4 ) ad=99*ad/100;
  5087. if ( loop(bp,0, 0,dx,cs,0,RI)>3*dx/4 ) ad=99*ad/100; // <
  5088. if ( loop(bp,0, 0,dy,cs,0,DO)<dy/2-1 ) ad=98*ad/100;
  5089. if ( loop(bp,0,dy-1,dy,cs,0,UP)<dy/2-2 ) ad=98*ad/100; // (
  5090. if ( loop(bp,dx-1,0,dx,cs,0,LE)
  5091. + loop(bp,dx-1,2,dx,cs,0,LE)
  5092. - 2*loop(bp,dx-1,1,dx,cs,0,LE) >=dx/8 ) ad=98*ad/100; // <
  5093. if ( loop(bp,dx-2,dy-1,dy,cs,0,UP)>dy/4 ) Break; // f
  5094. if ( get_bw(x0,x0,y0,y0+dy/4,box1->p,cs,1) == 1
  5095. || get_bw(x0,x0,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break;
  5096. Setac(box1,(bc='{'),ad);Break;
  5097. }
  5098. for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){
  5099. DBG( wchar_t c_ask='}'; )
  5100. if (!hchar) ad=97*ad/100;
  5101. for(y=0;y<dy;y++){
  5102. if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
  5103. } if (y<dy) Break;
  5104. for(x=0;x<dx/2;x++){
  5105. if( num_cross(x,x,0,dy-1,bp,cs) != 2 ) break;
  5106. } if (y<dx/2) Break;
  5107. if ( num_cross( 0, 0,dy/4,dy-1-dy/4,bp,cs) != 0 ) Break;
  5108. if ( num_cross(dx-1,dx-1,dy/4,dy-1-dy/4,bp,cs) != 1 ) Break;
  5109. if ( loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;}
  5110. if ( loop(bp,dx-1, 0,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;} // >
  5111. if ( loop(bp,dx-1, 0,dy,cs,0,DO)<dy/2-1 ) {ad=98*ad/100;}
  5112. if ( loop(bp,dx-1,dy-1,dy,cs,0,UP)<dy/2-2 ) {ad=98*ad/100;} // )
  5113. if ( loop(bp,0,0,dx,cs,0,RI)
  5114. + loop(bp,0,2,dx,cs,0,RI)
  5115. - 2*loop(bp,0,1,dx,cs,0,RI) >=dx/8 ) ad=98*ad/100; // <
  5116. if ( loop(bp,1,dy-1,dy,cs,0,UP)>dy/4 ) Break; // ???
  5117. if ( get_bw(x1,x1,y0,y0+dy/4,box1->p,cs,1) == 1
  5118. || get_bw(x1,x1,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break;
  5119. Setac(box1,(bc='}'),ad);Break;
  5120. }
  5121. return box1->c;
  5122. }
  5123. #if 0
  5124. /* ---------- empty prototype function for copy and expand ---------- */
  5125. static wchar_t ocr0_XXX(ocr0_shared_t *sdata){
  5126. struct box *box1=sdata->box1;
  5127. pix *bp=sdata->bp;
  5128. int i,j,d,x,y,i0,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
  5129. x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
  5130. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  5131. ac,ad; /* tmp-vars */
  5132. // --- test XXX ---------------------------------------------------
  5133. return box1->c;
  5134. }
  5135. #endif
  5136. /* ----------------------- part9 -------------------------------- */
  5137. static wchar_t ocr0p9(ocr0_shared_t *sdata){
  5138. struct box *box1=sdata->box1;
  5139. pix *bp=sdata->bp;
  5140. int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
  5141. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  5142. i1,i2,i3,i4; /* tmp-vars */
  5143. int xa,xb, /* used for store significant points of char */
  5144. dbg[9]={0,0,0,0,0,0,0,0,0}, /* debugging space */
  5145. ya,yb,ad,cs=sdata->cs;
  5146. wchar_t ac,bc=UNKNOWN; // bestletter
  5147. int hchar; // char is higher than e
  5148. int gchar; // char has ink lower than m3
  5149. // --- hchar --- gchar -------------------------
  5150. hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
  5151. gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1;
  5152. // if the char is slightly moved down correction can be done
  5153. if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
  5154. if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
  5155. /* reserved for the future */
  5156. // --- test beta,\3,sz,"s ---------------------------------------------
  5157. if(bc==UNKNOWN && hchar)
  5158. for(ad=d=100;dx>3 && dy>6;){ // min 4x7
  5159. DBG( wchar_t c_ask='S'; )
  5160. if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
  5161. /* this part is provisorium, should be changed!
  5162. a-\
  5163. | d
  5164. b| /
  5165. | \
  5166. -c /
  5167. */
  5168. if( num_cross(x0 ,x1 ,y0+dy/4 ,y0+dy/4 ,box1->p,cs) != 2
  5169. && num_cross(x0 ,x1 ,y0+dy/4+1,y0+dy/4+1,box1->p,cs) != 2 ) break;
  5170. for(i=1+dy/16,y=y0+dy/8;y<y1-dy/4 && i>0;y++){
  5171. if( y<y1-6*dy/16 ){ if( num_cross(x0 ,x1 ,y,y,box1->p,cs) != 2 ) i--;}
  5172. else { if( num_cross(x0 ,x1 ,y,y,box1->p,cs) < 2 ) i--;}
  5173. if( get_bw(x0,x0+dx/2,y,y,box1->p,cs,1) == 0 ) i--;
  5174. if( y<y1-5*dy/16 )
  5175. if( get_bw(x1-dx/2,x1,y,y,box1->p,cs,1) == 0 ) i--;
  5176. } if( i<=0 ) break;
  5177. // out_x(box1);
  5178. for(y=y0+dy/3;y<y1-dy/3;y++){
  5179. i =loop(box1->p,x1,y,dx,cs,0,LE);
  5180. if( i>=dx/8 ) break;
  5181. i+=loop(box1->p,x1-i,y,dx,cs,1,LE);
  5182. if( i>=dx/2 ) break;
  5183. } if( y>=y1-dy/3 ) break;
  5184. for(y=y0+dy/5;y<y0+dy/3;y++)
  5185. if( get_bw(x1-dx/6,x1,y,y,box1->p,cs,1) == 1 ) break;
  5186. if( y>=y0+dy/3 ) break;
  5187. for(y=y0+dy/2;y<y1;y++)
  5188. if( get_bw(x1-dx/6,x1,y,y,box1->p,cs,1) == 1 ) break;
  5189. if( y>=y1 ) break;
  5190. for(y=y1-dy/3;y<y1-dy/8;y++){
  5191. i=loop(box1->p,x1,y,dx,cs,0,LE);
  5192. if( i>dx/4
  5193. && get_bw(x1-dx/8,x1-dx/8,y,y1,box1->p,cs,1) == 1 ) break;
  5194. } if( y<y1-dy/8 ) break; // ~Q
  5195. if( box1->m3==0 || 2*y1<box1->m3+box1->m4 )
  5196. if( loop(box1->p,x1,y1, dx,cs,0,LE)==0
  5197. && loop(box1->p,x1,y1-dy/4,dx,cs,0,LE)>dx/8 ) break; // ~R
  5198. for(x=x0+dx/4;x<x1-dx/4;x++)
  5199. if( num_cross(x,x,y0,y1,box1->p,cs) == 3 ) break;
  5200. if( x>=x1-dx/4 ) break;
  5201. i=loop(bp,dx/2,dy-1,dy,cs,0,UP)+dy/64; // Jul00
  5202. for(x=dx/5;x<dx/2;x++)
  5203. if( loop(bp,x,dy-1,dy,cs,0,UP) > i ) break;
  5204. if( x==dx/2 ) break;
  5205. x=x0+loop(bp,0,dy/4,dx,cs,0,RI);
  5206. for(;x<x1-dx/3;x++)
  5207. if( get_bw(x,x,y0,y0+dy/4,box1->p,cs,1) == 0 ) break;
  5208. if( x<x1-dx/3 ) break;
  5209. if( !gchar )
  5210. // if( num_hole( x0, x1, y0, y1,box1->p,cs,NULL) != 0 ) break;
  5211. if (sdata->holes.num != 0) break;
  5212. bc=LATIN_SMALL_LETTER_SHARP_S;
  5213. Setac(box1,(wchar_t)bc,98);
  5214. break;
  5215. }
  5216. // --- test + ------------------------------------------------
  5217. for(ad=d=100;dx>2 && dy>2;){ // min 3x3
  5218. DBG( wchar_t c_ask='+'; )
  5219. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  5220. xa=(dx+1)/3-1; ya=(dy+1)/3-1; /* size of the 4 gaps = 1/3 * 1/3 */
  5221. xb=(dx+1)/4; yb=(dy+2)/4; /* smaller gap */
  5222. if( get_bw(x0,x0+xa,y0,y0+ya,box1->p,cs,1) == 1 ) Break; // left upper
  5223. if( get_bw(x0,x0+xa,y1-ya,y1,box1->p,cs,1) == 1 ) Break; // left lower
  5224. if( get_bw(x1-xb,x1,y0,y0+ya,box1->p,cs,1) == 1 ) Break; // right upper
  5225. if( get_bw(x1-xa,x1,y1-ya,y1,box1->p,cs,1) == 1 ) { // right lower
  5226. if( get_bw(x1-xa,x1,y1-yb,y1,box1->p,cs,1) == 1 ) Break;
  5227. ad=99*ad/100; // smoothed inner corner? 0907
  5228. }
  5229. for(i=0,y=y0+ya;y<=y1-ya;y++){ // horizontal line
  5230. if( get_bw(x0+dx/9,x1-dx/9,y,y,box1->p,cs,2) == 0 ) { i=y; break; }
  5231. }
  5232. if (3*dx<2*dy) ad=99*ad/100; // ~t
  5233. if( !i ) Break;
  5234. ac=(wchar_t) '+';
  5235. Setac(box1,ac,ad);
  5236. if (ad>=100) return ac;
  5237. break;
  5238. }
  5239. // --- test $ ------------------------------------------------
  5240. for(ad=d=99;dx>3 && dy>5;){ // min 3x4
  5241. DBG( wchar_t c_ask='$'; )
  5242. if (sdata->holes.num != 2) Break;
  5243. if( get_bw(x0,x0+dx/5,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break;
  5244. if( get_bw(x0,x0+dx/9,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
  5245. if( get_bw(x1-dx/9,x1,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break;
  5246. if( get_bw(x1-dx/5,x1,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
  5247. if( get_bw(x0,x0+dx/3,y0+dy/3 ,y0+dy/2 ,box1->p,cs,1) != 1 ) Break;
  5248. if( get_bw(x1-dx/3,x1,y1-dy/2 ,y1-dy/3 ,box1->p,cs,1) != 1 ) Break;
  5249. i1=x0+loop(box1->p,x0,y0,dx,cs,0,RI); if( i1<x0+dx/3 || i1>x1-dx/5 ) Break;
  5250. i2=x0+loop(box1->p,x0,y1,dx,cs,0,RI); if( i2<x0+dx/5 || i2>i1 ) Break;
  5251. ad= get_line2(i1,y0,i2,y1,box1->p,cs,100)*ad/100;
  5252. // check upper left and lower right half circle, $ (Oct08: removed)
  5253. // Oct08 JS: check the position of holes (better for tiny fonts)
  5254. // upper hole must be the first!? (hole[].x0 = relative coordinates)
  5255. if ( sdata->holes.hole[0].y0 <
  5256. sdata->holes.hole[1].y0 ) i4=0; else i4=1; /* sort to [idx^i4] */
  5257. if ( sdata->holes.hole[0^i4].y1 >
  5258. sdata->holes.hole[1^i4].y0 ) Break; /* no y-overlap allowed */
  5259. // upper left hole
  5260. // fprintf(stderr,"\nDBG hole[0]=x0=%d %d", sdata->holes.hole[0^i4].x0, sdata->holes.hole[0^i4].x1);
  5261. if ( sdata->holes.hole[0^i4].x0 > (dx+1)/3 ) Break;
  5262. if ( sdata->holes.hole[0^i4].x1 > dx/2+dx/4 ) Break;
  5263. if ( sdata->holes.hole[0^i4].y1 >= dy/2+dy/8 ) Break;
  5264. if ( sdata->holes.hole[0^i4].y0 > dy/2-dy/8 ) Break;
  5265. // lower right hole
  5266. // fprintf(stderr,"\nDBG hole[1]=x0=%d %d", sdata->holes.hole[1^i4].x0, sdata->holes.hole[1^i4].x1);
  5267. if ( sdata->holes.hole[1^i4].x0 <= dx/2-dx/4 ) Break;
  5268. if ( sdata->holes.hole[1^i4].x1 < dx/2+dx/4 ) Break;
  5269. if ( sdata->holes.hole[1^i4].y1 < dy/2+dy/8 ) Break;
  5270. if ( sdata->holes.hole[1^i4].y0 <= dy/2-dy/8 ) Break;
  5271. if (ad<95) Break;
  5272. ac=(wchar_t) '$';
  5273. Setac(box1,ac,ad);
  5274. if (ad>=100) return ac;
  5275. break;
  5276. }
  5277. // --- test & ------------------------------------------------
  5278. for(ad=d=99;dx>3 && dy>4;){ /* 4x6 font */
  5279. DBG( wchar_t c_ask='&'; )
  5280. if (sdata->holes.num != 2) Break;
  5281. if( get_bw(x1-dx/9,x1,y0,y0+dy/4,box1->p,cs,1) == 1 ) Break; // g
  5282. if( loop(bp,dx/2,0,dy,cs,0,DO)>dy/2) Break;
  5283. i1=loop(bp,0,dy/8 ,dx,cs,0,RI); if (i1>dx/2) Break;
  5284. i =loop(bp,0,dy/4 ,dx,cs,0,RI); if (i1>dx/2) Break; if (i<i1) i1=i;
  5285. i3=loop(bp,0,dy-dy/4 ,dx,cs,0,RI); if (i3>dx/2) Break;
  5286. i =loop(bp,0,dy-dy/4-1,dx,cs,0,RI); if (i3>dx/2) Break; if (i<i3) i3=i;
  5287. if (i3>i1) Break;
  5288. for( i2=0, y=dy/4; y<=dy/2+1; y++ ){
  5289. i =loop(bp,0,y,dx,cs,0,RI); if( i>i2 ) i2=i;
  5290. }
  5291. if(2*i2-i1-i3<1) Break;
  5292. // if( num_hole(x0,x1 ,y0,y1,box1->p,cs,NULL)!=2 ) Break;
  5293. if( num_hole(x0,x1-dx/4,y0,y1,box1->p,cs,NULL)!=2 ) Break;
  5294. if( num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) < 1 ) Break;
  5295. for( x=dx-1; x>=dx/2; x-- ){
  5296. if( num_cross(x,x,dy/4,dy-1,bp,cs) > 1 ) break;
  5297. } if( x<=3*dx/4 && x<dx-2) Break;
  5298. if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) > 3 ) { // glued ah
  5299. if (dy>15) { Break; } else ad=96*ad/100;
  5300. }
  5301. if (!hchar) ad=98*ad/100;
  5302. bc=(wchar_t) '&';
  5303. Setac(box1,bc,ad);
  5304. if (ad>=100) return bc;
  5305. break;
  5306. }
  5307. // --- test \it & like \epsilon\tau ------------------------------
  5308. if(bc==UNKNOWN)
  5309. for(ad=d=100;dx>7 && dy>7;){
  5310. DBG( wchar_t c_ask='&'; )
  5311. if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
  5312. if( num_cross(0,dx-1, dy/4, dy/4,bp,cs) != 3 ) break;
  5313. if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 4 ) break;
  5314. if( num_cross(dx/2,dx-1,dy/2, dy/2,bp,cs) != 2 ) break;
  5315. if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs) != 2 ) break;
  5316. if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 ) break;
  5317. if( num_cross( 0, 0,0,dy-1,bp,cs) != 1 ) break;
  5318. if( num_cross( dx/3, dx/3,0,dy-1,bp,cs) != 4 ) break;
  5319. if( num_cross(13*dx/16,13*dx/16,0,dy/8,bp,cs) != 0 ) break;
  5320. if( num_cross(4*dx/8,4*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
  5321. if( num_cross(3*dx/8,3*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
  5322. if( num_cross(5*dx/8,5*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
  5323. if( num_hole(x0 ,(x0+x1)/2,y0, y1,box1->p,cs,NULL) != 1 ) break;
  5324. if( num_hole(x0+dx/8,x1-dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) break;
  5325. ac=(wchar_t) '&';
  5326. Setac(box1,ac,ad);
  5327. if (ad>=100) return ac;
  5328. break;
  5329. }
  5330. // --- test ? ---------------------------------------------------
  5331. for(ad=d=98;dx>2 && dy>5;){ // min 3x(4+2)
  5332. DBG( wchar_t c_ask='?'; )
  5333. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  5334. if ( num_cross(x0, x1, y0, y0, box1->p, cs) !=1 ) Break; // ~?
  5335. if ( num_cross(x0, x1, y1, y1, box1->p, cs) > 1 ) Break; // ~?
  5336. for(y=y0;y<y1;y++) // new y1
  5337. if( get_bw(x0, x1, y, y,box1->p,cs,1) != 1 ) break; // lower end
  5338. if (2*y<y0+y1) Break;
  5339. i1=y1;
  5340. if (y==y1 && box1->m4) { // probably lower dot not catched in box?
  5341. if (get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) != 1 ) Break;
  5342. i1=box1->m4;
  5343. for(;i1>y1;i1--) // new y1
  5344. if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot
  5345. }
  5346. y--; i=y-y0+1; // new dy
  5347. for (y=0;y<dy/2;y++)
  5348. if( num_cross(x0, x1, y0+y, y0+y, box1->p, cs) == 2 ) break;
  5349. if (y==dy/2) Break;
  5350. // if( num_hole( x0, x1, y0, y1, box1->p,cs,NULL) > 0 ) Break;
  5351. if (sdata->holes.num > 0) Break;
  5352. for(y=y0+dy/2;y<=i1;y++)
  5353. if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break;
  5354. if( y==i1 ) Break;
  5355. for( ;y<=i1;y++)
  5356. if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
  5357. if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) Break;
  5358. if( get_bw(x0+7*dx/8,x1,y,i1,box1->p,cs,1) == 1 ) Break; // broken thin 2
  5359. bc='?';
  5360. Setac(box1,(wchar_t)bc,98);
  5361. return bc;
  5362. }
  5363. // --- test !| ---------------------------------------------------
  5364. for(ad=d=99; dy>4 && dy>2*dx;){ // min 3x4
  5365. DBG( wchar_t c_ask='!'; )
  5366. if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
  5367. // measure thickness
  5368. if (num_cross(x0,x1,y0 ,y0 ,box1->p,cs)!=1) Break;
  5369. if (num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs)!=1) Break;
  5370. for(y=y0;y<y1;y++) // new y1
  5371. if( get_bw(x0, x1, y, y,box1->p,cs,1) != 1 ) break; // lower end
  5372. if (2*y<y0+y1) Break;
  5373. if (y==y1 && y>box1->m3-dy/8) ad=ad*97/100; /* missing dot? */
  5374. i1=y1;
  5375. if (y==y1 && box1->m4) { // probably lower dot not catched in box?
  5376. if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1)
  5377. || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1 )) {
  5378. i1=box1->m4;
  5379. for(;i1>y1;i1--) // new y1
  5380. if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot
  5381. }
  5382. } i2=i1;
  5383. for( i1=0,y=y0;y<=i2;y++){
  5384. i=num_cross(x0,x1,y,y,box1->p,cs); if(i>1) break;
  5385. if(i==0 && i1==0) i1=y;
  5386. } if(y<=i2 || i1==0 || i1<y0+dy/2) Break;
  5387. if( loop(bp,dx-1,dy/8,dx,cs,0,LE)
  5388. -loop(bp,dx-1, 0,dx,cs,0,LE)>dx/4+1 ) Break; // f
  5389. if (!hchar) ad=96*ad/100;
  5390. Setac(box1,(wchar_t)'!',ad);
  5391. break;
  5392. }
  5393. // --- test * five egdes (jagges? beames?) what is the right english word? ----
  5394. for(ad=d=99;dx>2 && dy>4;){
  5395. DBG( wchar_t c_ask='*'; )
  5396. if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
  5397. if( num_cross(0,dx-1, 0,dy-1,bp,cs) != 1
  5398. && num_cross(0,dx-1, 1,dy-2,bp,cs) != 1 ) Break;
  5399. if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2
  5400. && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break;
  5401. x=dx/2;y=(6*dy+8)/16; // center point 6/8=6/2^3 rounded
  5402. /* upwarts from center */
  5403. dbg[0]=i=get_line2(x,y,x ,0,bp,cs,100); if(i<95) Break;
  5404. if (dx<8) /* be exact on small fonts, where get_line2 returns 100 (ToDo change) */
  5405. if (get_bw(x,x,0,y,bp,cs,2)==2) Break;
  5406. /* horizontal */
  5407. dbg[1]=i=get_line2(0,y,dx-1,y,bp,cs,100); if(i<95) Break;
  5408. if (dy<8)
  5409. if (get_bw(0,dx-1,y ,y ,bp,cs,2)==2
  5410. && get_bw(0,dx-1,y+1,y+1,bp,cs,2)==2) Break;
  5411. /* down (right) */
  5412. i=get_line2(x,y,(5*dx+4)/8,dy-1,bp,cs,100);
  5413. j=get_line2(x,y,(6*dx+4)/8,dy-1,bp,cs,100); if(j>i) dbg[2]=i=j;
  5414. if(i<95) Break;
  5415. /* down (left) */
  5416. dbg[3]=i=get_line2(x, y,(2*dx+4)/8,dy-1,bp,cs,100); if(i<95) Break; // straight up
  5417. /* check for lower gap at bottom */
  5418. dbg[4]=i=get_bw( x, x,dy-1-dy/8,dy-1,bp,cs,1); if(i==1) Break;
  5419. dbg[5]=i=get_line2( dx/4,dy/4, 0,0,bp,cs,101); if(i<95) Break; // upper left gap
  5420. dbg[6]=i=get_line2(dx-1-dx/4,dy/4,dx-1,0,bp,cs,101); if(i<95) Break; // upper right gap
  5421. MSG(fprintf(stderr,"%d %d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5],dbg[6]);)
  5422. Setac(box1,(wchar_t)'*',ad);
  5423. break;
  5424. }
  5425. // --- test * six egdes (jagges? beames?) incl. vert. line (|+X) ----
  5426. for(ad=d=100;dx>4 && dy>4;){
  5427. DBG( wchar_t c_ask='*'; )
  5428. if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
  5429. if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 3
  5430. && num_cross(0,dx-1, 1+dy/8, 1+dy/8,bp,cs) != 3) Break;
  5431. if( num_cross(0,dx-1,dy-2-dy/8,dy-2-dy/8,bp,cs) != 3) Break;
  5432. if( num_cross(0 , 0, 0,dy-1,bp,cs) != 2) Break;
  5433. if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) != 2) Break;
  5434. if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) != 1) Break;
  5435. if( num_cross( 0 ,dx/8,dy/2,dy/2,bp,cs) != 0) Break;
  5436. if( num_cross(dx-1-dx/8,dx-1,dy/2,dy/2,bp,cs) != 0) Break;
  5437. if (dx>5) {
  5438. dbg[0]=i=get_line2(0,dy-2-dy/8,dx-1,dy/8,bp,cs,100); if(i<95) Break; // black upwarts beam
  5439. dbg[1]=i=get_line2(0,dy/8,dx-1,dy-2-dy/8,bp,cs,100); if(i<95) Break; // black downwards beam
  5440. /* check vertical line */
  5441. dbg[2]=i=get_line2(dx/2,0,dx/2, dy-1,bp,cs,100); if(i<95) Break;
  5442. }
  5443. MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);)
  5444. Setac(box1,(wchar_t)'*',99);
  5445. break;
  5446. }
  5447. // --- test * six egdes '*'='x'+'-' incl. horizontal line ----
  5448. for(ad=d=100;dx>3 && dy>4;){
  5449. DBG( wchar_t c_ask='*'; )
  5450. if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
  5451. if( num_cross( dx/8, dx/8, 0, dy-1,bp,cs) != 3
  5452. && num_cross(1+dx/8,1+dx/8, 0, dy-1,bp,cs) != 3) Break;
  5453. if( num_cross(dx-1-dx/8,dx-1-dx/8,0,dy-1,bp,cs) != 3
  5454. && num_cross(dx-2-dx/8,dx-2-dx/8,0,dy-1,bp,cs) != 3) Break;
  5455. if( num_cross( 0,dx-1, 0, 0,bp,cs) != 2) Break;
  5456. if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 2) Break;
  5457. if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 1) Break;
  5458. // check upper and lower gap
  5459. if( num_cross(dx/2,dx/2,0,dy/8,bp,cs) != 0) Break;
  5460. if( num_cross(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs) != 0) Break;
  5461. if (dx>5) {
  5462. dbg[0]=i=get_line2(dx-2-dx/8,0,dx/8,dy-1,bp,cs,100); if(i<95) Break; // black upwarts beam
  5463. dbg[1]=i=get_line2(dx/8,0,dx-2-dx/8,dy-1,bp,cs,100); if(i<95) Break; // black downwards beam
  5464. /* check horizontal line */
  5465. dbg[2]=i=get_line2(0,dy/2,dx-1,dy/2,bp,cs,100); if(i<95) Break;
  5466. }
  5467. MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);)
  5468. Setac(box1,(wchar_t)'*',98);
  5469. break;
  5470. }
  5471. // --- test @ - a popular char should be detectable! added in version v0.2.4a5
  5472. if(bc==UNKNOWN)
  5473. for(ad=d=99;dx>5 && dy>7;){
  5474. DBG( wchar_t c_ask='@'; )
  5475. if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
  5476. // check ~ 7x10 0 with dot in it
  5477. // num_holes==1 + hole.y0<=dy/8,>=y1-dy/8
  5478. if (sdata->holes.num==1
  5479. && sdata->holes.hole[0].y0<=y0+dy/8
  5480. && sdata->holes.hole[0].y1>=y1-dy/8) Break;
  5481. if (loop(bp, 0,dy/2,dx,cs,0,RI)>dx/4) Break;
  5482. if (loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/4) Break;
  5483. if (loop(bp,dx/2,dy-1,dy,cs,0,UP)>dx/8) Break;
  5484. if (loop(bp,dx/2, 0,dy,cs,0,DO)>dx/8) Break;
  5485. /* ..@@@@..<- 8x10 example
  5486. .@@..@@.
  5487. @@....@@
  5488. @@..@@@@<
  5489. @@.@@.@@
  5490. @@.@@.@@
  5491. @@..@@@.
  5492. @@......
  5493. .@@...@@
  5494. ..@@@@@.<- */
  5495. x=6*dx/16;
  5496. y=dy/2;
  5497. i=num_cross(0,dx-1,y,y,bp,cs);
  5498. if (i<3 || i>4) Break;
  5499. if( i != 4 && dx>8 ) ad=98*ad/100;
  5500. i=num_cross(x,x,0,dy-1,bp,cs); if (i<2) Break;
  5501. if (i!=4) { j=num_cross(x+1,x+1,0,dy-1,bp,cs);
  5502. if (abs(4-j)<abs(i-4)) i=j; }
  5503. if (i!=4) { j=num_cross(x+2,x+2,0,dy-1,bp,cs);
  5504. if (abs(4-j)<abs(i-4)) i=j; }
  5505. if (i<3 || i>4) Break;
  5506. if (i!=4) ad=97*ad/100;
  5507. if( num_cross(0, x,y,y,bp,cs) != 2 ) Break;
  5508. if( num_cross(x,dx-1,y,y,bp,cs) != 2 ) Break;
  5509. if( num_cross(x,x,0, y,bp,cs) != 2 ) Break;
  5510. if( num_cross(x,x,y,dy-1,bp,cs) != 2 ) Break;
  5511. if (dx>7) {
  5512. // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break;
  5513. if (sdata->holes.num != 1) Break;
  5514. if( num_hole(x0+dx/8,x1-3*dx/16,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break;
  5515. }
  5516. Setac(box1,(wchar_t)'@',ad);
  5517. break;
  5518. }
  5519. // --- test paragraph v0.2.6
  5520. if(bc==UNKNOWN && hchar)
  5521. for(ad=d=100;dx>4 && dy>15;){
  5522. DBG( wchar_t c_ask='$'; )
  5523. if (sdata->holes.num > 3) break; /* tolerant against a tiny hole */
  5524. if( get_bw( 0,dx/2,3*dy/4,3*dy/4,bp,cs,1) == 1 ) break;
  5525. if( get_bw(3*dx/4,dx-1,3*dy/4,3*dy/4,bp,cs,1) == 0 ) break;
  5526. if( get_bw( 0,dx/4, dy/4, dy/4,bp,cs,1) == 0 ) break;
  5527. if( get_bw( dx/2,dx-1, dy/4, dy/4,bp,cs,1) == 1 ) break;
  5528. if( get_bw(dx/2,dx/2, 0, dy/4,bp,cs,1) == 0 ) break;
  5529. if( get_bw(dx/2,dx/2,dy-1-dy/4, dy-1,bp,cs,1) == 0 ) break;
  5530. if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 4 ) break;
  5531. if( num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs) != 2 ) break;
  5532. if( num_hole( x0,x1,y0+dy/4,y1-dy/4,box1->p,cs,NULL) != 1 ) break;
  5533. Setac(box1,SECTION_SIGN,96);
  5534. break; // paragraph=0xA7=167
  5535. }
  5536. return bc;
  5537. }
  5538. /* ----------------------- partx -------------------------------- */
  5539. static wchar_t ocr0px(ocr0_shared_t *sdata){
  5540. struct box *box1=sdata->box1;
  5541. pix *bp=sdata->bp;
  5542. int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
  5543. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  5544. i1,i2,i3,i4,j1,cs=sdata->cs; /* tmp-vars */
  5545. int ya,ad; /* used for store significant points of char */
  5546. wchar_t ac,bc=UNKNOWN; // bestletter
  5547. int hchar; // char is higher than e
  5548. int gchar; // char has ink lower than m3
  5549. // --- hchar --- gchar -------------------------
  5550. hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
  5551. gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1;
  5552. // if the char is slightly moved down correction can be done
  5553. if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
  5554. if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
  5555. /* reserved for special chars, to test at the end */
  5556. // --- test 'ff' ---------------------------------------------------
  5557. // ToDo: better check and call test 'f' and 'f' with subboxes
  5558. if( bc==UNKNOWN )
  5559. for(ad=98;dx>4 && dy>6;){ // Dec00 body copied from H
  5560. DBG( wchar_t c_ask='f'; )
  5561. if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
  5562. if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2
  5563. && num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 ) break;
  5564. if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2
  5565. && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) break;
  5566. if( loop(bp,0 ,dy/8,dx,cs,0,RI)
  5567. + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) break; // ~A
  5568. for( j1=0,i=1,y=y0+dy/10; y<y1-dy/10 && i; y++ ) // 2 vertikal lines
  5569. { j=loop(box1->p,x0 ,y,dx,cs,0,RI)
  5570. +loop(box1->p,x1 ,y,dx,cs,0,LE);
  5571. if( j>10*dx/16 ) i=0; if ( j>j1 ) j1=j; }
  5572. if( !i ) break;
  5573. for( x=dx/4; x<dx/2; x++ ){ // lower gap
  5574. y=loop(bp,x ,dy-1,dy,cs,0,UP);
  5575. if ( y > 3*dy/8 ) break;
  5576. if ( 10*y > dy ){ /* italic */
  5577. i=loop(bp,x ,dy-y,dx,cs,0,RI);
  5578. if( i>1 && y+loop(bp,x+i-1,dy-y,dy,cs,0,UP)>3*dy/8 ) break;
  5579. }
  5580. } if( x>=dx/2 ) break;
  5581. x=loop(box1->p,x0 ,y1-dy/8,dx,cs,0,RI)
  5582. +loop(box1->p,x1 ,y1-dy/8,dx,cs,0,LE);
  5583. for( i=1,y=dy/4; y<dy-1-dy/4 && i; y++ ) // max - min width
  5584. { j=loop(bp,0 ,y,dx,cs,0,RI)
  5585. +loop(bp,dx-1,y,dx,cs,0,LE); if( j-x>dx/5 ) i=0; }
  5586. if( !i ) break; // ~K Jul00
  5587. for( i=0,ya=y=y0+dy/4; y<y1-dy/3; y++ ) // horizontal line
  5588. { j=loop(box1->p,x0 ,y,dx,cs,0,RI);
  5589. j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } }
  5590. if( i<=dx/2 ) break; ya-=y0;
  5591. if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1
  5592. && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) break; /* Dec00 */
  5593. for( y=ya; y<dy-dy/4; y++ ) // ~M Dec00
  5594. if( num_cross(0,dx-1,y ,y ,bp,cs) > 2
  5595. && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break;
  5596. if ( y<dy-dy/4 ) break;
  5597. for(i=1,x=x0+dx/2;x<=x1-dx/4 && i;x++){
  5598. if( get_bw( x, x,y0 ,y0+dy/4,box1->p,cs,1) == 0 ) i=0;
  5599. } if( !i ) break;
  5600. for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
  5601. if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0;
  5602. } if( i ) break;
  5603. for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
  5604. if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0;
  5605. } if( i ) break;
  5606. for(i=1,y=y0;y<=y0+dy/4 && i;y++){
  5607. if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
  5608. } if( i ) break;
  5609. for(i=1,y=y1-dy/4;y<=y1 && i;y++){
  5610. if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
  5611. } if( i ) break;
  5612. if( num_cross(x0 ,x0+dx/8 ,y0+dy/8 ,y0 ,box1->p,cs) != 0 ) ad=96*ad/100;
  5613. if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) break;
  5614. if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) break;
  5615. i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) break;
  5616. i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2<i1-dx/4 || i2>i1+dx/8) break;
  5617. i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3<i2-dx/4 || i3>i2+dx/8) break;
  5618. if(abs(i1+i3-2*i2)>dx/16+1) break;
  5619. if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) break;
  5620. if (!hchar) ad=96*ad/100;
  5621. if (!gchar) ad=99*ad/100;
  5622. ac=LATIN_SMALL_LIGATURE_FF;
  5623. Setac(box1,ac,ad);
  5624. break;
  5625. }
  5626. // --- test ae ---------------------------------------------------
  5627. if( bc==UNKNOWN )
  5628. for(ad=98;dx>4 && dy>6;){ // provisorium
  5629. DBG( wchar_t c_ask=LATIN_SMALL_LETTER_AE; )
  5630. if (sdata->holes.num > 4) Break; /* tolerant against a tiny hole */
  5631. if( num_cross( dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 2
  5632. && num_cross(dx-1-dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 1 ) Break;
  5633. if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break;
  5634. if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break;
  5635. if( num_cross(dx-1,0, 0, dy-1,bp,cs) < 3 ) Break;
  5636. if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) < 2 )
  5637. if( num_cross(0,dx-1,1+dy/16,1+dy/16,bp,cs) < 2 ) Break;
  5638. if( num_cross(0,dx-1,dy-1-dy/16,dy-1-dy/16,bp,cs) < 2 ) Break;
  5639. for( x=0,i2=y=dy/4; y<3*dy/4; y++ ){
  5640. j=loop(bp,0,y,dx,cs,0,RI); if(j>x) { i2=y; x=j; }
  5641. } if( x<dx/4 || x>3*dx/4 ) Break;
  5642. for( x=0,i4=y=dy/4; y<3*dy/4; y++ ){
  5643. j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; }
  5644. } if( x<dx/4 || x>3*dx/4 ) Break;
  5645. for( x=0,i4=y=dy/8; y<3*dy/4; y++ ){
  5646. j=loop(bp,dx-1 ,y,dx,cs,0,LE);
  5647. j=loop(bp,dx-1-j,y,dx,cs,1,LE);
  5648. if(j>x) { i4=y; x=j; }
  5649. } if( x<dx/4 ) Break;
  5650. if( num_hole(x0,x0+3*dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break;
  5651. if( num_hole(x0+dx/2-1,x1,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
  5652. ac=LATIN_SMALL_LETTER_AE;
  5653. Setac(box1,ac,ad);
  5654. if (ad>=100) return ac;
  5655. break;
  5656. }
  5657. // --- test AE ---------------------------------------------------
  5658. if( bc==UNKNOWN )
  5659. for(ad=98;dx>5 && dy>6;){ // provisorium
  5660. DBG( wchar_t c_ask=LATIN_CAPITAL_LETTER_AE; )
  5661. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  5662. if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) < 2 ) Break;
  5663. if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break;
  5664. if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break;
  5665. if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) != 1
  5666. && num_cross(0,dx-1, dy/32, dy/32,bp,cs) != 1
  5667. && num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) Break;
  5668. // check for upper horizontal line
  5669. j=loop(bp,dx-1 ,0,dx,cs,0,LE); x=j;
  5670. j=loop(bp,dx-1-j,0,dx,cs,1,LE);
  5671. i=loop(bp,dx-1 ,1,dx,cs,0,LE); if (i<x) x=i;
  5672. i=loop(bp,dx-1-i,1,dx,cs,1,LE);
  5673. if (i>j) j=i;
  5674. if (x>dx/8) Break;
  5675. if (j<dx/4) Break;
  5676. for( x=dx,i1=i3=0,i2=y=dy/4; y<3*dy/4; y++ ){
  5677. j=loop(bp, 0,y,dx,cs,0,RI); if(j>x) break; x=j;
  5678. j=loop(bp, j,y,dx,cs,1,RI); if(j>i1) { i1=j; i2=y; }
  5679. j=loop(bp,dx-1 ,y,dx,cs,0,LE);
  5680. j=loop(bp,dx-1-j,y,dx,cs,1,LE); if(j>i3) { i3=j; i4=y; }
  5681. } if( y<3*dy/4 || i1<dx/4-1 || i3<dx/4-1) Break;
  5682. for( i1=i3=0,y=0; y<dy/8; y++ ){
  5683. j=loop(bp,dx-1 , y,dx,cs,0,LE);
  5684. j=loop(bp,dx-1-j, y,dx,cs,1,LE); if(j>i1) { i1=j; }
  5685. j=loop(bp,dx-1 ,dy-1-y,dx,cs,0,LE);
  5686. j=loop(bp,dx-1-j,dy-1-y,dx,cs,1,LE); if(j>i3) { i3=j; }
  5687. } if( i1<=dx/4 || i3<=dx/4 ) Break;
  5688. for( x=dx-1-dx/8; x>dx/2; x-- ){ // look for right the E
  5689. if( num_cross(x,x, 0,dy-1,bp,cs) == 3 )
  5690. if( num_cross(x,x, 0,dy/4,bp,cs) == 1 )
  5691. if( num_cross(x-1,dx-1-dx/8,3*dy/4,3*dy/4,bp,cs) == 0 )
  5692. if( num_cross(x,x,3*dy/4,dy-1,bp,cs) == 1 ) break;
  5693. } if (x<=dx/2) Break; // not found
  5694. if (sdata->holes.num != 1) Break;
  5695. if( num_hole(x0,x0+3*dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
  5696. // if( num_hole(x0, x1,y0,y1 ,box1->p,cs,NULL) != 1 ) Break;
  5697. ac=LATIN_CAPITAL_LETTER_AE;
  5698. Setac(box1,ac,ad);
  5699. if (ad>=100) return ac;
  5700. break;
  5701. }
  5702. // --- test /0 /o /O O_WITH_STROKE -----------------------------------------
  5703. for(ad=99;dx>4 && dy>4;){ // provisorium
  5704. DBG( wchar_t c_ask=LATIN_SMALL_LETTER_O_WITH_STROKE; )
  5705. if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
  5706. if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 3 ) Break;
  5707. if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break;
  5708. if (loop(bp,dx-1,3*dy/8,dx,cs,0,RI)>dx/8) Break;
  5709. if (loop(bp, 0,5*dy/8,dx,cs,0,RI)>dx/8) Break;
  5710. if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break;
  5711. if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break;
  5712. if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break;
  5713. if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break;
  5714. if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break;
  5715. if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 2 ) Break;
  5716. if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break;
  5717. if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 2 ) Break;
  5718. i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/8 ) Break;
  5719. i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/3 ) Break; i1=dx-1-i1;
  5720. i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/8 ) Break;
  5721. for(y=1;y<dy-1;y++){
  5722. x=i1+y*(i2-i1)/dy-dx/8; if(x<0)x=0;
  5723. j=loop(bp,x,y,dx,cs,0,RI); if( j>3*dx/16 ) break;
  5724. } if( y<dy-1 ) Break;
  5725. if( num_cross( 0 ,dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
  5726. if( num_cross(dx-1-dx/4,dx-1,dy/2,dy/2,bp,cs) != 1 ) Break;
  5727. if( num_cross(dx/4,dx-1-dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
  5728. if (sdata->holes.num != 2) Break;
  5729. // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 2 ) Break;
  5730. if ( hchar && 2*y0<box1->m1+box1->m2 )
  5731. ac=LATIN_CAPITAL_LETTER_O_WITH_STROKE;
  5732. else ac=LATIN_SMALL_LETTER_O_WITH_STROKE;
  5733. Setac(box1,ac,ad);
  5734. if (ad>=100) return ac;
  5735. break;
  5736. }
  5737. // --- test /c /C C_WITH_STROKE CENT_SIGN --------------------------
  5738. // here only the version with a continuously vertical line (not broken variant)
  5739. if( bc==UNKNOWN )
  5740. for(ad=98;dx>4 && dy>4;){ // provisorium
  5741. DBG( wchar_t c_ask=CENT_SIGN; )
  5742. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  5743. if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 2 ) Break;
  5744. if( num_cross(0,dx-1-dx/4,dy/2,dy/2,bp,cs) != 2 ) Break;
  5745. if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break;
  5746. if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break;
  5747. if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break;
  5748. if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break;
  5749. if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break;
  5750. if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break;
  5751. if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 3 ) Break;
  5752. if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break;
  5753. if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 3 ) Break;
  5754. i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/4 ) Break;
  5755. i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/4 ) Break; i1=dx-1-i1;
  5756. i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/4 ) Break;
  5757. for(y=0;y<dy;y++){
  5758. x=i1+y*(i2-i1)/dy; if(x>dx/16+1) x-=dx/16+1;
  5759. j=loop(bp,x,y,dx,cs,0,RI); // fprintf(stderr,"\n x=%d j=%d",x,j);
  5760. if( j>(dx+4)/8 ) ad=96*ad/100;
  5761. if( j>(dx+2)/4 ) break;
  5762. } if( y<dy ) Break;
  5763. if( num_cross( 0 ,dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
  5764. if( num_cross(dx-1-dx/4,dx-1,dy/2,dy/2,bp,cs) != 0 ) Break;
  5765. if( num_cross(dx/4,dx-1-dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
  5766. // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break;
  5767. if (sdata->holes.num != 1) Break;
  5768. ac=CENT_SIGN;
  5769. Setac(box1,ac,ad);
  5770. if (ad>=100) return ac;
  5771. break;
  5772. }
  5773. // --- test EURO_CURRENCY_SIGN -----------------------------------------
  5774. if( bc==UNKNOWN )
  5775. for(ad=98;dx>4 && dy>6;){ // provisorium
  5776. DBG( wchar_t c_ask='&'; )
  5777. if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */
  5778. if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 4 ) break;
  5779. if( num_cross( 0,dx-1, 0, 0,bp,cs) != 1 ) break;
  5780. if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break;
  5781. if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 1 ) break;
  5782. for(i=0,y=dy/4;y<dy-dy/4-1;y++){ // check if no gap on left side
  5783. x=loop(bp,0,y,dx,cs,0,RI); if( x>dx/4 ) break;
  5784. j=loop(bp,x,y,dx,cs,1,RI); if( j>i ) i=j;
  5785. } if( y<dy-dy/4-1 || i<dx/2 ) break;
  5786. for(y=dy/4;y<dy-dy/4-1;y++){ // check for right horizontal gap
  5787. x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>dx/2 ) break;
  5788. } if( y>=dy-dy/4-1 ) break;
  5789. // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
  5790. if (sdata->holes.num != 0) break;
  5791. ac=EURO_CURRENCY_SIGN;
  5792. Setac(box1,ac,ad);
  5793. if (ad>=100) return ac;
  5794. break;
  5795. }
  5796. // --- test LETTER_C_WITH_CEDILLA ---------------------------------------------------
  5797. if (bc==UNKNOWN)
  5798. if (gchar)
  5799. for(ad=98;dx>3 && dy>6;){ // provisorium
  5800. DBG( wchar_t c_ask='c'; )
  5801. if (sdata->holes.num > 0) break; /* no tolerant against tiny holes */
  5802. j=loop(bp,dx-1,dy/16 ,dy,cs,0,LE);
  5803. x=loop(bp,dx-1,dy/16+1,dy,cs,0,LE); if (x<j) j=x;
  5804. if (3*x>dx) Break; // ~4 ocr-b
  5805. if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) > 2 ) break;
  5806. if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 2 ) break;
  5807. if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) > 2 ) break;
  5808. for( x=dx,i2=y=dy/4; y<3*dy/4; y++ ){
  5809. j=loop(bp,0,y,dx,cs,0,RI); if(j<x) { i2=y; x=j; }
  5810. } if( x>0 ) break; i1=x;
  5811. for( x=0,i4=y=dy/4; y<5*dy/8; y++ ){
  5812. j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; }
  5813. } if( x<dx/2 ) break; i3=x;
  5814. j =loop(bp,dx/2,0,dy,cs,0,DO);
  5815. j+=loop(bp,dx/2,j,dy,cs,1,DO); if(j>dy/4) break;
  5816. j =loop(bp,dx/2,j,dy,cs,0,DO); if(j<dy/2) break;
  5817. j =loop(bp,dx-1 ,dy-1-dy/8,dx,cs,0,LE); if(j<dx/4 || 4*j>3*dx) break;
  5818. j =loop(bp,dx-1-j/2,dy-1-dy/8,dy,cs,0,UP); if(j>dy/2) break; // ~()
  5819. // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
  5820. if (sdata->holes.num) break;
  5821. if( hchar ) ac= LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
  5822. else ac= LATIN_SMALL_LETTER_C_WITH_CEDILLA;
  5823. Setac(box1,ac,ad);
  5824. if (ad>=100) return ac;
  5825. break;
  5826. }
  5827. // --- test # ---------------------------------------------------
  5828. for(ad=99;dx>4 && dy>4;){ // never sure?
  5829. DBG( wchar_t c_ask='#'; )
  5830. if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
  5831. if (sdata->holes.num < 1) Break;
  5832. if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 2 ) Break;
  5833. if( num_cross(0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs) != 2 ) Break;
  5834. if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 2 ) Break;
  5835. if( num_cross(0,dx/2, dy/2, dy/2,bp,cs) != 1 ) Break;
  5836. /* fat "#" have only small ends on left and right side, we tolerate this */
  5837. j=loop(bp, 0,dy/8,dx,cs,0,RI); if(j<1 || j<dx/16) Break; if (j<dx/8) {ad=ad*96/100;}
  5838. j=loop(bp, 0,dy/2,dx,cs,0,RI); if(j<1 || j<dx/16 || j>=dx/2) Break; if (j<dx/8) {ad=ad*96/100;}
  5839. j=loop(bp,dx-1,dy/2,dx,cs,0,LE); if(j<1 || j<dx/16 || j>=dx/2) Break; if (j<dx/8) {ad=ad*96/100;}
  5840. j=loop(bp,dx-1,dy-1,dx,cs,0,LE); if(j<1 || j<dx/16) Break; if (j<dx/8) {ad=ad*96/100;}
  5841. for( i1=i3=0,y=dy/4; y<dy/2; y++ ){
  5842. j=loop(bp,0, y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; }
  5843. j=loop(bp,j, y,dx,cs,1,RI); if(j>i1) { i1=j; }
  5844. j=loop(bp,0,dy-1-y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; }
  5845. j=loop(bp,j,dy-1-y,dx,cs,1,RI); if(j>i3) { i3=j; }
  5846. }
  5847. if (i1<dx-dx/4 || i3<dx-dx/4) Break;
  5848. if (i1<dx-dx/8) ad=97*ad/100;
  5849. if (i3<dx-dx/8) ad=97*ad/100;
  5850. if (sdata->holes.num != 1) {ad=95*ad/100;}
  5851. if( num_hole(x0+dx/8,x1-dx/8,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break;
  5852. // if( num_hole(x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break;
  5853. ac=(wchar_t) '#';
  5854. if( gchar ) {ad=99*ad/100;}
  5855. Setac(box1,ac,ad);
  5856. if (ad>=100) return ac;
  5857. break;
  5858. }
  5859. // --- test bullet, full_box, grabbed cursor, ZapfDingBats_156
  5860. if (bc==UNKNOWN)
  5861. for(ad=96;dx>4 && dy>4 && 2*dx>dy;){ // provisorium
  5862. DBG( wchar_t c_ask='#'; )
  5863. if( get_bw(x0,x1,y0,y1,box1->p,cs,2) != 0 ) break;
  5864. ac=BULLET;
  5865. if (gchar && !hchar) ad=80*ad/100;
  5866. Setac(box1,ac,ad);
  5867. if (ad>=100) return ac;
  5868. break;
  5869. }
  5870. /* --- test | (vertical line, could be a I or l) --- */
  5871. for(ad=99;dy>4 && 2*dx<dy;){ /* v0.44 */
  5872. DBG( wchar_t c_ask='|'; )
  5873. /* test if everything is filled black */
  5874. if( get_bw(x0+dx/8,x1-dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) break;
  5875. /* more unsure if the borders are not exact */
  5876. if( get_bw(x0 ,x0+dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100;
  5877. if( get_bw(x1-dx/8,x1 ,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100;
  5878. if( get_bw(x0+dx/8,x1-dx/8,y0 ,y0+dy/8,box1->p,cs,2) != 0 ) ad=99*ad/100;
  5879. if( get_bw(x0+dx/8,x1-dx/8,y1-dy/8,y1 ,box1->p,cs,2) != 0 ) ad=99*ad/100;
  5880. if (3*dx<dy) ad=98*ad/100;
  5881. if (4*dx<dy) ad=99*ad/100;
  5882. if (box1->m2 && 2*y1> box1->m2+box1->m3) Break;
  5883. if (box1->m2 && 3*y1>2*box1->m2+box1->m3) ad=95*ad/100;
  5884. ac='|';
  5885. if (!hchar) ad=98*ad/100;
  5886. Setac(box1,ac,ad);
  5887. break;
  5888. }
  5889. // --- test % ---------------------------------------------------
  5890. for(ad=100;dx>5 && dy>7;){ // provisorium
  5891. DBG( wchar_t c_ask='%'; )
  5892. if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
  5893. if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) != 3
  5894. && num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) != 3 ) Break;
  5895. if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) != 3
  5896. && num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) != 3 ) Break;
  5897. if( num_cross(x0,x1, y0, y1,box1->p,cs) < 4
  5898. && num_cross(x0+dx/8,x1, y0, y1,box1->p,cs) < 4
  5899. && num_cross(x0,x1+dx/4, y0, y1,box1->p,cs) < 4
  5900. && dx>7 && dy>15) Break;
  5901. if( num_cross(x0,x1, y0, y1,box1->p,cs) !=5 ) ad=99*ad/100;
  5902. if (dx>7 && dy>12) {
  5903. if( num_hole(x0 ,x1 ,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
  5904. if( num_hole(x0+dx/4,x1+dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break;
  5905. if( num_hole(x0 ,x1+dx/4,y0,y1 ,box1->p,cs,NULL) != 2 ) Break;
  5906. } else ad=98*ad/100;
  5907. // use box1->p instead of b, because % is a sum of 3 objects
  5908. if ( loop(box1->p,x0,y0 ,dx,cs,0,RI)
  5909. <= loop(box1->p,x0,y0+dy/16+1,dx,cs,0,RI) ) ad=96*ad/100; // X
  5910. if ( loop(box1->p,x1,y1 ,dx,cs,0,LE)
  5911. <= loop(box1->p,x1,y1-1-dy/16,dx,cs,0,LE) ) ad=96*ad/100; // X
  5912. for (x=0;x<dx;x++) { /* look for a vertical line and break if found */
  5913. if ( get_bw(x0+x,x0+x,y0+dy/8,y1-dy/8,box1->p,cs,2) != 2 ) break;
  5914. } if (x<dx) Break; // ~gluedVI
  5915. if (gchar) ad=98*ad/100;
  5916. ac=(wchar_t) '%';
  5917. Setac(box1,ac,ad);
  5918. if (ad>=100) return ac;
  5919. break;
  5920. }
  5921. // --- test Omega ---------------------------------------------------
  5922. for(ad=d=99;dx>7 && dy>7;){ // min 3x4
  5923. DBG( wchar_t c_ask=GREEK_CAPITAL_LETTER_OMEGA; )
  5924. if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  5925. if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  5926. if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
  5927. if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/3 , y1-dy/3,box1->p,cs,1) != 0 ) Break;
  5928. if( num_cross(x0+dx/2,x0+dx/2,y0 , y1-dy/3,box1->p,cs) != 1 ) Break;
  5929. if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND
  5930. if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
  5931. if( num_cross(x0+dx/3,x1-dx/3,y1 , y1 ,box1->p,cs) != 2 ) // against "rauschen"
  5932. if( num_cross(x0+dx/3,x1-dx/3,y1-1 , y1-1 ,box1->p,cs) != 2 ) Break;
  5933. if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
  5934. if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
  5935. if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
  5936. if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
  5937. if (sdata->holes.num) Break;
  5938. // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
  5939. if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
  5940. loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
  5941. if( loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,RI)>dx/4
  5942. || loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,LE)>dx/4 ) Break;
  5943. if( loop(bp,dx/2,3*dy/8,x1-x0,cs,0,RI)<dx/4
  5944. || loop(bp,dx/2,3*dy/8,x1-x0,cs,0,LE)<dx/4 ) Break;
  5945. i=loop(bp,0,dy-1-dy/16,x1-x0,cs,0,RI); if(i>dx/8) Break;
  5946. x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<3*dx/8 || i>dx/2) Break;
  5947. x=loop(bp,i,dy-1-dy/16,x1-x0,cs,0,RI); i+=x; if(i<dx/2 || i>5*dx/8) Break;
  5948. x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<7*dx/8) Break;
  5949. /* look for a vertikal gap at lower end */
  5950. for( x=dx/4;x<3*dx/4;x++ ){
  5951. i=loop(bp,x,dy-1,y1-y0,cs,0,UP);
  5952. if( i>3*dy/4 ) break;
  5953. }
  5954. if( x>=3*dx/4 ) Break;
  5955. if( !hchar ) ad=60*ad/100;
  5956. bc=GREEK_CAPITAL_LETTER_OMEGA;
  5957. Setac(box1,bc,ad);
  5958. break;
  5959. }
  5960. return bc;
  5961. }
  5962. // -------------------- OCR engine ;) ----------------------------
  5963. wchar_t ocr0(struct box *box1, pix *bp, int cs){
  5964. // pix p=*(box1->p);
  5965. int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
  5966. int dx=x1-x0+1,dy=y1-y0+1, /* size */
  5967. rx,ry,r1,r2,i1,i2,ad; /* tmp-vars */
  5968. // ad,ac will be used in future
  5969. wchar_t bc = UNKNOWN; // bestletter
  5970. wchar_t um = SPACE; // modifier '"
  5971. int hchar; // char is higher than e
  5972. int gchar; // char has ink lower than m3
  5973. int aa[4][4]; /* corner points, see xX, (x,y,dist^2,vector_idx) v0.41 */
  5974. ocr0_shared_t sdata; // data used in all subfunctions
  5975. sdata.box1=box1;
  5976. sdata.bp=bp;
  5977. sdata.cs=cs;
  5978. // --- hchar --- gchar -------------------------
  5979. hchar=0;if( y0 < box1->m2-(box1->m2-box1->m1)/2 ) hchar=1;
  5980. gchar=0;if( y1 > box1->m3+(box1->m4-box1->m3)/2 ) gchar=1;
  5981. // if the char is slightly moved down correction can be done
  5982. if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
  5983. if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
  5984. sdata.hchar=hchar;
  5985. sdata.gchar=gchar;
  5986. /* search for nearest points to the 4 courners, typical for xX */
  5987. /* this is faster as calling nearest_frame_vector 4 times */
  5988. aa[0][0]=aa[1][0]=aa[2][0]=aa[3][0]=(x0+x1)/2; /* set to center */
  5989. aa[0][1]=aa[1][1]=aa[2][1]=aa[3][1]=(y0+y1)/2; /* set to center */
  5990. aa[0][2]=aa[1][2]=aa[2][2]=aa[3][2]=2*sq(128); /* distance to box edges */
  5991. aa[0][3]=aa[1][3]=aa[2][3]=aa[3][3]=0; /* vector index */
  5992. /* searching for 4 diagonal line ends */
  5993. for (i=0;i<box1->num_frame_vectors[0];i++) {
  5994. x=box1->frame_vector[i][0]; /* take a vector */
  5995. y=box1->frame_vector[i][1];
  5996. /* distance to upper left end, normalized to 128 */
  5997. j=0; d=sq((x-x0)*128/dx)+sq((y-y0)*128/dy);
  5998. // fprintf(stderr," setaa i= %2d xy= %3d %3d d=%5d aa[3]=%2d\n",i,x-x0,y-y0,d,aa[0][3]);
  5999. if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
  6000. /* distance to lower left end */
  6001. j=1; d=sq((x-x0)*128/dx)+sq((y-y1)*128/dy);
  6002. if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
  6003. /* distance to lower right end */
  6004. j=2; d=sq((x-x1)*128/dx)+sq((y-y1)*128/dy);
  6005. if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
  6006. /* distance to upper right end */
  6007. j=3; d=sq((x-x1)*128/dx)+sq((y-y0)*128/dy);
  6008. if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
  6009. }
  6010. for (i=0;i<16;i++) sdata.aa[i/4][i%4]=aa[i/4][i%4];
  6011. /* extract number position and size of holes and store in a table
  6012. * - hole coordinates are relative to box (x-x0,y-y0)
  6013. */
  6014. sdata.holes.num=0;
  6015. if (box1->num_frames>0) // speedup v0.42
  6016. num_hole(x0,x1,y0,y1,box1->p,cs,&sdata.holes); // call once
  6017. // printf(" num_holes=%d\n",sdata.holes.num);
  6018. /*
  6019. after division of two glued chars, boundaries could be wrong,
  6020. check this first (ToDo: only if a flag set?)
  6021. */
  6022. if (2*y0 < box1->m2+box1->m3)
  6023. if (box1->m4>box1->m3 && 2*box1->y1>box1->m4+box1->m3){
  6024. /* could be a "I" from divided "Ij" or "Ig" */
  6025. for(y=(box1->m3+box1->m2)/2;2*y<box1->m3+box1->m4;y++)
  6026. if( get_bw(x0,x1,y,y,box1->p,cs,1)==0 ) break;
  6027. if(2*y<box1->m3+box1->m4)
  6028. if( get_bw((x0+x1)/2,(x0+x1)/2,y,box1->m4,box1->p,cs,1)==0 ){
  6029. /* be sure, ~_ */
  6030. if (y>y0) y1=box1->y1=y;
  6031. }
  6032. }
  6033. DBG( IFV fprintf(stderr,"\nDBG L%d (%d,%d): ",__LINE__,box1->x0,box1->y0); )
  6034. DBG( IFV out_b(box1,sdata.bp,0,0,dx,dy,160); )
  6035. DBG( IFV fprintf(stderr,"# aa[] %d %d %d %d %d %d %d %d (4 corners)"
  6036. " d= %d %d %d %d",
  6037. aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,
  6038. aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,
  6039. aa[0][2], aa[1][2], aa[2][2], aa[3][2]);)
  6040. DBG( IFV fprintf(stderr,"\n# holes %d gchar=%d hchar=%d",sdata.holes.num, gchar, hchar);)
  6041. // --- test thin lines - ---------------------------------
  6042. for( ad=100; 2*dy<box1->m3-box1->m2 && 3*dx>=4*dy && dx>2; ){ // min 3x3 (small font)
  6043. DBG( wchar_t c_ask='-'; )
  6044. if( get_bw(x0+dx/8+1,x1-dx/8-1,y0+dy/8+((dy>2)?1:0),
  6045. y1-dy/8-((dy>2)?1:0),box1->p,cs,2)==2 ) break;
  6046. if( box1->dots ) { Setac(box1,'=',97);break; }
  6047. if (dx<=2*dy) ad=98*ad/100;
  6048. if (dx<=3*dy) ad=99*ad/100;
  6049. if (!box1->m4) ad=96*ad/100;
  6050. else {
  6051. if (y1>=box1->m3) {
  6052. if ( dx<2*dy) ad=98*ad/100;
  6053. if (2*dx<3*dy) ad=98*ad/100;
  6054. Setac(box1,'_',ad);
  6055. break;
  6056. }
  6057. }
  6058. Setac(box1,'-',ad); if (ad>=100) return '-';
  6059. break;
  6060. }
  6061. // --- test thin lines = ---------------------------------
  6062. for( ; dy>2 && dx>2; ){ // min 3x3 (small font)
  6063. DBG( wchar_t c_ask='='; )
  6064. for( y=y0;y<y1;y++) // remove upper empty space
  6065. if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,1)==1 ) break;
  6066. if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,2)==2 ) break;
  6067. if( get_bw(x0 ,x1 ,(y+y1)/2,(y+y1)/2,box1->p,cs,1)==1 ) break;
  6068. if( get_bw(x0+dx/10,x1-dx/10,y1 ,y1 ,box1->p,cs,2)==2 ) break;
  6069. Setac(box1,'=',100);
  6070. return '=';
  6071. }
  6072. // --- test dots : ---------------------------------
  6073. for( ad=100; dy>2 && dy>=2*dx; ){ // max 3x3 (small font)
  6074. DBG( wchar_t c_ask=':'; )
  6075. // check the gap hight
  6076. for( i1=dy/16;i1<dy/2;i1++)
  6077. if( get_bw(x0+dx/8,x1-dx/8,y0+i1,y0+i1,box1->p,cs,1)==0 ) break;
  6078. if (i1>=dy/2) Break;
  6079. for( i2=dy/16;i2<dy/2;i2++)
  6080. if( get_bw(x0+dx/8,x1-dx/8,y1-i2,y1-i2,box1->p,cs,1)==0 ) break;
  6081. if (i2>=dy/2) Break;
  6082. MSG(fprintf(stderr,"gap y12 %d %d",i1,i2);)
  6083. if (box1->m3 && y1>box1->m3) ad=98*ad/100; // ~;
  6084. if (box1->m3 && 2*y0> box1->m2+box1->m1) ad=98*ad/100; // ~i
  6085. if (gchar) ad=99*ad/100;
  6086. ad=ad-abs(i1-i2)/dy*20;
  6087. if (abs(i1-dx)>dy/4) Break; // round or quadratic dots?
  6088. if (abs(i1-dx)>dy/8) ad=98*ad/100;
  6089. if (abs(i2-dx)>dy/4) Break; // round or quadratic dots?
  6090. if (abs(i2-dx)>dy/8) ad=98*ad/100;
  6091. if (box1->dots!=1) ad=96*ad/100;
  6092. Setac(box1,':',ad); // dx<=3 ad--
  6093. if (ad>=100) return ':';
  6094. break;
  6095. }
  6096. // --- test dots ; ---------------------------------
  6097. if( 2*y0> box1->m2+box1->m1 ) // ~i
  6098. if( 4*y1>=3*box1->m3+box1->m2 ) // ~:
  6099. for( ad=100; dy>5 && dx>1 && dy>2*dx; ){ // max 3x3 (small font)
  6100. DBG( wchar_t c_ask=';'; )
  6101. // better would it be to detect round pixelcluster on top
  6102. // check high of upper and lower dot
  6103. for( i1=0;i1<dy/2;i1++)
  6104. if( get_bw(x0,x1,y0+i1,y0+i1,box1->p,cs,1)==0 ) break;
  6105. if (i1>=dy/2) break;
  6106. for( i2=0;i2<dy/2;i2++)
  6107. if( get_bw(x0,x1,y1-i2,y1-i2,box1->p,cs,1)==0 ) break;
  6108. if (i2<i1) break;
  6109. /* test for horizontal symmetry ~i */
  6110. for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
  6111. if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
  6112. if (y==dy) ad=96*ad/100; /* ~i */
  6113. if (i2==i1 && y1<=box1->m3) ad=97*ad/100;
  6114. if (i2-i1<dy/8) ad=99*ad/100;
  6115. Setac(box1,';',ad); // dx<=3 ad--
  6116. if (ad>=100) return ';';
  6117. break;
  6118. }
  6119. // --- first test small dots . ---------------------------------
  6120. if( 3*dy<box1->m4-box1->m1 && abs(dx-dy)<(dx+dy)/4+2
  6121. && 3*y1>=(2*box1->m3+ box1->m2) // dot near baseline?
  6122. && 5*y0>=(3*box1->m3+2*box1->m2) ){ // Jul00
  6123. DBG( wchar_t c_ask='.'; )
  6124. d=0; r1=60;r2=140; ad=99;
  6125. for(x=x0;x<=x1;x++)for(y=y0;y<=y1;y++){ /* circle equation */
  6126. rx=100*(2*x-(x0+x1))/dx; // normalize to 15bit number
  6127. ry=100*(2*y-(y0+y1))/dy;
  6128. if( rx*rx + ry*ry < r1*r1 ) if( getpixel(box1->p,x,y)>=cs ){ d++;x=x1+1;y=y1+1; }
  6129. if( rx*rx + ry*ry > r2*r2 ) if( getpixel(box1->p,x,y)< cs ){ d++;x=x1+1;y=y1+1; }
  6130. // MSG( fprintf(stderr,"x= %3d %3d r= %6d %6d %6d", rx, ry, rx*rx+ry*ry, r1*r1, r2*r2); )
  6131. }
  6132. if (loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)> dx/8) { ad=98*ad/100; // ,
  6133. if (loop(box1->p,x0,y1 ,x1-x0,cs,0,RI)<=dx/8) ad=98*ad/100; } // ,
  6134. MSG( fprintf(stderr,"d= %3d ad= %3d", d, ad); )
  6135. if(d==0)
  6136. if( loop(box1->p,x0,y0,x1-x0,cs,0,RI)
  6137. <= loop(box1->p,x0,y1,x1-x0,cs,0,RI)
  6138. || loop(box1->p,x1,y0,x1-x0,cs,0,LE)
  6139. >= loop(box1->p,x1,y1,x1-x0,cs,0,LE) )
  6140. {
  6141. bc='.';
  6142. if (box1->dots) { Setac(box1,':',ad); ad=98*ad/100; }
  6143. Setac(box1,bc,ad);
  6144. }
  6145. }
  6146. // --- first test small dots , ---------------------------------
  6147. if( 3*dy<2*(box1->m4-box1->m1)
  6148. && 2*y0> box1->m2+box1->m3
  6149. && (2*dx<3*dy
  6150. || get_bw(0,dx/2,dy/2,dy-1,bp,cs,1)==0) ){ // ocr-a-,
  6151. DBG( wchar_t c_ask=','; )
  6152. ad=100; bc=',';
  6153. if (dy==1 && dx==1) ad=98*ad/100;
  6154. if (dy==2 && dx==1) ad=99*ad/100; // this is a problem case
  6155. if (dx>=dy) ad=99*ad/100;
  6156. if( 2*dy >= box1->m4-box1->m1) ad=98*ad/100;
  6157. if( loop(box1->p,x0,y0,x1-x0,cs,0,RI) /* simple line */
  6158. > loop(box1->p,x0,y1,x1-x0,cs,0,RI)
  6159. && loop(box1->p,x1,y0,x1-x0,cs,0,LE)
  6160. < loop(box1->p,x1,y1,x1-x0,cs,0,LE) ) { ad=99*ad/100; }
  6161. else { /* with upper circle */
  6162. if( loop(box1->p,x0,(y0+y1+1)/2,x1-x0,cs,0,RI)<dx/2 ) ad=98*ad/100;
  6163. if( loop(box1->p,x1, y1 ,x1-x0,cs,0,LE)<dx/2 ) ad=98*ad/100;
  6164. if( loop(box1->p,x0,y1-((dy>5)?1:0),x1-x0,cs,0,LE)>(dx+1)/2 )
  6165. if( loop(box1->p,x0, y1 ,x1-x0,cs,0,LE)>(dx+1)/2 ) ad=96*ad/100;
  6166. }
  6167. if(box1->dots==1) { Setac(box1,';',ad); ad=99*ad/100; }
  6168. Setac(box1,bc,ad);
  6169. }
  6170. // --- first test small dots '" ---------------------------------
  6171. if( 2*dy < box1->m4 -box1->m1+1
  6172. && 2*y0 < box1->m2 +box1->m3
  6173. && 3*y1 < box1->m2+2*box1->m3+2 ){
  6174. DBG( wchar_t c_ask='\''; )
  6175. ad=100; bc='\'';
  6176. if (2*y1 >= box1->m2+box1->m3) { ad=96*ad/100; MSG({}) } // ~!
  6177. if (3*y1>=2*box1->m2+box1->m3) { ad=96*ad/100; MSG({}) }
  6178. if (get_bw(x0,x1,(box1->m2+box1->m3)/2,box1->m4,box1->p,cs,1)!=0)
  6179. { ad=98*ad/100; MSG({}) }
  6180. if (dx>4
  6181. && num_cross(x0,x1,y1,y1,box1->p,cs) == 2) { // " "
  6182. bc='"';
  6183. // ocr-a-" has no gap!
  6184. if ( get_bw((x0+x1)/2,(x0+x1)/2,y0,y1,box1->p,cs,1)!=0 ) ad=96*ad/100;
  6185. } else {
  6186. if ( num_cross(x0,x1, y0 , y0 ,box1->p,cs)!=1) ad=96*ad/100;
  6187. if ( num_cross(x0,x1,(y0+y1)/2,(y0+y1)/2,box1->p,cs)!=1) ad=98*ad/100;
  6188. if (dx>dy) { ad=96*ad/100; MSG({}) }
  6189. }
  6190. if ( num_cross(x1,x1, y0 , y1 ,box1->p,cs)!=1) ad=99*ad/100;
  6191. if ( num_cross(x0,x1, y0+dy/4 , y0+dy/4 ,box1->p,cs)>2) ad=97*ad/100;
  6192. if ( num_cross(x0,x1, y1-dy/4 , y1-dy/4 ,box1->p,cs)>2) ad=97*ad/100; // * 5x8font
  6193. if (2*y0 > box1->m1+box1->m2) ad=99*ad/100;
  6194. Setac(box1,bc,ad);
  6195. if (ad>=100) return bc;
  6196. }
  6197. // --- TILDE ~ ---------------------------------
  6198. if( 2*dy<box1->m4-box1->m1 && dx>=dy && dx>3 && dy>1
  6199. && 2*y0< box1->m1+box1->m2
  6200. && 3*y1<2*box1->m2+box1->m3 ){
  6201. if( loop(box1->p,x0,y0,dx,cs,0,RI)
  6202. > loop(box1->p,x0,y1,dx,cs,0,RI)
  6203. && loop(box1->p,x1,y0,dx,cs,0,LE)
  6204. < loop(box1->p,x1,y1,dx,cs,0,LE)
  6205. && num_cross(x0,x1,y0,y0,box1->p,cs) == 2
  6206. && num_cross(x0,x1,y1,y1,box1->p,cs) == 2 ) {
  6207. DBG( wchar_t c_ask='~'; )
  6208. bc=TILDE;
  6209. Setac(box1,bc,99);
  6210. }
  6211. }
  6212. // --- CIRCUMFLEX, hat ^ ---------------------------------
  6213. if( 2*dy<box1->m4-box1->m1 && dx>=dy && dx>2 && dy>1
  6214. && 2*y0< box1->m1+box1->m2
  6215. && 3*y1<2*box1->m2+box1->m3 ){
  6216. DBG( wchar_t c_ask='^'; )
  6217. if( ( loop(box1->p,x0,y0 ,dx,cs,0,RI)
  6218. > loop(box1->p,x0,y1 ,dx,cs,0,RI)-dx/8
  6219. || loop(box1->p,x0,y0 ,dx,cs,0,RI)
  6220. > loop(box1->p,x0,y1-1,dx,cs,0,RI)-dx/8 )
  6221. && ( loop(box1->p,x1,y0 ,dx,cs,0,LE)
  6222. > loop(box1->p,x1,y1 ,dx,cs,0,LE)-dx/8
  6223. || loop(box1->p,x1,y0 ,dx,cs,0,LE)
  6224. > loop(box1->p,x1,y1-1,dx,cs,0,LE)-dx/8 )
  6225. && num_cross(x0,x1,y0 ,y0 ,box1->p,cs) == 1
  6226. && ( num_cross(x0,x1,y1 ,y1 ,box1->p,cs) == 2
  6227. || num_cross(x0,x1,y1-1,y1-1,box1->p,cs) == 2 )) {
  6228. bc='^';
  6229. Setac(box1,bc,99);
  6230. }
  6231. }
  6232. // ------------------------------------------------------
  6233. // if( dots==1 ){ um='\''; }
  6234. #if 0 /* ToDo: change to vectors, call here or in whatletter */
  6235. if (box1->dots==0) { // i-dots ??? (if dots==0 is wrong)
  6236. y=box1->m1;
  6237. for(;y<y0+dy/2;y++)if( get_bw(x0+dx/4,x1,y,y,box1->p,cs,1)==1) break;
  6238. { i1=y;
  6239. if( y<y0+dy/4 )
  6240. for(;y<y0+dy/2;y++)if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) break;
  6241. if( y<y0+dy/2 && 5*(y-i1+1)>box1->m2-box1->m1){
  6242. testumlaut(box1,cs,2,&um); // set modifier + new y0 ???
  6243. }
  6244. }
  6245. }
  6246. #else
  6247. um = box1->modifier;
  6248. #endif
  6249. if ( /* um==ACUTE_ACCENT || */ um==DIAERESIS){
  6250. for(y=y1;y>y0;y--)
  6251. if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) { y0=y; dy=y1-y0+1; break; } // scan "a "o "u
  6252. }
  6253. // --- test numbers 0..9 --- separated for faster compilation
  6254. if( JOB->cfg.only_numbers ) return ocr0n(&sdata);
  6255. // bc=ocr1(box1,bp,cs);
  6256. if(bc!=UNKNOWN && box1->num_ac>0 && box1->wac[0]==100)
  6257. return bc; // for fast compilable tests
  6258. // ------ separated for faster compilation
  6259. // ToDo: inser ocr0_shared_t here and split into a,b,cC,d,e,f,g9,...
  6260. #define IF_NOT_SURE if(bc==UNKNOWN || box1->num_ac==0 || box1->wac[0]<100)
  6261. IF_NOT_SURE bc=ocr0_eE(&sdata);
  6262. IF_NOT_SURE bc=ocr0_f(&sdata);
  6263. IF_NOT_SURE bc=ocr0_bB(&sdata);
  6264. IF_NOT_SURE bc=ocr0_dD(&sdata);
  6265. IF_NOT_SURE bc=ocr0_F(&sdata);
  6266. IF_NOT_SURE bc=ocr0_uU(&sdata);
  6267. IF_NOT_SURE bc=ocr0_micro(&sdata);
  6268. IF_NOT_SURE bc=ocr0_vV(&sdata);
  6269. IF_NOT_SURE bc=ocr0_rR(&sdata);
  6270. IF_NOT_SURE bc=ocr0_m(&sdata);
  6271. IF_NOT_SURE bc=ocr0_tT(&sdata);
  6272. IF_NOT_SURE bc=ocr0_sS(&sdata);
  6273. IF_NOT_SURE bc=ocr0_gG(&sdata);
  6274. IF_NOT_SURE bc=ocr0_xX(&sdata);
  6275. IF_NOT_SURE bc=ocr0_yY(&sdata);
  6276. IF_NOT_SURE bc=ocr0_zZ(&sdata);
  6277. IF_NOT_SURE bc=ocr0_wW(&sdata);
  6278. IF_NOT_SURE bc=ocr0_aA(&sdata);
  6279. IF_NOT_SURE bc=ocr0_cC(&sdata);
  6280. IF_NOT_SURE bc=ocr0_lL(&sdata);
  6281. IF_NOT_SURE bc=ocr0_oO(&sdata);
  6282. IF_NOT_SURE bc=ocr0_pP(&sdata);
  6283. IF_NOT_SURE bc=ocr0_qQ(&sdata);
  6284. IF_NOT_SURE bc=ocr0_iIjJ(&sdata);
  6285. IF_NOT_SURE bc=ocr0_n(&sdata);
  6286. IF_NOT_SURE bc=ocr0_M(&sdata);
  6287. IF_NOT_SURE bc=ocr0_N(&sdata);
  6288. IF_NOT_SURE bc=ocr0_h(&sdata);
  6289. IF_NOT_SURE bc=ocr0_H(&sdata);
  6290. IF_NOT_SURE bc=ocr0_k(&sdata);
  6291. IF_NOT_SURE bc=ocr0_K(&sdata);
  6292. IF_NOT_SURE bc=ocr0n(&sdata);
  6293. IF_NOT_SURE bc=ocr0_brackets(&sdata);
  6294. IF_NOT_SURE bc=ocr0p9(&sdata);
  6295. IF_NOT_SURE bc=ocr0px(&sdata);
  6296. if(box1->num_ac==0 && bc!=UNKNOWN) fprintf(stderr,"<!--ERROR 576-->");
  6297. if(box1->num_ac>0 && box1->wac[0]>95) box1->c=bc=box1->tac[0];
  6298. /* will be removed later, only fix old things */
  6299. for (i=0;i<box1->num_ac;i++) if (box1->tac[i]==bc) { bc=box1->tac[0]; }
  6300. return bc;
  6301. }