/TeXmacs-1.0.7.11-src/src/System/Language/hyphenate.cpp
C++ | 137 lines | 101 code | 14 blank | 22 comment | 49 complexity | 666ab960105fa34b525ea2e911e3fb40 MD5 | raw file
Possible License(s): GPL-3.0, GPL-2.0, MPL-2.0-no-copyleft-exception
1
2/******************************************************************************
3* MODULE : hyphenate.cpp
4* DESCRIPTION: hyphenation by Liang's algorithm
5* COPYRIGHT : (C) 1999 Joris van der Hoeven
6*******************************************************************************
7* This software falls under the GNU general public license version 3 or later.
8* It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9* in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10******************************************************************************/
11
12#include "file.hpp"
13#include "hyphenate.hpp"
14#include "analyze.hpp"
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19
20typedef int SI;
21#define MAX_SEARCH 10
22#define MAX_BUFFER_SIZE 256
23
24/*
25static bool
26my_strncmp (char* s1, char* s2, int len) {
27 int i;
28 for (i=0; i<len; i++) if (s1[i]!=s2[i]) return false;
29 return true;
30}
31*/
32
33static string
34unpattern (string s) {
35 int i, n= N(s);
36 string r;
37 for (i=0; i<n; ) {
38 while ((i<n) && (s[i]>='0') && (s[i]<='9')) i++;
39 if (i<n) r << s[i++];
40 }
41 return r;
42}
43
44static string
45hyphen_normalize (string s) {
46 int i;
47 string r (0);
48 for (i=0; i<N(s); i++)
49 if ((i+3<N(s)) && (s[i]=='^') && (s[i+1]=='^')) {
50 r << from_hexadecimal (s (i+2, i+4));
51 i+=3;
52 }
53 else r << s[i];
54 return r;
55}
56
57hashmap<string,string>
58load_hyphen_table (string file_name) {
59 string s;
60 file_name= string ("hyphen.") * file_name;
61 load_string (url ("$TEXMACS_PATH/langs/natural/hyphen", file_name), s, true);
62 if (DEBUG_VERBOSE) cout << "TeXmacs] Loading " << file_name << "\n";
63
64 hashmap<string,string> H ("?");
65 bool flag=false;
66 int i=0, n= N(s);
67 while (i<n) {
68 string buffer;
69 while ((i<n) && (s[i]!=' ') && (s[i]!='\t') && (s[i]!='\n')) {
70 if (s[i] != '%') buffer << s[i++];
71 else while ((i<n) && (s[i]!='\n')) i++;
72 }
73 if (i<n) i++;
74 if (buffer == "}") flag=false;
75 string norm= hyphen_normalize (buffer);
76 //cout << norm << "\n";
77 if (flag && (i!=0)) H (unpattern (norm))= norm;
78 if (buffer == "\\patterns{") flag=true;
79 }
80
81 // cout << file_name << " done!\n";
82 return H;
83}
84
85static string
86lower_case (string s) {
87 int i;
88 string r (N(s));
89 for (i=0; i<N(s); i++) {
90 if ((s[i]>='A') && (s[i]<='Z'))
91 r[i]= (char) (((int) s[i])+ ((int) 'a')- ((int) 'A'));
92 else r[i]=s[i];
93 }
94 return r;
95}
96
97array<int>
98get_hyphens (string s, hashmap<string,string> H) {
99 ASSERT (N(s) != 0, "hyphenation of empty string");
100 s= "." * lower_case (s) * ".";
101 // cout << s << "\n";
102 int i, j, k, m, len;
103 array<int> T (N(s)+1);
104 for (i=0; i<N(s)+1; i++) T[i]=0;
105 for (len=1; len < MAX_SEARCH; len++)
106 for (i=0; i<N(s)-len; i++) {
107 string r= H [s (i, i+len)];
108 if (!(r == "?")) {
109 // cout << " " << s (i, i+len) << " => " << r << "\n";
110 for (j=0, k=0; j<=len; j++, k++) {
111 if ((k<N(r)) && (r[k]>='0') && (r[k]<='9')) {
112 m=((int) r[k])-((int) '0');
113 k++;
114 }
115 else m=0;
116 if (m>T[i+j]) T[i+j]=m;
117 }
118 }
119 }
120
121 array<int> penalty (N(s)-3);
122 for (i=2; i<N(s)-1; i++)
123 penalty [i-2]= (((T[i]&1)==1)? HYPH_STD: HYPH_INVALID);
124 if (N(penalty)>0) penalty[0] = penalty[N(penalty)-1] = HYPH_INVALID;
125 if (N(penalty)>1) penalty[1] = penalty[N(penalty)-2] = HYPH_INVALID;
126 if (N(penalty)>2) penalty[N(penalty)-3] = HYPH_INVALID;
127 // cout << " -> " << penalty << "\n";
128 return penalty;
129}
130
131void
132std_hyphenate (string s, int after, string& left, string& right, int penalty) {
133 left = s (0, after+1);
134 right= s (after+1, N(s));
135 if (penalty >= HYPH_INVALID) left << string ("\\");
136 else left << string ("-");
137}