PageRenderTime 35ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/wyszukiwanie/soundex.c

https://github.com/cbart/njp-lab-2009
C | 449 lines | 225 code | 63 blank | 161 comment | 135 complexity | 8e0e96fbcc053f7cf53d2a042afc1953 MD5 | raw file
  1. /*
  2. * v 1.0d TESTED-OK 20060308
  3. * -----------------------
  4. *
  5. * The following SoundEx function is:
  6. *
  7. * (C) Copyright 2002 - 2006, Creativyst, Inc.
  8. * ALL RIGHTS RESERVED
  9. *
  10. * For more information go to:
  11. * http://www.Creativyst.com
  12. * or email:
  13. * Support@Creativyst.com
  14. *
  15. * Redistribution and use in source and binary
  16. * forms, with or without modification, are
  17. * permitted provided that the following conditions
  18. * are met:
  19. *
  20. * 1. Redistributions of source code must
  21. * retain the above copyright notice, this
  22. * list of conditions and the following
  23. * disclaimer.
  24. *
  25. * 2. Redistributions in binary form must
  26. * reproduce the above copyright notice,
  27. * this list of conditions and the
  28. * following disclaimer in the
  29. * documentation and/or other materials
  30. * provided with the distribution.
  31. *
  32. * 3. All advertising materials mentioning
  33. * features or use of this software must
  34. * display the following acknowledgement:
  35. * This product includes software developed
  36. * by Creativyst, Inc.
  37. *
  38. * 4. The name of Creativyst, Inc. may not be
  39. * used to endorse or promote products
  40. * derived from this software without
  41. * specific prior written permission.
  42. *
  43. * THIS SOFTWARE IS PROVIDED BY CREATIVYST CORPORATION
  44. *`AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
  45. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  46. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  47. * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  48. * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  49. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  50. * DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  51. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  52. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  53. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  54. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  55. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  56. * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  57. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  58. *
  59. *
  60. * ------------------
  61. * ------------------
  62. * FUNCTION NOTES:
  63. * 1. To avoid all possibility of overwrites make
  64. * sure *SoundEx points to a buffer with at least
  65. * 11 bytes of storage.
  66. *
  67. * 2. This function is for 7/8-bit ASCII characters.
  68. * Modifications are required for UTF16/32, or for
  69. * anything other than the first 7-bits of utf-8.
  70. *
  71. * 3. For those embedded guys who will understand this:
  72. * This is a true library-grade (i.e. re-usable) function,
  73. * meaning it has no dependencies on outside functions
  74. * and requires no non-standard libraries be linked in
  75. * order for it to work. In this case, since it doesn't
  76. * even require the standard C library, it is what C99
  77. * (I think) calls a: strictly conforming freestanding
  78. * function.
  79. *
  80. */
  81. int SoundEx(char *SoundEx,
  82. char *WordString,
  83. int LengthOption,
  84. int CensusOption)
  85. {
  86. int InSz = 31;
  87. char WordStr[32]; /* one bigger than InSz */
  88. int SoundExLen, WSLen, i;
  89. char FirstLetter, *p, *p2;
  90. SoundExLen = WSLen = 0;
  91. SoundEx[0] = 0;
  92. if(CensusOption) {
  93. LengthOption = 4;
  94. }
  95. if(LengthOption) {
  96. SoundExLen = LengthOption;
  97. }
  98. if(SoundExLen > 10) {
  99. SoundExLen = 10;
  100. }
  101. if(SoundExLen < 4) {
  102. SoundExLen = 4;
  103. }
  104. if(!WordString) {
  105. return(0);
  106. }
  107. /* Copy WordString to WordStr
  108. * without using funcs from other
  109. * libraries.
  110. */
  111. for(p = WordString,p2 = WordStr,i = 0;(*p);p++,p2++,i++) {
  112. if(i >= InSz) break;
  113. (*p2) = (*p);
  114. }
  115. (*p2) = 0;
  116. /* Convert WordStr to
  117. * upper-case, without using funcs
  118. * from other libraries
  119. */
  120. for(p = WordStr;(*p);p++) {
  121. if( (*p) >= 'a' && (*p) <= 'z' ) {
  122. (*p) -= 0x20;
  123. }
  124. }
  125. /* convert all non-alpha
  126. * chars to spaces
  127. */
  128. for(p = WordStr;(*p);p++) {
  129. if( (*p) < 'A' || (*p) > 'Z' ) {
  130. (*p) = ' ';
  131. }
  132. }
  133. /* Remove leading spaces
  134. */
  135. for(i = 0, p = p2 = WordStr;(*p);p++) {
  136. if(!i) {
  137. if( (*p) != ' ' ) {
  138. (*p2) = (*p);
  139. p2++;
  140. i++;
  141. }
  142. }
  143. else {
  144. (*p2) = (*p);
  145. p2++;
  146. }
  147. }
  148. (*p2) = 0;
  149. /* Get length of WordStr
  150. */
  151. for(i = 0,p = WordStr;(*p);p++) i++;
  152. /* Remove trailing spaces
  153. */
  154. for(;i;i--) {
  155. if(WordStr[i] == ' ') {
  156. WordStr[i] = 0;
  157. }
  158. else {
  159. break;
  160. }
  161. }
  162. /* Get length of WordStr
  163. */
  164. for(WSLen = 0,p = WordStr;(*p);p++) WSLen++;
  165. if(!WSLen) {
  166. return(0);
  167. }
  168. /* Perform our own multi-letter
  169. * improvements
  170. *
  171. * underscore placeholders (_) will be
  172. * removed below.
  173. */
  174. if(!CensusOption) {
  175. if(WordStr[0] == 'P' && WordStr[1] == 'S') {
  176. WordStr[0] = '_';
  177. }
  178. if(WordStr[0] == 'P' && WordStr[1] == 'F') {
  179. WordStr[0] = '_';
  180. }
  181. for(i = 0;i < WSLen;i++) {
  182. if(WordStr[i] == 'D' && WordStr[i+1] == 'G') {
  183. WordStr[i] = '_';
  184. i++;
  185. continue;
  186. }
  187. if(WordStr[i] == 'G' && WordStr[i+1] == 'H') {
  188. WordStr[i] = '_';
  189. i++;
  190. continue;
  191. }
  192. if(WordStr[i] == 'K' && WordStr[i+1] == 'N') {
  193. WordStr[i] = '_';
  194. i++;
  195. continue;
  196. }
  197. if(WordStr[i] == 'G' && WordStr[i+1] == 'N') {
  198. WordStr[i] = '_';
  199. i++;
  200. continue;
  201. }
  202. if(WordStr[i] == 'M' && WordStr[i+1] == 'B') {
  203. WordStr[i+1] = '_';
  204. i++;
  205. continue;
  206. }
  207. if(WordStr[i] == 'P' && WordStr[i+1] == 'H') {
  208. WordStr[i] = 'F';
  209. WordStr[i+1] = '_';
  210. i++;
  211. continue;
  212. }
  213. if(WordStr[i] == 'T' &&
  214. WordStr[i+1] == 'C' &&
  215. WordStr[i+2] == 'H'
  216. ) {
  217. WordStr[i] = '_';
  218. i++; i++;
  219. continue;
  220. }
  221. if(WordStr[i] == 'M' && WordStr[i+1] == 'P'
  222. && (WordStr[i+2] == 'S' ||
  223. WordStr[i+2] == 'T' ||
  224. WordStr[i+2] == 'Z')
  225. ) {
  226. WordStr[i+1] = '_';
  227. i++;
  228. }
  229. }
  230. } /* end if(!CensusOption) */
  231. /* squeeze out underscore characters
  232. * added as a byproduct of above process
  233. * (only needed in c styled replace)
  234. */
  235. for(p = p2 = WordStr;(*p);p++) {
  236. (*p2) = (*p);
  237. if( (*p2) != '_' ) {
  238. p2++;
  239. }
  240. }
  241. (*p2) = 0;
  242. /* This must be done AFTER our
  243. * multi-letter replacements
  244. * since they could change
  245. * the first letter
  246. */
  247. FirstLetter = WordStr[0];
  248. /* In case we're in CensusOption
  249. * 1 and the word starts with
  250. * an 'H' or 'W'
  251. * (v1.0c djr: add test for H or W)
  252. */
  253. if(FirstLetter == 'H' || FirstLetter == 'W') {
  254. WordStr[0] = '-';
  255. }
  256. /* In properly done census
  257. * SoundEx, the H and W will
  258. * be squezed out before
  259. * performing the test
  260. * for adjacent digits
  261. * (this differs from how
  262. * 'real' vowels are handled)
  263. */
  264. if(CensusOption == 1) {
  265. for(p = &(WordStr[1]);(*p);p++) {
  266. if((*p) == 'H' || (*p) == 'W') {
  267. (*p) = '.';
  268. }
  269. }
  270. }
  271. /* Perform classic SoundEx
  272. * replacements.
  273. */
  274. for(p = WordStr;(*p);p++) {
  275. if( (*p) == 'A' ||
  276. (*p) == 'E' ||
  277. (*p) == 'I' ||
  278. (*p) == 'O' ||
  279. (*p) == 'U' ||
  280. (*p) == 'Y' ||
  281. (*p) == 'H' ||
  282. (*p) == 'W'
  283. ){
  284. (*p) = '0'; /* zero */
  285. }
  286. if( (*p) == 'B' ||
  287. (*p) == 'P' ||
  288. (*p) == 'F' ||
  289. (*p) == 'V'
  290. ){
  291. (*p) = '1';
  292. }
  293. if( (*p) == 'C' ||
  294. (*p) == 'S' ||
  295. (*p) == 'G' ||
  296. (*p) == 'J' ||
  297. (*p) == 'K' ||
  298. (*p) == 'Q' ||
  299. (*p) == 'X' ||
  300. (*p) == 'Z'
  301. ){
  302. (*p) = '2';
  303. }
  304. if( (*p) == 'D' ||
  305. (*p) == 'T'
  306. ){
  307. (*p) = '3';
  308. }
  309. if( (*p) == 'L' ) {
  310. (*p) = '4';
  311. }
  312. if( (*p) == 'M' ||
  313. (*p) == 'N'
  314. ){
  315. (*p) = '5';
  316. }
  317. if( (*p) == 'R' ) {
  318. (*p) = '6';
  319. }
  320. }
  321. /* soundex replacement loop done */
  322. /* In properly done census
  323. * SoundEx, the H and W will
  324. * be squezed out before
  325. * performing the test
  326. * for adjacent digits
  327. * (this differs from how
  328. * 'real' vowels are handled)
  329. */
  330. if(CensusOption == 1) {
  331. /* squeeze out dots
  332. */
  333. for(p = p2 = &WordStr[1];(*p);p++) {
  334. (*p2) = (*p);
  335. if( (*p2) != '.' ) {
  336. p2++;
  337. }
  338. }
  339. (*p2) = 0;
  340. }
  341. /* squeeze out extra equal adjacent digits
  342. * (don't include first letter)
  343. * v1.0c djr (now includes first letter)
  344. */
  345. for(p = p2 = &(WordStr[0]);(*p);p++) {
  346. (*p2) = (*p);
  347. if( (*p2) != p[1] ) {
  348. p2++;
  349. }
  350. }
  351. (*p2) = 0;
  352. /* squeeze out spaces and zeros
  353. * Leave the first letter code
  354. * to be replaced below.
  355. * (In case it made a zero)
  356. */
  357. for(p = p2 = &WordStr[1];(*p);p++) {
  358. (*p2) = (*p);
  359. if( (*p2) != ' ' && (*p2) != '0' ) {
  360. p2++;
  361. }
  362. }
  363. (*p2) = 0;
  364. /* Get length of WordStr
  365. */
  366. for(WSLen = 0,p = WordStr;(*p);p++) WSLen++;
  367. /* Right pad with zero characters
  368. */
  369. for(i = WSLen;i < SoundExLen;i++ ) {
  370. WordStr[i] = '0';
  371. }
  372. /* Size to taste
  373. */
  374. WordStr[SoundExLen] = 0;
  375. /* Replace first digit with
  376. * first letter.
  377. */
  378. WordStr[0] = FirstLetter;
  379. /* Copy WordStr to SoundEx
  380. */
  381. for(p2 = SoundEx,p = WordStr;(*p);p++,p2++) {
  382. (*p2) = (*p);
  383. }
  384. (*p2) = 0;
  385. return(SoundExLen);
  386. }