PageRenderTime 445ms CodeModel.GetById 159ms app.highlight 65ms RepoModel.GetById 116ms app.codeStats 14ms

/wyszukiwanie/soundex.c

https://github.com/cbart/njp-lab-2009
C | 449 lines | 225 code | 63 blank | 161 comment | 135 complexity | 8e0e96fbcc053f7cf53d2a042afc1953 MD5 | raw file
  1/*
  2  * v 1.0d  TESTED-OK  20060308
  3  * -----------------------
  4  *
  5  * The following SoundEx function is:
  6  *
  7  *    (C) Copyright 2002 - 2006, Creativyst, Inc.
  8  *               ALL RIGHTS RESERVED
  9  *
 10  * For more information go to:
 11  *           http://www.Creativyst.com
 12  * or email:
 13  *           Support@Creativyst.com
 14  *
 15  * Redistribution and use in source and binary
 16  * forms, with or without modification, are
 17  * permitted provided that the following conditions
 18  * are met:
 19  *
 20  *   1. Redistributions of source code must
 21  *      retain the above copyright notice, this
 22  *      list of conditions and the following
 23  *      disclaimer.
 24  *
 25  *   2. Redistributions in binary form must
 26  *      reproduce the above copyright notice,
 27  *      this list of conditions and the
 28  *      following disclaimer in the
 29  *      documentation and/or other materials
 30  *      provided with the distribution.
 31  *
 32  *   3. All advertising materials mentioning
 33  *      features or use of this software must
 34  *      display the following acknowledgement:
 35  *      This product includes software developed
 36  *      by Creativyst, Inc.
 37  *
 38  *   4. The name of Creativyst, Inc. may not be
 39  *      used to endorse or promote products
 40  *      derived from this software without
 41  *      specific prior written permission.
 42  *
 43  * THIS SOFTWARE IS PROVIDED BY CREATIVYST CORPORATION
 44  *`AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
 45  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 46  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 47  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 48  * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 49  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 51  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 52  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 54  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 56  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 57  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 58  *
 59  *
 60  * ------------------
 61  * ------------------
 62  * FUNCTION NOTES:
 63  *  1. To avoid all possibility of overwrites make
 64  *     sure *SoundEx points to a buffer with at least
 65  *     11 bytes of storage.
 66  *
 67  *  2. This function is for 7/8-bit ASCII characters.
 68  *     Modifications are required for UTF16/32, or for
 69  *     anything other than the first 7-bits of utf-8.
 70  *
 71  *  3. For those embedded guys who will understand this:
 72  *     This is a true library-grade (i.e. re-usable) function,
 73  *     meaning it has no dependencies on outside functions
 74  *     and requires no non-standard libraries be linked in
 75  *     order for it to work. In this case, since it doesn't
 76  *     even require the standard C library, it is what C99
 77  *     (I think) calls a: strictly conforming freestanding
 78  *     function.
 79  *
 80 */
 81  int SoundEx(char *SoundEx,
 82              char *WordString,
 83              int   LengthOption,
 84              int   CensusOption)
 85  {
 86      int  InSz = 31;
 87      char WordStr[32];     /* one bigger than InSz */
 88      int  SoundExLen, WSLen, i;
 89      char FirstLetter, *p, *p2;
 90
 91      SoundExLen = WSLen = 0;
 92      SoundEx[0] = 0;
 93
 94      if(CensusOption) {
 95        LengthOption = 4;
 96      }
 97
 98      if(LengthOption) {
 99          SoundExLen = LengthOption;
100      }
101      if(SoundExLen > 10) {
102          SoundExLen = 10;
103      }
104      if(SoundExLen < 4) {
105          SoundExLen = 4;
106      }
107
108      if(!WordString) {
109          return(0);
110      }
111
112      /* Copy WordString to WordStr
113       * without using funcs from other
114       * libraries.
115      */
116      for(p = WordString,p2 = WordStr,i = 0;(*p);p++,p2++,i++) {
117        if(i >= InSz) break;
118        (*p2) = (*p);
119      }
120      (*p2) = 0;
121
122
123
124      /* Convert WordStr to
125       * upper-case, without using funcs
126       * from other libraries
127      */
128      for(p = WordStr;(*p);p++) {
129        if( (*p) >= 'a' && (*p) <= 'z' ) {
130            (*p) -= 0x20;
131        }
132      }
133
134
135      /* convert all non-alpha
136       * chars to spaces
137      */
138      for(p = WordStr;(*p);p++) {
139        if( (*p) < 'A' || (*p) > 'Z' ) {
140            (*p) = ' ';
141        }
142      }
143
144      /* Remove leading spaces
145      */
146      for(i = 0, p = p2 = WordStr;(*p);p++) {
147          if(!i) {
148              if( (*p) != ' ' ) {
149                  (*p2) = (*p);
150                  p2++;
151                  i++;
152              }
153          }
154          else {
155              (*p2) = (*p);
156              p2++;
157          }
158      }
159      (*p2) = 0;
160
161      /* Get length of WordStr
162      */
163      for(i = 0,p = WordStr;(*p);p++) i++;
164
165
166      /* Remove trailing spaces
167      */
168      for(;i;i--) {
169          if(WordStr[i] == ' ') {
170              WordStr[i] = 0;
171          }
172          else {
173              break;
174          }
175      }
176
177      /* Get length of WordStr
178      */
179      for(WSLen = 0,p = WordStr;(*p);p++) WSLen++;
180
181      if(!WSLen) {
182        return(0);
183      }
184
185
186
187      /* Perform our own multi-letter
188       * improvements
189       *
190       * underscore placeholders (_) will be
191       * removed below.
192      */
193      if(!CensusOption) {
194          if(WordStr[0] == 'P' && WordStr[1] == 'S') {
195              WordStr[0] = '_';
196          }
197          if(WordStr[0] == 'P' && WordStr[1] == 'F') {
198              WordStr[0] = '_';
199          }
200
201          for(i = 0;i < WSLen;i++) {
202              if(WordStr[i] == 'D' && WordStr[i+1] == 'G') {
203                  WordStr[i] = '_';
204                  i++;
205                  continue;
206              }
207              if(WordStr[i] == 'G' && WordStr[i+1] == 'H') {
208                  WordStr[i] = '_';
209                  i++;
210                  continue;
211              }
212              if(WordStr[i] == 'K' && WordStr[i+1] == 'N') {
213                  WordStr[i] = '_';
214                  i++;
215                  continue;
216              }
217              if(WordStr[i] == 'G' && WordStr[i+1] == 'N') {
218                  WordStr[i] = '_';
219                  i++;
220                  continue;
221              }
222              if(WordStr[i] == 'M' && WordStr[i+1] == 'B') {
223                  WordStr[i+1] = '_';
224                  i++;
225                  continue;
226              }
227
228              if(WordStr[i] == 'P' && WordStr[i+1] == 'H') {
229                  WordStr[i] = 'F';
230                  WordStr[i+1] = '_';
231                  i++;
232                  continue;
233              }
234              if(WordStr[i]  ==  'T'  &&
235                 WordStr[i+1] == 'C' &&
236                 WordStr[i+2] == 'H'
237                ) {
238
239                  WordStr[i] = '_';
240                  i++; i++;
241                  continue;
242              }
243              if(WordStr[i] == 'M' && WordStr[i+1] == 'P'
244                 && (WordStr[i+2] == 'S' ||
245                     WordStr[i+2] == 'T' ||
246                     WordStr[i+2] == 'Z')
247                ) {
248                  WordStr[i+1] = '_';
249                  i++;
250              }
251          }
252      } /* end if(!CensusOption) */
253
254
255
256      /* squeeze out underscore characters
257       * added as a byproduct of above process
258       * (only needed in c styled replace)
259      */
260      for(p = p2 = WordStr;(*p);p++) {
261        (*p2) = (*p);
262        if( (*p2) != '_' ) {
263            p2++;
264        }
265      }
266      (*p2) = 0;
267
268
269
270
271
272      /* This must be done AFTER our
273       * multi-letter replacements
274       * since they could change
275       * the first letter
276      */
277      FirstLetter = WordStr[0];
278
279
280      /* In case we're in CensusOption
281       * 1 and the word starts with
282       * an 'H' or 'W'
283       *  (v1.0c djr: add test for H or W)
284      */
285      if(FirstLetter == 'H' || FirstLetter == 'W') {
286          WordStr[0] = '-';
287      }
288
289
290
291      /* In properly done census
292       * SoundEx, the H and W will
293       * be squezed out before
294       * performing the test
295       * for adjacent digits
296       * (this differs from how
297       * 'real' vowels are handled)
298      */
299      if(CensusOption == 1) {
300          for(p = &(WordStr[1]);(*p);p++) {
301             if((*p) == 'H' || (*p) == 'W') {
302                 (*p) = '.';
303             }
304          }
305      }
306
307
308
309
310
311      /* Perform classic SoundEx
312       * replacements.
313      */
314      for(p = WordStr;(*p);p++) {
315          if( (*p) == 'A'   ||
316              (*p) == 'E'   ||
317              (*p) == 'I'   ||
318              (*p) == 'O'   ||
319              (*p) == 'U'   ||
320              (*p) == 'Y'   ||
321              (*p) == 'H'   ||
322              (*p) == 'W'
323            ){
324              (*p) = '0';   /* zero */
325          }
326          if( (*p) == 'B'   ||
327              (*p) == 'P'   ||
328              (*p) == 'F'   ||
329              (*p) == 'V'
330            ){
331              (*p) = '1';
332          }
333          if( (*p) == 'C'   ||
334              (*p) == 'S'   ||
335              (*p) == 'G'   ||
336              (*p) == 'J'   ||
337              (*p) == 'K'   ||
338              (*p) == 'Q'   ||
339              (*p) == 'X'   ||
340              (*p) == 'Z'
341            ){
342              (*p) = '2';
343          }
344          if( (*p) == 'D'   ||
345              (*p) == 'T'
346            ){
347              (*p) = '3';
348          }
349          if( (*p) == 'L' ) {
350              (*p) = '4';
351          }
352
353          if( (*p) == 'M'   ||
354              (*p) == 'N'
355            ){
356              (*p) = '5';
357          }
358          if( (*p) == 'R' ) {
359              (*p) = '6';
360          }
361      }
362      /* soundex replacement loop done  */
363
364
365
366
367      /* In properly done census
368       * SoundEx, the H and W will
369       * be squezed out before
370       * performing the test
371       * for adjacent digits
372       * (this differs from how
373       * 'real' vowels are handled)
374      */
375      if(CensusOption == 1) {
376          /* squeeze out dots
377          */
378          for(p = p2 = &WordStr[1];(*p);p++) {
379            (*p2) = (*p);
380            if( (*p2) != '.' ) {
381                p2++;
382            }
383          }
384          (*p2) = 0;
385      }
386
387
388
389      /* squeeze out extra equal adjacent digits
390       * (don't include first letter)
391       * v1.0c djr (now includes first letter)
392      */
393      for(p = p2 = &(WordStr[0]);(*p);p++) {
394        (*p2) = (*p);
395        if( (*p2) != p[1] ) {
396            p2++;
397        }
398      }
399      (*p2) = 0;
400
401
402
403      /* squeeze out spaces and zeros
404       * Leave the first letter code
405       * to be replaced below.
406       * (In case it made a zero)
407      */
408      for(p = p2 = &WordStr[1];(*p);p++) {
409        (*p2) = (*p);
410        if( (*p2) != ' ' && (*p2) != '0' ) {
411            p2++;
412        }
413      }
414      (*p2) = 0;
415
416
417
418      /* Get length of WordStr
419      */
420      for(WSLen = 0,p = WordStr;(*p);p++) WSLen++;
421
422
423      /* Right pad with zero characters
424      */
425      for(i = WSLen;i < SoundExLen;i++ ) {
426          WordStr[i] = '0';
427      }
428
429      /* Size to taste
430      */
431      WordStr[SoundExLen] = 0;
432
433
434      /* Replace first digit with
435       * first letter.
436      */
437      WordStr[0] = FirstLetter;
438
439
440      /* Copy WordStr to SoundEx
441      */
442      for(p2 = SoundEx,p = WordStr;(*p);p++,p2++) {
443          (*p2) = (*p);
444      }
445      (*p2) = 0;
446
447      return(SoundExLen);
448  }
449