/contrib/groff/src/libs/libgroff/unicode.cpp

https://bitbucket.org/freebsd/freebsd-head/ · C++ · 67 lines · 41 code · 3 blank · 23 comment · 25 complexity · 80c56ac3c9af4dfedf8e49f528d31529 MD5 · raw file

  1. // -*- C++ -*-
  2. /* Copyright (C) 2002
  3. Free Software Foundation, Inc.
  4. Written by Werner Lemberg <wl@gnu.org>
  5. This file is part of groff.
  6. groff is free software; you can redistribute it and/or modify it under
  7. the terms of the GNU General Public License as published by the Free
  8. Software Foundation; either version 2, or (at your option) any later
  9. version.
  10. groff is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  13. for more details.
  14. You should have received a copy of the GNU General Public License along
  15. with groff; see the file COPYING. If not, write to the Free Software
  16. Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
  17. #include "lib.h"
  18. #include "cset.h"
  19. #include "stringclass.h"
  20. #include "unicode.h"
  21. const char *check_unicode_name(const char *u)
  22. {
  23. if (*u != 'u')
  24. return 0;
  25. const char *p = ++u;
  26. for (;;) {
  27. int val = 0;
  28. const char *start = p;
  29. for (;;) {
  30. // only uppercase hex digits allowed
  31. if (!csxdigit(*p))
  32. return 0;
  33. if (csdigit(*p))
  34. val = val*0x10 + (*p-'0');
  35. else if (csupper(*p))
  36. val = val*0x10 + (*p-'A'+10);
  37. else
  38. return 0;
  39. // biggest Unicode value is U+10FFFF
  40. if (val > 0x10FFFF)
  41. return 0;
  42. p++;
  43. if (*p == '\0' || *p == '_')
  44. break;
  45. }
  46. // surrogates not allowed
  47. if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF))
  48. return 0;
  49. if (val > 0xFFFF) {
  50. if (*start == '0') // no leading zeros allowed if > 0xFFFF
  51. return 0;
  52. }
  53. else if (p - start != 4) // otherwise, check for exactly 4 hex digits
  54. return 0;
  55. if (*p == '\0')
  56. break;
  57. p++;
  58. }
  59. return u;
  60. }