PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/src/SixPack/Text/UnicodeSemantics.cs

http://sixpack-library.googlecode.com/
C# | 183 lines | 98 code | 15 blank | 70 comment | 5 complexity | 7265e27930ebed3eb59da2c1c07a2475 MD5 | raw file
Possible License(s): LGPL-2.1
  1. // UnicodeSemantics.cs
  2. //
  3. // Copyright (C) 2008 Fullsix Marketing Interactivo LDA
  4. // Author: Marco Cecconi
  5. //
  6. // This library is free software; you can redistribute it and/or
  7. // modify it under the terms of the GNU Lesser General Public
  8. // License as published by the Free Software Foundation; either
  9. // version 2.1 of the License, or (at your option) any later version.
  10. //
  11. // This library is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. // Lesser General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU Lesser General Public
  17. // License along with this library; if not, write to the Free Software
  18. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. //
  20. // UnicodeSemantics.cs created with MonoDevelop
  21. // User: marco at 1:10 PM 6/23/2008
  22. //
  23. // To change standard headers go to Edit->Preferences->Coding->Standard Headers
  24. //
  25. using System;
  26. using System.Collections.Generic;
  27. namespace SixPack.Text
  28. {
  29. /// <summary>
  30. /// Validates a <see cref="System.UInt32"/> against a set of Unicode ranges.
  31. /// </summary>
  32. [CLSCompliant(false)]
  33. public class UnicodeSemantics
  34. {
  35. /// <summary>
  36. /// Represents the smallest possible value of a valid argument for <see cref="IsInRange(uint)"/>
  37. /// </summary>
  38. public uint MinValue
  39. {
  40. get
  41. {
  42. return validRangesLower[0];
  43. }
  44. }
  45. /// <summary>
  46. /// Represents the largest possible value of a valid argument for <see cref="IsInRange(uint)"/>
  47. /// </summary>
  48. public uint MaxValue
  49. {
  50. get
  51. {
  52. return validRangesHigher[validRangesHigher.Length - 1];
  53. }
  54. }
  55. private readonly uint[] validRangesLower;
  56. private readonly uint[] validRangesHigher;
  57. /// <summary>
  58. /// Initializes a new instance of the <see cref="UnicodeSemantics"/> class.
  59. /// </summary>
  60. /// <param name="validRanges">
  61. /// A <see cref="UnicodeRange"/> parameter that defines the range of characters we want to validate.
  62. /// </param>
  63. public UnicodeSemantics(params UnicodeRange[] validRanges)
  64. {
  65. List<uint> low = new List<uint>();
  66. List<uint> high = new List<uint>();
  67. foreach (UnicodeRange ur in validRanges)
  68. {
  69. low.Add(possibleRangesLower[(int)ur]);
  70. high.Add(possibleRangesHigher[(int)ur]);
  71. }
  72. validRangesLower = low.ToArray();
  73. validRangesHigher = high.ToArray();
  74. }
  75. /// <summary>
  76. /// Initializes a new instance of the <see cref="UnicodeSemantics"/> class.
  77. /// </summary>
  78. public UnicodeSemantics()
  79. {
  80. validRangesHigher = (uint[])possibleRangesHigher.Clone();
  81. validRangesLower = (uint[])possibleRangesLower.Clone();
  82. }
  83. /// <summary>
  84. /// Returns true if the specified uint maps to a valid Unicode character in the ranges specified.
  85. /// </summary>
  86. /// <param name="value">The value.</param>
  87. /// <returns>
  88. /// True if the code is in one of the ranges defined in the creator, False otherwise.
  89. /// </returns>
  90. public bool IsInRange(UInt32 value)
  91. {
  92. if (value < MinValue || value > MaxValue)
  93. return false;
  94. int i = 1;
  95. // get the first "i" so that the lower bound is too much
  96. while (i<validRangesLower.Length && validRangesLower[i]<value) i++;
  97. // make "i" point to the highest lower bound which is not too much
  98. i--;
  99. return (validRangesHigher[i]>=value && validRangesLower[i]<=value);
  100. }
  101. /// <summary>
  102. /// Returns true if the specified uint maps to a valid Unicode character in the ranges specified.
  103. /// </summary>
  104. /// <param name="value">The value.</param>
  105. /// <param name="validRanges">A series of <see cref="UnicodeRange"/> to match the value against.</param>
  106. /// <returns>
  107. /// True if the code is in one of the ranges, False otherwise.
  108. /// </returns>
  109. public static bool IsInRange(uint value, params UnicodeRange[] validRanges)
  110. {
  111. UnicodeSemantics us = new UnicodeSemantics(validRanges);
  112. return us.IsInRange(value);
  113. }
  114. // include
  115. private static readonly uint[] possibleRangesLower = new uint[] {
  116. 0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400,
  117. 0x0500, 0x0530, 0x0590, 0x0600, 0x0700, 0x0750, 0x0780, 0x07C0, 0x08C0,
  118. 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, 0x0C00, 0x0C80, 0x0D00,
  119. 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x1380,
  120. 0x13A0, 0x1400, 0x1680, 0x16A0, 0x1700, 0x1720, 0x1740, 0x1760, 0x1780,
  121. 0x1800, 0x18B0, 0x1900, 0x1950, 0x1980, 0x19E0, 0x1A00, 0x1A20, 0x1B00,
  122. 0x1B80, 0x1D00, 0x1D80, 0x1DC0, 0x1E00, 0x1F00, 0x2000, 0x2070, 0x20A0,
  123. 0x20D0, 0x2100, 0x2150, 0x2190, 0x2200, 0x2300, 0x2400, 0x2440, 0x2460,
  124. 0x2500, 0x2580, 0x25A0, 0x2600, 0x2700, 0x27C0, 0x27F0, 0x2800, 0x2900,
  125. 0x2980, 0x2A00, 0x2B00, 0x2C00, 0x2C60, 0x2C80, 0x2D00, 0x2D30, 0x2D80,
  126. 0x2E00, 0x2E80, 0x2F00, 0x2FF0, 0x3000, 0x3040, 0x30A0, 0x3100, 0x3130,
  127. 0x3190, 0x31A0, 0x31C0, 0x31F0, 0x3200, 0x3300, 0x3400, 0x4DC0, 0x4E00,
  128. 0xA000, 0xA490, 0xA4D0, 0xA700, 0xA720, 0xA800, 0xA830, 0xA840, 0xA880,
  129. 0xAC00, 0xD7B0, 0xD800, 0xDB80, 0xDC00, 0xE000, 0xE080, 0xE2E0, 0xE300,
  130. 0xE340, 0xE360, 0xE380, 0xE3B0, 0xE400, 0xE430, 0xE450, 0xE470, 0xE490,
  131. 0xE4C0, 0xE4F0, 0xE500, 0xE520, 0xE550, 0xE580, 0xE5A0, 0xE5C0, 0xE5E0,
  132. 0xE600, 0xE630, 0xE650, 0xE690, 0xE6D0, 0xE740, 0xE770, 0xE780, 0xE800,
  133. 0xE830, 0xEE00, 0xEEB0, 0xF8A0, 0xF8D0, 0xF900, 0xFB00, 0xFB50, 0xFE00,
  134. 0xFE10, 0xFE20, 0xFE30, 0xFE50, 0xFE70, 0xFF00, 0xFFF0/*, 0x10000, 0x10080,
  135. 0x10100, 0x10140, 0x10190, 0x10300, 0x10330, 0x10380, 0x103A0, 0x10400,
  136. 0x10450, 0x10480, 0x10800, 0x10840, 0x10900, 0x10A00, 0x10A60, 0x12000,
  137. 0x12400, 0x12480, 0x1D000, 0x1D100, 0x1D200, 0x1D250, 0x1D300, 0x1D360,
  138. 0x1D380, 0x1D400, 0x1D800, 0x20000, 0x2A6E0, 0x2F800, 0x2FAB0, 0xE0000,
  139. 0xE0080, 0xE0100, 0xE01F0, 0xF0000, 0xF0E70, 0xF16B0, 0x100000*/
  140. };
  141. private static readonly uint[] possibleRangesHigher = new uint[] {
  142. 0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF,
  143. 0x052F, 0x058F, 0x05FF, 0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x08FF,
  144. 0x097F, 0x09FF, 0x0A7F, 0x0AFF, 0x0B7F, 0x0BFF, 0x0C7F, 0x0CFF, 0x0D7F,
  145. 0x0DFF, 0x0E7F, 0x0EFF, 0x0FFF, 0x109F, 0x10FF, 0x11FF, 0x137F, 0x139F,
  146. 0x13FF, 0x167F, 0x169F, 0x16FF, 0x171F, 0x173F, 0x175F, 0x177F, 0x17FF,
  147. 0x18AF, 0x18FF, 0x194F, 0x197F, 0x19DF, 0x19FF, 0x1A1F, 0x1AFF, 0x1B7F,
  148. 0x1CFF, 0x1D7F, 0x1DBF, 0x1DFF, 0x1EFF, 0x1FFF, 0x206F, 0x209F, 0x20CF,
  149. 0x20FF, 0x214F, 0x218F, 0x21FF, 0x22FF, 0x23FF, 0x243F, 0x245F, 0x24FF,
  150. 0x257F, 0x259F, 0x25FF, 0x26FF, 0x27BF, 0x27EF, 0x27FF, 0x28FF, 0x297F,
  151. 0x29FF, 0x2AFF, 0x2BFF, 0x2C5F, 0x2C7F, 0x2CFF, 0x2D2F, 0x2D7F, 0x2DDF,
  152. 0x2E7F, 0x2EFF, 0x2FDF, 0x2FFF, 0x303F, 0x309F, 0x30FF, 0x312F, 0x318F,
  153. 0x319F, 0x31BF, 0x31EF, 0x31FF, 0x32FF, 0x33FF, 0x4DBF, 0x4DFF, 0x9FFF,
  154. 0xA48F, 0xA4CF, 0xA6FF, 0xA71F, 0xA7FF, 0xA82F, 0xA83F, 0xA87F, 0xABFF,
  155. 0xD7AF, 0xD7FF, 0xDB7F, 0xDBFF, 0xDFFF, 0xE07F, 0xE0FF, 0xE2FF, 0xE33F,
  156. 0xE35F, 0xE37F, 0xE3AF, 0xE3FF, 0xE42F, 0xE44F, 0xE46F, 0xE48F, 0xE4BF,
  157. 0xE4EF, 0xE4FF, 0xE51F, 0xE54F, 0xE57F, 0xE59F, 0xE5BF, 0xE5DF, 0xE5FF,
  158. 0xE62F, 0xE64F, 0xE68F, 0xE6CF, 0xE6FF, 0xE76F, 0xE77F, 0xE7FF, 0xE82F,
  159. 0xEDFF, 0xEEAF, 0xF89F, 0xF8CF, 0xF8FF, 0xFAFF, 0xFB4F, 0xFDFF, 0xFE0F,
  160. 0xFE1F, 0xFE2F, 0xFE4F, 0xFE6F, 0xFEFF, 0xFFEF, 0xFFFF/*, 0x1007F, 0x100FF,
  161. 0x1013F, 0x1018F, 0x102FF, 0x1032F, 0x1034F, 0x1039F, 0x103DF, 0x1044F,
  162. 0x1047F, 0x104AF, 0x1083F, 0x108FF, 0x1091F, 0x10A5F, 0x11FFF, 0x123FF,
  163. 0x1247F, 0x1CFFF, 0x1D0FF, 0x1D1FF, 0x1D24F, 0x1D2FF, 0x1D35F, 0x1D37F,
  164. 0x1D3FF, 0x1D7FF, 0x1FFFF, 0x2A6DF, 0x2F7FF, 0x2FA1F, 0xDFFFF, 0xE007F,
  165. 0xE00FF, 0xE01EF, 0xEFFFF, 0xF0E69, 0xF16AF, 0xFFFFF, 0x10FFFF*/
  166. };
  167. }
  168. }