PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/src/Workspaces/Core/Portable/Shared/Utilities/StringBreaker.cs

https://gitlab.com/sharadag/Roslyn
C# | 327 lines | 228 code | 49 blank | 50 comment | 41 complexity | 6caf382e6259e0e965b29b0ff60ccb89 MD5 | raw file
  1. // Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Diagnostics;
  5. using Microsoft.CodeAnalysis.Text;
  6. namespace Microsoft.CodeAnalysis.Shared.Utilities
  7. {
  8. /// <summary>
  9. /// Values returned from <see cref="StringBreaker"/> routines.
  10. /// Optimized for short strings with a handful of spans.
  11. /// Each span is encoded in two bitfields 'gap' and 'length' and these
  12. /// bitfields are stored in a 32-bit bitmap.
  13. /// Falls back to a <see cref="List{T}"/> if the encoding won't work.
  14. /// </summary>
  15. internal struct StringBreaks
  16. {
  17. private readonly List<TextSpan> _spans;
  18. private readonly EncodedSpans _encodedSpans;
  19. // These two values may be adjusted. The remaining constants are
  20. // derived from them. The values are chosen to minimize the number
  21. // of fallbacks during normal typing. With 5 total bits per span, we
  22. // can encode up to 6 spans, each as long as 15 chars with 0 or 1 char
  23. // gap. This is sufficient for the vast majority of framework symbols.
  24. private const int BitsForGap = 1;
  25. private const int BitsForLength = 4;
  26. private const int BitsPerEncodedSpan = BitsForGap + BitsForLength;
  27. private const int MaxShortSpans = 32 / BitsPerEncodedSpan;
  28. private const int MaxGap = (1 << BitsForGap) - 1;
  29. private const int MaxLength = (1 << BitsForLength) - 1;
  30. private struct EncodedSpans
  31. {
  32. private const uint Mask = (1u << BitsPerEncodedSpan) - 1u;
  33. private uint _value;
  34. public byte this[int index]
  35. {
  36. get
  37. {
  38. Debug.Assert(index >= 0 && index < MaxShortSpans);
  39. return (byte)((_value >> (index * BitsPerEncodedSpan)) & Mask);
  40. }
  41. set
  42. {
  43. Debug.Assert(index >= 0 && index < MaxShortSpans);
  44. int shift = index * BitsPerEncodedSpan;
  45. _value = (_value & ~(Mask << shift)) | ((uint)value << shift);
  46. }
  47. }
  48. }
  49. public static StringBreaks Create(string text, Func<string, int, TextSpan> spanGenerator)
  50. {
  51. Debug.Assert(text != null);
  52. Debug.Assert(spanGenerator != null);
  53. EncodedSpans encodedSpans;
  54. return TryEncodeSpans(text, spanGenerator, out encodedSpans)
  55. ? new StringBreaks(encodedSpans)
  56. : new StringBreaks(CreateFallbackList(text, spanGenerator));
  57. }
  58. private static bool TryEncodeSpans(string text, Func<string, int, TextSpan> spanGenerator, out EncodedSpans encodedSpans)
  59. {
  60. encodedSpans = default(EncodedSpans);
  61. for (int start = 0, b = 0; start < text.Length;)
  62. {
  63. var span = spanGenerator(text, start);
  64. if (span.IsEmpty)
  65. {
  66. // All done
  67. break;
  68. }
  69. int gap = span.Start - start;
  70. Debug.Assert(gap >= 0, "Bad generator.");
  71. if (b >= MaxShortSpans ||
  72. span.Length > MaxLength ||
  73. gap > MaxGap)
  74. {
  75. // Too many spans, or span cannot be encoded.
  76. return false;
  77. }
  78. encodedSpans[b++] = Encode(gap, span.Length);
  79. start = span.End;
  80. }
  81. return true;
  82. }
  83. private static List<TextSpan> CreateFallbackList(string text, Func<string, int, TextSpan> spanGenerator)
  84. {
  85. List<TextSpan> list = new List<TextSpan>();
  86. for (int start = 0; start < text.Length;)
  87. {
  88. var span = spanGenerator(text, start);
  89. if (span.IsEmpty)
  90. {
  91. // All done
  92. break;
  93. }
  94. Debug.Assert(span.Start >= start, "Bad generator.");
  95. list.Add(span);
  96. start = span.End;
  97. }
  98. return list;
  99. }
  100. private StringBreaks(EncodedSpans encodedSpans)
  101. {
  102. _encodedSpans = encodedSpans;
  103. _spans = null;
  104. }
  105. private StringBreaks(List<TextSpan> spans)
  106. {
  107. _encodedSpans = default(EncodedSpans);
  108. _spans = spans;
  109. }
  110. public int Count
  111. {
  112. get
  113. {
  114. if (_spans != null)
  115. {
  116. return _spans.Count;
  117. }
  118. int i;
  119. for (i = 0; i < MaxShortSpans; i++)
  120. {
  121. if (_encodedSpans[i] == 0) break;
  122. }
  123. return i;
  124. }
  125. }
  126. public TextSpan this[int index]
  127. {
  128. get
  129. {
  130. if (index < 0)
  131. {
  132. throw new IndexOutOfRangeException(nameof(index));
  133. }
  134. if (_spans != null)
  135. {
  136. return _spans[index];
  137. }
  138. for (int i = 0, start = 0; i < MaxShortSpans; i++)
  139. {
  140. byte b = _encodedSpans[i];
  141. if (b == 0)
  142. {
  143. break;
  144. }
  145. start += DecodeGap(b);
  146. int length = DecodeLength(b);
  147. if (i == index)
  148. {
  149. return new TextSpan(start, length);
  150. }
  151. start += length;
  152. }
  153. throw new IndexOutOfRangeException(nameof(index));
  154. }
  155. }
  156. private static byte Encode(int gap, int length)
  157. {
  158. Debug.Assert(gap >= 0 && gap <= MaxGap);
  159. Debug.Assert(length >= 0 && length <= MaxLength);
  160. return unchecked((byte)((gap << BitsForLength) | length));
  161. }
  162. private static int DecodeLength(byte b) => b & MaxLength;
  163. private static int DecodeGap(byte b) => b >> BitsForLength;
  164. }
  165. internal static class StringBreaker
  166. {
  167. /// <summary>
  168. /// Breaks an identifier string into constituent parts.
  169. /// </summary>
  170. public static StringBreaks BreakIntoCharacterParts(string identifier) => StringBreaks.Create(identifier, s_characterPartsGenerator);
  171. /// <summary>
  172. /// Breaks an identifier string into constituent parts.
  173. /// </summary>
  174. public static StringBreaks BreakIntoWordParts(string identifier) => StringBreaks.Create(identifier, s_wordPartsGenerator);
  175. private static readonly Func<string, int, TextSpan> s_characterPartsGenerator = (identifier, start) => GenerateSpan(identifier, start, word: false);
  176. private static readonly Func<string, int, TextSpan> s_wordPartsGenerator = (identifier, start) => GenerateSpan(identifier, start, word: true);
  177. public static TextSpan GenerateSpan(string identifier, int wordStart, bool word)
  178. {
  179. for (int i = wordStart + 1; i < identifier.Length; i++)
  180. {
  181. var lastIsDigit = char.IsDigit(identifier[i - 1]);
  182. var currentIsDigit = char.IsDigit(identifier[i]);
  183. var transitionFromLowerToUpper = TransitionFromLowerToUpper(identifier, word, i);
  184. var transitionFromUpperToLower = TransitionFromUpperToLower(identifier, word, i, wordStart);
  185. if (char.IsPunctuation(identifier[i - 1]) ||
  186. char.IsPunctuation(identifier[i]) ||
  187. lastIsDigit != currentIsDigit ||
  188. transitionFromLowerToUpper ||
  189. transitionFromUpperToLower)
  190. {
  191. if (!IsAllPunctuation(identifier, wordStart, i))
  192. {
  193. return new TextSpan(wordStart, i - wordStart);
  194. }
  195. wordStart = i;
  196. }
  197. }
  198. if (!IsAllPunctuation(identifier, wordStart, identifier.Length))
  199. {
  200. return new TextSpan(wordStart, identifier.Length - wordStart);
  201. }
  202. return default(TextSpan);
  203. }
  204. private static bool IsAllPunctuation(string identifier, int start, int end)
  205. {
  206. for (int i = start; i < end; i++)
  207. {
  208. var ch = identifier[i];
  209. // We don't consider _ as punctuation as there may be things with that name.
  210. if (!char.IsPunctuation(ch) || ch == '_')
  211. {
  212. return false;
  213. }
  214. }
  215. return true;
  216. }
  217. private static bool TransitionFromUpperToLower(string identifier, bool word, int index, int wordStart)
  218. {
  219. if (word)
  220. {
  221. // Cases this supports:
  222. // 1) IDisposable -> I, Disposable
  223. // 2) UIElement -> UI, Element
  224. // 3) HTMLDocument -> HTML, Document
  225. //
  226. // etc.
  227. if (index != wordStart &&
  228. index + 1 < identifier.Length)
  229. {
  230. var currentIsUpper = char.IsUpper(identifier[index]);
  231. var nextIsLower = char.IsLower(identifier[index + 1]);
  232. if (currentIsUpper && nextIsLower)
  233. {
  234. // We have a transition from an upper to a lower letter here. But we only
  235. // want to break if all the letters that preceded are uppercase. i.e. if we
  236. // have "Foo" we don't want to break that into "F, oo". But if we have
  237. // "IFoo" or "UIFoo", then we want to break that into "I, Foo" and "UI,
  238. // Foo". i.e. the last uppercase letter belongs to the lowercase letters
  239. // that follows. Note: this will make the following not split properly:
  240. // "HELLOthere". However, these sorts of names do not show up in .Net
  241. // programs.
  242. for (int i = wordStart; i < index; i++)
  243. {
  244. if (!char.IsUpper(identifier[i]))
  245. {
  246. return false;
  247. }
  248. }
  249. return true;
  250. }
  251. }
  252. }
  253. return false;
  254. }
  255. private static bool TransitionFromLowerToUpper(string identifier, bool word, int index)
  256. {
  257. var lastIsUpper = char.IsUpper(identifier[index - 1]);
  258. var currentIsUpper = char.IsUpper(identifier[index]);
  259. // See if the casing indicates we're starting a new word. Note: if we're breaking on
  260. // words, then just seeing an upper case character isn't enough. Instead, it has to
  261. // be uppercase and the previous character can't be uppercase.
  262. //
  263. // For example, breaking "AddMetadata" on words would make: Add Metadata
  264. //
  265. // on characters would be: A dd M etadata
  266. //
  267. // Break "AM" on words would be: AM
  268. //
  269. // on characters would be: A M
  270. //
  271. // We break the search string on characters. But we break the symbol name on words.
  272. var transition = word
  273. ? (currentIsUpper && !lastIsUpper)
  274. : currentIsUpper;
  275. return transition;
  276. }
  277. }
  278. }