/crates/mun_hir/src/line_index.rs

https://github.com/mun-lang/runtime · Rust · 266 lines · 216 code · 27 blank · 23 comment · 19 complexity · 2c0852c2e71778ed4258b44177b7562e MD5 · raw file

  1. use mun_syntax::TextSize;
  2. use rustc_hash::FxHashMap;
  3. #[derive(Clone, Debug, PartialEq, Eq)]
  4. pub struct LineIndex {
  5. /// Offsets from the beginning of each line
  6. newlines: Vec<TextSize>,
  7. /// List of non-ASCII characters on each line
  8. pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
  9. }
  10. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
  11. pub struct LineCol {
  12. /// The line index (zero-based)
  13. pub line: u32,
  14. /// The column index when the text is represented as UTF16 text (zero-based)
  15. pub col_utf16: u32,
  16. }
  17. #[derive(Clone, Debug, Hash, PartialEq, Eq)]
  18. pub(crate) struct Utf16Char {
  19. /// Start offset of a character inside a line, zero-based
  20. pub(crate) start: TextSize,
  21. /// End offset of a character inside a line, zero-based
  22. pub(crate) end: TextSize,
  23. }
  24. impl Utf16Char {
  25. /// Returns the length in 8-bit UTF-8 code units.
  26. fn len(&self) -> TextSize {
  27. self.end - self.start
  28. }
  29. /// Returns the length in 16-bit UTF-16 code units.
  30. fn len_utf16(&self) -> usize {
  31. if self.len() == TextSize::from(4) {
  32. 2
  33. } else {
  34. 1
  35. }
  36. }
  37. }
  38. impl LineIndex {
  39. pub fn new(text: &str) -> LineIndex {
  40. let mut utf16_lines = FxHashMap::default();
  41. let mut utf16_chars = Vec::new();
  42. // Iterate over all the characters in the text and record all the newlines and UTF16
  43. // characters.
  44. let mut newlines = vec![0.into()];
  45. let mut curr_row = 0.into();
  46. let mut curr_col = 0.into();
  47. let mut line = 0;
  48. for c in text.chars() {
  49. let c_len = TextSize::of(c);
  50. curr_row += c_len;
  51. if c == '\n' {
  52. newlines.push(curr_row);
  53. // Save any utf-16 characters seen in the previous line
  54. if !utf16_chars.is_empty() {
  55. utf16_lines.insert(line, utf16_chars);
  56. utf16_chars = Vec::new();
  57. }
  58. // Prepare for processing the next line
  59. curr_col = 0.into();
  60. line += 1;
  61. continue;
  62. }
  63. if !c.is_ascii() {
  64. utf16_chars.push(Utf16Char {
  65. start: curr_col,
  66. end: curr_col + c_len,
  67. });
  68. }
  69. curr_col += c_len;
  70. }
  71. // Save any utf-16 characters seen in the last line
  72. if !utf16_chars.is_empty() {
  73. utf16_lines.insert(line, utf16_chars);
  74. }
  75. LineIndex {
  76. newlines,
  77. utf16_lines,
  78. }
  79. }
  80. /// Returns the line and column index at the given offset in the text
  81. pub fn line_col(&self, offset: TextSize) -> LineCol {
  82. let line = self
  83. .newlines
  84. .binary_search_by(|x| {
  85. if x <= &offset {
  86. std::cmp::Ordering::Less
  87. } else {
  88. std::cmp::Ordering::Greater
  89. }
  90. })
  91. .unwrap_or_else(|i| i)
  92. - 1;
  93. let line_start_offset = self.newlines[line];
  94. let col = offset - line_start_offset;
  95. LineCol {
  96. line: line as u32,
  97. col_utf16: self.utf8_to_utf16_col(line as u32, col) as u32,
  98. }
  99. }
  100. /// Returns the offset in the text for the given line and column index
  101. pub fn offset(&self, line_col: LineCol) -> TextSize {
  102. let col = self.utf16_to_utf8_col(line_col.line, line_col.col_utf16);
  103. self.newlines[line_col.line as usize] + col
  104. }
  105. /// Retrieves the text between `first_line` and `last_line`, if any.
  106. pub fn text_part<'a>(
  107. &self,
  108. first_line: u32,
  109. last_line: u32,
  110. text: &'a str,
  111. text_len: usize,
  112. ) -> Option<&'a str> {
  113. let start_of_part = (*self.newlines.get(first_line as usize)?).into();
  114. let end_of_part = self
  115. .newlines
  116. .get(last_line as usize + 1)
  117. .map(|u| usize::from(*u) - 1usize)
  118. .unwrap_or(text_len);
  119. Some(&text[start_of_part..end_of_part])
  120. }
  121. /// Retrieves the offset to the line corresponding to `line_index`.
  122. #[inline]
  123. pub fn line_offset(&self, line_index: u32) -> usize {
  124. self.newlines[line_index as usize].into()
  125. }
  126. /// Given a line and column number for utf16 text convert it to the offset in utf8 text.
  127. fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
  128. if let Some(utf16_chars) = self.utf16_lines.get(&line) {
  129. for c in utf16_chars {
  130. if col > u32::from(c.start) {
  131. col += u32::from(c.len()) - c.len_utf16() as u32;
  132. } else {
  133. // From here on, all utf16 characters come *after* the character we are mapping,
  134. // so we don't need to take them into account
  135. break;
  136. }
  137. }
  138. }
  139. col.into()
  140. }
  141. /// Given a line and column number for utf8 text, convert it to the offset in utf16 text.
  142. fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
  143. let mut res: usize = col.into();
  144. if let Some(utf16_chars) = self.utf16_lines.get(&line) {
  145. for c in utf16_chars {
  146. if c.end <= col {
  147. res -= usize::from(c.len()) - c.len_utf16();
  148. } else {
  149. // From here on, all utf16 characters come *after* the character we are mapping,
  150. // so we don't need to take them into account
  151. break;
  152. }
  153. }
  154. }
  155. res
  156. }
  157. }
  158. #[cfg(test)]
  159. mod tests {
  160. use super::*;
  161. #[test]
  162. fn test_line_index() {
  163. let text = "hello\nworld";
  164. let index = LineIndex::new(text);
  165. assert_eq!(
  166. index.line_col(0.into()),
  167. LineCol {
  168. line: 0,
  169. col_utf16: 0
  170. }
  171. );
  172. assert_eq!(
  173. index.line_col(1.into()),
  174. LineCol {
  175. line: 0,
  176. col_utf16: 1
  177. }
  178. );
  179. assert_eq!(
  180. index.line_col(5.into()),
  181. LineCol {
  182. line: 0,
  183. col_utf16: 5
  184. }
  185. );
  186. assert_eq!(
  187. index.line_col(6.into()),
  188. LineCol {
  189. line: 1,
  190. col_utf16: 0
  191. }
  192. );
  193. assert_eq!(
  194. index.line_col(7.into()),
  195. LineCol {
  196. line: 1,
  197. col_utf16: 1
  198. }
  199. );
  200. }
  201. #[test]
  202. fn test_text_part() {
  203. let text = "ℱ٥ℜ\n†ěṦτ\nℙน尺קő$ع";
  204. let text_len = text.len();
  205. let index = LineIndex::new(text);
  206. assert_eq!(index.text_part(0, 0, &text, text_len), Some("ℱ٥ℜ"));
  207. assert_eq!(index.text_part(0, 1, &text, text_len), Some("ℱ٥ℜ\n†ěṦτ"));
  208. assert_eq!(
  209. index.text_part(1, 2, &text, text_len),
  210. Some("†ěṦτ\nℙน尺קő$ع")
  211. );
  212. assert_eq!(index.text_part(0, 2, &text, text_len), Some(text));
  213. }
  214. #[test]
  215. fn test_text_part_utf16() {
  216. let text = "a\n❤️\nb";
  217. let index = LineIndex::new(text);
  218. let start = index.offset(LineCol {
  219. line: 1,
  220. col_utf16: 0,
  221. });
  222. let end = index.offset(LineCol {
  223. line: 1,
  224. col_utf16: 1,
  225. });
  226. assert_eq!(
  227. index.text_part(1, 1, &text, (end - start).into()),
  228. Some("❤️")
  229. );
  230. }
  231. #[test]
  232. fn test_line_offset() {
  233. let text = "for\ntest\npurpose";
  234. let index = LineIndex::new(text);
  235. assert_eq!(index.line_offset(0), 0);
  236. assert_eq!(index.line_offset(1), 4);
  237. assert_eq!(index.line_offset(2), 9);
  238. }
  239. }