| 1 | //! `LineIndex` to make a line_offsets, each item is an byte offset (start from 0) of the beginning of the line. |
| 2 | //! |
| 3 | //! For example, the text: `"hello 你好\nworld"`, the line_offsets will store `[0, 13]`. |
| 4 | //! |
| 5 | //! Then `line_col` with a offset just need to find the line index by binary search. |
| 6 | //! |
| 7 | //! Inspired by rust-analyzer's `LineIndex`: |
| 8 | //! <https://github.com/rust-lang/rust/blob/1.67.0/src/tools/rust-analyzer/crates/ide-db/src/line_index.rs> |
| 9 | use alloc::vec::Vec; |
| 10 | |
| 11 | #[derive (Clone)] |
| 12 | pub struct LineIndex { |
| 13 | /// Offset (bytes) the the beginning of each line, zero-based |
| 14 | line_offsets: Vec<usize>, |
| 15 | } |
| 16 | |
| 17 | impl LineIndex { |
| 18 | pub fn new(text: &str) -> LineIndex { |
| 19 | let mut line_offsets: Vec<usize> = alloc::vec![0]; |
| 20 | |
| 21 | let mut offset = 0; |
| 22 | |
| 23 | for c in text.chars() { |
| 24 | offset += c.len_utf8(); |
| 25 | if c == ' \n' { |
| 26 | line_offsets.push(offset); |
| 27 | } |
| 28 | } |
| 29 | |
| 30 | LineIndex { line_offsets } |
| 31 | } |
| 32 | |
| 33 | /// Returns (line, col) of pos. |
| 34 | /// |
| 35 | /// The pos is a byte offset, start from 0, e.g. "ab" is 2, "你好" is 6 |
| 36 | pub fn line_col(&self, input: &str, pos: usize) -> (usize, usize) { |
| 37 | let line = self.line_offsets.partition_point(|&it| it <= pos) - 1; |
| 38 | let first_offset = self.line_offsets[line]; |
| 39 | |
| 40 | // Get line str from original input, then we can get column offset |
| 41 | let line_str = &input[first_offset..pos]; |
| 42 | let col = line_str.chars().count(); |
| 43 | |
| 44 | (line + 1, col + 1) |
| 45 | } |
| 46 | } |
| 47 | |
| 48 | #[cfg (test)] |
| 49 | mod tests { |
| 50 | use super::*; |
| 51 | |
| 52 | #[allow (clippy::zero_prefixed_literal)] |
| 53 | #[test ] |
| 54 | fn test_line_index() { |
| 55 | let text = "hello 你好 A🎈C \nworld" ; |
| 56 | let table = [ |
| 57 | (00, 1, 1, 'h' ), |
| 58 | (01, 1, 2, 'e' ), |
| 59 | (02, 1, 3, 'l' ), |
| 60 | (03, 1, 4, 'l' ), |
| 61 | (04, 1, 5, 'o' ), |
| 62 | (05, 1, 6, ' ' ), |
| 63 | (06, 1, 7, '你' ), |
| 64 | (09, 1, 8, '好' ), |
| 65 | (12, 1, 9, ' ' ), |
| 66 | (13, 1, 10, 'A' ), |
| 67 | (14, 1, 11, '🎈' ), |
| 68 | (18, 1, 12, 'C' ), |
| 69 | (19, 1, 13, ' \n' ), |
| 70 | (20, 2, 1, 'w' ), |
| 71 | (21, 2, 2, 'o' ), |
| 72 | (22, 2, 3, 'r' ), |
| 73 | (23, 2, 4, 'l' ), |
| 74 | (24, 2, 5, 'd' ), |
| 75 | ]; |
| 76 | |
| 77 | let index = LineIndex::new(text); |
| 78 | for &(offset, line, col, c) in table.iter() { |
| 79 | let res = index.line_col(text, offset); |
| 80 | assert_eq!( |
| 81 | (res.0, res.1), |
| 82 | (line, col), |
| 83 | "Expected: ({}, {}, {}, {:?})" , |
| 84 | offset, |
| 85 | line, |
| 86 | col, |
| 87 | c |
| 88 | ); |
| 89 | } |
| 90 | } |
| 91 | } |
| 92 | |