1 | //! A data structure for tracking source positions in language implementations, inspired by the |
2 | //! [CodeMap type in rustc's libsyntax](https://github.com/rust-lang/rust/blob/master/src/libsyntax/codemap.rs). |
3 | //! |
4 | //! The `CodeMap` tracks all source files and maps positions within them to linear indexes as if all |
5 | //! source files were concatenated. This allows a source position to be represented by a small |
6 | //! 32-bit `Pos` indexing into the `CodeMap`, under the assumption that the total amount of parsed |
7 | //! source code will not exceed 4GiB. The `CodeMap` can look up the source file, line, and column |
8 | //! of a `Pos` or `Span`, as well as provide source code snippets for error reporting. |
9 | //! |
10 | //! # Example |
11 | //! ``` |
12 | //! use codemap::CodeMap; |
13 | //! let mut codemap = CodeMap::new(); |
14 | //! let file = codemap.add_file("test.rs" .to_string(), "fn test(){ \n println!( \"Hello \"); \n} \n" .to_string()); |
15 | //! let string_literal_span = file.span.subspan(24, 31); |
16 | //! |
17 | //! let location = codemap.look_up_span(string_literal_span); |
18 | //! assert_eq!(location.file.name(), "test.rs" ); |
19 | //! assert_eq!(location.begin.line, 1); |
20 | //! assert_eq!(location.begin.column, 13); |
21 | //! assert_eq!(location.end.line, 1); |
22 | //! assert_eq!(location.end.column, 20); |
23 | //! ``` |
24 | |
25 | use std::cmp::{self, Ordering}; |
26 | use std::fmt; |
27 | use std::hash::{Hash, Hasher}; |
28 | use std::ops::{Add, Deref, Sub}; |
29 | use std::sync::Arc; |
30 | |
31 | /// A small, `Copy`, value representing a position in a `CodeMap`'s file. |
32 | #[derive (Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)] |
33 | pub struct Pos(u32); |
34 | |
35 | impl Add<u64> for Pos { |
36 | type Output = Pos; |
37 | fn add(self, other: u64) -> Pos { |
38 | Pos(self.0 + other as u32) |
39 | } |
40 | } |
41 | |
42 | impl Sub<Pos> for Pos { |
43 | type Output = u64; |
44 | fn sub(self, other: Pos) -> u64 { |
45 | (self.0 - other.0) as u64 |
46 | } |
47 | } |
48 | |
49 | /// A range of text within a CodeMap. |
50 | #[derive (Copy, Clone, Hash, Eq, PartialEq, Debug)] |
51 | pub struct Span { |
52 | /// The position in the codemap representing the first byte of the span. |
53 | low: Pos, |
54 | |
55 | /// The position after the last byte of the span. |
56 | high: Pos, |
57 | } |
58 | |
59 | impl Span { |
60 | /// Makes a span from offsets relative to the start of this span. |
61 | /// |
62 | /// # Panics |
63 | /// * If `end < begin` |
64 | /// * If `end` is beyond the length of the span |
65 | pub fn subspan(&self, begin: u64, end: u64) -> Span { |
66 | assert!(end >= begin); |
67 | assert!(self.low + end <= self.high); |
68 | Span { |
69 | low: self.low + begin, |
70 | high: self.low + end, |
71 | } |
72 | } |
73 | |
74 | /// Checks if a span is contained within this span. |
75 | pub fn contains(&self, other: Span) -> bool { |
76 | self.low <= other.low && self.high >= other.high |
77 | } |
78 | |
79 | /// The position in the codemap representing the first byte of the span. |
80 | pub fn low(&self) -> Pos { |
81 | self.low |
82 | } |
83 | |
84 | /// The position after the last byte of the span. |
85 | pub fn high(&self) -> Pos { |
86 | self.high |
87 | } |
88 | |
89 | /// The length in bytes of the text of the span |
90 | pub fn len(&self) -> u64 { |
91 | self.high - self.low |
92 | } |
93 | |
94 | /// Create a span that encloses both `self` and `other`. |
95 | pub fn merge(&self, other: Span) -> Span { |
96 | Span { |
97 | low: cmp::min(self.low, other.low), |
98 | high: cmp::max(self.high, other.high), |
99 | } |
100 | } |
101 | } |
102 | |
103 | /// Associate a Span with a value of arbitrary type (e.g. an AST node). |
104 | #[derive (Clone, PartialEq, Eq, Hash, Debug, Copy)] |
105 | pub struct Spanned<T> { |
106 | pub node: T, |
107 | pub span: Span, |
108 | } |
109 | |
110 | impl<T> Spanned<T> { |
111 | /// Maps a `Spanned<T>` to `Spanned<U>` by applying the function to the node, |
112 | /// leaving the span untouched. |
113 | pub fn map_node<U, F: FnOnce(T) -> U>(self, op: F) -> Spanned<U> { |
114 | Spanned { |
115 | node: op(self.node), |
116 | span: self.span |
117 | } |
118 | } |
119 | } |
120 | |
121 | impl<T> Deref for Spanned<T> { |
122 | type Target = T; |
123 | |
124 | fn deref(&self) -> &T { |
125 | &self.node |
126 | } |
127 | } |
128 | |
129 | /// A data structure recording source code files for position lookup. |
130 | #[derive (Default, Debug)] |
131 | pub struct CodeMap { |
132 | files: Vec<Arc<File>>, |
133 | } |
134 | |
135 | impl CodeMap { |
136 | /// Creates an empty `CodeMap`. |
137 | pub fn new() -> CodeMap { |
138 | Default::default() |
139 | } |
140 | |
141 | /// Adds a file with the given name and contents. |
142 | /// |
143 | /// Use the returned `File` and its `.span` property to create `Spans` |
144 | /// representing substrings of the file. |
145 | pub fn add_file(&mut self, name: String, source: String) -> Arc<File> { |
146 | let low = self.end_pos() + 1; |
147 | let high = low + source.len() as u64; |
148 | let mut lines = vec![low]; |
149 | lines.extend( |
150 | source |
151 | .match_indices(' \n' ) |
152 | .map(|(p, _)| low + (p + 1) as u64), |
153 | ); |
154 | |
155 | let file = Arc::new(File { |
156 | span: Span { low, high }, |
157 | name, |
158 | source, |
159 | lines, |
160 | }); |
161 | |
162 | self.files.push(file.clone()); |
163 | file |
164 | } |
165 | |
166 | fn end_pos(&self) -> Pos { |
167 | self.files.last().map(|x| x.span.high).unwrap_or(Pos(0)) |
168 | } |
169 | |
170 | /// Looks up the `File` that contains the specified position. |
171 | pub fn find_file(&self, pos: Pos) -> &Arc<File> { |
172 | self.files |
173 | .binary_search_by(|file| { |
174 | if file.span.high < pos { |
175 | Ordering::Less |
176 | } else if file.span.low > pos { |
177 | Ordering::Greater |
178 | } else { |
179 | Ordering::Equal |
180 | } |
181 | }) |
182 | .ok() |
183 | .map(|i| &self.files[i]) |
184 | .expect("Mapping unknown source location" ) |
185 | } |
186 | |
187 | /// Gets the file, line, and column represented by a `Pos`. |
188 | pub fn look_up_pos(&self, pos: Pos) -> Loc { |
189 | let file = self.find_file(pos); |
190 | let position = file.find_line_col(pos); |
191 | Loc { |
192 | file: file.clone(), |
193 | position, |
194 | } |
195 | } |
196 | |
197 | /// Gets the file and its line and column ranges represented by a `Span`. |
198 | pub fn look_up_span(&self, span: Span) -> SpanLoc { |
199 | let file = self.find_file(span.low); |
200 | let begin = file.find_line_col(span.low); |
201 | let end = file.find_line_col(span.high); |
202 | SpanLoc { |
203 | file: file.clone(), |
204 | begin, |
205 | end, |
206 | } |
207 | } |
208 | } |
209 | |
210 | /// A `CodeMap`'s record of a source file. |
211 | pub struct File { |
212 | /// The span representing the entire file. |
213 | pub span: Span, |
214 | |
215 | /// The filename as it would be displayed in an error message. |
216 | name: String, |
217 | |
218 | /// Contents of the file. |
219 | source: String, |
220 | |
221 | /// Byte positions of line beginnings. |
222 | lines: Vec<Pos>, |
223 | } |
224 | |
225 | impl File { |
226 | /// Gets the name of the file |
227 | pub fn name(&self) -> &str { |
228 | &self.name |
229 | } |
230 | |
231 | /// Gets the line number of a Pos. |
232 | /// |
233 | /// The lines are 0-indexed (first line is numbered 0) |
234 | /// |
235 | /// # Panics |
236 | /// |
237 | /// * If `pos` is not within this file's span |
238 | pub fn find_line(&self, pos: Pos) -> usize { |
239 | assert!(pos >= self.span.low); |
240 | assert!(pos <= self.span.high); |
241 | match self.lines.binary_search(&pos) { |
242 | Ok(i) => i, |
243 | Err(i) => i - 1, |
244 | } |
245 | } |
246 | |
247 | /// Gets the line and column of a Pos. |
248 | /// |
249 | /// # Panics |
250 | /// |
251 | /// * If `pos` is not with this file's span |
252 | /// * If `pos` points to a byte in the middle of a UTF-8 character |
253 | pub fn find_line_col(&self, pos: Pos) -> LineCol { |
254 | let line = self.find_line(pos); |
255 | let line_span = self.line_span(line); |
256 | let byte_col = pos - line_span.low; |
257 | let column = self.source_slice(line_span)[..byte_col as usize] |
258 | .chars() |
259 | .count(); |
260 | |
261 | LineCol { line, column } |
262 | } |
263 | |
264 | /// Gets the full source text of the file |
265 | pub fn source(&self) -> &str { |
266 | &self.source |
267 | } |
268 | |
269 | /// Gets the source text of a Span. |
270 | /// |
271 | /// # Panics |
272 | /// |
273 | /// * If `span` is not entirely within this file. |
274 | pub fn source_slice(&self, span: Span) -> &str { |
275 | assert!(self.span.contains(span)); |
276 | &self.source[((span.low - self.span.low) as usize)..((span.high - self.span.low) as usize)] |
277 | } |
278 | |
279 | /// Gets the span representing a line by line number. |
280 | /// |
281 | /// The line number is 0-indexed (first line is numbered 0). The returned span includes the |
282 | /// line terminator. |
283 | /// |
284 | /// # Panics |
285 | /// |
286 | /// * If the line number is out of range |
287 | pub fn line_span(&self, line: usize) -> Span { |
288 | assert!(line < self.lines.len()); |
289 | Span { |
290 | low: self.lines[line], |
291 | high: *self.lines.get(line + 1).unwrap_or(&self.span.high), |
292 | } |
293 | } |
294 | |
295 | /// Gets the source text of a line. |
296 | /// |
297 | /// The string returned does not include the terminating \r or \n characters. |
298 | /// |
299 | /// # Panics |
300 | /// |
301 | /// * If the line number is out of range |
302 | pub fn source_line(&self, line: usize) -> &str { |
303 | self.source_slice(self.line_span(line)) |
304 | .trim_end_matches(&[' \n' , ' \r' ][..]) |
305 | } |
306 | |
307 | /// Gets the number of lines in the file |
308 | pub fn num_lines(&self) -> usize { |
309 | self.lines.len() |
310 | } |
311 | } |
312 | |
313 | impl fmt::Debug for File { |
314 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { |
315 | write!(f, "File( {:?})" , self.name) |
316 | } |
317 | } |
318 | |
319 | impl PartialEq for File { |
320 | /// Compares by identity |
321 | fn eq(&self, other: &File) -> bool { |
322 | self as *const _ == other as *const _ |
323 | } |
324 | } |
325 | |
326 | impl Eq for File {} |
327 | |
328 | impl Hash for File { |
329 | fn hash<H: Hasher>(&self, hasher: &mut H) { |
330 | self.span.hash(state:hasher); |
331 | } |
332 | } |
333 | |
334 | /// A line and column. |
335 | #[derive (Copy, Clone, Hash, Eq, PartialEq, Debug)] |
336 | pub struct LineCol { |
337 | /// The line number within the file (0-indexed). |
338 | pub line: usize, |
339 | |
340 | /// The column within the line (0-indexed). |
341 | pub column: usize, |
342 | } |
343 | |
344 | /// A file, and a line and column within it. |
345 | #[derive (Clone, Eq, PartialEq, Debug)] |
346 | pub struct Loc { |
347 | pub file: Arc<File>, |
348 | pub position: LineCol, |
349 | } |
350 | |
351 | impl fmt::Display for Loc { |
352 | /// Formats the location as `filename:line:column`, with a 1-indexed |
353 | /// line and column. |
354 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { |
355 | write!( |
356 | f, |
357 | " {}: {}: {}" , |
358 | self.file.name, |
359 | self.position.line + 1, |
360 | self.position.column + 1 |
361 | ) |
362 | } |
363 | } |
364 | |
365 | /// A file, and a line and column range within it. |
366 | #[derive (Clone, Eq, PartialEq, Debug)] |
367 | pub struct SpanLoc { |
368 | pub file: Arc<File>, |
369 | pub begin: LineCol, |
370 | pub end: LineCol, |
371 | } |
372 | |
373 | impl fmt::Display for SpanLoc { |
374 | /// Formats the span as `filename:start_line:start_column: end_line:end_column`, |
375 | /// or if the span is zero-length, `filename:line:column`, with a 1-indexed line and column. |
376 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { |
377 | if self.begin == self.end { |
378 | write!( |
379 | f, |
380 | " {}: {}: {}" , |
381 | self.file.name, |
382 | self.begin.line + 1, |
383 | self.begin.column + 1 |
384 | ) |
385 | } else { |
386 | write!( |
387 | f, |
388 | " {}: {}: {}: {}: {}" , |
389 | self.file.name, |
390 | self.begin.line + 1, |
391 | self.begin.column + 1, |
392 | self.end.line + 1, |
393 | self.end.column + 1 |
394 | ) |
395 | } |
396 | } |
397 | } |
398 | |
399 | #[test ] |
400 | fn test_codemap() { |
401 | let mut codemap = CodeMap::new(); |
402 | let f1 = codemap.add_file("test1.rs" .to_string(), "abcd \nefghij \nqwerty" .to_string()); |
403 | let f2 = codemap.add_file("test2.rs" .to_string(), "foo \nbar" .to_string()); |
404 | |
405 | assert_eq!(codemap.find_file(f1.span.low()).name(), "test1.rs" ); |
406 | assert_eq!(codemap.find_file(f1.span.high()).name(), "test1.rs" ); |
407 | assert_eq!(codemap.find_file(f2.span.low()).name(), "test2.rs" ); |
408 | assert_eq!(codemap.find_file(f2.span.high()).name(), "test2.rs" ); |
409 | |
410 | let x = f1.span.subspan(5, 10); |
411 | let f = codemap.find_file(x.low); |
412 | assert_eq!(f.name, "test1.rs" ); |
413 | assert_eq!( |
414 | f.find_line_col(f.span.low()), |
415 | LineCol { line: 0, column: 0 } |
416 | ); |
417 | assert_eq!( |
418 | f.find_line_col(f.span.low() + 4), |
419 | LineCol { line: 0, column: 4 } |
420 | ); |
421 | assert_eq!( |
422 | f.find_line_col(f.span.low() + 5), |
423 | LineCol { line: 1, column: 0 } |
424 | ); |
425 | assert_eq!( |
426 | f.find_line_col(f.span.low() + 16), |
427 | LineCol { line: 2, column: 4 } |
428 | ); |
429 | |
430 | let x = f2.span.subspan(4, 7); |
431 | assert_eq!(codemap.find_file(x.low()).name(), "test2.rs" ); |
432 | assert_eq!(codemap.find_file(x.high()).name(), "test2.rs" ); |
433 | } |
434 | |
435 | #[test ] |
436 | fn test_issue2() { |
437 | let mut codemap = CodeMap::new(); |
438 | let content = "a \nxyz \r\n" ; |
439 | let file = codemap.add_file("<test>" .to_owned(), content.to_owned()); |
440 | |
441 | let span = file.span.subspan(2, 3); |
442 | assert_eq!( |
443 | codemap.look_up_span(span), |
444 | SpanLoc { |
445 | file: file.clone(), |
446 | begin: LineCol { line: 0, column: 2 }, |
447 | end: LineCol { line: 1, column: 0 } |
448 | } |
449 | ); |
450 | |
451 | assert_eq!(file.source_line(0), "a " ); |
452 | assert_eq!(file.source_line(1), "xyz" ); |
453 | assert_eq!(file.source_line(2), "" ); |
454 | } |
455 | |
456 | #[test ] |
457 | fn test_multibyte() { |
458 | let mut codemap = CodeMap::new(); |
459 | let content = "65°00′N 18°00′W 汉语 \n🔬" ; |
460 | let file = codemap.add_file("<test>" .to_owned(), content.to_owned()); |
461 | |
462 | assert_eq!( |
463 | codemap.look_up_pos(file.span.low() + 21), |
464 | Loc { |
465 | file: file.clone(), |
466 | position: LineCol { |
467 | line: 0, |
468 | column: 15 |
469 | } |
470 | } |
471 | ); |
472 | assert_eq!( |
473 | codemap.look_up_pos(file.span.low() + 28), |
474 | Loc { |
475 | file: file.clone(), |
476 | position: LineCol { |
477 | line: 0, |
478 | column: 18 |
479 | } |
480 | } |
481 | ); |
482 | assert_eq!( |
483 | codemap.look_up_pos(file.span.low() + 33), |
484 | Loc { |
485 | file: file.clone(), |
486 | position: LineCol { line: 1, column: 1 } |
487 | } |
488 | ); |
489 | } |
490 | |