1//! A data structure for tracking source positions in language implementations, inspired by the
2//! [CodeMap type in rustc's libsyntax](https://github.com/rust-lang/rust/blob/master/src/libsyntax/codemap.rs).
3//!
4//! The `CodeMap` tracks all source files and maps positions within them to linear indexes as if all
5//! source files were concatenated. This allows a source position to be represented by a small
6//! 32-bit `Pos` indexing into the `CodeMap`, under the assumption that the total amount of parsed
7//! source code will not exceed 4GiB. The `CodeMap` can look up the source file, line, and column
8//! of a `Pos` or `Span`, as well as provide source code snippets for error reporting.
9//!
10//! # Example
11//! ```
12//! use codemap::CodeMap;
13//! let mut codemap = CodeMap::new();
14//! let file = codemap.add_file("test.rs".to_string(), "fn test(){\n println!(\"Hello\");\n}\n".to_string());
15//! let string_literal_span = file.span.subspan(24, 31);
16//!
17//! let location = codemap.look_up_span(string_literal_span);
18//! assert_eq!(location.file.name(), "test.rs");
19//! assert_eq!(location.begin.line, 1);
20//! assert_eq!(location.begin.column, 13);
21//! assert_eq!(location.end.line, 1);
22//! assert_eq!(location.end.column, 20);
23//! ```
24
25use std::cmp::{self, Ordering};
26use std::fmt;
27use std::hash::{Hash, Hasher};
28use std::ops::{Add, Deref, Sub};
29use std::sync::Arc;
30
31/// A small, `Copy`, value representing a position in a `CodeMap`'s file.
32#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)]
33pub struct Pos(u32);
34
35impl Add<u64> for Pos {
36 type Output = Pos;
37 fn add(self, other: u64) -> Pos {
38 Pos(self.0 + other as u32)
39 }
40}
41
42impl Sub<Pos> for Pos {
43 type Output = u64;
44 fn sub(self, other: Pos) -> u64 {
45 (self.0 - other.0) as u64
46 }
47}
48
49/// A range of text within a CodeMap.
50#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
51pub struct Span {
52 /// The position in the codemap representing the first byte of the span.
53 low: Pos,
54
55 /// The position after the last byte of the span.
56 high: Pos,
57}
58
59impl Span {
60 /// Makes a span from offsets relative to the start of this span.
61 ///
62 /// # Panics
63 /// * If `end < begin`
64 /// * If `end` is beyond the length of the span
65 pub fn subspan(&self, begin: u64, end: u64) -> Span {
66 assert!(end >= begin);
67 assert!(self.low + end <= self.high);
68 Span {
69 low: self.low + begin,
70 high: self.low + end,
71 }
72 }
73
74 /// Checks if a span is contained within this span.
75 pub fn contains(&self, other: Span) -> bool {
76 self.low <= other.low && self.high >= other.high
77 }
78
79 /// The position in the codemap representing the first byte of the span.
80 pub fn low(&self) -> Pos {
81 self.low
82 }
83
84 /// The position after the last byte of the span.
85 pub fn high(&self) -> Pos {
86 self.high
87 }
88
89 /// The length in bytes of the text of the span
90 pub fn len(&self) -> u64 {
91 self.high - self.low
92 }
93
94 /// Create a span that encloses both `self` and `other`.
95 pub fn merge(&self, other: Span) -> Span {
96 Span {
97 low: cmp::min(self.low, other.low),
98 high: cmp::max(self.high, other.high),
99 }
100 }
101}
102
103/// Associate a Span with a value of arbitrary type (e.g. an AST node).
104#[derive(Clone, PartialEq, Eq, Hash, Debug, Copy)]
105pub struct Spanned<T> {
106 pub node: T,
107 pub span: Span,
108}
109
110impl<T> Spanned<T> {
111 /// Maps a `Spanned<T>` to `Spanned<U>` by applying the function to the node,
112 /// leaving the span untouched.
113 pub fn map_node<U, F: FnOnce(T) -> U>(self, op: F) -> Spanned<U> {
114 Spanned {
115 node: op(self.node),
116 span: self.span
117 }
118 }
119}
120
121impl<T> Deref for Spanned<T> {
122 type Target = T;
123
124 fn deref(&self) -> &T {
125 &self.node
126 }
127}
128
129/// A data structure recording source code files for position lookup.
130#[derive(Default, Debug)]
131pub struct CodeMap {
132 files: Vec<Arc<File>>,
133}
134
135impl CodeMap {
136 /// Creates an empty `CodeMap`.
137 pub fn new() -> CodeMap {
138 Default::default()
139 }
140
141 /// Adds a file with the given name and contents.
142 ///
143 /// Use the returned `File` and its `.span` property to create `Spans`
144 /// representing substrings of the file.
145 pub fn add_file(&mut self, name: String, source: String) -> Arc<File> {
146 let low = self.end_pos() + 1;
147 let high = low + source.len() as u64;
148 let mut lines = vec![low];
149 lines.extend(
150 source
151 .match_indices('\n')
152 .map(|(p, _)| low + (p + 1) as u64),
153 );
154
155 let file = Arc::new(File {
156 span: Span { low, high },
157 name,
158 source,
159 lines,
160 });
161
162 self.files.push(file.clone());
163 file
164 }
165
166 fn end_pos(&self) -> Pos {
167 self.files.last().map(|x| x.span.high).unwrap_or(Pos(0))
168 }
169
170 /// Looks up the `File` that contains the specified position.
171 pub fn find_file(&self, pos: Pos) -> &Arc<File> {
172 self.files
173 .binary_search_by(|file| {
174 if file.span.high < pos {
175 Ordering::Less
176 } else if file.span.low > pos {
177 Ordering::Greater
178 } else {
179 Ordering::Equal
180 }
181 })
182 .ok()
183 .map(|i| &self.files[i])
184 .expect("Mapping unknown source location")
185 }
186
187 /// Gets the file, line, and column represented by a `Pos`.
188 pub fn look_up_pos(&self, pos: Pos) -> Loc {
189 let file = self.find_file(pos);
190 let position = file.find_line_col(pos);
191 Loc {
192 file: file.clone(),
193 position,
194 }
195 }
196
197 /// Gets the file and its line and column ranges represented by a `Span`.
198 pub fn look_up_span(&self, span: Span) -> SpanLoc {
199 let file = self.find_file(span.low);
200 let begin = file.find_line_col(span.low);
201 let end = file.find_line_col(span.high);
202 SpanLoc {
203 file: file.clone(),
204 begin,
205 end,
206 }
207 }
208}
209
210/// A `CodeMap`'s record of a source file.
211pub struct File {
212 /// The span representing the entire file.
213 pub span: Span,
214
215 /// The filename as it would be displayed in an error message.
216 name: String,
217
218 /// Contents of the file.
219 source: String,
220
221 /// Byte positions of line beginnings.
222 lines: Vec<Pos>,
223}
224
225impl File {
226 /// Gets the name of the file
227 pub fn name(&self) -> &str {
228 &self.name
229 }
230
231 /// Gets the line number of a Pos.
232 ///
233 /// The lines are 0-indexed (first line is numbered 0)
234 ///
235 /// # Panics
236 ///
237 /// * If `pos` is not within this file's span
238 pub fn find_line(&self, pos: Pos) -> usize {
239 assert!(pos >= self.span.low);
240 assert!(pos <= self.span.high);
241 match self.lines.binary_search(&pos) {
242 Ok(i) => i,
243 Err(i) => i - 1,
244 }
245 }
246
247 /// Gets the line and column of a Pos.
248 ///
249 /// # Panics
250 ///
251 /// * If `pos` is not with this file's span
252 /// * If `pos` points to a byte in the middle of a UTF-8 character
253 pub fn find_line_col(&self, pos: Pos) -> LineCol {
254 let line = self.find_line(pos);
255 let line_span = self.line_span(line);
256 let byte_col = pos - line_span.low;
257 let column = self.source_slice(line_span)[..byte_col as usize]
258 .chars()
259 .count();
260
261 LineCol { line, column }
262 }
263
264 /// Gets the full source text of the file
265 pub fn source(&self) -> &str {
266 &self.source
267 }
268
269 /// Gets the source text of a Span.
270 ///
271 /// # Panics
272 ///
273 /// * If `span` is not entirely within this file.
274 pub fn source_slice(&self, span: Span) -> &str {
275 assert!(self.span.contains(span));
276 &self.source[((span.low - self.span.low) as usize)..((span.high - self.span.low) as usize)]
277 }
278
279 /// Gets the span representing a line by line number.
280 ///
281 /// The line number is 0-indexed (first line is numbered 0). The returned span includes the
282 /// line terminator.
283 ///
284 /// # Panics
285 ///
286 /// * If the line number is out of range
287 pub fn line_span(&self, line: usize) -> Span {
288 assert!(line < self.lines.len());
289 Span {
290 low: self.lines[line],
291 high: *self.lines.get(line + 1).unwrap_or(&self.span.high),
292 }
293 }
294
295 /// Gets the source text of a line.
296 ///
297 /// The string returned does not include the terminating \r or \n characters.
298 ///
299 /// # Panics
300 ///
301 /// * If the line number is out of range
302 pub fn source_line(&self, line: usize) -> &str {
303 self.source_slice(self.line_span(line))
304 .trim_end_matches(&['\n', '\r'][..])
305 }
306
307 /// Gets the number of lines in the file
308 pub fn num_lines(&self) -> usize {
309 self.lines.len()
310 }
311}
312
313impl fmt::Debug for File {
314 fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
315 write!(f, "File({:?})", self.name)
316 }
317}
318
319impl PartialEq for File {
320 /// Compares by identity
321 fn eq(&self, other: &File) -> bool {
322 self as *const _ == other as *const _
323 }
324}
325
326impl Eq for File {}
327
328impl Hash for File {
329 fn hash<H: Hasher>(&self, hasher: &mut H) {
330 self.span.hash(state:hasher);
331 }
332}
333
334/// A line and column.
335#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
336pub struct LineCol {
337 /// The line number within the file (0-indexed).
338 pub line: usize,
339
340 /// The column within the line (0-indexed).
341 pub column: usize,
342}
343
344/// A file, and a line and column within it.
345#[derive(Clone, Eq, PartialEq, Debug)]
346pub struct Loc {
347 pub file: Arc<File>,
348 pub position: LineCol,
349}
350
351impl fmt::Display for Loc {
352 /// Formats the location as `filename:line:column`, with a 1-indexed
353 /// line and column.
354 fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
355 write!(
356 f,
357 "{}:{}:{}",
358 self.file.name,
359 self.position.line + 1,
360 self.position.column + 1
361 )
362 }
363}
364
365/// A file, and a line and column range within it.
366#[derive(Clone, Eq, PartialEq, Debug)]
367pub struct SpanLoc {
368 pub file: Arc<File>,
369 pub begin: LineCol,
370 pub end: LineCol,
371}
372
373impl fmt::Display for SpanLoc {
374 /// Formats the span as `filename:start_line:start_column: end_line:end_column`,
375 /// or if the span is zero-length, `filename:line:column`, with a 1-indexed line and column.
376 fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
377 if self.begin == self.end {
378 write!(
379 f,
380 "{}:{}:{}",
381 self.file.name,
382 self.begin.line + 1,
383 self.begin.column + 1
384 )
385 } else {
386 write!(
387 f,
388 "{}:{}:{}: {}:{}",
389 self.file.name,
390 self.begin.line + 1,
391 self.begin.column + 1,
392 self.end.line + 1,
393 self.end.column + 1
394 )
395 }
396 }
397}
398
399#[test]
400fn test_codemap() {
401 let mut codemap = CodeMap::new();
402 let f1 = codemap.add_file("test1.rs".to_string(), "abcd\nefghij\nqwerty".to_string());
403 let f2 = codemap.add_file("test2.rs".to_string(), "foo\nbar".to_string());
404
405 assert_eq!(codemap.find_file(f1.span.low()).name(), "test1.rs");
406 assert_eq!(codemap.find_file(f1.span.high()).name(), "test1.rs");
407 assert_eq!(codemap.find_file(f2.span.low()).name(), "test2.rs");
408 assert_eq!(codemap.find_file(f2.span.high()).name(), "test2.rs");
409
410 let x = f1.span.subspan(5, 10);
411 let f = codemap.find_file(x.low);
412 assert_eq!(f.name, "test1.rs");
413 assert_eq!(
414 f.find_line_col(f.span.low()),
415 LineCol { line: 0, column: 0 }
416 );
417 assert_eq!(
418 f.find_line_col(f.span.low() + 4),
419 LineCol { line: 0, column: 4 }
420 );
421 assert_eq!(
422 f.find_line_col(f.span.low() + 5),
423 LineCol { line: 1, column: 0 }
424 );
425 assert_eq!(
426 f.find_line_col(f.span.low() + 16),
427 LineCol { line: 2, column: 4 }
428 );
429
430 let x = f2.span.subspan(4, 7);
431 assert_eq!(codemap.find_file(x.low()).name(), "test2.rs");
432 assert_eq!(codemap.find_file(x.high()).name(), "test2.rs");
433}
434
435#[test]
436fn test_issue2() {
437 let mut codemap = CodeMap::new();
438 let content = "a \nxyz\r\n";
439 let file = codemap.add_file("<test>".to_owned(), content.to_owned());
440
441 let span = file.span.subspan(2, 3);
442 assert_eq!(
443 codemap.look_up_span(span),
444 SpanLoc {
445 file: file.clone(),
446 begin: LineCol { line: 0, column: 2 },
447 end: LineCol { line: 1, column: 0 }
448 }
449 );
450
451 assert_eq!(file.source_line(0), "a ");
452 assert_eq!(file.source_line(1), "xyz");
453 assert_eq!(file.source_line(2), "");
454}
455
456#[test]
457fn test_multibyte() {
458 let mut codemap = CodeMap::new();
459 let content = "65°00′N 18°00′W 汉语\n🔬";
460 let file = codemap.add_file("<test>".to_owned(), content.to_owned());
461
462 assert_eq!(
463 codemap.look_up_pos(file.span.low() + 21),
464 Loc {
465 file: file.clone(),
466 position: LineCol {
467 line: 0,
468 column: 15
469 }
470 }
471 );
472 assert_eq!(
473 codemap.look_up_pos(file.span.low() + 28),
474 Loc {
475 file: file.clone(),
476 position: LineCol {
477 line: 0,
478 column: 18
479 }
480 }
481 );
482 assert_eq!(
483 codemap.look_up_pos(file.span.low() + 33),
484 Loc {
485 file: file.clone(),
486 position: LineCol { line: 1, column: 1 }
487 }
488 );
489}
490