1 | //===- RewriteRope.h - Rope specialized for rewriter ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the RewriteRope class, which is a powerful string class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H |
14 | #define LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H |
15 | |
16 | #include "llvm/ADT/IntrusiveRefCntPtr.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include <cassert> |
19 | #include <cstddef> |
20 | #include <iterator> |
21 | #include <utility> |
22 | |
23 | namespace clang { |
24 | |
25 | //===--------------------------------------------------------------------===// |
26 | // RopeRefCountString Class |
27 | //===--------------------------------------------------------------------===// |
28 | |
29 | /// RopeRefCountString - This struct is allocated with 'new char[]' from the |
30 | /// heap, and represents a reference counted chunk of string data. When its |
31 | /// ref count drops to zero, it is delete[]'d. This is primarily managed |
32 | /// through the RopePiece class below. |
33 | struct RopeRefCountString { |
34 | unsigned RefCount; |
35 | char Data[1]; // Variable sized. |
36 | |
37 | void Retain() { ++RefCount; } |
38 | |
39 | void Release() { |
40 | assert(RefCount > 0 && "Reference count is already zero." ); |
41 | if (--RefCount == 0) |
42 | delete [] (char*)this; |
43 | } |
44 | }; |
45 | |
46 | //===--------------------------------------------------------------------===// |
47 | // RopePiece Class |
48 | //===--------------------------------------------------------------------===// |
49 | |
50 | /// RopePiece - This class represents a view into a RopeRefCountString object. |
51 | /// This allows references to string data to be efficiently chopped up and |
52 | /// moved around without having to push around the string data itself. |
53 | /// |
54 | /// For example, we could have a 1M RopePiece and want to insert something |
55 | /// into the middle of it. To do this, we split it into two RopePiece objects |
56 | /// that both refer to the same underlying RopeRefCountString (just with |
57 | /// different offsets) which is a nice constant time operation. |
58 | struct RopePiece { |
59 | llvm::IntrusiveRefCntPtr<RopeRefCountString> StrData; |
60 | unsigned StartOffs = 0; |
61 | unsigned EndOffs = 0; |
62 | |
63 | RopePiece() = default; |
64 | RopePiece(llvm::IntrusiveRefCntPtr<RopeRefCountString> Str, unsigned Start, |
65 | unsigned End) |
66 | : StrData(std::move(Str)), StartOffs(Start), EndOffs(End) {} |
67 | |
68 | const char &operator[](unsigned Offset) const { |
69 | return StrData->Data[Offset+StartOffs]; |
70 | } |
71 | char &operator[](unsigned Offset) { |
72 | return StrData->Data[Offset+StartOffs]; |
73 | } |
74 | |
75 | unsigned size() const { return EndOffs-StartOffs; } |
76 | }; |
77 | |
78 | //===--------------------------------------------------------------------===// |
79 | // RopePieceBTreeIterator Class |
80 | //===--------------------------------------------------------------------===// |
81 | |
82 | /// RopePieceBTreeIterator - This class provides read-only forward iteration |
83 | /// over bytes that are in a RopePieceBTree. This first iterates over bytes |
84 | /// in a RopePiece, then iterates over RopePiece's in a RopePieceBTreeLeaf, |
85 | /// then iterates over RopePieceBTreeLeaf's in a RopePieceBTree. |
86 | class RopePieceBTreeIterator { |
87 | /// CurNode - The current B+Tree node that we are inspecting. |
88 | const void /*RopePieceBTreeLeaf*/ *CurNode = nullptr; |
89 | |
90 | /// CurPiece - The current RopePiece in the B+Tree node that we're |
91 | /// inspecting. |
92 | const RopePiece *CurPiece = nullptr; |
93 | |
94 | /// CurChar - The current byte in the RopePiece we are pointing to. |
95 | unsigned CurChar = 0; |
96 | |
97 | public: |
98 | using iterator_category = std::forward_iterator_tag; |
99 | using value_type = const char; |
100 | using difference_type = std::ptrdiff_t; |
101 | using pointer = value_type *; |
102 | using reference = value_type &; |
103 | |
104 | RopePieceBTreeIterator() = default; |
105 | RopePieceBTreeIterator(const void /*RopePieceBTreeNode*/ *N); |
106 | |
107 | char operator*() const { |
108 | return (*CurPiece)[CurChar]; |
109 | } |
110 | |
111 | bool operator==(const RopePieceBTreeIterator &RHS) const { |
112 | return CurPiece == RHS.CurPiece && CurChar == RHS.CurChar; |
113 | } |
114 | bool operator!=(const RopePieceBTreeIterator &RHS) const { |
115 | return !operator==(RHS); |
116 | } |
117 | |
118 | RopePieceBTreeIterator& operator++() { // Preincrement |
119 | if (CurChar+1 < CurPiece->size()) |
120 | ++CurChar; |
121 | else |
122 | MoveToNextPiece(); |
123 | return *this; |
124 | } |
125 | |
126 | RopePieceBTreeIterator operator++(int) { // Postincrement |
127 | RopePieceBTreeIterator tmp = *this; ++*this; return tmp; |
128 | } |
129 | |
130 | llvm::StringRef piece() const { |
131 | return llvm::StringRef(&(*CurPiece)[0], CurPiece->size()); |
132 | } |
133 | |
134 | void MoveToNextPiece(); |
135 | }; |
136 | |
137 | //===--------------------------------------------------------------------===// |
138 | // RopePieceBTree Class |
139 | //===--------------------------------------------------------------------===// |
140 | |
141 | class RopePieceBTree { |
142 | void /*RopePieceBTreeNode*/ *Root; |
143 | |
144 | public: |
145 | RopePieceBTree(); |
146 | RopePieceBTree(const RopePieceBTree &RHS); |
147 | RopePieceBTree &operator=(const RopePieceBTree &) = delete; |
148 | ~RopePieceBTree(); |
149 | |
150 | using iterator = RopePieceBTreeIterator; |
151 | |
152 | iterator begin() const { return iterator(Root); } |
153 | iterator end() const { return iterator(); } |
154 | unsigned size() const; |
155 | unsigned empty() const { return size() == 0; } |
156 | |
157 | void clear(); |
158 | |
159 | void insert(unsigned Offset, const RopePiece &R); |
160 | |
161 | void erase(unsigned Offset, unsigned NumBytes); |
162 | }; |
163 | |
164 | //===--------------------------------------------------------------------===// |
165 | // RewriteRope Class |
166 | //===--------------------------------------------------------------------===// |
167 | |
168 | /// RewriteRope - A powerful string class. This class supports extremely |
169 | /// efficient insertions and deletions into the middle of it, even for |
170 | /// ridiculously long strings. |
171 | class RewriteRope { |
172 | RopePieceBTree Chunks; |
173 | |
174 | /// We allocate space for string data out of a buffer of size AllocChunkSize. |
175 | /// This keeps track of how much space is left. |
176 | llvm::IntrusiveRefCntPtr<RopeRefCountString> AllocBuffer; |
177 | enum { AllocChunkSize = 4080 }; |
178 | unsigned AllocOffs = AllocChunkSize; |
179 | |
180 | public: |
181 | RewriteRope() = default; |
182 | RewriteRope(const RewriteRope &RHS) : Chunks(RHS.Chunks) {} |
183 | |
184 | // The copy assignment operator is defined as deleted pending further |
185 | // motivation. |
186 | RewriteRope &operator=(const RewriteRope &) = delete; |
187 | |
188 | using iterator = RopePieceBTree::iterator; |
189 | using const_iterator = RopePieceBTree::iterator; |
190 | |
191 | iterator begin() const { return Chunks.begin(); } |
192 | iterator end() const { return Chunks.end(); } |
193 | unsigned size() const { return Chunks.size(); } |
194 | |
195 | void clear() { |
196 | Chunks.clear(); |
197 | } |
198 | |
199 | void assign(const char *Start, const char *End) { |
200 | clear(); |
201 | if (Start != End) |
202 | Chunks.insert(Offset: 0, R: MakeRopeString(Start, End)); |
203 | } |
204 | |
205 | void insert(unsigned Offset, const char *Start, const char *End) { |
206 | assert(Offset <= size() && "Invalid position to insert!" ); |
207 | if (Start == End) return; |
208 | Chunks.insert(Offset, R: MakeRopeString(Start, End)); |
209 | } |
210 | |
211 | void erase(unsigned Offset, unsigned NumBytes) { |
212 | assert(Offset+NumBytes <= size() && "Invalid region to erase!" ); |
213 | if (NumBytes == 0) return; |
214 | Chunks.erase(Offset, NumBytes); |
215 | } |
216 | |
217 | private: |
218 | RopePiece MakeRopeString(const char *Start, const char *End); |
219 | }; |
220 | |
221 | } // namespace clang |
222 | |
223 | #endif // LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H |
224 | |