Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===----------------------------------------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H |
10 | #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H |
11 | |
12 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
13 | # pragma GCC system_header |
14 | #endif |
15 | |
16 | #include <__algorithm/fill_n.h> |
17 | #include <__config> |
18 | #include <__functional/hash.h> |
19 | #include <__functional/operations.h> |
20 | #include <__iterator/distance.h> |
21 | #include <__iterator/iterator_traits.h> |
22 | #include <__memory/shared_ptr.h> |
23 | #include <__type_traits/make_unsigned.h> |
24 | #include <__utility/pair.h> |
25 | #include <__vector/vector.h> |
26 | #include <array> |
27 | #include <limits> |
28 | #include <unordered_map> |
29 | |
30 | #if _LIBCPP_STD_VER >= 17 |
31 | |
32 | _LIBCPP_PUSH_MACROS |
33 | # include <__undef_macros> |
34 | |
35 | _LIBCPP_BEGIN_NAMESPACE_STD |
36 | |
37 | template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/> |
38 | class _BMSkipTable; |
39 | |
40 | // General case for BM data searching; use a map |
41 | template <class _Key, class _Value, class _Hash, class _BinaryPredicate> |
42 | class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> { |
43 | private: |
44 | using value_type = _Value; |
45 | using key_type = _Key; |
46 | |
47 | const value_type __default_value_; |
48 | unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_; |
49 | |
50 | public: |
51 | _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable( |
52 | size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred) |
53 | : __default_value_(__default_value), __table_(__sz, __hash, __pred) {} |
54 | |
55 | _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; } |
56 | |
57 | _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const { |
58 | auto __it = __table_.find(__key); |
59 | return __it == __table_.end() ? __default_value_ : __it->second; |
60 | } |
61 | }; |
62 | |
63 | // Special case small numeric values; use an array |
64 | template <class _Key, class _Value, class _Hash, class _BinaryPredicate> |
65 | class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> { |
66 | private: |
67 | using value_type = _Value; |
68 | using key_type = _Key; |
69 | |
70 | using unsigned_key_type = make_unsigned_t<key_type>; |
71 | std::array<value_type, 256> __table_; |
72 | static_assert(numeric_limits<unsigned_key_type>::max() < 256); |
73 | |
74 | public: |
75 | _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) { |
76 | std::fill_n(__table_.data(), __table_.size(), __default_value); |
77 | } |
78 | |
79 | _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) { |
80 | __table_[static_cast<unsigned_key_type>(__key)] = __val; |
81 | } |
82 | |
83 | _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const { |
84 | return __table_[static_cast<unsigned_key_type>(__key)]; |
85 | } |
86 | }; |
87 | |
88 | template <class _RandomAccessIterator1, |
89 | class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, |
90 | class _BinaryPredicate = equal_to<>> |
91 | class boyer_moore_searcher { |
92 | private: |
93 | using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; |
94 | using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; |
95 | using __skip_table_type _LIBCPP_NODEBUG = |
96 | _BMSkipTable<value_type, |
97 | difference_type, |
98 | _Hash, |
99 | _BinaryPredicate, |
100 | is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && |
101 | is_same_v<_BinaryPredicate, equal_to<>>>; |
102 | |
103 | public: |
104 | _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher( |
105 | _RandomAccessIterator1 __first, |
106 | _RandomAccessIterator1 __last, |
107 | _Hash __hash = _Hash(), |
108 | _BinaryPredicate __pred = _BinaryPredicate()) |
109 | : __first_(__first), |
110 | __last_(__last), |
111 | __pred_(__pred), |
112 | __pattern_length_(__last - __first), |
113 | __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)), |
114 | __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>( |
115 | allocator<difference_type>(), __pattern_length_ + 1)) { |
116 | difference_type __i = 0; |
117 | while (__first != __last) { |
118 | __skip_table_->insert(*__first, __i); |
119 | ++__first; |
120 | ++__i; |
121 | } |
122 | __build_suffix_table(__first_, __last_, __pred_); |
123 | } |
124 | |
125 | template <class _RandomAccessIterator2> |
126 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
127 | operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { |
128 | static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type, |
129 | typename iterator_traits<_RandomAccessIterator2>::value_type>::value, |
130 | "Corpus and Pattern iterators must point to the same type"); |
131 | if (__first == __last) |
132 | return std::make_pair(__last, __last); |
133 | if (__first_ == __last_) |
134 | return std::make_pair(__first, __first); |
135 | |
136 | if (__pattern_length_ > (__last - __first)) |
137 | return std::make_pair(__last, __last); |
138 | return __search(__first, __last); |
139 | } |
140 | |
141 | private: |
142 | _RandomAccessIterator1 __first_; |
143 | _RandomAccessIterator1 __last_; |
144 | _BinaryPredicate __pred_; |
145 | difference_type __pattern_length_; |
146 | shared_ptr<__skip_table_type> __skip_table_; |
147 | shared_ptr<difference_type[]> __suffix_; |
148 | |
149 | template <class _RandomAccessIterator2> |
150 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
151 | __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { |
152 | _RandomAccessIterator2 __current = __f; |
153 | const _RandomAccessIterator2 __last = __l - __pattern_length_; |
154 | const __skip_table_type& __skip_table = *__skip_table_; |
155 | |
156 | while (__current <= __last) { |
157 | difference_type __j = __pattern_length_; |
158 | while (__pred_(__first_[__j - 1], __current[__j - 1])) { |
159 | --__j; |
160 | if (__j == 0) |
161 | return std::make_pair(__current, __current + __pattern_length_); |
162 | } |
163 | |
164 | difference_type __k = __skip_table[__current[__j - 1]]; |
165 | difference_type __m = __j - __k - 1; |
166 | if (__k < __j && __m > __suffix_[__j]) |
167 | __current += __m; |
168 | else |
169 | __current += __suffix_[__j]; |
170 | } |
171 | return std::make_pair(__l, __l); |
172 | } |
173 | |
174 | template <class _Iterator, class _Container> |
175 | _LIBCPP_HIDE_FROM_ABI void |
176 | __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) { |
177 | const size_t __count = __last - __first; |
178 | |
179 | __prefix[0] = 0; |
180 | size_t __k = 0; |
181 | |
182 | for (size_t __i = 1; __i != __count; ++__i) { |
183 | while (__k > 0 && !__pred(__first[__k], __first[__i])) |
184 | __k = __prefix[__k - 1]; |
185 | |
186 | if (__pred(__first[__k], __first[__i])) |
187 | ++__k; |
188 | __prefix[__i] = __k; |
189 | } |
190 | } |
191 | |
192 | _LIBCPP_HIDE_FROM_ABI void |
193 | __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) { |
194 | const size_t __count = __last - __first; |
195 | |
196 | if (__count == 0) |
197 | return; |
198 | |
199 | vector<difference_type> __scratch(__count); |
200 | |
201 | __compute_bm_prefix(__first, __last, __pred, __scratch); |
202 | for (size_t __i = 0; __i <= __count; ++__i) |
203 | __suffix_[__i] = __count - __scratch[__count - 1]; |
204 | |
205 | using _ReverseIter = reverse_iterator<_RandomAccessIterator1>; |
206 | __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch); |
207 | |
208 | for (size_t __i = 0; __i != __count; ++__i) { |
209 | const size_t __j = __count - __scratch[__i]; |
210 | const difference_type __k = __i - __scratch[__i] + 1; |
211 | |
212 | if (__suffix_[__j] > __k) |
213 | __suffix_[__j] = __k; |
214 | } |
215 | } |
216 | }; |
217 | _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher); |
218 | |
219 | template <class _RandomAccessIterator1, |
220 | class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, |
221 | class _BinaryPredicate = equal_to<>> |
222 | class boyer_moore_horspool_searcher { |
223 | private: |
224 | using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type; |
225 | using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; |
226 | using __skip_table_type _LIBCPP_NODEBUG = |
227 | _BMSkipTable<value_type, |
228 | difference_type, |
229 | _Hash, |
230 | _BinaryPredicate, |
231 | is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && |
232 | is_same_v<_BinaryPredicate, equal_to<>>>; |
233 | |
234 | public: |
235 | _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher( |
236 | _RandomAccessIterator1 __first, |
237 | _RandomAccessIterator1 __last, |
238 | _Hash __hash = _Hash(), |
239 | _BinaryPredicate __pred = _BinaryPredicate()) |
240 | : __first_(__first), |
241 | __last_(__last), |
242 | __pred_(__pred), |
243 | __pattern_length_(__last - __first), |
244 | __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) { |
245 | if (__first == __last) |
246 | return; |
247 | --__last; |
248 | difference_type __i = 0; |
249 | while (__first != __last) { |
250 | __skip_table_->insert(*__first, __pattern_length_ - 1 - __i); |
251 | ++__first; |
252 | ++__i; |
253 | } |
254 | } |
255 | |
256 | template <class _RandomAccessIterator2> |
257 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
258 | operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { |
259 | static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type, |
260 | typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value, |
261 | "Corpus and Pattern iterators must point to the same type"); |
262 | if (__first == __last) |
263 | return std::make_pair(__last, __last); |
264 | if (__first_ == __last_) |
265 | return std::make_pair(__first, __first); |
266 | |
267 | if (__pattern_length_ > __last - __first) |
268 | return std::make_pair(__last, __last); |
269 | |
270 | return __search(__first, __last); |
271 | } |
272 | |
273 | private: |
274 | _RandomAccessIterator1 __first_; |
275 | _RandomAccessIterator1 __last_; |
276 | _BinaryPredicate __pred_; |
277 | difference_type __pattern_length_; |
278 | shared_ptr<__skip_table_type> __skip_table_; |
279 | |
280 | template <class _RandomAccessIterator2> |
281 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
282 | __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { |
283 | _RandomAccessIterator2 __current = __f; |
284 | const _RandomAccessIterator2 __last = __l - __pattern_length_; |
285 | const __skip_table_type& __skip_table = *__skip_table_; |
286 | |
287 | while (__current <= __last) { |
288 | difference_type __j = __pattern_length_; |
289 | while (__pred_(__first_[__j - 1], __current[__j - 1])) { |
290 | --__j; |
291 | if (__j == 0) |
292 | return std::make_pair(__current, __current + __pattern_length_); |
293 | } |
294 | __current += __skip_table[__current[__pattern_length_ - 1]]; |
295 | } |
296 | return std::make_pair(__l, __l); |
297 | } |
298 | }; |
299 | _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher); |
300 | |
301 | _LIBCPP_END_NAMESPACE_STD |
302 | |
303 | _LIBCPP_POP_MACROS |
304 | |
305 | #endif // _LIBCPP_STD_VER >= 17 |
306 | |
307 | #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H |
308 |
Warning: This file is not a C or C++ file. It does not have highlighting.