Warning: This file is not a C or C++ file. It does not have highlighting.
| 1 | //===----------------------------------------------------------------------===// |
|---|---|
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H |
| 10 | #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H |
| 11 | |
| 12 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| 13 | # pragma GCC system_header |
| 14 | #endif |
| 15 | |
| 16 | #include <__algorithm/fill_n.h> |
| 17 | #include <__config> |
| 18 | #include <__functional/hash.h> |
| 19 | #include <__functional/operations.h> |
| 20 | #include <__iterator/distance.h> |
| 21 | #include <__iterator/iterator_traits.h> |
| 22 | #include <__memory/shared_ptr.h> |
| 23 | #include <__type_traits/make_unsigned.h> |
| 24 | #include <__utility/pair.h> |
| 25 | #include <__vector/vector.h> |
| 26 | #include <array> |
| 27 | #include <limits> |
| 28 | #include <unordered_map> |
| 29 | |
| 30 | #if _LIBCPP_STD_VER >= 17 |
| 31 | |
| 32 | _LIBCPP_PUSH_MACROS |
| 33 | # include <__undef_macros> |
| 34 | |
| 35 | _LIBCPP_BEGIN_NAMESPACE_STD |
| 36 | |
| 37 | template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/> |
| 38 | class _BMSkipTable; |
| 39 | |
| 40 | // General case for BM data searching; use a map |
| 41 | template <class _Key, class _Value, class _Hash, class _BinaryPredicate> |
| 42 | class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> { |
| 43 | private: |
| 44 | using value_type = _Value; |
| 45 | using key_type = _Key; |
| 46 | |
| 47 | const value_type __default_value_; |
| 48 | unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_; |
| 49 | |
| 50 | public: |
| 51 | _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable( |
| 52 | size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred) |
| 53 | : __default_value_(__default_value), __table_(__sz, __hash, __pred) {} |
| 54 | |
| 55 | _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; } |
| 56 | |
| 57 | _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const { |
| 58 | auto __it = __table_.find(__key); |
| 59 | return __it == __table_.end() ? __default_value_ : __it->second; |
| 60 | } |
| 61 | }; |
| 62 | |
| 63 | // Special case small numeric values; use an array |
| 64 | template <class _Key, class _Value, class _Hash, class _BinaryPredicate> |
| 65 | class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> { |
| 66 | private: |
| 67 | using value_type = _Value; |
| 68 | using key_type = _Key; |
| 69 | |
| 70 | using unsigned_key_type = make_unsigned_t<key_type>; |
| 71 | std::array<value_type, 256> __table_; |
| 72 | static_assert(numeric_limits<unsigned_key_type>::max() < 256); |
| 73 | |
| 74 | public: |
| 75 | _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) { |
| 76 | std::fill_n(__table_.data(), __table_.size(), __default_value); |
| 77 | } |
| 78 | |
| 79 | _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) { |
| 80 | __table_[static_cast<unsigned_key_type>(__key)] = __val; |
| 81 | } |
| 82 | |
| 83 | _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const { |
| 84 | return __table_[static_cast<unsigned_key_type>(__key)]; |
| 85 | } |
| 86 | }; |
| 87 | |
| 88 | template <class _RandomAccessIterator1, |
| 89 | class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, |
| 90 | class _BinaryPredicate = equal_to<>> |
| 91 | class boyer_moore_searcher { |
| 92 | private: |
| 93 | using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; |
| 94 | using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; |
| 95 | using __skip_table_type _LIBCPP_NODEBUG = |
| 96 | _BMSkipTable<value_type, |
| 97 | difference_type, |
| 98 | _Hash, |
| 99 | _BinaryPredicate, |
| 100 | is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && |
| 101 | is_same_v<_BinaryPredicate, equal_to<>>>; |
| 102 | |
| 103 | public: |
| 104 | _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher( |
| 105 | _RandomAccessIterator1 __first, |
| 106 | _RandomAccessIterator1 __last, |
| 107 | _Hash __hash = _Hash(), |
| 108 | _BinaryPredicate __pred = _BinaryPredicate()) |
| 109 | : __first_(__first), |
| 110 | __last_(__last), |
| 111 | __pred_(__pred), |
| 112 | __pattern_length_(__last - __first), |
| 113 | __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)), |
| 114 | __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>( |
| 115 | allocator<difference_type>(), __pattern_length_ + 1)) { |
| 116 | difference_type __i = 0; |
| 117 | while (__first != __last) { |
| 118 | __skip_table_->insert(*__first, __i); |
| 119 | ++__first; |
| 120 | ++__i; |
| 121 | } |
| 122 | __build_suffix_table(__first_, __last_, __pred_); |
| 123 | } |
| 124 | |
| 125 | template <class _RandomAccessIterator2> |
| 126 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
| 127 | operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { |
| 128 | static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type, |
| 129 | typename iterator_traits<_RandomAccessIterator2>::value_type>::value, |
| 130 | "Corpus and Pattern iterators must point to the same type"); |
| 131 | if (__first == __last) |
| 132 | return std::make_pair(__last, __last); |
| 133 | if (__first_ == __last_) |
| 134 | return std::make_pair(__first, __first); |
| 135 | |
| 136 | if (__pattern_length_ > (__last - __first)) |
| 137 | return std::make_pair(__last, __last); |
| 138 | return __search(__first, __last); |
| 139 | } |
| 140 | |
| 141 | private: |
| 142 | _RandomAccessIterator1 __first_; |
| 143 | _RandomAccessIterator1 __last_; |
| 144 | _BinaryPredicate __pred_; |
| 145 | difference_type __pattern_length_; |
| 146 | shared_ptr<__skip_table_type> __skip_table_; |
| 147 | shared_ptr<difference_type[]> __suffix_; |
| 148 | |
| 149 | template <class _RandomAccessIterator2> |
| 150 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
| 151 | __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { |
| 152 | _RandomAccessIterator2 __current = __f; |
| 153 | const _RandomAccessIterator2 __last = __l - __pattern_length_; |
| 154 | const __skip_table_type& __skip_table = *__skip_table_; |
| 155 | |
| 156 | while (__current <= __last) { |
| 157 | difference_type __j = __pattern_length_; |
| 158 | while (__pred_(__first_[__j - 1], __current[__j - 1])) { |
| 159 | --__j; |
| 160 | if (__j == 0) |
| 161 | return std::make_pair(__current, __current + __pattern_length_); |
| 162 | } |
| 163 | |
| 164 | difference_type __k = __skip_table[__current[__j - 1]]; |
| 165 | difference_type __m = __j - __k - 1; |
| 166 | if (__k < __j && __m > __suffix_[__j]) |
| 167 | __current += __m; |
| 168 | else |
| 169 | __current += __suffix_[__j]; |
| 170 | } |
| 171 | return std::make_pair(__l, __l); |
| 172 | } |
| 173 | |
| 174 | template <class _Iterator, class _Container> |
| 175 | _LIBCPP_HIDE_FROM_ABI void |
| 176 | __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) { |
| 177 | const size_t __count = __last - __first; |
| 178 | |
| 179 | __prefix[0] = 0; |
| 180 | size_t __k = 0; |
| 181 | |
| 182 | for (size_t __i = 1; __i != __count; ++__i) { |
| 183 | while (__k > 0 && !__pred(__first[__k], __first[__i])) |
| 184 | __k = __prefix[__k - 1]; |
| 185 | |
| 186 | if (__pred(__first[__k], __first[__i])) |
| 187 | ++__k; |
| 188 | __prefix[__i] = __k; |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | _LIBCPP_HIDE_FROM_ABI void |
| 193 | __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) { |
| 194 | const size_t __count = __last - __first; |
| 195 | |
| 196 | if (__count == 0) |
| 197 | return; |
| 198 | |
| 199 | vector<difference_type> __scratch(__count); |
| 200 | |
| 201 | __compute_bm_prefix(__first, __last, __pred, __scratch); |
| 202 | for (size_t __i = 0; __i <= __count; ++__i) |
| 203 | __suffix_[__i] = __count - __scratch[__count - 1]; |
| 204 | |
| 205 | using _ReverseIter = reverse_iterator<_RandomAccessIterator1>; |
| 206 | __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch); |
| 207 | |
| 208 | for (size_t __i = 0; __i != __count; ++__i) { |
| 209 | const size_t __j = __count - __scratch[__i]; |
| 210 | const difference_type __k = __i - __scratch[__i] + 1; |
| 211 | |
| 212 | if (__suffix_[__j] > __k) |
| 213 | __suffix_[__j] = __k; |
| 214 | } |
| 215 | } |
| 216 | }; |
| 217 | _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher); |
| 218 | |
| 219 | template <class _RandomAccessIterator1, |
| 220 | class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, |
| 221 | class _BinaryPredicate = equal_to<>> |
| 222 | class boyer_moore_horspool_searcher { |
| 223 | private: |
| 224 | using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type; |
| 225 | using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; |
| 226 | using __skip_table_type _LIBCPP_NODEBUG = |
| 227 | _BMSkipTable<value_type, |
| 228 | difference_type, |
| 229 | _Hash, |
| 230 | _BinaryPredicate, |
| 231 | is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && |
| 232 | is_same_v<_BinaryPredicate, equal_to<>>>; |
| 233 | |
| 234 | public: |
| 235 | _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher( |
| 236 | _RandomAccessIterator1 __first, |
| 237 | _RandomAccessIterator1 __last, |
| 238 | _Hash __hash = _Hash(), |
| 239 | _BinaryPredicate __pred = _BinaryPredicate()) |
| 240 | : __first_(__first), |
| 241 | __last_(__last), |
| 242 | __pred_(__pred), |
| 243 | __pattern_length_(__last - __first), |
| 244 | __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) { |
| 245 | if (__first == __last) |
| 246 | return; |
| 247 | --__last; |
| 248 | difference_type __i = 0; |
| 249 | while (__first != __last) { |
| 250 | __skip_table_->insert(*__first, __pattern_length_ - 1 - __i); |
| 251 | ++__first; |
| 252 | ++__i; |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | template <class _RandomAccessIterator2> |
| 257 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
| 258 | operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { |
| 259 | static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type, |
| 260 | typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value, |
| 261 | "Corpus and Pattern iterators must point to the same type"); |
| 262 | if (__first == __last) |
| 263 | return std::make_pair(__last, __last); |
| 264 | if (__first_ == __last_) |
| 265 | return std::make_pair(__first, __first); |
| 266 | |
| 267 | if (__pattern_length_ > __last - __first) |
| 268 | return std::make_pair(__last, __last); |
| 269 | |
| 270 | return __search(__first, __last); |
| 271 | } |
| 272 | |
| 273 | private: |
| 274 | _RandomAccessIterator1 __first_; |
| 275 | _RandomAccessIterator1 __last_; |
| 276 | _BinaryPredicate __pred_; |
| 277 | difference_type __pattern_length_; |
| 278 | shared_ptr<__skip_table_type> __skip_table_; |
| 279 | |
| 280 | template <class _RandomAccessIterator2> |
| 281 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
| 282 | __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { |
| 283 | _RandomAccessIterator2 __current = __f; |
| 284 | const _RandomAccessIterator2 __last = __l - __pattern_length_; |
| 285 | const __skip_table_type& __skip_table = *__skip_table_; |
| 286 | |
| 287 | while (__current <= __last) { |
| 288 | difference_type __j = __pattern_length_; |
| 289 | while (__pred_(__first_[__j - 1], __current[__j - 1])) { |
| 290 | --__j; |
| 291 | if (__j == 0) |
| 292 | return std::make_pair(__current, __current + __pattern_length_); |
| 293 | } |
| 294 | __current += __skip_table[__current[__pattern_length_ - 1]]; |
| 295 | } |
| 296 | return std::make_pair(__l, __l); |
| 297 | } |
| 298 | }; |
| 299 | _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher); |
| 300 | |
| 301 | _LIBCPP_END_NAMESPACE_STD |
| 302 | |
| 303 | _LIBCPP_POP_MACROS |
| 304 | |
| 305 | #endif // _LIBCPP_STD_VER >= 17 |
| 306 | |
| 307 | #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H |
| 308 |
Warning: This file is not a C or C++ file. It does not have highlighting.
