1/*
2 Copyright (c) Marshall Clow 2010-2012.
3
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7 For more information, see http://www.boost.org
8*/
9
10#ifndef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
11#define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
12
13#include <iterator> // for std::iterator_traits
14
15#include <boost/assert.hpp>
16#include <boost/static_assert.hpp>
17
18#include <boost/range/begin.hpp>
19#include <boost/range/end.hpp>
20
21#include <boost/utility/enable_if.hpp>
22#include <boost/type_traits/is_same.hpp>
23
24#include <boost/algorithm/searching/detail/bm_traits.hpp>
25#include <boost/algorithm/searching/detail/debugging.hpp>
26
27// #define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
28
29namespace boost { namespace algorithm {
30
31/*
32 A templated version of the boyer-moore-horspool searching algorithm.
33
34 Requirements:
35 * Random access iterators
36 * The two iterator types (patIter and corpusIter) must
37 "point to" the same underlying type.
38 * Additional requirements may be imposed buy the skip table, such as:
39 ** Numeric type (array-based skip table)
40 ** Hashable type (map-based skip table)
41
42http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html
43
44*/
45
46 template <typename patIter, typename traits = detail::BM_traits<patIter> >
47 class boyer_moore_horspool {
48 typedef typename std::iterator_traits<patIter>::difference_type difference_type;
49 public:
50 boyer_moore_horspool ( patIter first, patIter last )
51 : pat_first ( first ), pat_last ( last ),
52 k_pattern_length ( std::distance ( pat_first, pat_last )),
53 skip_ ( k_pattern_length, k_pattern_length ) {
54
55 // Build the skip table
56 std::size_t i = 0;
57 if ( first != last ) // empty pattern?
58 for ( patIter iter = first; iter != last-1; ++iter, ++i )
59 skip_.insert ( *iter, k_pattern_length - 1 - i );
60#ifdef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
61 skip_.PrintSkipTable ();
62#endif
63 }
64
65 ~boyer_moore_horspool () {}
66
67 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
68 /// \brief Searches the corpus for the pattern that was passed into the constructor
69 ///
70 /// \param corpus_first The start of the data to search (Random Access Iterator)
71 /// \param corpus_last One past the end of the data to search
72 /// \param p A predicate used for the search comparisons.
73 ///
74 template <typename corpusIter>
75 corpusIter operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
76 BOOST_STATIC_ASSERT (( boost::is_same<
77 typename std::iterator_traits<patIter>::value_type,
78 typename std::iterator_traits<corpusIter>::value_type>::value ));
79
80 if ( corpus_first == corpus_last ) return corpus_last; // if nothing to search, we didn't find it!
81 if ( pat_first == pat_last ) return corpus_first; // empty pattern matches at start
82
83 const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
84 // If the pattern is larger than the corpus, we can't find it!
85 if ( k_corpus_length < k_pattern_length )
86 return corpus_last;
87
88 // Do the search
89 return this->do_search ( corpus_first, corpus_last );
90 }
91
92 template <typename Range>
93 typename boost::range_iterator<Range>::type operator () ( Range &r ) const {
94 return (*this) (boost::begin(r), boost::end(r));
95 }
96
97 private:
98/// \cond DOXYGEN_HIDE
99 patIter pat_first, pat_last;
100 const difference_type k_pattern_length;
101 typename traits::skip_table_t skip_;
102
103 /// \fn do_search ( corpusIter corpus_first, corpusIter corpus_last )
104 /// \brief Searches the corpus for the pattern that was passed into the constructor
105 ///
106 /// \param corpus_first The start of the data to search (Random Access Iterator)
107 /// \param corpus_last One past the end of the data to search
108 /// \param k_corpus_length The length of the corpus to search
109 ///
110 template <typename corpusIter>
111 corpusIter do_search ( corpusIter corpus_first, corpusIter corpus_last ) const {
112 corpusIter curPos = corpus_first;
113 const corpusIter lastPos = corpus_last - k_pattern_length;
114 while ( curPos <= lastPos ) {
115 // Do we match right where we are?
116 std::size_t j = k_pattern_length - 1;
117 while ( pat_first [j] == curPos [j] ) {
118 // We matched - we're done!
119 if ( j == 0 )
120 return curPos;
121 j--;
122 }
123
124 curPos += skip_ [ curPos [ k_pattern_length - 1 ]];
125 }
126
127 return corpus_last;
128 }
129// \endcond
130 };
131
132/* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
133 Use a bit of TMP to disambiguate the 3-argument templates */
134
135/// \fn boyer_moore_horspool_search ( corpusIter corpus_first, corpusIter corpus_last,
136/// patIter pat_first, patIter pat_last )
137/// \brief Searches the corpus for the pattern.
138///
139/// \param corpus_first The start of the data to search (Random Access Iterator)
140/// \param corpus_last One past the end of the data to search
141/// \param pat_first The start of the pattern to search for (Random Access Iterator)
142/// \param pat_last One past the end of the data to search for
143///
144 template <typename patIter, typename corpusIter>
145 corpusIter boyer_moore_horspool_search (
146 corpusIter corpus_first, corpusIter corpus_last,
147 patIter pat_first, patIter pat_last )
148 {
149 boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
150 return bmh ( corpus_first, corpus_last );
151 }
152
153 template <typename PatternRange, typename corpusIter>
154 corpusIter boyer_moore_horspool_search (
155 corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
156 {
157 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
158 boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
159 return bmh ( corpus_first, corpus_last );
160 }
161
162 template <typename patIter, typename CorpusRange>
163 typename boost::lazy_disable_if_c<
164 boost::is_same<CorpusRange, patIter>::value, typename boost::range_iterator<CorpusRange> >
165 ::type
166 boyer_moore_horspool_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
167 {
168 boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
169 return bm (boost::begin (corpus), boost::end (corpus));
170 }
171
172 template <typename PatternRange, typename CorpusRange>
173 typename boost::range_iterator<CorpusRange>::type
174 boyer_moore_horspool_search ( CorpusRange &corpus, const PatternRange &pattern )
175 {
176 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
177 boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
178 return bmh (boost::begin (corpus), boost::end (corpus));
179 }
180
181
182 // Creator functions -- take a pattern range, return an object
183 template <typename Range>
184 boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<const Range>::type>
185 make_boyer_moore_horspool ( const Range &r ) {
186 return boost::algorithm::boyer_moore_horspool
187 <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
188 }
189
190 template <typename Range>
191 boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<Range>::type>
192 make_boyer_moore_horspool ( Range &r ) {
193 return boost::algorithm::boyer_moore_horspool
194 <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
195 }
196
197}}
198
199#endif // BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
200

source code of boost/boost/algorithm/searching/boyer_moore_horspool.hpp