1 | /* |
2 | Copyright (c) Marshall Clow 2010-2012. |
3 | |
4 | Distributed under the Boost Software License, Version 1.0. (See accompanying |
5 | file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
6 | |
7 | For more information, see http://www.boost.org |
8 | */ |
9 | |
10 | #include <boost/algorithm/searching/boyer_moore.hpp> |
11 | #include <boost/algorithm/searching/boyer_moore_horspool.hpp> |
12 | #include <boost/algorithm/searching/knuth_morris_pratt.hpp> |
13 | |
14 | #define BOOST_TEST_MAIN |
15 | #include <boost/test/unit_test.hpp> |
16 | |
17 | #include <iostream> |
18 | #include <string> |
19 | #include <vector> |
20 | |
21 | |
22 | namespace ba = boost::algorithm; |
23 | |
24 | template <typename Iter> |
25 | std::string make_str ( Iter first, std::size_t len ) { |
26 | std::string retVal ( len + 2, '\'' ); |
27 | std::copy ( first, first+len, retVal.begin () + 1); |
28 | return retVal; |
29 | } |
30 | |
31 | namespace { |
32 | |
33 | // Check using iterators |
34 | template<typename Container> |
35 | void check_one_iter ( const Container &haystack, const std::string &needle, int expected ) { |
36 | typedef typename Container::const_iterator iter_type; |
37 | typedef std::string::const_iterator pattern_type; |
38 | |
39 | iter_type hBeg = haystack.begin (); |
40 | iter_type hEnd = haystack.end (); |
41 | pattern_type nBeg = needle.begin (); |
42 | pattern_type nEnd = needle.end (); |
43 | |
44 | iter_type it0 = std::search (hBeg, hEnd, nBeg, nEnd); |
45 | iter_type it1 = ba::boyer_moore_search (hBeg, hEnd, nBeg, nEnd); |
46 | iter_type it1r = ba::boyer_moore_search (haystack, nBeg, nEnd); |
47 | iter_type it2 = ba::boyer_moore_horspool_search (hBeg, hEnd, nBeg, nEnd); |
48 | iter_type it3 = ba::knuth_morris_pratt_search (hBeg, hEnd, nBeg, nEnd); |
49 | const int dist = it1 == hEnd ? -1 : std::distance ( hBeg, it1 ); |
50 | |
51 | std::cout << "(Iterators) Pattern is " << needle.length () << ", haysstack is " << haystack.length () << " chars long; " << std::endl; |
52 | try { |
53 | if ( it0 != it1 ) { |
54 | throw std::runtime_error ( |
55 | std::string ( "results mismatch between std::search and boyer-moore search" )); |
56 | } |
57 | |
58 | if ( it1 != it1r ) { |
59 | throw std::runtime_error ( |
60 | std::string ( "results mismatch between iterator and range boyer_moore search" )); |
61 | } |
62 | |
63 | if ( it1 != it2 ) { |
64 | throw std::runtime_error ( |
65 | std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" )); |
66 | } |
67 | |
68 | if ( it1 != it3 ) |
69 | throw std::runtime_error ( |
70 | std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" )); |
71 | |
72 | } |
73 | |
74 | catch ( ... ) { |
75 | std::cout << "Searching for: " << needle << std::endl; |
76 | std::cout << "Expected: " << expected << "\n" ; |
77 | std::cout << " std: " << std::distance ( hBeg, it0 ) << "\n" ; |
78 | std::cout << " bm: " << std::distance ( hBeg, it1 ) << "\n" ; |
79 | std::cout << " bm(r): " << std::distance ( hBeg, it1r ) << "\n" ; |
80 | std::cout << " bmh: " << std::distance ( hBeg, it2 ) << "\n" ; |
81 | std::cout << " kpm: " << std::distance ( hBeg, it3 )<< "\n" ; |
82 | std::cout << std::flush; |
83 | throw ; |
84 | } |
85 | |
86 | BOOST_CHECK_EQUAL ( dist, expected ); |
87 | } |
88 | |
89 | // Check using pointers |
90 | // We're assuming that the container implements contiguous storage here. |
91 | template<typename Container> |
92 | void check_one_pointer ( const Container &haystack, const std::string &needle, int expected ) { |
93 | typedef const typename Container::value_type *ptr_type; |
94 | ptr_type hBeg = haystack.size () == 0 ? NULL : &*haystack.begin (); |
95 | ptr_type hEnd = hBeg + haystack.size (); |
96 | ptr_type nBeg = needle.size () == 0 ? NULL : &*needle.begin (); |
97 | ptr_type nEnd = nBeg + needle.size (); |
98 | |
99 | ptr_type it0 = std::search (hBeg, hEnd, nBeg, nEnd); |
100 | ptr_type it1 = ba::boyer_moore_search (hBeg, hEnd, nBeg, nEnd); |
101 | ptr_type it2 = ba::boyer_moore_horspool_search (hBeg, hEnd, nBeg, nEnd); |
102 | ptr_type it3 = ba::knuth_morris_pratt_search (hBeg, hEnd, nBeg, nEnd); |
103 | const int dist = it1 == hEnd ? -1 : std::distance ( hBeg, it1 ); |
104 | |
105 | std::cout << "(Pointers) Pattern is " << needle.length () << ", haysstack is " << haystack.length () << " chars long; " << std::endl; |
106 | try { |
107 | if ( it0 != it1 ) { |
108 | throw std::runtime_error ( |
109 | std::string ( "results mismatch between std::search and boyer-moore search" )); |
110 | } |
111 | |
112 | if ( it1 != it2 ) { |
113 | throw std::runtime_error ( |
114 | std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" )); |
115 | } |
116 | |
117 | if ( it1 != it3 ) |
118 | throw std::runtime_error ( |
119 | std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" )); |
120 | |
121 | } |
122 | |
123 | catch ( ... ) { |
124 | std::cout << "Searching for: " << needle << std::endl; |
125 | std::cout << "Expected: " << expected << "\n" ; |
126 | std::cout << " std: " << std::distance ( hBeg, it0 ) << "\n" ; |
127 | std::cout << " bm: " << std::distance ( hBeg, it1 ) << "\n" ; |
128 | std::cout << " bmh: " << std::distance ( hBeg, it2 ) << "\n" ; |
129 | std::cout << " kpm: " << std::distance ( hBeg, it3 )<< "\n" ; |
130 | std::cout << std::flush; |
131 | throw ; |
132 | } |
133 | |
134 | BOOST_CHECK_EQUAL ( dist, expected ); |
135 | } |
136 | |
137 | // Check using objects |
138 | template<typename Container> |
139 | void check_one_object ( const Container &haystack, const std::string &needle, int expected ) { |
140 | typedef typename Container::const_iterator iter_type; |
141 | typedef std::string::const_iterator pattern_type; |
142 | |
143 | iter_type hBeg = haystack.begin (); |
144 | iter_type hEnd = haystack.end (); |
145 | pattern_type nBeg = needle.begin (); |
146 | pattern_type nEnd = needle.end (); |
147 | |
148 | ba::boyer_moore<pattern_type> bm_r = ba::make_boyer_moore ( r: needle ); |
149 | ba::boyer_moore<pattern_type> bm ( nBeg, nEnd ); |
150 | ba::boyer_moore_horspool<pattern_type> bmh ( nBeg, nEnd ); |
151 | ba::knuth_morris_pratt<pattern_type> kmp ( nBeg, nEnd ); |
152 | |
153 | iter_type it0 = std::search (hBeg, hEnd, nBeg, nEnd); |
154 | iter_type it1 = bm (hBeg, hEnd); |
155 | iter_type it1r = bm (haystack); |
156 | iter_type rt1 = bm_r (hBeg, hEnd); |
157 | iter_type rt1r = bm_r (haystack); |
158 | iter_type it2 = bmh (hBeg, hEnd); |
159 | iter_type it3 = kmp (hBeg, hEnd); |
160 | const int dist = it1 == hEnd ? -1 : std::distance ( hBeg, it1 ); |
161 | |
162 | std::cout << "(Objects) Pattern is " << needle.length () << ", haysstack is " << haystack.length () << " chars long; " << std::endl; |
163 | try { |
164 | if ( it0 != it1 ) { |
165 | throw std::runtime_error ( |
166 | std::string ( "results mismatch between std::search and boyer-moore search" )); |
167 | } |
168 | |
169 | if ( it1 != it1r ) { |
170 | throw std::runtime_error ( |
171 | std::string ( "results mismatch between iterator and range boyer_moore search(1)" )); |
172 | } |
173 | |
174 | if ( it1 != rt1 ) { |
175 | throw std::runtime_error ( |
176 | std::string ( "results mismatch between iterator and range boyer_moore search(2)" )); |
177 | } |
178 | |
179 | if ( rt1 != rt1r ) { |
180 | throw std::runtime_error ( |
181 | std::string ( "results mismatch between iterator and range boyer_moore search(3)" )); |
182 | } |
183 | |
184 | if ( it1 != it2 ) { |
185 | throw std::runtime_error ( |
186 | std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" )); |
187 | } |
188 | |
189 | if ( it1 != it3 ) |
190 | throw std::runtime_error ( |
191 | std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" )); |
192 | |
193 | } |
194 | |
195 | catch ( ... ) { |
196 | std::cout << "Searching for: " << needle << std::endl; |
197 | std::cout << "Expected: " << expected << "\n" ; |
198 | std::cout << " std: " << std::distance ( hBeg, it0 ) << "\n" ; |
199 | std::cout << " bm: " << std::distance ( hBeg, it1 ) << "\n" ; |
200 | std::cout << " bm(r1): " << std::distance ( hBeg, it1r ) << "\n" ; |
201 | std::cout << " bm(r2): " << std::distance ( hBeg, rt1 ) << "\n" ; |
202 | std::cout << " bm(r3): " << std::distance ( hBeg, rt1r ) << "\n" ; |
203 | std::cout << " bmh: " << std::distance ( hBeg, it2 ) << "\n" ; |
204 | std::cout << " kpm: " << std::distance ( hBeg, it3 )<< "\n" ; |
205 | std::cout << std::flush; |
206 | throw ; |
207 | } |
208 | |
209 | BOOST_CHECK_EQUAL ( dist, expected ); |
210 | } |
211 | |
212 | |
213 | template<typename Container> |
214 | void check_one ( const Container &haystack, const std::string &needle, int expected ) { |
215 | check_one_iter ( haystack, needle, expected ); |
216 | check_one_pointer ( haystack, needle, expected ); |
217 | check_one_object ( haystack, needle, expected ); |
218 | } |
219 | } |
220 | |
221 | |
222 | BOOST_AUTO_TEST_CASE( test_main ) |
223 | { |
224 | std::string haystack1 ( "NOW AN FOWE\220ER ANNMAN THE ANPANMANEND" ); |
225 | std::string needle1 ( "ANPANMAN" ); |
226 | std::string needle2 ( "MAN THE" ); |
227 | std::string needle3 ( "WE\220ER" ); |
228 | std::string needle4 ( "NOW " ); // At the beginning |
229 | std::string needle5 ( "NEND" ); // At the end |
230 | std::string needle6 ( "NOT FOUND" ); // Nowhere |
231 | std::string needle7 ( "NOT FO\340ND" ); // Nowhere |
232 | |
233 | std::string haystack2 ( "ABC ABCDAB ABCDABCDABDE" ); |
234 | std::string needle11 ( "ABCDABD" ); |
235 | |
236 | std::string haystack3 ( "abra abracad abracadabra" ); |
237 | std::string needle12 ( "abracadabra" ); |
238 | |
239 | std::string needle13 ( "" ); |
240 | std::string haystack4 ( "" ); |
241 | |
242 | check_one ( haystack: haystack1, needle: needle1, expected: 26 ); |
243 | check_one ( haystack: haystack1, needle: needle2, expected: 18 ); |
244 | check_one ( haystack: haystack1, needle: needle3, expected: 9 ); |
245 | check_one ( haystack: haystack1, needle: needle4, expected: 0 ); |
246 | check_one ( haystack: haystack1, needle: needle5, expected: 33 ); |
247 | check_one ( haystack: haystack1, needle: needle6, expected: -1 ); |
248 | check_one ( haystack: haystack1, needle: needle7, expected: -1 ); |
249 | |
250 | check_one ( haystack: needle1, needle: haystack1, expected: -1 ); // cant find long pattern in short corpus |
251 | check_one ( haystack: haystack1, needle: haystack1, expected: 0 ); // find something in itself |
252 | check_one ( haystack: haystack2, needle: haystack2, expected: 0 ); // find something in itself |
253 | |
254 | check_one ( haystack: haystack2, needle: needle11, expected: 15 ); |
255 | check_one ( haystack: haystack3, needle: needle12, expected: 13 ); |
256 | |
257 | check_one ( haystack: haystack1, needle: needle13, expected: 0 ); // find the empty string |
258 | check_one ( haystack: haystack4, needle: needle1, expected: -1 ); // can't find in an empty haystack |
259 | |
260 | // Mikhail Levin <svarneticist@gmail.com> found a problem, and this was the test |
261 | // that triggered it. |
262 | |
263 | const std::string mikhail_pattern = |
264 | "GATACACCTACCTTCACCAGTTACTCTATGCACTAGGTGCGCCAGGCCCATGCACAAGGGCTTGAGTGGATGGGAAGGA" |
265 | "TGTGCCCTAGTGATGGCAGCATAAGCTACGCAGAGAAGTTCCAGGGCAGAGTCACCATGACCAGGGACACATCCACGAG" |
266 | "CACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAAGACACGGCCATGTATTACTGTGGGAGAGATGTCTGGAGTGGT" |
267 | "TATTATTGCCCCGGTAATATTACTACTACTACTACTACATGGACGTCTGGGGCAAAGGGACCACG" |
268 | ; |
269 | const std::string mikhail_corpus = std::string (8, 'a') + mikhail_pattern; |
270 | |
271 | check_one ( haystack: mikhail_corpus, needle: mikhail_pattern, expected: 8 ); |
272 | } |
273 | |