Claw  1.7.3
kmp.tpp
Go to the documentation of this file.
1 /*
2  CLAW - a C++ Library Absolutely Wonderful
3 
4  CLAW is a free library without any particular aim but being useful to
5  anyone.
6 
7  Copyright (C) 2005-2011 Julien Jorge
8 
9  This library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU Lesser General Public
11  License as published by the Free Software Foundation; either
12  version 2.1 of the License, or (at your option) any later version.
13 
14  This library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  Lesser General Public License for more details.
18 
19  You should have received a copy of the GNU Lesser General Public
20  License along with this library; if not, write to the Free Software
21  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 
23  contact: julien.jorge@gamned.org
24 */
30 #include <map>
31 #include <assert.h>
32 #include <claw/it_index.hpp>
33 
34 /*---------------------------------------------------------------------------*/
43 template<class RandomIterator>
44 unsigned int
46 common_prefix_length( const RandomIterator begin_1,
47  const RandomIterator begin_2,
48  const RandomIterator end_1, const RandomIterator end_2
49  ) const
50 {
51  unsigned int count = 0;
52  RandomIterator it_1 = begin_1, it_2 = begin_2;
53  bool quit = false;
54 
55  while ( (it_1 != end_1) && (it_2 != end_2) && ! quit )
56  if ( *it_1 == *it_2 )
57  {
58  ++it_1;
59  ++it_2;
60  ++count;
61  }
62  else
63  quit = true;
64 
65  return count;
66 } // common_prefix_length()
67 
68 /*---------------------------------------------------------------------------*/
76 template<class RandomIterator>
77 void claw::text::kmp<RandomIterator>::z_boxes(const RandomIterator begin,
78  const RandomIterator end,
79  std::map<unsigned int,unsigned int>& out) const
80 {
81  // right and left bounds of the current item's Z-box.
83  claw::it_index<RandomIterator> it_l(begin);
84  claw::it_index<RandomIterator> it_k(begin); // increment on the items
85  unsigned int z; // le Zi of the current position
86 
87  for (++it_k; it_k!=end; ++it_k)
88  {
89  if (it_k > it_r)
90  {
91  z = common_prefix_length(begin, it_k, end, end);
92 
93  if ( z > 0 ) // set the Z-box
94  {
95  out[it_k] = z;
96  it_l = it_k;
97  it_r = it_k.operator+(z) - 1;
98  }
99  }
100  else /* k <= r */
101  {
102  unsigned int k_bis = it_k - it_l;
103  claw::it_index<RandomIterator> it_b(it_r - it_k);
104 
105  if ( out.find(k_bis) == out.end() )
106  z = 0;
107  else
108  z = out[k_bis];
109 
110  if ( z <= (unsigned int)it_b )
111  {
112  if ( z > 0 )
113  out[it_k] = z;
114  }
115  else
116  {
117  claw::it_index<RandomIterator> it_q = it_r + 1;
118 
119  it_q += common_prefix_length(it_q, it_b+1, end, end);
120 
121  out[it_k] = it_q - it_k;
122  it_r = it_q - 1;
123  it_l = it_k;
124  }
125  }
126  }
127 } // z_boxes()
128 
129 /*---------------------------------------------------------------------------*/
137 template<class RandomIterator>
138 void claw::text::kmp<RandomIterator>::spi_prime(const RandomIterator begin,
139  const RandomIterator end,
140  std::map<unsigned int, unsigned int>& out) const
141 {
142  std::map<unsigned int, unsigned int> z; // pattern's Z-boxes.
143  unsigned int j; // loop index.
144 
145  // set Z-boxes
146  z_boxes(begin, end, z);
147 
148  // calculates spi' (from end to begining)
149  j=end-begin;
150 
151  do
152  {
153  --j;
154  if (z.find(j) != z.end())
155  out[j + z[j] - 1] = z[j];
156  }
157  while (j!=0);
158 } // spi_prime()
159 
160 /*---------------------------------------------------------------------------*/
171 template<class RandomIterator>
172 template<class UnaryPredicate>
174  (const RandomIterator pattern_begin, const RandomIterator pattern_end,
175  const RandomIterator text_begin, const RandomIterator text_end,
176  UnaryPredicate& action ) const
177 {
178  std::map<unsigned int, unsigned int> spi; // pattern's spi'
179  claw::it_index<RandomIterator> it_p(pattern_begin,1);
180  claw::it_index<RandomIterator> it_t(text_begin,1);
181  bool stop = false; // result of the last call to the predicate action
182 
183  const int pattern_length = pattern_end - pattern_begin;
184  const int text_length = text_end - text_begin;
185 
186  assert(pattern_begin != pattern_end);
187 
188  spi_prime(pattern_begin, pattern_end, spi);
189 
190  unsigned int i = 0;
191 
192  while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop )
193  {
194  unsigned int common;
195 
196  common = common_prefix_length(it_p, it_t, pattern_end, text_end);
197  i += common;
198  it_p += common;
199  it_t += common;
200 
201  if (it_p == 1)
202  ++it_t;
203  else
204  {
205  if ( (int)it_p == pattern_length+1 )
206  stop = !action( (int)it_t - pattern_length-1 );
207 
208  i = spi[i-1];
209  it_p.set(pattern_begin+i, i+1);
210  }
211  }
212 } // operator()