首先,我知道互联网上有大量关于KMP的信息,但是很多例子只是代码,没有任何解释。所以我决定自己实现它。
其次,我将使用这个算法来搜索一个数字字符串中的模式。
我把所有的东西都打包在KMP课上。
#ifndef KMP_H
#define KMP_H
#include <cstddef>
#include <vector>
class KMP
{
protected :
std::vector< size_t > found_positions;
public :
KMP() :
found_positions( 0 )
{ }
void build_failure_vector( const std::vector< float >& pattern,
std::vector< int >& failure_vector );
void search_for_pattern( const std::vector< float >& pattern,
const std::vector< float >& sample );
typedef typename std::vector< size_t >::const_iterator position_iterator;
inline position_iterator cbegin() const noexcept { return found_positions.cbegin(); }
inline position_iterator cend() const noexcept { return found_positions.cend(); }
};
#endif#include "KMP.h"
void KMP::build_failure_vector( const std::vector< float >& pattern,
std::vector< int >& failure_vector )
{
//extra space for the case when all occurencies are required
failure_vector.resize( pattern.size() + 1 );
//set -1 as an indicator of the beginning of a pattern
failure_vector[ 0 ] = -1;
//in the case of a mismatch we don't shift text index back
//but instead we check if we could proceed from a proper suffix of the pattern
//it is the pattern index which indicates the position of a potential match
int continue_from;
for( size_t i = 1; i < failure_vector.size(); i++ )
{
//if a mismatch was at index i
//we check if the beginning of a pattern has been reached or
//the previous pattern character (at i-1) matches
//some failure character
//start from the failure at the previous pattern index
continue_from = failure_vector[ i-1 ];
while( (continue_from >= 0) &&
(pattern[ continue_from ] != pattern[ i-1 ] ) )
{
continue_from = failure_vector[ continue_from ];
}
//if in the above while loop we found that
//pattern[ i-1 ] == pattern[ continue_from ] for some continue_from index
//then we should check if the current main text character mathes pattern[ continue_from + 1 ]
failure_vector[ i ] = continue_from + 1;
}
}
void KMP::search_for_pattern( const std::vector< float >& pattern,
const std::vector< float >& sample )
{
//prepare failure function
std::vector< int > failure_vector;
build_failure_vector( pattern, failure_vector );
//position in the main text
size_t sample_pos = 0;
//must be of type int because may be equal to -1
int pattern_pos = 0;
while( sample_pos < sample.size() )
{
//check next character in the main text
//only if the next potential match is the beginning of the pattern
//or there is a match
if( (pattern_pos < 0) ||
(sample[ sample_pos ] == pattern[ pattern_pos ]) )
{
sample_pos++;
//if pattern_pos was -1 then it becomes 0
//i.e. we start from the beginning
pattern_pos++;
if( pattern_pos == pattern.size() )
{
//complete match occured
found_positions.push_back( sample_pos - pattern_pos );
//the last position in failure_vector
//says what we should continue from after complete match
pattern_pos = failure_vector[ pattern_pos ];
}
}
//if there was a mismatch
else
{
//what next character of the pattern is a potential match
pattern_pos = failure_vector[ pattern_pos ];
}
}
}使用非常简单。例如,
样本向量:3,1,3,1,3,2,0,1,3,3,模式向量:3,1,3,故障向量:-1,001,模式找到位置向量:0,2,8,
发布于 2019-04-25 13:11:55
让编译器进行优化
C++编译器已经非常擅长于优化,特别是当它们被编译为-O3时。编译器将自动处理优化(如内联函数),不需要在头文件中对前面的函数进行inline规范。看这个堆栈溢出问题。
在代码中使用的函数cbegin()和cend()并不明显。
包含不必要的头文件
代码在不包括cstddef的情况下正确编译。包含不必要的头文件可能会导致符号冲突,也会减慢编译时间,因为包含文件中的代码也需要编译。
过度评论
很明显,注释解决方案是主要目标之一,这是一个很好的目标,但是,在注释代码方面也存在这样的问题。随着代码的老化,将对其进行修改以修复bug或进行优化,而注释需要额外的工作来确保它们继续表示代码。最好使用自文档化的变量和函数名称,以使代码更加清晰。如果算法需要注释,最好将块注释放在函数的顶部。
https://codereview.stackexchange.com/questions/219095
复制相似问题