在这个存储库中,我只设置了一个标头字符串分隔符,允许字符和字符串文字作为分隔符。
(小)库严格地说是C++17。
我想征求你的意见。
我也希望守则尽可能简短,因此也非常欢迎在这方面的任何评论。
由于至少包含3行代码似乎是强制性的,下面是代码:
// MIT License
//
// Copyright (c) 2019 degski
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <algorithm>
#include <iostream>
#include <string>
#include <string_view>
#include <type_traits>
#include <vector>
std::ostream & nl ( std::ostream & out_ ) { return out_ << '\n'; }
std::wostream & nl ( std::wostream & out_ ) { return out_ << L'\n'; }
namespace sax::detail {
template<typename CharT>
[[ nodiscard ]] constexpr std::basic_string_view<CharT> make_string_view ( std::basic_string_view<CharT> x ) noexcept {
return x; // guaranteed copy elision.
}
template<typename CharT>
[[ nodiscard ]] constexpr std::basic_string_view<CharT> make_string_view ( CharT x ) noexcept {
return std::basic_string_view<CharT> ( std::addressof ( x ), 1 );
}
template<typename CharT>
[[ nodiscard ]] constexpr std::basic_string_view<CharT> make_string_view ( const CharT * x ) noexcept {
return std::basic_string_view<CharT> ( x );
}
template<typename CharT>
constexpr void remove_prefix ( std::basic_string_view<CharT> & s, bool & removed, std::basic_string_view<CharT> x ) noexcept {
// This bit will come with C++20.
if ( s.size ( ) >= x.size ( ) and s.compare ( 0, x.size ( ), x ) == 0 ) {
s.remove_prefix ( x.size ( ) );
removed = removed or true;
};
}
template<typename CharT>
constexpr void remove_prefix ( std::basic_string_view<CharT> & s, bool & removed, CharT x ) noexcept {
if ( s.size ( ) >= 1 and s [ 0 ] == x ) {
s.remove_prefix ( 1 );
removed = removed or true;
};
}
template<typename CharT>
constexpr void remove_prefix ( std::basic_string_view<CharT> & s, bool & removed, const CharT * x ) noexcept {
remove_prefix ( s, removed, std::basic_string_view<CharT> ( x ) );
}
template<typename CharT, typename ... Args>
constexpr void remove_prefix ( std::basic_string_view<CharT> & s_, Args ... args_ ) noexcept {
bool removed = false;
do {
removed = false;
( remove_prefix ( s_, removed, std::forward<Args> ( args_ ) ), ... );
} while ( removed ); // Keep removing untill nothing more can be removed.
}
template<typename CharT>
constexpr void remove_suffix ( std::basic_string_view<CharT> & s, bool & removed, std::basic_string_view<CharT> x ) noexcept {
// This bit will come with C++20.
if ( s.size ( ) >= x.size ( ) and s.compare ( s.size ( ) - x.size ( ), std::basic_string_view<CharT>::npos, x ) == 0 ) {
s.remove_suffix ( x.size ( ) );
removed = removed or true;
};
}
template<typename CharT>
constexpr void remove_suffix ( std::basic_string_view<CharT> & s, bool & removed, CharT x ) noexcept {
remove_suffix ( s, removed, std::basic_string_view<CharT> ( std::addressof ( x ), 1 ) );
}
template<typename CharT>
constexpr void remove_suffix ( std::basic_string_view<CharT> & s, bool & removed, const CharT * x ) noexcept {
remove_suffix ( s, removed, std::basic_string_view<CharT> ( x ) );
}
template<typename CharT, typename ... Args>
constexpr void remove_suffix ( std::basic_string_view<CharT> & s_, Args ... args_ ) noexcept {
bool removed = false;
do {
removed = false;
( remove_suffix ( s_, removed, std::forward<Args> ( args_ ) ), ... );
} while ( removed ); // Keep removing untill nothing more can be removed.
}
template<typename CharT, typename SizeT, typename StringyThing>
constexpr void find ( std::basic_string_view<CharT> & s, SizeT & f_, StringyThing x_ ) noexcept {
f_ = std::min ( s.find ( make_string_view<CharT> ( x_ ) ), f_ );
}
template<typename CharT, typename ... Args>
[[ nodiscard ]] constexpr auto find ( std::basic_string_view<CharT> & s_, Args ... args_ ) noexcept {
auto found = std::basic_string_view<CharT>::npos;
( find ( s_, found, std::forward<Args> ( args_ ) ), ... );
return found;
}
}
namespace sax {
template<typename CharT, typename ... Delimiters>
[[ nodiscard ]] std::vector<std::basic_string_view<CharT>> string_split ( const std::basic_string<CharT> & string_, Delimiters ... delimiters_ ) {
using size_type = typename std::basic_string_view<CharT>::size_type;
std::basic_string_view<CharT> string_view ( string_ );
std::vector<std::basic_string_view<CharT>> string_view_vector;
string_view_vector.reserve ( 4 ); // Avoid small size re-allocating, 0 > 1 > 2 > 3 > 4 > 6, now 4 > 6 > 9 etc.
// Remove trailing delimiters.
detail::remove_suffix ( string_view, std::forward<Delimiters> ( delimiters_ ) ... );
// Parse the string_view left to right.
while ( true ) {
detail::remove_prefix ( string_view, std::forward<Delimiters> ( delimiters_ ) ... );
const size_type pos = detail::find ( string_view, std::forward<Delimiters> ( delimiters_ ) ... );
if ( std::basic_string_view<CharT>::npos == pos ) {
string_view_vector.emplace_back ( std::move ( string_view ) );
break;
}
string_view_vector.emplace_back ( string_view.data ( ), pos );
string_view.remove_prefix ( pos );
}
return string_view_vector;
}
}使用方式如下:
// MIT License
//
// Copyright (c) 2019 degski
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <array>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <iterator>
#include <list>
#include <map>
#include <random>
#include <string>
#include <type_traits>
#include <vector>
namespace fs = std::filesystem;
#include <string_split.hpp>
template<typename Stream, typename Container>
Stream & operator << ( Stream & out_, const Container & s_ ) noexcept {
for ( const auto & v : s_ )
out_ << '\"' << v << "\" ";
out_ << '\b';
return out_;
}
int main ( ) {
std::string s ( " , \t the quick brown ,, ,fox jumps underover \t , the lazy dog ," );
std::cout << sax::string_split ( s, " ", ',', "\t", "under" ) << nl;
return EXIT_SUCCESS;
}输出:
"the" "quick" "brown" "fox" "jumps" "over" "the" "lazy" "dog"代码所做的(应该做的):从字符串中移除传入的任何分隔符,从左到右执行分隔符,从左到右应用分隔符,并将现在分离的位作为string _view的向量返回到原始字符串上。后者意味着字符串必须超过字符串视图的向量。
以上所述意味着,根据您放置的分隔符类型(因为它们可以是字符串,可以相互交互),分隔符的顺序具有重要意义。
nl-代码最初在一个更大的库的另一个标题部分中,但是由于要求所有都应该是独立的,所以只是抛出了它。
removed = removed or true可以/应该是removed = true。
发布于 2019-03-01 18:25:57
我自己并不总是遵循这个建议,但是为您正在实现的函数/算法编写一个规范是个好主意。(有关更多信息,请参见Eric的这帖子)。这也将使您更容易找到关于各种角用例的测试用例和原因(例如,如果我尝试做split("aaba", "a", "ab"),行为是什么?)
在本例中,我们可能进行如下操作:给定一个string_view和一个"string“分隔符的列表(参数列表),返回一个std::vector of string_views,表示分隔符之间的文本(使用第一个匹配的分隔符)。如果没有找到分隔符,则返回一个包含输入字符串的元素的向量。
我之所以提到这一点,是因为目前很难看到代码背后的总体想法。我并不完全清楚它在做什么,我可以看到它正在删除后缀和前缀,但是很难验证主循环总是会终止。我认为,如果您事先编写了一个非正式的规范,那么您可能最终会得到一个更结构化和更容易理解的主循环。
下面是我在阅读代码时注意到的一些其他事情(有些来自我的评论):
make_string_view( CharT X )返回一个指向临时对象的指针。您可以通过这种方式从参数中生成一个string_view,但是您需要通过引用传递参数。removed = removed or true应该是removed = truenl不属于那个标头private/internal/detail命名空间中[[nodiscard]],如果不使用返回值是一个很大的错误(我认为make_string_view没有资格)&&,以便std::forward执行它的魔术string_views。考虑到上面所有的i映像,您可以实现如下函数(用伪代码):
vector<string_view<CharT>> split(string_view<CharT> str, Delimiters&& delims...) {
if (str.empty()) return { str }; // Get special case out of the way
vector<string_view<CharT>> res;
size_t last_index = 0;
const auto delim_array = make_delim_array(std::forward<Delimiters>(delims)...);
for (size_t i = 0; i < str.length();) {
if (auto match_length = any_matches(str, i, delim_array)) {
res.push_back(str.substr(last_index, i - last_index));
i += match_length;
last_index = i;
} else {
++i;
}
}
res.push_back(str.substr(last_index));
return res;
}https://codereview.stackexchange.com/questions/214544
复制相似问题