C++でsplit
間違って消したので復旧
競技プログラミングでは基本的にboostは使えないのでsplitを作った
〜mysplit〜 istringstreamとback_inserter利用
ホワイトスペースでsplit
std::vector< std::string > mysplit(const std::string &str) { std::istringstream iss(str); std::vector< std::string > res; // end-of-streamイテレータ↓ std::copy( std::istream_iterator< std::string >(iss), std::istream_iterator< std::string >(), std::back_inserter(res) ); return res; }
〜mysplit1〜 find利用
文字列デミリタ使用可
std::vector< std::string > mysplit1(const std::string &str, const std::string &delim) { std::vector< std::string > res; size_t current, found, delimlen = delim.size(); //std::string::size_type for( current = 0; ( found = str.find(delim, current) ) != std::string::npos; current = found + delimlen ) res.push_back( str.substr( current, found-current ) ); res.push_back( str.substr( current ) ); return res; }
〜mysplit2〜 find_first_of利用
複数文字のデミリタ使用可
mysplit1とは違う
std::vector< std::string > mysplit2(const std::string &str, const std::string &delim) { std::vector< std::string > res; size_t current = 0, found; for( current = 0; ( found = str.find_first_of(delim, current) ) != std::string::npos; current = found + 1 ) res.push_back( str.substr( current, found - current ) ); res.push_back( str.substr( current ) ); return res; }
〜mysplit3〜 istringstream利用
デミリタは1文字
std::vector< std::string > mysplit3(const std::string &str, const char delim) { std::vector< std::string > res; std::istringstream iss( str ); std::string segment; while( std::getline(iss, segment, delim) ) res.push_back( segment ); return res; }
〜c_style_split〜 strtok(string tokenize)を利用
破壊的、
文字列デミリタ使用可
static変数を持つ->マルチスレッドに非対応->リエントラント版のstrtok_rがある(非標準ライブラリ)
null要素は読み飛ばされる?
int c_style_split(char *str, const char delim[], char *res[]) { //破壊的なのでconst char* strにしてコピーしたものをstrtokしたほうがいいかもしれない int len = 0; char *segment = strtok( str, delim ); while( segment != NULL ){ res[len++] = segment; segment = strtok(NULL, delim); } return len; }
string_split.cpp
int main(){ string original = " \tABC DEFG HIJKL MNOPQ RSTU \t VWXYZ \t"; char original_c_style[] = " \tABC DEFG HIJKL MNOPQ RSTU \t VWXYZ \t"; vector<string> words = mysplit(original); cout << "mysplit \t→"; for(size_t i = 0; i < words.size(); i++) cout << "[" << words[i] << "]"; cout<<endl; vector<string> words1 = mysplit1(original, " \t"); cout << "mysplit1\t→"; for(size_t i = 0; i < words1.size(); i++) cout << "[" << words1[i] << "]"; cout<<endl; vector<string> words2 = mysplit2(original, " \t"); cout << "mysplit2\t→"; for(size_t i = 0; i < words2.size(); i++) cout << "[" << words2[i] << "]"; cout<<endl; vector<string> words3 = mysplit3(original, ' '); cout << "mysplit3\t→"; for(size_t i = 0; i < words3.size(); i++) cout << "[" << words3[i] << "]"; cout<<endl; char* words_c_style[100]; int len = c_style_split(original_c_style, " \t", words_c_style); cout << "c_style_split\t→"; for(int i=0; i < len; i++) cout << "[" << words_c_style[i] << "]"; cout<<endl<<"broken string = [" << original_c_style << "]" << endl; return 0; }
結果
mysplit →[ABC][DEFG][HIJKL][MNOPQ][RSTU][VWXYZ] mysplit1 →[][ABC DEFG HIJKL MNOPQ RSTU][ VWXYZ][] mysplit2 →[][][ABC][DEFG][HIJKL][MNOPQ][RSTU][][][VWXYZ][][] mysplit3 →[][ ABC][DEFG][HIJKL][MNOPQ][RSTU][ ][VWXYZ][ ] c_style_split →[ABC][DEFG][HIJKL][MNOPQ][RSTU][VWXYZ] broken string = [ ABC]