C++字符串分割

2017-06-01

AfxExtractSubString

表头:

BOOL AFXAPI AfxExtractSubString ( CString& rString, LPCTSTR lpszFullString, int iSubString, TCHAR chSep = ‘\n’);

参数

rString 对CString将得到一个单独的子字符串的对象。

lpszFullString 字符串包含字符串的全文提取自。

iSubString 提取的子字符串的从零开始的索引从lpszFullString。

chSep 使用的分隔符分隔子字符串，默认的是’\n’。

返回值

TRUE ，如果函数成功提取了该子字符串中提供的索引;否则， FALSE。

例：

// 使用AfxExtractSubString分割字符串
void CSplitString::SplitString1()
{
    std::vector<long> arrPic;

    CString strContent = _T("1,2,3,4,5");
    CString strTemp;
    int iPos = 0;

    while (AfxExtractSubString(strTemp, strContent, iPos, ','))
    {
        iPos++;
        arrPic.push_back(_wtol(strTemp));
    }
}

STL find_first_of

// 利用STL自己实现字符串分割
void CSplitString::SplitString2()
{
    const std::string s("1,2,3,4,5;6;7;8;9");
    std::vector<std::string> v;
    const std::string c(",;");//多个分隔符

    std::string::size_type pos1, pos2;
    pos2 = s.find_first_of(c);
    pos1 = 0;
    while (std::string::npos != pos2)
    {
        v.push_back(s.substr(pos1, pos2 - pos1));

        pos1 = pos2 + 1;
        pos2 = s.find_first_of(c, pos1);
    }
    if (pos1 != s.length())
        v.push_back(s.substr(pos1));
}

_tcstok_s

// 使用C的_tcstok分割字符串
void CSplitString::SplitString3()
{
    CString str = _T("a,b*c,d");
    TCHAR seps[] = _T(",*");//可按多个字符来分割
    TCHAR *next_token1 = NULL;
    TCHAR* token = _tcstok_s((LPTSTR)(LPCTSTR)str, seps,&next_token1);
    while (token != NULL)
    {
        TRACE("\r\nstr=%s  token=%s\r\n", str, token);
        token = _tcstok_s(NULL, seps, &next_token1);
    }
}

http://www.qiezichaodan.com/mfc_cstring_split/

http://blog.csdn.net/xjw532881071/article/details/49154911

http://www.cnblogs.com/happykoukou/p/5427268.html

词汇分割器库 Boost.Tokenizer

#include <boost/tokenizer.hpp> 
#include <string> 
#include <iostream> 

int main() 
{ 
  typedef boost::tokenizer<boost::char_separator<char> > tokenizer; 
  std::string s = "Boost C++ libraries"; 
  boost::char_separator<char> sep(" ", "+", boost::keep_empty_tokens); 
  tokenizer tok(s, sep); 
  for (tokenizer::iterator it = tok.begin(); it != tok.end(); ++it) 
    std::cout << *it << std::endl; 
}

分词Boost.regex

void TestToken()
{
    using namespace std;
    using namespace boost;
    string str("tengxun@qq.com, aa@tt.com, bb@qq.com");
    regex reg("\\w+");
    sregex_token_iterator pos(str.begin(), str.end(), reg);
    while (pos != sregex_token_iterator())
    {
        cout << "[" << *pos << "]";
        ++pos;
    }

    cout << endl;
    //如果最后一个参数args为-1，则把匹配到的字符串视为分隔符
    regex split_reg(",");
    pos = sregex_token_iterator(str.begin(), str.end(), split_reg, -1);
    while (pos != sregex_token_iterator())
    {
        cout << "[" << *pos << "]";
        ++pos;
    }
    cout << endl;
    //如果最后一个参数args为正数，则返回匹配结果的第args个子串
    regex split_sub_reg("(\\w*)@(\\w*).(\\w*)");
    pos = sregex_token_iterator(str.begin(), str.end(), split_sub_reg, 1);
    while (pos != sregex_token_iterator())
    {
        cout << "[" << *pos << "]";
        ++pos;
    }
    cout << endl;
    //匹配并指定输出顺序
    //从下面字符串中提取日期，并转换成 年月日 的顺序输出
    std::string input("01/02/2003 blahblah 04/23/1999 blahblah 11/13/1981");
    regex re("(\\d{2})/(\\d{2})/(\\d{4})"); // find a date
    int const sub_matches[] = { 3, 1, 2 }; // year，month， day
    sregex_token_iterator begin(input.begin(), input.end(), re, sub_matches), end;
    // write all the words to std::cout
    std::ostream_iterator< std::string > out_iter(std::cout, "\n");
    std::copy(begin, end, out_iter);
}

C++ 11正则表达式

#include <iostream>
#include <vector>
#include <iterator>
#include <regex>

// std::wstring版本
std::vector<std::wstring> ws_split(const std::wstring& in, const std::wstring& delim) {
    std::wregex re{ delim };
    return std::vector<std::wstring> {
        std::wsregex_token_iterator(in.begin(), in.end(), re, -1),
        std::wsregex_token_iterator()
    };
}
// c string版本
std::vector<std::string> c_split(const char* in, const char* delim) {
    std::regex re{ delim };
    return std::vector<std::string> {
        std::cregex_token_iterator(in, in + strlen(in),re, -1),
        std::cregex_token_iterator()
    };
}
// 支持wchar_t宽字符集的版本
std::vector<std::wstring> wc_split(const wchar_t* in, const wchar_t* delim) {
    std::wregex re{ delim };
    return std::vector<std::wstring> {
        std::wcregex_token_iterator(in, in + wcslen(in),re, -1),
        std::wcregex_token_iterator()
    };
}
// 上面的s_split和ws_split可以统一用模板来实现
template<typename E,
    typename TR = std::char_traits<E>,
    typename AL = std::allocator<E>,
    typename _str_type = std::basic_string<E, TR, AL>>
std::vector<_str_type> bs_split(const std::basic_string<E, TR, AL>& in, const std::basic_string<E, TR, AL>& delim) {
    std::basic_regex<E> re{ delim };
    return std::vector<_str_type> {
        std::regex_token_iterator<typename _str_type::const_iterator>(in.begin(), in.end(), re, -1),
            std::regex_token_iterator<typename _str_type::const_iterator>()
    };
}

参考：

http://www.cplusplus.com/faq/sequences/strings/split/