URL加解密 URLEncoder

2018-11-16

url

urlencode编码的基本规则

字符”a”-“z”，”A”-“Z”，”0”-“9”，”.”，”-“，”*”，和”_” 都不被编码，维持原值；
空格” “被转换为加号”+”。
其他每个字节都被表示成”%XY”格式的由3个字符组成的字符串，编码为UTF-8(特别需要注意：这里是大写形式的hexchar)。

urlencode编码

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>

static unsigned char hexchars[] = "0123456789ABCDEF";

/**
 * @brief URLEncode : encode the base64 string "str"
 *
 * @param str:  the base64 encoded string
 * @param strsz:  the str length (exclude the last \0)
 * @param result:  the result buffer
 * @param resultsz: the result buffer size(exclude the last \0)
 *
 * @return: >=0 represent the encoded result length
 *              <0 encode failure
 *
 * Note:
 * 1) to ensure the result buffer has enough space to contain the encoded string, we'd better
 *     to set resultsz to 3*strsz
 *
 * 2) we don't check whether str has really been base64 encoded
 */
int URLEncode(const char *str, const int strsz, char *result, const int resultsz)
{
    int i, j;
    char ch;

    if(strsz < 0 || resultsz < 0)
        return -1;

    for(i = 0, j = 0; i < strsz && j < resultsz; i++)
    {
        ch = *(str + i);
        if((ch >= 'A' && ch <= 'Z') ||
                (ch >= 'a' && ch <= 'z') ||
                (ch >= '0' && ch <= '9') ||
                ch == '.' || ch == '-' || ch == '*' || ch == '_')
            result[j++] = ch;
        else if(ch == ' ')
            result[j++] = '+';
        else
        {
            if(j + 3 <= resultsz)
            {
                result[j++] = '%';
                result[j++] = hexchars[(unsigned char)ch >> 4];
                result[j++] = hexchars[(unsigned char)ch & 0xF];
            }
            else
            {
                return -2;
            }
        }
    }

    if(i == 0)
        return 0;
    else if(i == strsz)
        return j;
    return -2;
}



// return < 0: represent failure
int main(int argc, char *argv[])
{
    int fd = -1;
    char buf[1024], result[1024 * 3];
    int ret;
    int i = 0;

    if(argc != 2)
    {
        printf("please input the encoding filename\n");
        return -1;
    }

    if((fd = open(argv[1], O_RDONLY)) == -1)
    {
        printf("open file %s failure\n", argv[1]);
        return -2;
    }

    while((ret = read(fd, buf, 1024)) >= 0)
    {
        if(ret == 0)
            break;

        ret = URLEncode(buf, ret, result, 1024 * 3);
        if(ret < 0)
            break;

        for(i = 0; i < ret; i++)
            printf("%c", result[i]);

    }

    if(ret < 0)
    {
        printf("encode data failure\n");
    }

    close(fd);
    return ret;
}

urldecode解码

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>

static unsigned char hexchars[] = "0123456789ABCDEF";

/**
 * @brief URLDecode : decode the urlencoded str to base64 encoded string
 *
 * @param str:  the urlencoded string
 * @param strsz:  the str length (exclude the last \0)
 * @param result:  the result buffer
 * @param resultsz: the result buffer size(exclude the last \0)
 *
 * @return: >=0 represent the decoded result length
 *              <0 encode failure
 *
 * Note:
 * 1) to ensure the result buffer has enough space to contain the decoded string, we'd better
 *     to set resultsz to strsz
 *
 */
int URLDecode(const char *str, const int strsz, char *result, const int resultsz, const char **last_pos)
{
    int i, j;
    char ch;
    char a;

    *last_pos = str;
    if(strsz < 0 || resultsz < 0)
        return -1;

    for(i = 0, j = 0; i < strsz && j < resultsz; j++)
    {
        ch = *(str + i);

        if(ch == '+')
        {
            result[j] = ' ';
            i += 1;
        }
        else if(ch == '%')
        {
            if(i + 3 <= strsz)
            {
                ch = *(str + i + 1);

                if(ch >= 'A' && ch <= 'F')
                {
                    a = (ch - 'A') + 10;
                }
                else if(ch >= '0' && ch <= '9')
                {
                    a = ch - '0';
                }
                else if(ch >= 'a' && ch <= 'f')
                {
                    a = (ch - 'a') + 10;
                }
                else
                {
                    return -2;
                }

                a <<= 4;

                ch = *(str + i + 2);
                if(ch >= 'A' && ch <= 'F')
                {
                    a |= (ch - 'A') + 10;
                }
                else if(ch >= '0' && ch <= '9')
                {
                    a |= (ch - '0');
                }
                else if(ch >= 'a' && ch <= 'f')
                {
                    a |= (ch - 'a') + 10;
                }
                else
                {
                    return -2;
                }

                result[j] = a;

                i += 3;
            }
            else
                break;
        }
        else if((ch >= 'A' && ch <= 'Z') ||
                (ch >= 'a' && ch <= 'z') ||
                (ch >= '0' && ch <= '9') ||
                ch == '.' || ch == '-' || ch == '*' || ch == '_')
        {

            result[j] = ch;
            i += 1;
        }
        else
        {
            return -2;
        }

    }

    *last_pos =  str + i;
    return j;

}



// return < 0: represent failure
int main(int argc, char *argv[])
{
    int fd = -1;
    char buf[4096], result[4096];
    char *start_pos;
    const char *last_pos;
    int ret, sz;
    int i = 0;

    if(argc != 2)
    {
        printf("please input the encoding filename\n");
        return -1;
    }

    if((fd = open(argv[1], O_RDONLY)) == -1)
    {
        printf("open file %s failure\n", argv[1]);
        return -2;
    }

    start_pos = buf;
    last_pos = NULL;

    while((ret = read(fd, start_pos, buf + 4096 - start_pos)) >= 0)
    {
        if(ret == 0)
        {
            if(start_pos == buf)
                break;
            else
            {
                ret = -3;
                break;
            }
        }
        sz = URLDecode(buf, start_pos - buf + ret, result, 4096, &last_pos);
        if(sz < 0)
        {
            ret = -4;
            break;
        }

        if(last_pos != start_pos + ret)
        {
            memcpy(buf, last_pos, start_pos + ret - last_pos);
            start_pos = buf + (start_pos + ret - last_pos);
        }
        else
        {
            start_pos = buf;
        }

        for(i = 0; i < sz; i++)
            printf("%c", result[i]);

    }

    if(ret < 0)
    {
        printf("decode data failure\n");
    }
    close(fd);
    return ret;
}

(C++)UrlEncode的标准实现

unsigned char ToHex(unsigned char x)
{
    return  x > 9 ? x + 55 : x + 48;
}

unsigned char FromHex(unsigned char x)
{
    unsigned char y;
    if (x >= 'A' && x <= 'Z') y = x - 'A' + 10;
    else if (x >= 'a' && x <= 'z') y = x - 'a' + 10;
    else if (x >= '0' && x <= '9') y = x - '0';
    else assert(0);
    return y;
}

std::string UrlEncode(const std::string &str)
{
    std::string strTemp = "";
    size_t length = str.length();
    for (size_t i = 0; i < length; i++)
    {
        if (isalnum((unsigned char)str[i]) ||
                (str[i] == '-') ||
                (str[i] == '_') ||
                (str[i] == '.') ||
                (str[i] == '~'))
            strTemp += str[i];
        else if (str[i] == ' ')
            strTemp += "+";
        else
        {
            strTemp += '%';
            strTemp += ToHex((unsigned char)str[i] >> 4);
            strTemp += ToHex((unsigned char)str[i] % 16);
        }
    }
    return strTemp;
}

std::string UrlDecode(const std::string &str)
{
    std::string strTemp = "";
    size_t length = str.length();
    for (size_t i = 0; i < length; i++)
    {
        if (str[i] == '+') strTemp += ' ';
        else if (str[i] == '%')
        {
            assert(i + 2 < length);
            unsigned char high = FromHex((unsigned char)str[++i]);
            unsigned char low = FromHex((unsigned char)str[++i]);
            strTemp += high * 16 + low;
        }
        else strTemp += str[i];
    }
    return strTemp;
}

URL编码

#include <stdio.h>
#include <ctype.h>

char rfc3986[256] = {0};
char html5[256] = {0};

/* caller responsible for memory */
void encode(const char *s, char *enc, char *tb)
{
    for (; *s; s++) {
        if (tb[*s]) sprintf(enc, "%c", tb[*s]);
        else        sprintf(enc, "%%%02X", *s);
        while (*++enc);
    }
}

int main()
{
    const char url[] = "http://foo bar/";
    char enc[(strlen(url) * 3) + 1];

    int i;
    for (i = 0; i < 256; i++) {
        rfc3986[i] = isalnum(i)||i == '~'||i == '-'||i == '.'||i == '_'
            ? i : 0;
        html5[i] = isalnum(i)||i == '*'||i == '-'||i == '.'||i == '_'
            ? i : (i == ' ') ? '+' : 0;
    }

    encode(url, enc, rfc3986);
    puts(enc);

    return 0;
}

base64编码/解码

static const std::string base64_chars =
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    "abcdefghijklmnopqrstuvwxyz"
    "0123456789+/";

static inline bool is_base64(unsigned char c)
{
    return (isalnum(c) || (c == '+') || (c == '/'));
}

unsigned int base64_encode(const unsigned char* bytes_to_encode, unsigned int in_len, unsigned char* encoded_buffer, unsigned int& out_len)
{
    int i = 0;
    int j = 0;
    unsigned char char_array_3[3] = { 0, 0, 0 };
    unsigned char char_array_4[4] = { 0, 0, 0, 0 };

    out_len = 0;
    while (in_len--)
    {
        char_array_3[i++] = *(bytes_to_encode++);
        if (i == 3)
        {
            char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
            char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
            char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
            char_array_4[3] = char_array_3[2] & 0x3f;

            for (i = 0; i < 4 ; i++)
            {
                encoded_buffer[out_len++] = base64_chars[char_array_4[i]];
            }
            i = 0;
        }
    }

    if (i)
    {
        for (j = i; j < 3; j++)
        {
            char_array_3[j] = '\0';
        }

        char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
        char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
        char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
        char_array_4[3] = char_array_3[2] & 0x3f;

        for (j = 0; j < (i + 1); j++)
        {
            encoded_buffer[out_len++] = base64_chars[char_array_4[j]];
        }

        while (i++ < 3)
        {
            encoded_buffer[out_len++] = '=';
        }
    }

    return out_len;
}

unsigned int base64_decode(const unsigned char* encoded_string, unsigned int in_len, unsigned char* decoded_buffer, unsigned int& out_len)
{
    size_t i = 0;
    size_t j = 0;
    int in_ = 0;
    unsigned char char_array_3[3] = { 0, 0, 0 };
    unsigned char char_array_4[4] = { 0, 0, 0, 0 };

    out_len = 0;
    while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
    {
        char_array_4[i++] = encoded_string[in_]; in_++;
        if (i == 4)
        {
            for (i = 0; i < 4; i++)
            {
                char_array_4[i] = static_cast<unsigned char>(base64_chars.find(char_array_4[i]));
            }

            char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
            char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
            char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

            for (i = 0; i < 3; i++)
            {
                decoded_buffer[out_len++] = char_array_3[i];
            }
            i = 0;
        }
    }

    if (i)
    {
        for (j = i; j < 4; j++)
        {
            char_array_4[j] = 0;
        }

        for (j = 0; j < 4; j++)
        {
            char_array_4[j] = static_cast<unsigned char>(base64_chars.find(char_array_4[j]));
        }

        char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
        char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
        char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

        for (j = 0; (j < i - 1); j++)
        {
            decoded_buffer[out_len++] = char_array_3[j];
        }
    }
    return out_len;
}

C++

#include <QByteArray>
#include <iostream>

int main( ) {
   QByteArray text ( "http://foo bar/" ) ;
   QByteArray encoded( text.toPercentEncoding( ) ) ;
   std::cout << encoded.data( ) << '\n' ;
   return 0 ;
}

using System;

namespace URLEncode
{
    internal class Program
    {
        private static void Main(string[] args)
        {
            Console.WriteLine(Encode("http://foo bar/"));
        }

        private static string Encode(string uri)
        {
            return Uri.EscapeDataString(uri);
        }
    }
}

package main

import (
    "fmt"
    "net/url"
)

func main() {
    fmt.Println(url.QueryEscape("http://foo bar/"))
}

Java

The built-in URLEncoder in Java converts the space “ “ into a plus-sign “+” instead of “%20”:

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;

public class Main
{
    public static void main(String[] args) throws UnsupportedEncodingException
    {
        String normal = "http://foo bar/";
        String encoded = URLEncoder.encode(normal, "utf-8");
        System.out.println(encoded);
    }
}

Python

import urllib
s = 'http://foo/bar/'
s = urllib.quote(s)

There is also urllib.quote_plus(), which also encodes spaces as “+” signs

展开全文 >>

C++字符转换

2018-09-27

UTF_8与GBK

字符集编码(GBK，BIG5，UNICODE)与C++的string/wstring

C++11与Unicode及使用标准库进行UTF-8、UTF-16、UCS2、UCS4/UTF-32编码转换

C++11新特性转换GB2312(ANSI)、UTF8、Unicode编码文件

在windows平台下sizeof(wchar_t)为2，而在linux平台下sizeof(wchar_t)为4；
在windows平台下宽字符(或字符串)字面量使用UTF-16编码，linux平台下使用UTF-32编码。

MultiByteToWideChar、WideCharToMultiByte

std::string UTF8ToGBK(const std::string& strUTF8)
{
    int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
    WCHAR* wszGBK = new WCHAR[len + 1];
    memset(wszGBK, 0, len * 2 + 2);
    MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)(LPCTSTR)strUTF8.c_str(), -1, wszGBK, len);

    len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
    char *szGBK = new char[len + 1];
    memset(szGBK, 0, len + 1);
    WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, szGBK, len, NULL, NULL);
    std::string strTemp(szGBK);
    delete[]szGBK;
    delete[]wszGBK;
    return strTemp;
}

std::string GBKToUTF8(const std::string& strGBK)
{
    std::string strOutUTF8 = "";
    WCHAR * str1;
    int n = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, NULL, 0);
    str1 = new WCHAR[n];
    MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, str1, n);
    n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL);
    char * str2 = new char[n];
    WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL);
    strOutUTF8 = str2;
    delete[]str1;
    str1 = NULL;
    delete[]str2;
    str2 = NULL;
    return strOutUTF8;
}

MultiByteToWideChar、WideCharToMultiByte可以改变多字节与宽字节

CodePage参数控制字符集：

CodePage——指定要转换成的字符集代码页，它可以是任何已经安装的或系统自带的字符集，可选择以下代码页：

CP_ACP   //当前系统ANSI代码页 
CP_MACCP   //当前系统Macintosh代码页 
CP_OEMCP   //当前系统OEM代码页，一种原始设备制造商硬件扫描码 
CP_SYMBOL //Symbol代码页，用于Windows 2000及以后版本
CP_THREAD_ACP //当前线程ANSI代码页，用于Windows 2000及以后版本
CP_UTF7 //UTF-7，设置此值时lpDefaultChar和lpUsedDefaultChar都必须为NULL 
CP_UTF8 //UTF-8，设置此值时lpDefaultChar和lpUsedDefaultChar都必须为NULL

用 GetLocaleInfo 函数获取当前系统的代码页，936: 简体中文, 950: 繁体中文，949：韩文

Code Page Identifiers

ATL封装`_bstr_`

#include <comutil.h>  
#pragma comment(lib, "comsuppw.lib")

string ws2s(const wstring& ws)
{
    _bstr_t t = ws.c_str();  
    char* pchar = (char*)t;  
    string result = pchar;  
    return result;  
}

wstring s2ws(const string& s)
{
    _bstr_t t = s.c_str();  
    wchar_t* pwchar = (wchar_t*)t;  
    wstring result = pwchar;  
    return result; 
}

iconv

int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen,
        char *outbuf, size_t outlen) {
    iconv_t cd;
    char **pin = &inbuf;
    char **pout = &outbuf;

    cd = iconv_open(to_charset, from_charset);
    if (cd == 0)
        return -1;
    memset(outbuf, 0, outlen);
    if (iconv(cd, pin, &inlen, pout, &outlen) == -1)
        return -1;
    iconv_close(cd);
    *pout = '\0';

    return 0;
}

int u2g(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
    return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen);
}

int g2u(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
    return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen);
}

使用C++11标准库进行编码转换

下面是一段测试代码，print_code_uint_sequence函数模板用于输出字符串的码元序列。

#include <string>
#include <iostream>
#include <iomanip>
#include <type_traits>

template<typename tStringType, typename tTraits = typename tStringType::traits_type>
void print_code_unit_sequence(tStringType str)
{
  using char_type = typename tTraits::char_type;
  static_assert(std::is_same<char_type, char>::value || std::is_same<char_type, char16_t>::value || std::is_same<char_type, char32_t>::value, "error");
  using unsigned_char_type = typename std::make_unsigned<char_type>::type;
  using unsigned_int_type = typename std::make_unsigned<typename tTraits::int_type>::type;
  int w = std::is_same<char, char_type>::value ? 2 : std::is_same<char16_t, char_type>::value ? 4 : 8;
  for(auto c : str) {
    auto value = static_cast<unsigned_int_type>(static_cast<unsigned_char_type>(c));
    std::cout << "0x" << std::hex << std::uppercase << std::setw(w) << std::setfill('0') << value << ' ';
  }
}

int main()
{
  std::string u8_str = u8"𪚥"; // utf-8
  std::u16string u16_str = u"𪚥"; // utf-16
  std::u32string u32_str = U"𪚥"; // ucs4
  std::cout << "utf-8: ";
  print_code_unit_sequence(u8_str);
  std::cout << std::endl;
  std::cout << "utf-16: ";
  print_code_unit_sequence(u16_str);
  std::cout << std::endl;
  std::cout << "ucs4: ";
  print_code_unit_sequence(u32_str);
  std::cout << std::endl;
}

C++11标准库在头文件中定义了3个Unicode编码转换的Facet

std::codecvt_utf8 封装了UTF-8与UCS2及UTF-8与UCS4的编码转换
std::codecvt_utf16 封装了UTF-16与UCS2及UTF-16与UCS4的编码转换
std::codecvt_utf8_utf16 封装了UTF-8与UTF-16的编码转换

当要转换的字符串为std::basic_string使用头文件中定义的std::wstring_convert类模板会带来极大的方便。

template<class Codecvt,
  class Elem = wchar_t,
  class Wide_alloc = std::allocator<Elem>,
  class Byte_alloc = std::allocator<char>>
class wstring_convert;

UTF-8与UTF-16编码转换

UTF-8与UTF-16的编码转换使用std::codecvt_utf8_utf16类模板，其中Elem用于存储UTF-16码元，可以是char16_t、char32_t或wchar_t（这些类型都至少能够存储16bit）。

std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> cvt;
std::u16string u16_cvt_str = cvt.from_bytes(u8_source_str); // utf-8 to utf-16
std::string u8_cvt_str = cvt.to_bytes(u16_cvt_str); // utf-16 to utf-8

UTF-8与UCS2编码转换及UTF-8与UCS4编码转换

UTF-8与UCS2或UCS4编码转换使用std::codecvt_utf8类模板，当Elem为char16_t时转换为UTF-8与UCS2，当Elem为char32_t时转换为UTF-16与UCS4，当Elem为wchar_t时转换取决于实现。

std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> utf8_ucs2_cvt;
std::u16string ucs2_cvt_str = utf8_ucs2_cvt.from_bytes(u8_source_str); // utf-8 to ucs2
std::string u8_str_from_ucs2 = utf8_ucs2_cvt.to_bytes(ucs2_cvt_str); // ucs2 to utf-8
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utf8_ucs4_cvt;
std::u32string ucs4_cvt_str = utf8_ucs4_cvt.from_bytes(u8_source_str); // utf-8 to ucs4
std::string u8_str_from_ucs4 = utf8_ucs4_cvt.to_bytes(ucs4_cvt_str); // ucs4 to utf-8

UTF-16与UCS2编码转换及UTF-16与UCS4编码转换

UTF-16转UCS2或UCS4使用std::codecvt_utf16类模板

这里以UTF-16与UCS4的转换为例Elem为char32_t，UTF-16与UCS2的转换类似只是Elem需为char16_t。
由于std::wstring_convert是用于在byte string和wide string之间转换，使用std::codecvt_utf16时UTF-16字符串作为byte string，因此使用这个转换时就需要考虑字节序的问题。std::codecvt_utf16类模板的第3个模板参数Mode类型为std::codecvt_mode

enum codecvt_mode {
  consume_header = 4,
  generate_header = 2,
  little_endian = 1
};

consume_header 告诉codecvt需要处理输入的byte string中的BOM(Byte Order Mark)
generate_header 告诉codecvt在输出的byte string中添加BOM
little_endian 告诉codecvt将byte string的字节序视为小端(Little Endian)，默认为大端(Big Endian)

std::wstring_convert<std::codecvt_utf16, char32_t> utf16le_cvt; // little endian
std::wstring_convert<std::codecvt_utf16, char32_t> utf16be_cvt; // default big endian
std::u32string u32_str_from_le = utf16le_cvt.from_bytes(u16le_byte_str); // utf-16 to ucs4
std::u32string u32_str_from_be = utf16be_cvt.from_bytes(u16be_byte_str); // utf-16 to ucs4

BOM(Byte Order Mark)

字节序标记是插入到以UTF-8、UTF-16或UTF-32编码Unicode文件开头的特殊标记，用于标识文本编码及字节序。

UTF-8 0xEF 0xBB 0xBF
UTF-16 BE 0xFE 0xFF
UTF-16 LE 0xFF 0xFE
UTF-32 BE 0x00 0x00 0xFE 0xFF
UTF-32 LE 0xFF 0xFE 0x00 0x00

std::wstring_convert<std::codecvt_utf16, char32_t> utf16le_cvt; // little endian
std::wstring_convert<std::codecvt_utf16, char32_t> utf16be_cvt; // default big endian
std::u32string u32_str_from_le = utf16le_cvt.from_bytes(u16le_byte_str); // utf-16 to ucs4
std::u32string u32_str_from_be = utf16be_cvt.from_bytes(u16be_byte_str); // utf-16 to ucs4

当UCS4转UTF-16时输出为byte string，可以通过设置Mode为std::generate_header来使输出带BOM，下面的代码通过UCS4转UTF-16 LE和UTF-16 BE演示了std::generate_header的使用

std::wstring_convert<std::codecvt_utf16<char32_t, 0x10ffff, static_cast<std::codecvt_mode>(std::generate_header | std::little_endian)>, char32_t> utf16le_cvt; // little endian
std::wstring_convert<std::codecvt_utf16<char32_t, 0x10ffff, static_cast<std::codecvt_mode>(std::generate_header)>, char32_t> utf16be_cvt; // default big endian
std::string u16le_byte_str = utf16le_cvt.to_bytes(u32_str); // ucs4 to utf-16 little endian with BOM
std::string u16be_byte_str = utf16be_cvt.to_bytes(u32_str); // ucs4 to utf-16 big endian with BOM

使用std::wstring_convert进行字符编码转换

C++11新增的std::wstring_convert可以很方便地在std::string和std::wstring之间进行转换。例如，把一个std::wstring转换成以UTF-8编码的std::string：

std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
std::string string = converter.to_bytes(L"这是一个宽字符串");

反过来，把一个以UTF-8编码的std::string转换成std::wstring：

std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
std::wstring wide_string = converter.from_bytes("\xe5\xad\x97\xe7\xac\xa6\xe4\xb8\xb2”);  //字符串的内容为“字符串”

std::wstring_convert使用模板参数中指定的codecvt进行实际的转换工作，也就是说，std::string使用哪种字符编码由这个codecvt来决定。上面的例子用的是std::codecvt_utf8，即UTF-8编码。理论上，指定不同的codecvt，即可支持各种字符编码。但是，如何得到合适codecvt则是不小的问题。

为了获取其它字符编码的codecvt，需要使用std::codecvt_byname，这个类可以通过字符编码的名称来创建一个codecvt。这看起来挺不错，但遗憾的是，字符编码的名称并没有统一的标准，各个平台的支持情况都不一样。例如，在Windows下可以使用“chs”来创建简体中文编码的codecvt，在Mac OS X下则要使用“zh_cn.gb2312”；甚至在Mac OS X下，即使成功创建了这个codecvt，它也不能正常地转换。

下面以Windows为例，说明如何将std::codecvt_byname用于std::wstring_convert。由于历史原因，std::codecvt_byname的析构函数是protected的，std::wstring_convert不能对它调用delete，所以首先要自行定义一个类来继承std::codecvt_byname：

class chs_codecvt : public std::codecvt_byname<wchar_t, char, std::mbstate_t> {
public:
    chs_codecvt() : codecvt_byname("chs") { }
};

chs_codecvt的默认析构函数是public的，从而让std::wstring_convert可以删除它。为方便起见，在chs_codecvt的构造函数中，直接把“chs”传给了std::codecvt_byname。

接下来的用法跟本文开头的例子基本一致：

std::wstring_convert<chs_codecvt> converter;
std::string string = converter.to_bytes(L"你好");
std::wstring wide_string = converter.from_bytes("\xc4\xe3\xba\xc3”);  //字符串的内容为“你好”

综上所述，只有UTF编码的转换是完全符合C++标准并且真正能够跨平台的，使用这些国际化的字符编码能够减少很多不必要的麻烦。

ICU

Boost的正则表达式库可以使用ICU

多语言处理 –> UNICODE –> IBM的ICU类库

最近写的一个C++程序里需要处理不同语言编码之间的相互转换问题，主要是GBK、BIG5、UTF-8和UTF-16这几种编码，程序虽小，但转换操作相对复杂一些。起初只想用C++本身提供的功能——包括C++标准里的wchar_t类型，C语言运行库中的locale函数集、mbstowcs和wcstombs函数，以及标准C++库中的wstring、wfstream等类——解决问题。

在MFC中，直接用CString或TCHAR数组存储字符串，用WIN32的MultiByteToWideChar和WideCharToMultiByte函数处理编码转换

http://site.icu-project.org/

https://github.com/unicode-org/icu

使用 International Component for Unicode (ICU) 升级 MFC —— 开发全球化更好的软件产品

使用 ICU4C 进行国际化应用开发

ICU是一组成熟的，广泛使用的C / C ++和Java库，为软件应用程序提供Unicode和全球化支持。ICU具有广泛的可移植性，可以在所有平台上以及C / C ++和Java软件之间为应用程序提供相同的结果。

在各种字符串类型之间进行转换

字符串类型包括char *， wchar_t*， _bstr_t， CComBSTR， CString， basic_string，和System.String。

从 char 转换 *

// convert_from_char.cpp
// compile with: /clr /link comsuppw.lib

#include <iostream>
#include <stdlib.h>
#include <string>

#include "atlbase.h"
#include "atlstr.h"
#include "comutil.h"

using namespace std;
using namespace System;

int main()
{
    // Create and display a C style string, and then use it
    // to create different kinds of strings.
    char *orig = "Hello, World!";
    cout << orig << " (char *)" << endl;

    // newsize describes the length of the
    // wchar_t string called wcstring in terms of the number
    // of wide characters, not the number of bytes.
    size_t newsize = strlen(orig) + 1;

    // The following creates a buffer large enough to contain
    // the exact number of characters in the original string
    // in the new format. If you want to add more characters
    // to the end of the string, increase the value of newsize
    // to increase the size of the buffer.
    wchar_t * wcstring = new wchar_t[newsize];

    // Convert char* string to a wchar_t* string.
    size_t convertedChars = 0;
    mbstowcs_s(&convertedChars, wcstring, newsize, orig, _TRUNCATE);
    // Display the result and indicate the type of string that it is.
    wcout << wcstring << _T(" (wchar_t *)") << endl;

    // Convert the C style string to a _bstr_t string.
    _bstr_t bstrt(orig);
    // Append the type of string to the new string
    // and then display the result.
    bstrt += " (_bstr_t)";
    cout << bstrt << endl;

    // Convert the C style string to a CComBSTR string.
    CComBSTR ccombstr(orig);
    if (ccombstr.Append(_T(" (CComBSTR)")) == S_OK)
    {
        CW2A printstr(ccombstr);
        cout << printstr << endl;
    }

    // Convert the C style string to a CstringA and display it.
    CStringA cstringa(orig);
    cstringa += " (CStringA)";
    cout << cstringa << endl;

    // Convert the C style string to a CStringW and display it.
    CStringW cstring(orig);
    cstring += " (CStringW)";
    // To display a CStringW correctly, use wcout and cast cstring
    // to (LPCTSTR).
    wcout << (LPCTSTR)cstring << endl;

    // Convert the C style string to a basic_string and display it.
    string basicstring(orig);
    basicstring += " (basic_string)";
    cout << basicstring << endl;

    // Convert the C style string to a System::String and display it.
    String ^systemstring = gcnew String(orig);
    systemstring += " (System::String)";
    Console::WriteLine("{0}", systemstring);
    delete systemstring;
}

从 wchar_t 转换 *

// convert_from_wchar_t.cpp
// compile with: /clr /link comsuppw.lib

#include <iostream>
#include <stdlib.h>
#include <string>

#include "atlbase.h"
#include "atlstr.h"
#include "comutil.h"

using namespace std;
using namespace System;

int main()
{
    // Create a string of wide characters, display it, and then
    // use this string to create other types of strings.
    wchar_t *orig = _T("Hello, World!");
    wcout << orig << _T(" (wchar_t *)") << endl;

    // Convert the wchar_t string to a char* string. Record
    //.the length of the original string and add 1 to it to
    //.account for the terminating null character.
    size_t origsize = wcslen(orig) + 1;
    size_t convertedChars = 0;

    // Use a multibyte string to append the type of string
    // to the new string before displaying the result.
    char strConcat[] = " (char *)";
    size_t strConcatsize = (strlen( strConcat ) + 1)*2;

    // Allocate two bytes in the multibyte output string for every wide
    // character in the input string (including a wide character
    // null). Because a multibyte character can be one or two bytes,
    // you should allot two bytes for each character. Having extra
    // space for the new string is not an error, but having
    // insufficient space is a potential security problem.
    const size_t newsize = origsize*2;
    // The new string will contain a converted copy of the original
    // string plus the type of string appended to it.
    char *nstring = new char[newsize+strConcatsize];

    // Put a copy of the converted string into nstring
    wcstombs_s(&convertedChars, nstring, newsize, orig, _TRUNCATE);
    // append the type of string to the new string.
    _mbscat_s((unsigned char*)nstring, newsize+strConcatsize, (unsigned char*)strConcat);
    // Display the result.
    cout << nstring << endl;

    // Convert a wchar_t to a _bstr_t string and display it.
    _bstr_t bstrt(orig);
    bstrt += " (_bstr_t)";
    cout << bstrt << endl;

    // Convert the wchar_t string to a BSTR wide character string
    // by using the ATL CComBSTR wrapper class for BSTR strings.
    // Then display the result.

    CComBSTR ccombstr(orig);
    if (ccombstr.Append(_T(" (CComBSTR)")) == S_OK)
    {
        // CW2A converts the string in ccombstr to a multibyte
        // string in printstr, used here for display output.
        CW2A printstr(ccombstr);
        cout << printstr << endl;
        // The following line of code is an easier way to
        // display wide character strings:
        // wcout << (LPCTSTR) ccombstr << endl;
    }

    // Convert a wide wchar_t string to a multibyte CStringA,
    // append the type of string to it, and display the result.
    CStringA cstringa(orig);
    cstringa += " (CStringA)";
    cout << cstringa << endl;

    // Convert a wide character wchar_t string to a wide
    // character CStringW string and append the type of string to it
    CStringW cstring(orig);
    cstring += " (CStringW)";
    // To display a CStringW correctly, use wcout and cast cstring
    // to (LPCTSTR).
    wcout << (LPCTSTR)cstring << endl;

    // Convert the wide character wchar_t string to a
    // basic_string, append the type of string to it, and
    // display the result.
    wstring basicstring(orig);
    basicstring += _T(" (basic_string)");
    wcout << basicstring << endl;

    // Convert a wide character wchar_t string to a
    // System::String string, append the type of string to it,
    // and display the result.
    String ^systemstring = gcnew String(orig);
    systemstring += " (System::String)";
    Console::WriteLine("{0}", systemstring);
    delete systemstring;
}

从 `_bstr_t` 转换

// convert_from_bstr_t.cpp
// compile with: /clr /link comsuppw.lib

#include <iostream>
#include <stdlib.h>
#include <string>

#include "atlbase.h"
#include "atlstr.h"
#include "comutil.h"

using namespace std;
using namespace System;

int main()
{
    // Create a _bstr_t string, display the result, and indicate the
    // type of string that it is.
    _bstr_t orig("Hello, World!");
    wcout << orig << " (_bstr_t)" << endl;

    // Convert the wide character _bstr_t string to a C style
    // string. To be safe, allocate two bytes for each character
    // in the char* string, including the terminating null.
    const size_t newsize = (orig.length()+1)*2;
    char *nstring = new char[newsize];

    // Uses the _bstr_t operator (char *) to obtain a null
    // terminated string from the _bstr_t object for
    // nstring.
    strcpy_s(nstring, newsize, (char *)orig);
    strcat_s(nstring, newsize, " (char *)");
    cout << nstring << endl;

    // Prepare the type of string to append to the result.
    wchar_t strConcat[] = _T(" (wchar_t *)");
    size_t strConcatLen = wcslen(strConcat) + 1;

    // Convert a _bstr_t to a wchar_t* string.
    const size_t widesize = orig.length()+ strConcatLen;
    wchar_t *wcstring = new wchar_t[newsize];
    wcscpy_s(wcstring, widesize, (wchar_t *)orig);
    wcscat_s(wcstring, widesize, strConcat);
    wcout << wcstring << endl;

    // Convert a _bstr_t string to a CComBSTR string.
    CComBSTR ccombstr((char *)orig);
    if (ccombstr.Append(_T(" (CComBSTR)")) == S_OK)
    {
        CW2A printstr(ccombstr);
        cout << printstr << endl;
    }

    // Convert a _bstr_t to a CStringA string.
    CStringA cstringa(orig.GetBSTR());
    cstringa += " (CStringA)";
    cout << cstringa << endl;

    // Convert a _bstr_t to a CStringW string.
    CStringW cstring(orig.GetBSTR());
    cstring += " (CStringW)";
    // To display a cstring correctly, use wcout and
    // "cast" the cstring to (LPCTSTR).
    wcout << (LPCTSTR)cstring << endl;

    // Convert the _bstr_t to a basic_string.
    string basicstring((char *)orig);
    basicstring += " (basic_string)";
    cout << basicstring << endl;

    // Convert the _bstr_t to a System::String.
    String ^systemstring = gcnew String((char *)orig);
    systemstring += " (System::String)";
    Console::WriteLine("{0}", systemstring);
    delete systemstring;
}

从 CComBSTR 转换

// convert_from_ccombstr.cpp
// compile with: /clr /link comsuppw.lib

#include <iostream>
#include <stdlib.h>
#include <string>

#include "atlbase.h"
#include "atlstr.h"
#include "comutil.h"
#include "vcclr.h"

using namespace std;
using namespace System;
using namespace System::Runtime::InteropServices;

int main()
{
    // Create and initialize a BSTR string by using a CComBSTR object.
    CComBSTR orig("Hello, World!");
    // Convert the BSTR into a multibyte string, display the result,
    // and indicate the type of string that it is.
    CW2A printstr(orig);
    cout << printstr << " (CComBSTR)" << endl;

    // Convert a wide character CComBSTR string to a
    // regular multibyte char* string. Allocate enough space
    // in the new string for the largest possible result,
    // including space for a terminating null.
    const size_t newsize = (orig.Length()+1)*2;
    char *nstring = new char[newsize];

    // Create a string conversion object, copy the result to
    // the new char* string, and display the result.
    CW2A tmpstr1(orig);
    strcpy_s(nstring, newsize, tmpstr1);
    cout << nstring << " (char *)" << endl;

    // Prepare the type of string to append to the result.
    wchar_t strConcat[] = _T(" (wchar_t *)");
    size_t strConcatLen = wcslen(strConcat) + 1;

    // Convert a wide character CComBSTR string to a wchar_t*.
    // The code first determines the length of the converted string
    // plus the length of the appended type of string, then
    // prepares the final wchar_t string for display.
    const size_t widesize = orig.Length()+ strConcatLen;
    wchar_t *wcstring = new wchar_t[widesize];
    wcscpy_s(wcstring, widesize, orig);
    wcscat_s(wcstring, widesize, strConcat);

    // Display the result. Unlike CStringW, a wchar_t does not need
    // a cast to (LPCTSTR) with wcout.
    wcout << wcstring << endl;

    // Convert a wide character CComBSTR to a wide character _bstr_t,
    // append the type of string to it, and display the result.
    _bstr_t bstrt(orig);
    bstrt += " (_bstr_t)";
    cout << bstrt << endl;

    // Convert a wide character CComBSTR to a multibyte CStringA,
    // append the type of string to it, and display the result.
    CStringA cstringa(orig);
    cstringa += " (CStringA)";
    cout << cstringa << endl;

    // Convert a wide character CComBSTR to a wide character CStringW.
    CStringW cstring(orig);
    cstring += " (CStringW)";
    // To display a cstring correctly, use wcout and cast cstring
    // to (LPCTSTR).
    wcout << (LPCTSTR)cstring << endl;

    // Convert a wide character CComBSTR to a wide character
    // basic_string.
    wstring basicstring(orig);
    basicstring += _T(" (basic_string)");
    wcout << basicstring << endl;

    // Convert a wide character CComBSTR to a System::String.
    String ^systemstring = gcnew String(orig);
    systemstring += " (System::String)";
    Console::WriteLine("{0}", systemstring);
    delete systemstring;
}

从 CString 转换

// convert_from_cstring.cpp
// compile with: /clr /link comsuppw.lib

#include <iostream>
#include <stdlib.h>
#include <string>

#include "atlbase.h"
#include "atlstr.h"
#include "comutil.h"

using namespace std;
using namespace System;

int main()
{
    // Set up a multibyte CStringA string.
    CStringA origa("Hello, World!");
    cout << origa << " (CStringA)" << endl;
// Set up a wide character CStringW string.
CStringW origw("Hello, World!");
wcout << (LPCTSTR)origw << _T(" (CStringW)") << endl;

// Convert to a char* string from CStringA string
// and display the result.
const size_t newsizea = (origa.GetLength() + 1);
char *nstringa = new char[newsizea];
strcpy_s(nstringa, newsizea, origa);
cout << nstringa << " (char *)" << endl;

// Convert to a char* string from a wide character
// CStringW string. To be safe, we allocate two bytes for each
// character in the original string, including the terminating
// null.
const size_t newsizew = (origw.GetLength() + 1)*2;
char *nstringw = new char[newsizew];
size_t convertedCharsw = 0;
wcstombs_s(&convertedCharsw, nstringw, newsizew, origw, _TRUNCATE );
cout << nstringw << " (char *)" << endl;

// Convert to a wchar_t* from CStringA
size_t convertedCharsa = 0;
wchar_t *wcstring = new wchar_t[newsizea];
mbstowcs_s(&convertedCharsa, wcstring, newsizea, origa, _TRUNCATE);
wcout << wcstring << _T(" (wchar_t *)") << endl;

// Convert to a wide character wchar_t* string from
// a wide character CStringW string.
wchar_t *n2stringw = new wchar_t[newsizew];
wcscpy_s( n2stringw, newsizew, origw );
wcout << n2stringw << _T(" (wchar_t *)") << endl;

// Convert to a wide character _bstr_t string from
// a multibyte CStringA string.
_bstr_t bstrt(origa);
bstrt += _T(" (_bstr_t)");
wcout << bstrt << endl;

// Convert to a wide character_bstr_t string from
// a wide character CStringW string.
bstr_t bstrtw(origw);
bstrtw += " (_bstr_t)";
wcout << bstrtw << endl;

// Convert to a wide character CComBSTR string from
// a multibyte character CStringA string.
CComBSTR ccombstr(origa);
if (ccombstr.Append(_T(" (CComBSTR)")) == S_OK)
{
    // Convert the wide character string to multibyte
    // for printing.
    CW2A printstr(ccombstr);
    cout << printstr << endl;
}

// Convert to a wide character CComBSTR string from
// a wide character CStringW string.
CComBSTR ccombstrw(origw);
// Append the type of string to it, and display the result.

if (ccombstrw.Append(_T(" (CComBSTR)")) == S_OK)
{
    CW2A printstrw(ccombstrw);
    wcout << printstrw << endl;
}

// Convert a multibyte character CStringA to a
// multibyte version of a basic_string string.
string basicstring(origa);
basicstring += " (basic_string)";
cout << basicstring << endl;

// Convert a wide character CStringW to a
// wide character version of a basic_string
// string.
wstring basicstringw(origw);
basicstringw += _T(" (basic_string)");
wcout << basicstringw << endl;

// Convert a multibyte character CStringA to a
// System::String.
String ^systemstring = gcnew String(origa);
systemstring += " (System::String)";
Console::WriteLine("{0}", systemstring);
delete systemstring;
// Convert a wide character CStringW to a
    // System::String.
    String ^systemstringw = gcnew String(origw);
    systemstringw += " (System::String)";
    Console::WriteLine("{0}", systemstringw);
    delete systemstringw;
}

从 basic_string 转换

// convert_from_basic_string.cpp
// compile with: /clr /link comsuppw.lib

#include <iostream>
#include <stdlib.h>
#include <string>

#include "atlbase.h"
#include "atlstr.h"
#include "comutil.h"

using namespace std;
using namespace System;

int main()
{
    // Set up a basic_string string.
    string orig("Hello, World!");
    cout << orig << " (basic_string)" << endl;

    // Convert a wide char basic_string string to a multibyte char*
    // string. To be safe, we allocate two bytes for each character
    // in the original string, including the terminating null.
    const size_t newsize = (strlen(orig.c_str()) + 1)*2;
    char *nstring = new char[newsize];
    strcpy_s(nstring, newsize, orig.c_str());
    cout << nstring << " (char *)" << endl;

    // Convert a basic_string string to a wide character
    // wchar_t* string. You must first convert to a char*
    // for this to work.
    const size_t newsizew = strlen(orig.c_str()) + 1;
    size_t convertedChars = 0;
    wchar_t *wcstring = new wchar_t[newsizew];
    mbstowcs_s(&convertedChars, wcstring, newsizew, orig.c_str(), _TRUNCATE);
    wcout << wcstring << _T(" (wchar_t *)") << endl;

    // Convert a basic_string string to a wide character
    // _bstr_t string.
    _bstr_t bstrt(orig.c_str());
    bstrt += _T(" (_bstr_t)");
    wcout << bstrt << endl;

    // Convert a basic_string string to a wide character
    // CComBSTR string.
    CComBSTR ccombstr(orig.c_str());
    if (ccombstr.Append(_T(" (CComBSTR)")) == S_OK)
    {
        // Make a multibyte version of the CComBSTR string
        // and display the result.
        CW2A printstr(ccombstr);
        cout << printstr << endl;
    }

    // Convert a basic_string string into a multibyte
    // CStringA string.
    CStringA cstring(orig.c_str());
    cstring += " (CStringA)";
    cout << cstring << endl;

    // Convert a basic_string string into a wide
    // character CStringW string.
    CStringW cstringw(orig.c_str());
    cstringw += _T(" (CStringW)");
    wcout << (LPCTSTR)cstringw << endl;

    // Convert a basic_string string to a System::String
    String ^systemstring = gcnew String(orig.c_str());
    systemstring += " (System::String)";
    Console::WriteLine("{0}", systemstring);
    delete systemstring;
}

从 system:: string 转换

// convert_from_system_string.cpp
// compile with: /clr /link comsuppw.lib

#include <iostream>
#include <stdlib.h>
#include <string>

#include "atlbase.h"
#include "atlstr.h"
#include "comutil.h"
#include "vcclr.h"

using namespace std;
using namespace System;
using namespace System::Runtime::InteropServices;

int main()
{
    // Set up a System::String and display the result.
    String ^orig = gcnew String("Hello, World!");
    Console::WriteLine("{0} (System::String)", orig);

    // Obtain a pointer to the System::String in order to
    // first lock memory into place, so that the
    // Garbage Collector (GC) cannot move that object
    // while we call native functions.
    pin_ptr<const wchar_t> wch = PtrToStringChars(orig);

    // Make a copy of the system string as a multibyte
    // char* string. Allocate two bytes in the multibyte
    // output string for every wide character in the input
    // string, including space for a terminating null.
    size_t origsize = wcslen(wch) + 1;
    const size_t newsize = origsize*2;
    size_t convertedChars = 0;
    char *nstring = new char[newsize];
    wcstombs_s(&convertedChars, nstring, newsize, wch, _TRUNCATE);
    cout << nstring << " (char *)" << endl;

    // Convert a wide character system string to a
    // wide character wchar_t* string.
    const size_t newsizew = origsize;
    wchar_t *wcstring = new wchar_t[newsizew];
    wcscpy_s(wcstring, newsizew, wch);
    wcout << wcstring << _T(" (wchar_t *)") << endl;

    // Convert a wide character system string to a
    // wide character _bstr_t string.
    _bstr_t bstrt(wch);
    bstrt += " (_bstr_t)";
    cout << bstrt << endl;

    // Convert a wide character system string
    // to a wide character CComBSTR string.
    CComBSTR ccombstr(wch);
    if (ccombstr.Append(_T(" (CComBSTR)")) == S_OK)
    {
        // Make a multibyte copy of the CComBSTR string
        // and display the result.
        CW2A printstr(ccombstr);
        cout << printstr << endl;
    }

    // Convert a wide character System::String to
    // a multibyte CStringA string.
    CStringA cstring(wch);
    cstring += " (CStringA)";
    cout << cstring << endl;

    // Convert a wide character System::String to
    // a wide character CStringW string.
    CStringW cstringw(wch);
    cstringw += " (CStringW)";
    wcout << (LPCTSTR)cstringw << endl;

    // Convert a wide character System::String to
    // a wide character basic_string.
    wstring basicstring(wch);
    basicstring += _T(" (basic_string)");
    wcout << basicstring << endl;

    delete orig;
}

字符串Format实现

_vscprintf，_vscprintf_l，_vscwprintf，_vscwprintf_l

vsprintf, _vsprintf_l, vswprintf, _vswprintf_l, __vswprintf_l

void PrintWide ( const TCHAR * format, ... )
{
    TCHAR buffer[256];
    va_list args;
    va_start ( args, format );
    _vstprintf ( buffer, 256, format, args );
    va_end ( args );
}

void test( TCHAR * format, ... )
{
    va_list args;
    int     len;
    TCHAR    *buffer;

    // retrieve the variable arguments
    va_start( args, format );

    len = _vsctprintf( format, args ) // _vscprintf doesn't count
        + 1; // terminating '\0'

    buffer = (TCHAR*)malloc( len * sizeof(TCHAR) );

    _vstprintf( buffer, format, args ); // C4996
    // Note: vsprintf is deprecated; consider using vsprintf_s instead

    free( buffer );
    va_end( args );
}

wstring 与 string

几种C++ std::string和std::wstring相互转换的转换方法

调用WideCharToMultiByte()和MultiByteToWideChar()

#include <string>
#include <windows.h>
using namespace std;
//Converting a WChar string to a Ansi string
std::string WChar2Ansi(LPCWSTR pwszSrc)
{
int nLen = WideCharToMultiByte(CP_ACP, 0, pwszSrc, -1, NULL, 0, NULL, NULL);

if (nLen<= 0) return std::string("");

char* pszDst = new char[nLen];
if (NULL == pszDst) return std::string("");

WideCharToMultiByte(CP_ACP, 0, pwszSrc, -1, pszDst, nLen, NULL, NULL);
pszDst[nLen -1] = 0;

std::string strTemp(pszDst);
delete [] pszDst;

return strTemp;
}

string ws2s(wstring& inputws){ return WChar2Ansi(inputws.c_str()); }

//Converting a Ansi string to WChar string

std::wstring Ansi2WChar(LPCSTR pszSrc, int nLen)

{
int nSize = MultiByteToWideChar(CP_ACP, 0, (LPCSTR)pszSrc, nLen, 0, 0);
if(nSize <= 0) return NULL;

WCHAR *pwszDst = new WCHAR[nSize+1];
if( NULL == pwszDst) return NULL;

MultiByteToWideChar(CP_ACP, 0,(LPCSTR)pszSrc, nLen, pwszDst, nSize);
pwszDst[nSize] = 0;

if( pwszDst[0] == 0xFEFF) // skip Oxfeff
for(int i = 0; i < nSize; i ++) 
pwszDst[i] = pwszDst[i+1];

wstring wcharString(pwszDst);
delete pwszDst;

return wcharString;
}

std::wstring s2ws(const string& s){ return Ansi2WChar(s.c_str(),s.size());}

采用ATL封装_bstr_t

#include <string>
#include <comutil.h>
using namespace std;
#pragma comment(lib, "comsuppw.lib")

string ws2s(const wstring& ws);
wstring s2ws(const string& s);

string ws2s(const wstring& ws)
{
_bstr_t t = ws.c_str();
char* pchar = (char*)t;
string result = pchar;
return result;
}

wstring s2ws(const string& s)
{
_bstr_t t = s.c_str();
wchar_t* pwchar = (wchar_t*)t;
wstring result = pwchar;
return result;
}

使用CRT库的mbstowcs()函数和wcstombs()函数

#include <string>
#include <locale.h>
using namespace std;
string ws2s(const wstring& ws)
{
string curLocale = setlocale(LC_ALL, NULL); // curLocale = "C";

setlocale(LC_ALL, "chs");

const wchar_t* _Source = ws.c_str();
size_t _Dsize = 2 * ws.size() + 1;
char *_Dest = new char[_Dsize];
memset(_Dest,0,_Dsize);
wcstombs(_Dest,_Source,_Dsize);
string result = _Dest;
delete []_Dest;

setlocale(LC_ALL, curLocale.c_str());

return result;
}

wstring s2ws(const string& s)
{
setlocale(LC_ALL, "chs");

const char* _Source = s.c_str();
size_t _Dsize = s.size() + 1;
wchar_t *_Dest = new wchar_t[_Dsize];
wmemset(_Dest, 0, _Dsize);
mbstowcs(_Dest,_Source,_Dsize);
wstring result = _Dest;
delete []_Dest;

setlocale(LC_ALL, "C");

return result;
}

string与wstring转换

std::copy

std::wstring StringToWString(const std::string &str)
 {
     std::wstring wstr(str.length(),L' ');
     std::copy(str.begin(), str.end(), wstr.begin());
     return wstr; 
 }

 //只拷贝低字节至string中
 std::string WStringToString(const std::wstring &wstr)
 {
     std::string str(wstr.length(), ' ');
     std::copy(wstr.begin(), wstr.end(), str.begin());
     return str; 
 }

展开全文 >>

Linux从入门到精通

2018-09-01

桌面：Gnome和KDE

windows下常用刻录软件Nero

Linux默认使用的操作系统引导加载器Grub,可重装Grub

用户可执行不带参数的su命令将自己提升为root权限，另一个命令行工具是sudo，它可以临时使用root身份运行一个程序，运行完毕后返回普通用户状态

Ubuntu附带编辑器gedit

Windows Maker,FVWM,IceWM,Sawfish等是目前比较常见的窗口管理器。

通过命令行运行startx命令启动X服务器。

桌面环境的引导脚本是一段Linux命令组成的脚本程序Xsession

KDE是用C++编写的，基于Qt库。

Gnome是C语言写成的，基于GTK+widget库。

Shell命令行：pwd,cd,ls,dir,vdir

查看文本文件：cat,more,head,tail

更好的文本阅读工具：less

查找文件内容：grep

找文件：find

更快速定位文件：locate

查找特定程序：whereis

用户以及版本信息：who,whoami,uname -a,uname -r

帮助：man

获取命令简介：whatis,apropos

改变文件所有权：chown,chgrp

改变文件权限：chmod

建立链接：ln

重定向与管道：<,>,|

SUSE,Red Hat,Fedora等发行版本使用RPM,而Debian,Ubuntu使用.deb格式的软件包。

对于Debian，Ubuntu:sudo dpkg -i

对于openSUSE和其他使用RPM软件包：rpm -ivh

高级软件包管理工具APT,yum可自动搜寻依赖关系并执行安装。

管理.deb软件包：dpkg

管理RPM软件包：rpm

高级软件包工具:APT:apt-get update,pat-get install,apt-get upgrade,apt-get remove,apt-get source,apt-get clean,apt-cache search，apt-cachedepends

MPlayer开源多媒体播放软件，支持Linux,Windows,Mac OS.

从源代码编译软件：

下载解压：tar

configure依据用户提供的参数生产对于的makefile文件：./configure –prefix=

编译源代码：make

安装：sudo make install

swap交换分区，分区容量应该大于物理大小，但目前不能超过2GB

挂载文件系统：mount,umount

查看磁盘使用情况:df

检查和修复文件系统:fsck

在磁盘上建立文件系统：mkfs

列出当前内核发现的USB设备：lsusb

压缩工具：gzip,bzip2,RAR for linux

存档工具:tar,dd

建立分区表工具：fdisk

建立ext3fs文件系统：mkfs

创建并激活较好分区：mkswap

高级硬盘管理工具：RAID,LVM

备份工作与系统工具：dump,restore

定时自动完成：cron

添加用户useradd,groupadd

记录用户操作:history

直接编辑passwd,shadow

管理用户：usermod

受限的特权：sudo,配置/etc/sudoers指定用户可以执行的特权命令。修改sudoers文件应该使用visudo命令

监视进程：ps aux,ps lax

即时跟踪进程信息：top

查看占用文件进程：lsof

向进程发生信号：kill

调整进程的谦让度：nice,renice

/PROC文件系统：目录下存放内核系统状态的各种有意义信息。

使用ifconfig配置网络接口

使用route配置静态路由

netstat -r命令看当前系统中的路由信息

使用wvdial建立PPP连接

浏览器：firefox,opera

基于文本的浏览器：lynx

Gnome下的邮件客户端：Evolution,Thunderbird

KDE邮件客户端：Kmail

Linux间的网络硬盘：NFS,使用mount挂载

与windows协作：Samba

查看当前可使用的Samba资源：smbtree,nmblookup

Linux下的Samba客户端：smbclient

挂载共享目录：mount.cifs，将windows共享目录挂载到本地某目录下

使用FTP:FileZilla

ftp命令：get,mget,prompt off,put,lcd

基于SSH的文件传输：sftp,scp

远程登录：

OpenSSH是linux下最常用的SSH服务器与客户端软件

VNC可用于图形化得登录：vnc4server

安装：速冻apt-getinstallvnc4-common vnc4server

SUSE防火墙，允许DHCPv4服务器和dnsmasq

SSH：ssh -l login_name hostname

SSH默认开启22号端口，可通过-p选项指定要连接的端口。

登录X窗口系统：图形化得VNC

SSH连接远程主机，运行vncserver生成配置文件，使用vncviewer跨平台的VNC客户端工具，执行vncviewer ip-address:1

从windows登录Linux，window上有几种不同的SSH客户端，其中开源的PuTTY是使用最广泛的。window版vncviewer.

从Linux登录到windows：1.windows安装VNC Server软件。2.通过RDP协议连接到windows,下载rdesktop

xine是linux中最著名的播放软件之一。另一款播放引擎叫做gstreamer.

播放器：Rhythmbox,amarok,MPlayer

比较流行的音乐文件格式：MP3,WMA,MIDI

linux上使用的Ogg，完全免费开源。

使用Konqueror和Nautilus，GIMP,Shotwell查看图片。

LibreOffice绘图工具。

打印机语言：PDL

PostScript,PCL5,PCL6和PDF是如今最知名的PDL.

CUPS打印系统

设置当前用户的默认打印机：lpoptions

测试打印机：lpr

管理CUPS服务器：

CUPS的配置文件cupsd.conf

显示当前CUPS的状态：lpstat -t

办公软件：LibreOffice,Xpdf，Adobe Reader

光盘刻录：Gnome下B染色肉，KDE下K3b

Linux编程：

编辑器：Vim,Emacs

Gnome下的编辑器gedit

C和C++的编译器：gcc,同时支持C,C++,Objective C,Chill,Fortran和Java

gcc:-c,-S,-E,-g

优化选项:-O1,-O2,-O3

gcc -O2

编译C++程序：C++

调试gdb:gdb -g

list命令可指定代码行号

gdb提供search命令搜索特定内容

设置断点：break , info break, clear

运行程序和单步执行：run,next,continue

监视变量：print,watch

临时修改变量：set var

查看堆栈情况：bt

退出gdb:quit

版本控制系统：Subversion

svnadmin create,svn import,svn checkout,svn co,svn status, svn diff,svn commint,svn lig,svn update,svn resolved,svn merge

Shel编程

GNU/Linux中有两套库可用于正则表达式，POSIX和PCRE

图形化得gedit和kate支持Shell脚本语法加亮。

Shell编程工具：source,export,for,$[],expr,let,if,case,;;,test,文件测试，数字比较，复合表达式，while,until,read,exit,trap,命令表的表示形式，cut,diff,sort,uniq,tr,wc,substr,seq,printenv,mypr,alias

个性化设置：修改.bashrc文件

立刻生效：source .bashrc

服务器配置：

inetd和xinetd管理守护进程。

HTTP服务器-Apache

设置Apache服务器：配置文件httpd.conf

日子文件：

cgi公共网关接口，定义了Web服务器和外接程序交互的接口，是网站上实现动态页面的最简单和常用方法。httpd.conf文件，可以找到下边命令：ScriptAlias /cgi-bin/ /usr/local/apache2/cgi-bin/

使用PHP+MySQL

LAMP:Linux,Apache,MySQL,PHP

FTP服务器-vsftpd

除了standalone方式运行FTP服务器，还可以配置以xinetd来管理vsftpd。

vsftpd匿名用户，本地用户，虚拟用户。

在非匿名应用方面，有基于SSH的sftp.

Samba服务：sudo apt-get install samba-common samba

从源代码安装：tar,./autogen.sh,./configure,make,sudo make install

Samba配置：smb.conf全局设置和共享定义。

create mask设置用户在共享目录中创建文件所使用的权限。

可孤立用户的共享目录

SWAT管理工具，为Samba服务器提供图形管理。

网络硬盘- NFS

安装NFS服务器：sudo apt-get install nfs-common nfs-kernel-server

配置文件/etc/exports,在末尾添加：/srv/nfs_share *(rw)

NFS使用RPC作为自己的传输协议

监视NFS状态：nfsstat

任务计划：cron

配置/etc/crontab文件

sudo /etc/init.d/cron restart

可放到/etc/cron.d目录，还有/etc/cron.hourly,/etc/cron.daily,/etc/cron.weekly,/etc/cron.monthly

普通用户在获得管理员批准后也可以定制自己的任务计划，每个咏鹅的cron配置文件保存在/var/spool/cron目录下，SUSE在/var/spool/cron/tabs目录下

简单定时：at命令

使用atq命令可以看到当前已经设置的任务

atrm 删除任务

Linux防火墙 IP Tables

iptables命令最常用的5个选项：-F,-P,-A,-D,-L

扫描网络端口：nmap

找出不安全的口令：John the Ripper

对于那些包含主机访问控制hosts_access功能服务（典型的有xinetd,sshd），Linux提供了除防火墙之外另一种来源控制方案，在/etc目录下有两个文件hosts.allow和hosts.deny.

掩盖入侵痕迹：rootkits

linux下防病毒软件：ClamAV

ClamAV也提供图形化工具：sudo apt-get install clamtk

附录：Linux常用指令

展开全文 >>

网络编程

2018-08-14

参考

网络通信编程

Windows网络模型

网络协议

book

《TCP/IP网络编程》

《Windows网络编程》

展开全文 >>

个人喜欢的工具

2018-08-08

Zeal

Zeal是一款面向软件开发人员的离线文档浏览器。

https://zealdocs.org/download.html#windows

Visual Studio Code插件

安装Zeal后，转到Tools⟶ Docsets浏览并下载docsets。

展开全文 >>

MFC

2018-08-05

MFC创建选项

复合文档支持的各选项功能如下：

◆ 无复合文档支持：即无OLE（Object Linking and Embedding，对象连接与嵌入）支持。 
◆ 容器支持：将对象嵌入或链接至文档，以便用户编辑。
◆ 袖珍服务器：表示应用程序可创建和管理复合文档对象。不能独立运行并且仅支持嵌入项。 
◆ 完全服务器：表示应用程序可创建和管理复合文档对象。能够独立运行，并且既支持链接项也支持嵌入项。 
◆ 容器/完全服务器：表示应用程序可以既是容器又是服务器。容器是可将嵌入项或者链接项并入自己的文档中的应用程序。服务器是可创建供容器应用程序使用的自动化项的应用程序。 其他支持包括： 
◆ 自动化：允许应用程序向脚本工具和其他应用程序公开对象。 
◆ ActiveX控件：应用程序能够将ActiveX控件包含在内。

MAPI支持：可以从应用程序直接调用MAPI函数。

MAPI”表示“消息传递应用程序接口”。它是电子邮件等应用程序的消息传递结构和客户接口组件。如果双方应用程序都启用“MAPI”，就可以相互共享邮件信息，为多个应用程序提供一致接口。

◆ Windows套接字：启用适当的头文件、库和MFC Windows Socket规范，使应用程序支持TCP/IP网络协议。

说你对Windows消息机制的理解

Windows系统是一个消息驱动的操作系统。什么是消息呢？下面从不同的几个方面进行讲解：

1）消息的组成：一个消息由一个消息名称（UINT）和两个参数（WPARAM，LPARAM）组成。当用户进行了输入或是窗口的状态发生改变时系统都会发送消息到某一个窗口。例如，当菜单选择之后会有WM_COMMAND消息发送，WPARAM的高字节中（HIWORD(wParam)）是命令的ID号，对菜单来讲就是菜单ID。当然，用户也可以定义自己的消息名称，也可以利用自定义消息来发送通知和传送数据。 
2）谁将收到消息：一个消息必须有一个窗口接收。在窗口过程（WNDPROC）中可以对消息进行分析，对自己感兴趣的消息进行处理。例如，希望对菜单选择进行处理，那么可以定义对WM_COMMAND进行处理的代码，如果希望在窗口中进行图形输出，则就必须对WM_PAINT进行处理。 
3）未处理的消息到哪里去了：Microsoft为窗口编写了默认的窗口过程，这个窗口过程将负责处理那些没有被处理的消息。正因为有了这个默认窗口过程，我们才可以利用Windows的窗口进行开发而不必过多关注窗口各种消息的处理。例如，窗口在被拖动时会有很多消息发送，而我们都可以不予理睬而让系统自己去处理。 
4）窗口句柄：说到消息就不能不说窗口句柄，系统通过窗口句柄在整个系统中唯一标识一个窗口，发送一个消息时必须指定一个窗口句柄表明该消息由哪个窗口接收。而每个窗口都会有自己的窗口句柄，所以用户的输入就会被正确地处理。例如，有两个窗口共用一个窗口过程代码，在窗口1上按下鼠标时消息就会通过窗口1的句柄被发送到窗口1而不是窗口2。

系统将会维护一个或多个消息队列，所有产生的消息都会被放入或插入队列中。系统会在队列中取出每一条消息，根据消息的接收句柄而将该消息发送给拥有该窗口程序的消息循环。每一个运行的程序都有自己的消息循环，在循环中得到属于自己的消息并根据接收窗口的句柄调用相应的窗口过程。而在没有消息时消息循环就将控制权交给系统，所以Windows可以同时进行多个任务。

数据库编程基础

ODBC数据库链接：可以学到在VC 6.0中使用ODBC进行数据库链接。
ADO数据库链接：可以学到在VC 6.0中使用ADO进行数据库链接。

OLE DB

OLE DB模板是活动模板库(ATL)的一部分，它们通过提供实现许多常用OLE DB接口的类来使得高性能OLE DB数据库技术使用起来很简单。模板库中附带有创建OLE DB起始应用程序的向导支持。模板库包含两部分：

OLE DB使用者模板：用于实现OLE DB客户（使用者）应用程序。
OLE DB提供程序模板：用于实现OLE DB服务器（提供程序）应用程序。

OLE DB数据使用者模板是由一些模板组成的。下面对一些常用类做一些介绍。
（1）CDataSource类 CDataSource类与OLE DB的数据源对象相对应。这个类代表了OLE DB数据提供程序和数据源之间的连接，即主要负责数据源对象。只有当数据源的连接被建立之后，才能产生会话对象，可以调用Open()函数来打开数据源的连接。
（2）CSession类 CSession所创建的对象代表了一个单独的数据库访问的会话，该类负责管理数据源和应用程序进程的会话。一个用CDataSource类产生的数据源对象可以创建一个或者多个会话，要在数据源对象上产生一个会话对象，需要调用Open()函数来打开。同时，会话对象还可用于创建事务的操作。
（3）CEnumeratorAccessor类CEnumeratorAccessor类是用来访问枚举器查询后所产生的行集中可用数据提供程序的信息的访问器，可提供当前可用的数据提供程序和可见的访问器。

访问器类用于管理与访问相关的操作。常用的访问器类如下。

（1）CAcessor类 CAccessor类代表与访问器的类型。当用户知道数据库的类型和结构时，可以使用此类。它支持对一个行集采用多个访问器，并且存放数据的缓冲区是由用户分配的。
（2）CDynamicAccessor类 CDynamicAccessor类用来在程序运行时动态地创建访问器。当系统运行时，可以动态地从行集中获得列的信息，可根据此信息动态地创建访问器。
（3）CManualAccessor类 CManualAccessor类用来在程序运行时将列与变量绑定或者将参数与变量绑定。

行集类用于管理以行为单位的数据集。常用的行集类如下。

（1）CRowSet类 CRowSet类封装了行集对象和相应的接口，并且提供了一些方法用于查询、设置数据等。可以用Move()等函数进行记录移动，用GetData()函数读取数据，用Insert()、Delete()、SetData()来更新数据。
（2）CBulkRowset类 CBulkRowset类用于在一次调用中取回多个行句柄或者对多个行进行操作。
（3）CArrayRowset类 CArrayRowset类提供用数组下标进行数据访问。

命令类包括以下两类。

（1）CTable类 CTable类用于对数据库的简单访问，用数据源的名称得到行集，从而得到数据。
（2）CCommand类 CCommand类用于支持命令的数据源。

可以用Open()函数来执行SQL命令，也可以用Prepare()函数先对命令进行准备，对于支持命令的数据源，可以提高程序的灵活性和健壮性。注意　如果使用OLE DB进行数据库程序设计，则应该使用OLE DB类，在使用OLE DB类的过程中，也可以和其他数据库连接方式的类兼容。

展开全文 >>

COM编程

2018-08-02

COM线程模型

要注意单线程与多线程的问题

https://blog.csdn.net/zj510/article/category/2510453

通常我们提交COM的线程模型，其实指的是两方面：一个是客户程序的线程模式，一个是组件所支持的线程模式。客户程序的线程模式只有两种，单线程公寓(STA)和多线程公寓(MTA)。组件所支持的线程模式有四种：Single(单线程)、Apartment(STA)、Free(MTA)、Both(STA+MTA)。

注意，公寓和套间是同一个概念，这只是翻译而已，都是指apartment

STA客户调用STA COM组件

STA对象在客户创建的STA套间线程里面运行；
STA客户直接调用STA COM对象指针；

MTA客户调用STA COM组件

STA对象在default STA里面运行，如果有多个STA对象，它们统统在同一个default sta线程里面运行。

MTA客户调用STA COM对象的代理。

谈谈COM的线程模型。然后讨论进程内/外组件的差别

中文翻译为“ 公寓 ” ，有时候为 “ 套间 ” ，这里就翻译为 “ 公寓 ” 吧，都一个意思，指的就是COM的线程模式，这个概念很抽象，理解起来比较困难。因为公寓不像 Windows内核对象那样有个 “ 句柄 ” ，并且跟公寓相关的Windows API很少，只有5个：CoInitialize，CoUninitialize，CoInitializeEx，OleInitialize和 OleUninitialize，大家都很熟悉了，5个关于COM初始化和反初始化的函数。如何来理解公寓呢？可以这样：1、线程住在公寓中；2、对象住在公寓中。有时候，对象和创建它的线程住在同一个公寓中，有时候不是。这样还是很难理解对吧，但没事，这个先记下来，后面会明白的。
COM只有两种公寓，一种叫单线程公寓（Single-Thread Apartment），简称STA，一种叫多线程公寓（Multi-Thread Apartment），简称MTA，顾名思义，一种只能容纳一个线程，另一种能容纳多个线程。在一个进程中，MTA只有一个，而STA可以有很多。

我们在使用COM之前，都应该先初始化COM，怎么初始化？当然是前文提到的那几个函数了，CoInitialize，CoInitializeEx和OleInitialize，那我们是每个程序（进程）初始化一次还是每个线程初始化一次？答案是线程，每个线程初始化一次，这么个初始化，就相当于把这个线程安置在某个公寓中。具体这样的：CoInitialize或OleInitialize把线程放置入STA；CoInitializeEx允许你把线程放置入MTA。从公寓中移除线程的方法是CoUninitialize和OleUninitialize。

我们都知道，对象是线程创建的，那对象什么时候跟创建它的线程同一个公寓，什么时候不是同一个公寓呢？前面说了线程所在的公寓类型是由那几个初始化函数所决定，那对象所在什么公寓是由什么决定的呢？这个稍微复杂一点，答案是：由创建它的线程的类型及对象本身的线程属性所决定。线程类型大家都知道啦，就前面提到的由那几个初始化函数决定，那么对象本身线程属性怎么来定呢？答案：注册表里的信息来定。

打开注册表编辑器，按照这路径： “ \ HKEY_CLASSES_ROOT\CLSID{00000010-0000-0010-8000-00AA006D2EA4}\InprocServer32 ” ，（这个 GUID很奇怪吧，明显不是用工具生成的，微软可有手动填写GUID的特权哦）看看 “ ThreadingModel ” 的值，嗯，没错，是 “ Apartment ” ，这个 “ Apartment ” 就是刚才我所提到的 “ 对象本身线程属性 ” ，对象本身线程属性一共有四种：Apartment、Both、Free和Single。下面我列个表，一目了然。

组件线程属性 | STA线程 | MTA线程

备注：第一个以 COINIT_APARTMENTTHREADED调用CoInitializeEx()的线程被称作是主STA。

例如一个STA线程创建了一个本身线程属性为 Free的对象，那该对象存在于MTA中，这个STA线程访问它就得通过代理，当然了，这对程序员来说是透明的，因为这个功能是靠COM的 remoting层来实现的。要说和直接访问有什么能体现出来的不同，可能就是通过代理访问会慢一些，毕竟消息需要Marshalling，但这几毫秒的时间差你们地球人是很难感觉出来的( J )。

那么很明显了，只要我们把组件类型设置为 Apartment，就不会有任何线程访问冲突的问题。《Win32 多线程》侯捷

进程内/外组件的差别是Com组建的一种表现形式:.exe文件,它是进程外组建,dll是进程内组建.

关于COM组件线程模型的实验

单线程模型(Single)

在注册表中删除上述ThreadingModel键值，则COM组件被配置为使用单线程模型。使用单线程模型的组件只能存在于主STA，也就是进程中的第一个STA中。对于具有图形界面的Windows程序，第一个STA通常由主线程，也就是界面线程创建。具有图形界面的COM组件，比如说，ActiveX控件，常常使用单线程模型。

套间线程模型(Apartment)

设置上述ThreadingModel键值为Apartment，则COM组件被配置为使用套间线程模型。使用套间线程模型的组件只能存在于STA中。套间线程模型是在Visual Studio中使用ATL开发COM组件时默认的线程模型。

单线程模型和套间线程模型的共同点是在任何时刻只有一个线程可以直接访问组件，这个线程就是创建组件所在的STA的线程(不一定是调用CoCreateInstance创建组件的线程)。其他线程对组件的调用都是通过这个线程间接进行的：COM基础设施为STA创建一个隐藏的窗口，将其他线程对STA中组件的调用请求转化为发送给这个窗口的消息，然后由套间中唯一的线程处理消息，返回调用结果。所以，使用单线程模型和套间线程模型的组件要求消息队列，其他线程对组件的调用都是间接地通过消息队列进行的。这一点很重要。本文后面将通过代码验证这一点。

自由线程模型(Free)

设置上述ThreadingModel键值为Free，则COM组件被配置为使用自由线程模型。使用自由线程模型的组件只能存在于MTA中，可以被处于MTA中的多个线程“自由”地调用。不在MTA中的线程调用MTA组件时，COM基础设施随机选择RPC线程池中的某个RPC线程代为间接处理(RPC线程池是COM基础设施的组成部分)。由于COM基础设施没有提供任何同步方面的帮助，多个线程可以并发地调用组件的方法，所以需要编写代码对组件实施必要的保护，就像多线程编程中需要对共享资源实施保护一样。

双线程模型(Both)

设置上述ThreadingModel键值为Both，则COM组件被配置为使用双线程模型。此时组件与创建组件的线程存在于相同的套间中：既可能是STA，也可能是MTA。因为组件可能存在于MTA中，被多个线程并发访问，所以需要编写代码对组件实施必要的保护。

自由线程模型和双线程模型有一个重要的差别：采用自由线程模型的组件可以创建能够直接调用组件的工作线程；而采用双线程模型的组件不能。因为采用双线程模型的组件可能位于STA中，如果组件创建的工作线程可以直接访问组件，则工作线程也必须位于STA中(套间之外的线程对组件的调用不能直接进行)，这就违反了STA中只能有一个线程的规则，破坏了COM线程模型的同步机制。

线程中立模型(Neutral)

设置上述ThreadingModel键值为Neutral，则COM组件被配置为使用线程中立模型。使用线程中立模型的组件位于TNA中，可以被任何线程自由地、直接地访问。调用线程访问这种类型的组件时将暂时离开所属的STA或者MTA，进入TNA，直接对组件进行方法调用，调用完成后返回STA或者MTA。与采用自由线程模型和双线程模型的组件一样，必须编写代码对组件实施必要的保护，以防止多线程并发访问可能出现的问题。线程中立模型是运行在组件服务中的，不需要用户界面的组件的最优选择。

什么是COM

《Visual C++从入门到精通（视频实战版）》

COM是Component Object Model（组件对象模型）的缩写。不管用户需要什么样的产品，以下两个条件是必须要保证的：

高效：使用软件产品可以做的工作，一般而言，使用手工劳动一样能达到同样效果，只是软件可以快速处理相同工作。在众多软件的选择中，高效的软件具备无可比拟的优势。
健壮：如果一个软件系统出现很小的问题就会异常终止，导致整个系统工作无法进行，并且需要系统管理员不厌其烦地重启服务器，这样的软件还会有人购买吗？因此，健壮性也是软件产品的一个必要条件。面向对象的程序设计可以说是一次革命性的变革，因为面向对象的程序设计可以将要解决的问题和对象抽象成为数据对象，将功能和动作封装起来，并提供一些接口。但当今的计算机时代是“后OO时代”（后面向对象时代），即面向组件的时代。 COM是一种跨应用和语言共享二进制代码的方法。
跨应用：指不同的应用程序可以使用或共享COM组件。
跨语言：指不同的语言可以调用同一个COM组件。

COM提倡源代码重用，而ATL则在代码重用方面不如COM，因为ATL只是在代码级别上的重用好，但只能用于C++语言，如果不使用名称控件，还可能引起名字冲突的问题。同时，这样的重用很可能会导致工程膨胀和源代码臃肿。 Windows使用DLLs在二进制级共享代码。所以Windows程序运行经常会调用到诸如kernel32.dll、user32.dll等。但DLLs是针对C接口（Win32 SDK）而写的，它们只能被C或理解C调用规范的语言（如C++）使用。由编程语言来负责实现共享代码，这样会导致DLLs的使用受到限制。 MFC引入了另外一种MFC扩展DLLs二进制共享机制。但它的使用仍受限制，因为其只能在MFC程序中使用。 COM通过定义二进制标准解决了这些问题，即COM明确指出二进制模块（DLLs和EXEs）必须被编译成与指定的结构匹配。实际上，COM在这里充当了中间层的作用。这个标准也确切规定了在内存中如何组织COM对象。COM定义的二进制标准还必须独立于任何编程语言，这样才能对任何语言独立平等地进行对待。满足这些条件之后，就可以很方便地从其他语言中使用这些模块中的功能。由编译器负责所产生的二进制代码与标准兼容。这样使后来的人就能更容易地使用这些二进制代码。大多数COM的代码都是用C++编写的，但并非就表明编写代码一定要用C++，编写COM组件是与开发语言没有关系的，而且二进制代码也可以为所有语言所用。注意　在操作系统层次中，COM也不是Win32特有的。从理论上讲，它可以被移植到UNIX或其他操作系统中。

COM接口

COM的接口都必须从IUnknown继承，犹如在MFC中，CObject类的地位。IUnknown有三个重要的函数： 1）QueryInterface()函数。该函数的原型如下： HRESULT__stdcall QueryInterface(const IID＆iid,void**ppv); 该函数的参数的含义是：

iid：标志客户所需的接口，是一个接口标志符“结构（IID）”。 
ppv：QueryInterface用来存放所请求接口的地址。

该函数的返回值可以是S_OK或E_NOINTERFACE，并且应该用SUCEEDED或者FAILED宏验证是否调用成功。

该函数的使用方法是：假如知道一个指向IUnknown接口的指针pI，传给它一个接口标志符即可。使用例子可以如以下代码所示：

void Function( IUnknown*pI )
{
    IX*pIX = NULL; HRESULT hr = pI - ＞ QueryInterface( IID_IX, (void * *) pIX ); if ( SUCCEEDED( hr ) )
    {
        pIX - ＞ FX();
    }
}

Addref()函数：用于增加引用计数。
Release()函数：用于释放引用计数。

这三个函数的顺序是不能变化的。QueryInterface用于查询组件实现的其他接口，也就是看看这个组件的父类中还有哪些接口类，AddRef用于增加引用计数，Release用于减少引用计数。一般在以下两种情况下必须调用Release：

调用QueryInterface后。调用了任何得到一个接口指针的函数后。 IDispatch接口把每一个函数每一个属性编上号，Client要调用这些函数、属性的时候就把这个编号传给IDispatch接口。开发组件时一般有两种方式：

ATL：使用ATL对于用C++创建COM组件和节省空间都是一种快速简便的方法。如果不需要MFC自动提供的所有内置功能，使用ATL创建控件。
MFC：MFC允许创建具有完整功能的应用程序、ActiveX控件和活动文档。如果已经使用MFC创建了控件，可能需要继续使用MFC进行开发。当创建新控件时，如果不需要MFC的所有内置功能，可考虑使用ATL。ATL与传统的C++模板库的区别：ATL通常只作为源代码提供，并且在本质上没有固有的分层结构或没有必要有分层结构；不是从派生类得到所需的功能，而是从模板中实例化类。

COM组件、ActiveX、OCX区别

COM 组件就是一组接口的集合，实现了 IDispath 接口的 COM 组件叫做自动化组件。在自动化组件的基础上，在实现规定的接口就称为 ActiveX 组件，Ocx 是 Activex 的文件载体
一般来讲，一个 ActiveX 对应一个 Ocx 文件，如果愿意，一个Ocx 文件也可以包含多个 ActiveX 控件。

COM是microsoft制定的一个组件软件标准，跟unix上的CORBA一样。凡是遵循COM标准开发出来的组件称为COM组件。简单的说就是要实现在二进制方式的重用。
在windows平台上，COM的实现形式有DLL(进程内组件)和EXE(进程外组件)2种。
ActiveX是Microsoft提出的一组使用COM（Component Object Model，部件对象模型）使得软件部件在网络环境中进行交互的技术集。它与具体的编程语言无关。作为针对Internet应用开发的技术，ActiveX被广泛应用于WEB服务器以及客户端的各个方面。同时，ActiveX技术也被用于方便地创建普通的桌面应用程序，此外ActiveX一般具有界面。

展开全文 >>

UML

2018-07-16

UML软件

PowerDesigner

StarUML

UML类关系

在UML类图中，常见的有以下几种关系:泛化（Generalization）, 实现（Realization）,关联（Association）,聚合（Aggregation）,组合(Composition)，依赖(Dependency)

在UML类图中，类之间的关系可以分成：关联(association)、聚合(aggregation)、组合(composition)、依赖(dependency)、泛化(generalization)/继承(inheritance)和实现(realization)。这六种关系如下图所描绘：

泛化(generalization)/继承(inheritance)

【泛化关系】：是一种继承关系,它指定了子类如何特化父类的所有特征和行为

【箭头指向】：带三角箭头的实线，箭头指向父类

实现（Realization)

【实现关系】：是一种类与接口的关系，表示类是接口所有特征和行为的实现

【箭头指向】：带三角箭头的虚线，箭头指向接口

关联（Association）

【关联关系】：是一种拥有的关系,它使一个类知道另一个类的属性和方法；

关联可以是双向的，也可以是单向的。双向的关联可以有两个箭头或者没有箭头，单向的关联有一个箭头。

【代码体现】：成员变量

【箭头及指向】：带普通箭头的实心线，指向被拥有者

聚合（Aggregation）

【聚合关系】：是整体与部分的关系

聚合关系是关联关系的一种，是强的关联关系；关联和聚合在语法上无法区分，必须考察具体的逻辑关系。

【代码体现】：成员变量

【箭头及指向】：带空心菱形的实心线，菱形指向整体

组合(Composition)

【组合关系】：是整体与部分的关系.,没有公司就不存在部门组合关系是关联关系的一种，是比聚合关系还要强的关系，它要求普通的聚合关系中代表整体的对象负责代表部分的对象的生命周期

【代码体现】：成员变量

【箭头及指向】：带实心菱形的实线，菱形指向整体

依赖(Dependency)

【依赖关系】：是一种使用的关系,所以要尽量不使用双向的互相依赖。

【代码表现】：局部变量、方法的参数或者对静态方法的调用

【箭头及指向】：带箭头的虚线，指向被使用者

各种关系的强弱顺序：

泛化= 实现> 组合> 聚合> 关联> 依赖

参考

https://blog.csdn.net/tianhai110/article/details/6339565

https://www.jianshu.com/p/4cd95d4ddb59

展开全文 >>

算法图解

2018-06-25

算法图解

【美】Aditya Bhargava

关于本书

www.manning.com/books/grokking-algorithms

https://github.com/egonschiele/grokking_algorithms

1.2.1 更佳的查找方式

一般而言，对于包含n个元素的列表，用二分查找最多需要log2n步，而简单查找最多需要n步。

1.3.4 一些常见的大O运行时间

下面按从快到慢的顺序列出了你经常会遇到的5种大O运行时间。
O(log^n)，也叫对数时间，这样的算法包括二分查找。
O(n)，也叫线性时间，这样的算法包括简单查找。
O(n*log^n)，这样的算法包括第4章将介绍的快速排序——一种速度较快的排序算法。
O(n^2)，这样的算法包括第2章将介绍的选择排序——一种速度较慢的排序算法。
O(n!)，这样的算法包括接下来将介绍的旅行商问题的解决方案——一种非常慢的算法。

3.1 递归

Leigh Caldwell在Stack Overflow上说的一句话：“如果使用循环，程序的性能可能更高；如果使用递归，程序可能更容易理解。如何选择要看什么对你来说更重要。”

3.3.2 递归调用栈

使用栈虽然很方便，但是也要付出代价：存储详尽的信息可能占用大量的内存。每个函数调用都要占用一定的内存，如果栈很高，就意味着计算机存储了大量函数调用的信息。在这种情况下，你有两种选择。
重新编写代码，转而使用循环。
使用尾递归

第4章快速排序

学习快速排序——一种常用的优雅的排序算法。快速排序使用分而治之的策略。

4.2 快速排序

语言标准库中的函数qsort实现的就是快速排序

4.3 再谈大O表示法

选择排序，其运行时间为O(n2)，速度非常慢。
还有一种名为合并排序（merge sort）的排序算法，其运行时间为O(n logn)，比选择排序快得多！快速排序的情况比较棘手，在最糟情况下，其运行时间为O(n2)。
与选择排序一样慢！但这是最糟情况。在平均情况下，快速排序的运行时间为O(n logn)。

5.3 冲突

散列函数总是将不同的键映射到数组的不同位置。

处理冲突的方式很多，最简单的办法如下：如果两个键映射到了同一个位置，就在这个位置存储一个链表。

5.4 性能

避免冲突，需要有：
较低的填装因子；
良好的散列函数。

第6章广度优先搜索

广度优先搜索让你能够找出两样东西之间的最短距离

6.3.2 队列

队列是一种先进先出（First In First Out，FIFO）的数据结构，而栈是一种后进先出（Last In First Out，LIFO）的数据结构。

第7章狄克斯特拉算法

介绍狄克斯特拉算法，让你能够找出加权图中前往X的最短路径。

介绍图中的环，它导致狄克斯特拉算法不管用。

7.2 术语

狄克斯特拉算法用于每条边都有关联数字的图，这些数字称为权重（weight）。

要计算非加权图中的最短路径，可使用广度优先搜索。要计算加权图中的最短路径，可使用狄克斯特拉算法

7.4 负权边

如果有负权边，就不能使用狄克斯特拉算法

在包含负权边的图中，要找出最短路径，可使用另一种算法——贝尔曼-福德算法（Bellman-Ford algorithm）。

7.5 实现

节点的所有邻居都存储在散列表中。

一个散列表来存储每个节点的开销。

节点的开销指的是从起点出发前往该节点需要多长时间。

还需要一个存储父节点的散列表

需要一个数组，用于记录处理过的节点，因为对于同一个节点，你不用处理多次。

第8章贪婪算法

学习贪婪策略

8.1 教室调度问题

贪婪算法很简单：每步都采取最优的做法

第9章动态规划

学习动态规划，这是一种解决棘手问题的方法，它将问题分成小问题，并先着手解决这些小问题。

9.1.2 动态规划

动态规划先解决子问题，再逐步解决大问题。

9.4 小结

需要在给定约束条件下优化某种指标时，动态规划很有用。
问题可分解为离散子问题时，可使用动态规划来解决。
每种动态规划解决方案都涉及网格。
单元格中的值通常就是你要优化的值。
每个单元格都是一个子问题，因此你需要考虑如何将问题分解为子问题。
没有放之四海皆准的计算动态规划解决方案的公式。

第10章 K最近邻算法

K最近邻算法创建分类系统

10.2.1 特征抽取

要计算两点的距离，可使用毕达哥拉斯公式。

10.2.2 回归

使用KNN来做两项基本工作——分类和回归：
分类就是编组；
回归就是预测结果（如一个数字）。

余弦相似度
前面计算两位用户的距离时，使用的都是距离公式。

余弦相似度不计算两个矢量的距离，而比较它们的角度，因此更适合处理前面所说的情况。本书不讨论余弦相似度，但如果你要使用KNN，就一定要研究研究它！

10.3.2 创建垃圾邮件过滤器

垃圾邮件过滤器使用一种简单算法——朴素贝叶斯分类器（Naive Bayes classifier）

10.4 小结

KNN用于分类和回归，需要考虑最近的邻居。
分类就是编组。
回归就是预测结果（如数字）。
特征抽取意味着将物品（如水果或用户）转换为一系列可比较的数字。
能否挑选合适的特征事关KNN算法的成败。

如果考虑的邻居太少，结果很可能存在偏差。一个不错的经验规则是：如果有N位用户，应考虑sqrt(N)个邻居。

11.1 树

二叉查找树

对于其中的每个节点，左子节点的值都比它小，而右子节点的值都比它大。

也有一些处于平衡状态的特殊二叉查找树，如红黑树。

B树是一种特殊的二叉树，数据库常用它来存储数据。

请研究如下数据结构：B树，红黑树，堆，伸展树。

11.3 傅里叶变换

绝妙、优雅且应用广泛的算法少之又少，傅里叶变换算是一个。

傅里叶变换非常适合用于处理信号，可使用它来压缩音乐。

11.4 并行算法

并行算法

在最佳情况下，排序算法的速度大致为O(n logn)。众所周知，对数组进行排序时，除非使用并行算法，否则运行时间不可能为O(n)！对数组进行排序时，快速排序的并行版本所需的时间为O(n)。

11.5 MapReduce

有一种特殊的并行算法正越来越流行，它就是分布式算法。在并行算法只需两到四个内核时，完全可以在笔记本电脑上运行它，但如果需要数百个内核呢？在这种情况下，可让算法在多台计算机上运行。MapReduce是一种流行的分布式算法，你可通过流行的开源工具Apache Hadoop来使用它。

11.5.1 分布式算法为何很有用

分布式算法非常适合用于在短时间内完成海量工作，其中的MapReduce基于两个简单的理念：映射（map）函数和归并（reduce）函数。

11.5.3 归并函数

映射是将一个数组转换为另一个数组。

而归并是将一个数组转换为一个元素。

11.6 布隆过滤器和HyperLogLog

布隆过滤器和HyperLogLog

11.6.1 布隆过滤器

布隆过滤器提供了解决之道。布隆过滤器是一种概率型数据结构，它提供的答案有可能不对，但很可能是正确的

11.6.2 HyperLogLog

HyperLogLog是一种类似于布隆过滤器的算法

面临海量数据且只要求答案八九不离十时，可考虑使用概率型算法！

11.7 SHA算法

　SHA算法

11.7.1 比较文件

另一种散列函数是安全散列算法（secure hash algorithm，SHA）函数。给定一个字符串，SHA返回其散列值。

11.7.2 检查密码

当前，最安全的密码散列函数是bcrypt，

11.8 局部敏感的散列算法

希望散列函数是局部敏感的。在这种情况下，可使用Simhash

如果你对字符串做细微的修改，Simhash生成的散列值也只存在细微的差别。这让你能够通过比较散列值来判断两个字符串的相似程度，这很有用！

11.9 Diffie-Hellman密钥交换

这里有必要提一提Diffie-Hellman算法，它以优雅的方式解决了一个古老的问题：如何对消息进行加密，以便只有收件人才能看懂呢？

Diffie-Hellman使用两个密钥：公钥和私钥

Diffie-Hellman算法及其替代者RSA依然被广泛使用

11.10 线性规划

线性规划用于在给定约束条件下最大限度地改善指定的指标

线性规划使用Simplex算法

参考

http://www.cnblogs.com/OctoptusLian/p/9026319.html?dt_platform=other&dt_dapp=1

展开全文 >>

啊哈算法-读书笔记

2018-06-22

本书涉及的数据结构有栈、队列、树、并查集、堆和图等；算法有各种排序、枚举、深度和广度优先搜索、图上的遍历，当然还有图论中不可缺少的四种最短路径算法、两种最小生成树算法、割点与割边算法、二分图的最大匹配算法等。

排序

桶排序

时间复杂度O(N+M)

冒泡排序

冒泡排序的基本思想是：每次比较两个相邻的元素，如果它们的顺序错误就把它们交换过来。

从大到小排序。

冒泡排序的核心部分是双重嵌套循环。时间复杂度是O(N^2)。

快速排序

首先找一个数作为基准数，设置两个哨兵变量，指向序列最左边与最右边，找到右边小于基准数的数与左边大于基准数的数，两数交换，两哨兵变量相遇后，交换基准数与哨兵变量所指的数，然后对两哨兵变量左边与右边做相同的操作。

快速排序的每一轮处理其实就是将这一轮的基准数归为，直到所有的基准数都归为。

快速排序的最差时间复杂度和冒泡排序一样都是O(N^2),他的平均时间复杂度为O(NlogN)。

快速排序基于“二分”发。

排序算法还有选择排序、计数排序、基数排序、插入排序、归并排序、堆排序。堆排序是基于二叉树的排序。

栈、队列、链表

可使用两个数组模拟链表。第一个整数数组data是用来存放序列中的具体数字，另外一个整数数组right是用来存放当前序列中的每一个元素右边的元素在数组data中位置。

枚举

穷举法

万能的搜索

深度优先搜索

栈

广度优先搜索

队列

图的遍历

也是用深度或广度优先搜索

可使用二维数组来存储一个图。二维数组沿主对角线对称，说明是无向图。

广度优先搜索可找到两点之间经过最少点的路径。

最短路径

求有权图中两点最短路径，可使用深度优先搜索、广度优先搜索、Floyd、Bellman-Ford、Dijkstra等。

	Floyd	Dijkstra	Bellman-Ford	队列优化的Bellman-Ford
空间复杂度	O(N^2)	O(M)	O(M)	O(M)
时间复杂度	O(N^3)	O((M+N)logN)	O(NM)	最坏是O(NM)
适用情况	1.稠密图2.和顶点关系密切	1.稠密图2.和顶点关系密切	1.稀疏图2.和边关系密切	1.稀疏图2.和边关系密切
负权	可以解决负权	不能解决负权	可以解决负权	可以解决负权

注：其中N表示点数，M表示边数

Floyd算法虽然总体时间复杂度，但是可以解决负权边(不能解决负权环，实际上这几种都无法解决负权回路，因为一直循环下去总能找到更小的路径)，并且均摊到每一点对上，在所有的算法中还是比较好的. Floyd算法代码复杂度小也是一大优势. Dijkstra算法最大的弊端就是无法适应有负权边的图，但Dijkstra具有很好的可扩展性，另外在Dijkstra算法在选择剩余不在最短路径顶点的集合中选择最小值是可以堆优化，这样算法的时间复杂度可以达到O(MlogN). 当图中含有负边时，使用Bellman-Ford或者队列优化的Bellman-Ford算法.

Floyd-Warshall

每个定点都有可能使另外两个定点之间的路程变短。

核心代码：

for(k=1;k<=n;k++)
{
    for(i=1;i<=n;i++)
    {
        for(j=1;j<.n;j++)
        {
            if(e[i][j]>e[i][k]+e[k][j])
            {
                e[i][j]=e[i][k]+e[k][j];
            }
        }
    }
}

从i号顶点到j号顶点只经过前k号点的最短路径。(动态规划)

可求出任意两个点之间的最短路径。不能解决带有“负权回路”或者叫“负权环”，因为带有“负权回路”的图没有最短路径。

Dijkstra算法–单源最短路径

边数M少于N^2的叫稀疏图，M相对较大的为稠密图。

使用邻接表使时间复杂度优化到O(M+N)logN.

贪心策略的算法。

不能计算有负权边的图。

Bellman-Ford–解决负权边

for(k=1;k<=n-1;k++)
    for(i=1;i<=m;i++
        if(dis[v[i]]>dis[u[i]]+w[i])
            dis[v[i]]-dis[u[i]]+w[i]

外循环n-1次（n为定点数），内循环数m次（m为边的个数）。dis数组存储源点到其他点的最短距离。u、v、w三个数组记录边的信息。例如第i条边存储在u[i]、v[i]和w[i]中，表示从顶点u[i]到顶点v[i]这条边（u[i]->v[i]）权值为w[i].

外循环n-1次，因为在一个含有n个顶点的图中，任意两点之间的最短路径最多包含n-1边。

如果在n-1轮松弛后最短路径仍然会发生变化，则该图必然存在负权回路。

优化

可以添加一个变量标记数组dis在本轮松弛中是否发生了变化，如果没有变化，则可以提前跳出循环。

每次仅对最短路径估计值发生变化了的顶点的所有出边执行松弛操作：Bellman-Ford队列优化

Bellman-Ford队列优化

队列广度优先搜索，

每次选取队首顶点u,对顶点u的所有出边进行松弛操作。例如有一条u->v的边,如果通过u->v这条边使得源点到顶点v的最短路程变短(dis[u]+e[u][v]<dis[v]),且顶点v不在当前的队列中,就将顶点v放入队尾。需要注意的是,同一个顶点同时在队列中出现多次是毫无意义的,所以我们需要一个数组来判重(判断哪些点已经在队列中)。在对顶点u的所有出边松弛完毕后,就将顶点u出队。接下来不断从队列中取出新的队首顶点再进行如上操作,直至队列空为止。

如果某个点进入队列的次数超过n次，则存在负环。

还有最短路径SPFA快速算法，也是基于队列优化的Bellman-Ford算法。

神奇的树

树其实就是不含回路的无向图

二叉树的特点是每个节点最多两个子节点。

如果二叉树中每个内部节点都有两个子节点，叫做满二叉树。深度为h且有2^h-1个节点。

完全二叉树：除h层外，其他层的节点个数达到最大数。

堆–神奇的优先队列

堆是一种特殊的特殊的完全二叉树

所有父节点都比子节点小，最小堆。反之，最大堆。

优先队列：支持插入元素和寻找最大（小）值元素的数据结构。

如果使用普通队列来实现这个两个功能，那么寻找最大元素需要枚举整个队列，这样的时间复杂度比较高。如果已排序好的数组，那么插入一个元素则需要移动很多元素，时间复杂度依旧很高。而堆就是一种优先队列的实现，可以很好的解决这两种操作。

堆还经常被用来求一个数列中第K大的数。只需要建立一个大小为K的最小堆，堆顶就是第K大的数。如果求一个数列中第K小的数，只最需要建立一个大小为K的最大堆，堆顶就是第K小的数，这种方法的时间复杂度是O(NlogK)。当然你也可以用堆来求前K大的数和前K小的数。

并查集是一种树型的数据结构，用于处理一些不相交集合（Disjoint Sets）的合并及查询问题。常常在使用中以森林来表示。

堆排序的时间复杂度也是O(NlogN)与快速排序一样。

树还有很多神奇的用法：线段树、树状数组、Trie树（字典树）、二叉搜索树、红黑树（一种平衡二叉搜索树）。

还能更好吗——微软亚洲研究院面试

主元素问题

声明一个变量count = 0，声明一个常量size等于数组大小。
假设该数组的第一个元素a(1)为主元素，让其与a(2)进行比较，若相同，则使变量count+1，若不同，则count-1。然后继续比较a(3)。以此类推。

当与a(n)比较后，count = -1时，将count重新归为0，并重新假设a(n+1)为主元素，并继续与a(n+2)作比较。

当count>=(size-m)/2时，此时假设的主元素a(m)即为实际的主元素。
或遍历完整个数组后，当前假设的主元素为实际主元素。

book

艾伦·图灵传：如谜的解密者
图灵的秘密
编程之美
算法导论
思考的乐趣
数学之美
具体数学
算法帝国

参考

《啊哈！算法》读书笔记

展开全文 >>

url

UTF_8与GBK

MultiByteToWideChar、WideCharToMultiByte

ATL封装_bstr_

iconv

使用C++11标准库进行编码转换

ICU

在各种字符串类型之间进行转换

从 char 转换 *

从 wchar_t 转换 *

从 _bstr_t 转换

从 CComBSTR 转换

从 CString 转换

从 basic_string 转换

从 system:: string 转换

字符串Format实现

wstring 与 string

参考

book

Zeal

MFC创建选项

说你对Windows消息机制的理解

数据库编程基础

OLE DB

COM线程模型

组件线程属性 | STA线程 | MTA线程

单线程模型(Single)

套间线程模型(Apartment)

自由线程模型(Free)

双线程模型(Both)

线程中立模型(Neutral)

什么是COM

COM接口

UML软件

UML类关系

泛化(generalization)/继承(inheritance)

实现（Realization)

关联（Association）

聚合（Aggregation）

组合(Composition)

依赖(Dependency)

各种关系的强弱顺序：

参考

参考

排序

桶排序

冒泡排序

快速排序

栈、队列、链表

枚举

万能的搜索

深度优先搜索

广度优先搜索

图的遍历

最短路径

Floyd-Warshall

Dijkstra算法–单源最短路径

Bellman-Ford–解决负权边

优化

Bellman-Ford队列优化

神奇的树

堆–神奇的优先队列

更多精彩算法

还能更好吗——微软亚洲研究院面试

book

参考

ATL封装`_bstr_`

从 `_bstr_t` 转换