#include <string.h>
 
#include <iostream>
#include <string>
using namespace std;
 
#include <atlconv.h>
 
enum
{
    UNICODE_CALC_SIZE = 1,
    UNICODE_GET_BYTES = 2
};
 
//unicode转义字符序列转换为内存中的unicode字符串
int unicode_bytes(char* p_unicode_escape_chars,wchar_t *bytes,int flag)
{
    /*
    char* p_unicode_escape_chars="pp\\u4fddp\\u5b58\\u6210pp\\u529f0a12";
 
    //通过此函数获知转换后需要的字节数
    int n_length=unicode_bytes(p_unicode_escape_chars,NULL,UNICODE_CALC_SIZE);
 
    //再次调用此函数,取得字节序列
    wchar_t *bytes=new wchar_t[n_length+sizeof(wchar_t)];
    unicode_bytes(p_unicode_escape_chars,bytes,UNICODE_GET_BYTES);
    bytes[n_length]=0;
 
    //此时的bytes中是转换后的字节序列
    delete[] bytes;
    */
 
    int unicode_count=0;
    int length=strlen(p_unicode_escape_chars);
    for (int char_index=0;char_index<length;char_index++)
    {
       char unicode_hex[5];
       memset(unicode_hex,0,5);
 
       char ascii[2];
       memset(ascii,0,2);
 
       if (*(p_unicode_escape_chars+char_index)=='\\')
       {
           char_index++;
           if (char_index<length)
           {
              if (*(p_unicode_escape_chars+char_index)=='u')
              {
                  if (flag==UNICODE_GET_BYTES)
                  {
                     memcpy(unicode_hex,p_unicode_escape_chars+char_index+1,4);
                    
                     //sscanf不可以使用unsigned short类型
                     //否则:Run-Time Check Failure #2 - Stack around the variable 'a' was corrupted.
                     unsigned int a=0;
                     sscanf_s(unicode_hex,"%04x",&a);
                     bytes[unicode_count++]=a;
                  }
                  else if(flag==UNICODE_CALC_SIZE)
                  {
                     unicode_count++;
                  }
                  char_index+=4;
              }
           }
       }
       else
       {
           if (flag==UNICODE_GET_BYTES)
           {
              memcpy(ascii,p_unicode_escape_chars+char_index,1);
              unsigned int a=0;
              sscanf_s(ascii,"%c",&a);
              bytes[unicode_count++]=a;
           }
           else if(flag==UNICODE_CALC_SIZE)
           {
              unicode_count++;
           }
       }
    }
 
    return unicode_count;
}
 
string UnEscape(char* p_unicode_escape_chars)
{
    int nBytes=unicode_bytes(p_unicode_escape_chars,NULL,UNICODE_CALC_SIZE);
 
    wchar_t *p_bytes=new wchar_t[nBytes+sizeof(wchar_t)];
    unicode_bytes(p_unicode_escape_chars,p_bytes,UNICODE_GET_BYTES);
    p_bytes[nBytes]=0;
 
    USES_CONVERSION;
    string cs_return=W2A((wchar_t*)p_bytes);
 
    delete[] p_bytes;
 
    return cs_return;
}
 
int _tmain(int argc, _TCHAR* argv[])
{
    // 发送成功条
    // \u53d1\u9001\u6210\u529f1\u6761
    char* p_unicode_escape_chars="\\u53d1\\u9001\\u6210\\u529f1\\u6761";
    cout<<UnEscape(p_unicode_escape_chars)<<endl;
    return 0;
}
 

本文链接地址: C++版本的UnEscape
https://blog.qingfengju.com/index.asp?id=245

分类:Win32/C++ 查看次数:15558 发布时间:2011/3/27 12:21:14