a handy header file for windows locale convertion

/********************************************************************
created: 2012/06/28
created: 28:6:2012 23:45
author: yew

purpose: helper functions for locale convertion
*********************************************************************/

#pragma once

#ifndef _WINNLS_
extern “C” __declspec(dllimport) int __stdcall MultiByteToWideChar(
unsigned int CodePage, unsigned long dwFlags,
const char* lpMultiByteStr, int cbMultiByte,
wchar_t* lpWideCharStr,int cchWideChar);
extern “C” __declspec(dllimport) int __stdcall WideCharToMultiByte(
unsigned int CodePage, unsigned long dwFlags,
const wchar_t* lpWideCharStr, int cchWideChar,
char* lpMultiByteStr, int cbMultiByte,
const char* lpDefaultChar, int* lpUsedDefaultChar);
#define CP_ACP 0 // default to ANSI code page
#define CP_OEMCP 1 // default to OEM code page
#define CP_THREAD_ACP 3 // current thread’s ANSI code page
#define CP_UTF7 65000 // UTF-7 translation
#define CP_UTF8 65001 // UTF-8 translation
#endif
#ifndef CP_UTF16
#define CP_UTF16 65002 // UTF-16 translation
#endif

#include <xstring>

namespace mylocale
{
namespace detail
{
template<typename T>
class scoped_array
{
T* t_;
public:
explicit scoped_array(T* t=NULL):t_(t){}
~scoped_array(){if (t_) delete[] t_;}
inline T* get() {return t_;}
};

template<typename Target,typename Source>
inline Target _str(const Source& arg)
{
return arg;
}
inline const char* _str(const char* arg)
{
return arg;
}
inline const wchar_t* _str(const wchar_t* arg)
{
return arg;
}
inline const char* _str(const std::string& arg)
{
return arg.c_str();
}
inline const wchar_t* _str(const std::wstring& arg)
{
return arg.c_str();
}

template<typename Target,typename T>
Target t_to_t(const T* arg,int len)
{
if (len<0)
return arg;
else if(len==0)
return Target();
else
{
scoped_array<T> buf(new T[len+1]);
memcpy(buf.get(),arg,sizeof(T)*len);
buf.get()[len]=0;
return Target(buf.get());
}
}

template<typename Target>
Target a_to_w(int cp,const char* arg,int len)
{
int n=MultiByteToWideChar(cp,0,arg,len,0,0);
if (n>0)
{
scoped_array<wchar_t> buf(new wchar_t[n+1]);
int n2=MultiByteToWideChar(cp,0,arg,len,buf.get(),n);
buf.get()[n2]=0;
return Target(buf.get());
}
return Target();
}
template<typename Target>
inline Target a_to_w(int cp,const wchar_t* arg,int len)
{
return t_to_t<Target,wchar_t>(arg,len);
}
template<typename Target>
Target a_to_a(int from,int cp,const wchar_t* arg,int len)
{
int n=WideCharToMultiByte(cp,0,arg,len,0,0,0,0);
if (n>0)
{
scoped_array<char> buf(new char[n+1]);
int n2=WideCharToMultiByte(cp,0,arg,len,buf.get(),n,0,0);
buf.get()[n2]=0;
return Target(buf.get());
}
return Target();
}

template<typename Target>
Target a_to_a(int from,int to,const char* arg,int len)
{
if (from==to)
return t_to_t<Target,char>(arg,len);
else
{
// convert to utf16 first
int n=MultiByteToWideChar(from,0,arg,len,0,0);
if (n>0)
{
scoped_array<wchar_t> buf(new wchar_t[n+1]);
int n2=MultiByteToWideChar(from,0,arg,len,buf.get(),n);
buf.get()[n2]=0;

// convert to user code page
return a_to_a<Target>(from,to,buf.get(),n2);
}
return Target();
}
}

template<int cp>
class base
{
public:
template<typename Target,typename Source>
inline static Target to_utf16(const Source& arg,int len=-1)
{
return a_to_w<Target>(cp,_str(arg),len);
}
template<typename Target,typename Source>
inline static Target to_user_cp(int cp_user,const Source& arg,int len=-1)
{
return a_to_a<Target>(cp,cp_user,_str(arg),len);
}

template<typename Target,typename Source>
inline static Target to_ansi(const Source& arg,int len=-1)
{
return a_to_a<Target>(cp,CP_ACP,_str(arg),len);
}

template<typename Target,typename Source>
inline static Target to_utf8(const Source& arg,int len=-1)
{
return a_to_a<Target>(cp,CP_UTF8,_str(arg),len);
}

template<typename Target,typename Source>
inline static Target to_utf7(const Source& arg,int len=-1)
{
return a_to_a<Target>(cp,CP_UTF7,_str(arg),len);
}

template<typename Target,typename Source>
inline static Target to_oem(const Source& arg,int len=-1)
{
return a_to_a<Target>(cp,CP_OEMCP,_str(arg),len);
}

template<typename Target,typename Source>
inline static Target to_thread(const Source& arg,int len=-1)
{
return a_to_a<Target>(cp,CP_THREAD_ACP,_str(arg),len);
}
};
}

class ansi: public detail::base<CP_ACP> {};
class utf8: public detail::base<CP_UTF8> {};
class utf16: public detail::base<CP_UTF16> {};
class utf7: public detail::base<CP_UTF7> {};
class oem: public detail::base<CP_OEMCP> {};
class thread: public detail::base<CP_THREAD_ACP> {};
}

/*
sample
std::string utf8test = mylocale::ansi::to_utf8<std::string>(“中文text”);
std::string utf8test = mylocale::FROM::to_TO<std::string>(SOURCE);

SOURCE: a string in the FROM code page or utf16
FROM: is the source code page of SOURCE, it’s ignored if SOURCE is utf16
TO: is the target codepage
*/

Advertisements

1 Comment

  1. for utf8/utf16 std::wstring/std::string user, the usage can be more brief.
    like this:

    #define utf8_to_utf16 mylocale::utf8::to_utf16
    #define utf16_to_utf8 mylocale::utf8::to_utf8
    std::wstring utf16test = utf8_to_utf16(“this is utf8″);
    std::string utf8test = utf16_to_utf8(L”this is utf16”);

    Like

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s