00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef Tanl_Text_Utf8Utils_h
00025 #define Tanl_Text_Utf8Utils_h
00026
00027
00028 #include <sys/types.h>
00029 #include <string.h>
00030
00031 #if defined(_WIN32)
00032 # include <tchar.h>
00033 #endif
00034
00035
00036 #include "text.h"
00037 #include "Char.h"
00038
00039 namespace Tanl {
00040 namespace Text {
00041
00045 namespace Unicode
00046 {
00047 typedef unsigned char byte;
00048
00049 void incUtfPtr(const char*& ptr);
00050 void decUtfPtr(const char*& ptr, const char* begin);
00051 size_t utfDiff(const char* end, const char* begin);
00052 bool isAscii(const char* begin, const char* end);
00053
00054 UCS2 fetchChar(const char*& begin, const char* end);
00055 inline UCS2 toChar(const char* begin, const char* end)
00056 {
00057 const char* it = begin;
00058 return fetchChar(it, end);
00059 }
00060 int byteLength(const char* it);
00064 int utfLength(UCS2 ch);
00065
00069 int utfLength(const UCS2* begin, const UCS2* end);
00073 int ucLength(const UCS2* ch);
00074 inline int uc2Length(const UCS2* ch) { return ucLength(ch); }
00078 int uc4Length(const UCS4* ch);
00089 int ToUtf8(byte*& it, byte* end, UCS2 ucc);
00090
00091 int ToUtf8(char*& it, Char c, char* end = 0) {
00092 return ToUtf8((byte*&)it, (byte*)end, (UCS2)c);
00093 }
00100 int ToUtf8(byte* it, const size_t max, const UCS2* ucbegin, const size_t len);
00101
00111 UCS2* ToUc(UCS2* buffer, const char* source, int length);
00122 UCS2* ToUc(UCS2* buffer, const UCS4* source, int length = -1);
00133 UCS2* ToUc(UCS2* buffer, const wchar_t* source, int length = -1);
00137 UCS2* utf8ToUc(UCS2* buffer, const char* source, int length = -1);
00138
00139 inline size_t stringLength(const char* str) { return strlen(str); }
00140 inline size_t stringLength(const UCS2* str) { return ucLength(str); }
00141 inline size_t stringLength(const UCS4* str) { return uc4Length(str); }
00142
00143 template <typename ToCharType, typename FromCharType>
00144 inline ToCharType* convertTo(ToCharType* buffer, const FromCharType* source, int length = -1)
00145 {
00146 if (length == -1)
00147 length = stringLength(source);
00148 ToCharType* tit = buffer;
00149 register const FromCharType* fit = source;
00150 register const FromCharType* end = source + length;
00151 for (; fit < end; ++tit, ++fit)
00152 *tit = *fit;
00153 return buffer;
00154 }
00155
00156 }
00157 }
00158 }
00159
00160
00161 #endif //Tanl_Text_Utf8Utils_h