00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef Tanl_Text_Char_h
00025 #define Tanl_Text_Char_h
00026
00027
00028 #include "include/config.h"
00029
00030 #include <cstring>
00031 #include <cwchar>
00032 #include <stdio.h>
00033 #include <string>
00034
00035 #include "Unicode.h"
00036
00037 namespace Tanl {
00038 namespace Text {
00039
00043 class Char
00044 {
00045 public:
00046
00047 typedef Char CharType;
00048 typedef UCS2 CodeUnit;
00049
00050 static const int MaxRadix = 36;
00051 static const int MinRadix = 2;
00052
00053 static const UCS2 MaxValue = 0xFFFF;
00054 static const UCS2 MinValue = 0x0000;
00055
00056 Char() { }
00057
00058 Char(UCS2 ucs) : ucs(ucs) { }
00059
00060 operator UCS2() const { return ucs; }
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00086 int CompareTo(Char const& other) const { return ucs - (UCS2)other; }
00087
00088 int CompareCase(Char const& other) const {
00089 return ToLower().ucs - other.ToLower().ucs; }
00090
00091 Unicode::Category GetCategory() const {
00092 return (Unicode::Category)Unicode::UnicodeTable[ucs].category;
00093 }
00094
00095 bool IsControl() const {
00096 return Unicode::UnicodeTable[ucs].flags & Unicode::Control; }
00097
00098 bool IsDigit() const {
00099 return Unicode::UnicodeTable[ucs].flags & Unicode::Number; }
00100
00101 bool IsLetter() const {
00102 return Unicode::UnicodeTable[ucs].flags & Unicode::Letter; }
00103
00104 bool IsLetterOrDigit() const { return IsDigit() || IsLetter(); }
00105
00106 bool IsLower() const {
00107 return Unicode::UnicodeTable[ucs].flags & Unicode::Lowercase; }
00108
00109 bool IsMark() const {
00110 return Unicode::UnicodeTable[ucs].flags & Unicode::Control;
00111 }
00112
00114 bool IsPunctuation() const {
00115 return Unicode::UnicodeTable[ucs].flags & Unicode::Punctuation;
00116 }
00117
00119 bool IsSpace() const {
00120 return Unicode::UnicodeTable[ucs].flags & Unicode::Whitespace ||
00121 ucs == 0x00A0;
00122 }
00123
00124 bool IsSymbol() const {
00125 return Unicode::UnicodeTable[ucs].flags & Unicode::Symbol;
00126 }
00127
00128 bool IsTitle() const {
00129 return Unicode::UnicodeTable[ucs].flags & Unicode::Titlecase;
00130 }
00131
00132 bool IsUpper() const {
00133 return Unicode::UnicodeTable[ucs].flags & Unicode::Uppercase;
00134 }
00135
00137 bool IsWhitespace() const {
00138 return Unicode::UnicodeTable[ucs].flags & Unicode::Whitespace;
00139 }
00140
00141 int ToDigit(int radix);
00142
00143 Char ToLower() const;
00144
00145 Char ToUpper() const;
00146
00150 Unicode::BlockCode BlockCode() { return Unicode::GetBlockCode(ucs); }
00151
00152 protected:
00153 UCS2 ucs;
00154 };
00155
00159 class Utf8Char : public Char
00160 {
00161 public:
00162 typedef Char CharType;
00163 typedef char CodeUnit;
00164 };
00165
00169 class CChar
00170 {
00171 public:
00172 typedef char CharType;
00173 typedef char CodeUnit;
00174 };
00175
00176 }
00177 }
00178
00179 namespace std {
00180
00181 #if defined(__GNUC__) && __GNUC__ < 3
00182
00183 struct string_char_traits<Tanl::Text::Char> {
00184
00185 typedef Tanl::Text::Char char_type;
00186
00187 static void assign(char_type& c1, const char_type& c2) { c1 = c2; }
00188 static bool eq(const char_type& c1, const char_type& c2) {
00189 return (c1 == c2);
00190 }
00191 static bool ne(const char_type& c1, const char_type& c2) {
00192 return (c1 != c2);
00193 }
00194 static bool lt(const char_type& c1, const char_type& c2) {
00195 return (c1 < c2);
00196 }
00197 static char_type eos() { return 0; }
00198 static bool is_del(char_type a) { return char_type(a).IsSpace(); }
00199 static int compare(const char_type* s1, const char_type* s2, size_t n) {
00200 return ::wmemcmp((wchar_t const*)s1, (wchar_t const*)s2, n);
00201 }
00202 static size_t length(const char_type* s) {
00203 size_t i = 0;
00204 while (!eq(s[i], char_type(0))) ++i;
00205 return i;
00206 }
00207 static char_type* copy(char_type* s1, const char_type* s2, size_t n) {
00208 return (char_type*)::wmemcpy((wchar_t*)s1, (wchar_t const*)s2, n);
00209 }
00210 static char_type* move(char_type* s1, const char_type* s2, size_t n) {
00211 return (char_type*)::wmemmove((wchar_t*)s1, (wchar_t const*)s2, n);
00212 }
00213 static char_type* set(char_type* s, const char_type& c, size_t n) {
00214 return (char_type*)::wmemset((wchar_t*)(Tanl::Text::UCS2*)s, (Tanl::Text::UCS2)c, n);
00215 }
00216 };
00217
00218 #else
00219
00220 template<>
00221 struct char_traits<Tanl::Text::Char> {
00222
00223 typedef Tanl::Text::Char char_type;
00224 typedef int int_type;
00225 typedef streampos pos_type;
00226 typedef streamoff off_type;
00227 typedef mbstate_t state_type;
00228
00229 static void
00230 assign(char_type& c1, const char_type& c2) { c1 = c2; }
00231
00232
00233 static bool
00234 eq(const char_type& c1, const char_type& c2) {
00235 return (c1 == c2);
00236 }
00237
00238 static bool
00239 lt(const char_type& c1, const char_type& c2) { return(c1 < c2); }
00240
00241
00242 static int
00243 compare(const char_type* s1, const char_type* s2, size_t n) {
00244 for (size_t i = 0; i < n; ++i) {
00245 if (eq(s1[i], s2[i])) continue;
00246 if (lt(s1[i], s2[i])) return -1;
00247 else return 1;
00248 }
00249 return 0;
00250 }
00251
00252 static size_t
00253 length(const char_type* s) {
00254 size_t i = 0;
00255 while (!eq(s[i], char_type(0))) ++i;
00256 return i;
00257 }
00258
00259 static const char_type*
00260 find(const char_type* s, size_t n, const char_type& c) {
00261 size_t i = 0;
00262 while (!eq(s[i], c)) ++i;
00263 return s + i;
00264 }
00265
00266 static char_type*
00267 move(char_type* s1, const char_type* s2, size_t n) {
00268 return static_cast<char_type*>
00269 (::memmove(s1, s2, n * sizeof(char_type)));
00270 }
00271
00272 static char_type*
00273 copy(char_type* s1, const char_type* s2, size_t n) {
00274 return static_cast<char_type*>
00275 (::memcpy(s1, s2, n * sizeof(char_type)));
00276 }
00277
00278 static char_type*
00279 assign(char_type* s, size_t n, char_type a) {
00280 for (size_t i = 0; i < n; ++i)
00281 assign(s[i], a);
00282 return s;
00283 }
00284
00285 static char_type
00286 to_char_type(const int_type& i) { return char_type(i); }
00287
00288 static int_type
00289 to_int_type(const char_type& c) { return (Tanl::Text::UCS2)c; }
00290
00291 static bool
00292 eq_int_type(const int_type& i1, const int_type& i2) { return (i1 == i2); }
00293
00294 static int_type
00295 eof() { return static_cast<int_type>(EOF); }
00296
00297 static int_type
00298 not_eof(const int_type& i) { return (eof() == i) ? !eof() : i; }
00299 };
00300
00301 #endif // __GNUC__
00302
00303 }
00304
00305 #endif // Tanl_Text_Char_h