Tanl Linguistic Pipeline |
00001 /* 00002 ** Tanl 00003 ** text/Utf8Normalizer.h: normalize a sequence of UTF-8 characters. 00004 ** ---------------------------------------------------------------------- 00005 ** Copyright (c) 2008 Giuseppe Attardi (attardi@di.unipi.it). 00006 ** ---------------------------------------------------------------------- 00007 ** 00008 ** This file is part of Tanl. 00009 ** 00010 ** Tanl is free software; you can redistribute it and/or modify it 00011 ** under the terms of the GNU General Public License, version 3, 00012 ** as published by the Free Software Foundation. 00013 ** 00014 ** Tanl is distributed in the hope that it will be useful, 00015 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 ** GNU General Public License for more details. 00018 ** 00019 ** You should have received a copy of the GNU General Public License 00020 ** along with this program. If not, see <http://www.gnu.org/licenses/>. 00021 ** ---------------------------------------------------------------------- 00022 */ 00023 00024 #ifndef Tanl_Text_Utf8Normalizer_H 00025 #define Tanl_Text_Utf8Normalizer_H 00026 00027 #include "Normalizer.h" 00028 00029 namespace Tanl { 00030 namespace Text { 00031 namespace Unicode { 00032 00041 struct Utf8Normalizer : public Normalizer 00042 { 00043 size_t normalize(char* dest, char const* source, int length); 00044 }; 00045 00046 } // namespace Unicode 00047 } // namespace Text 00048 } // namespace Tanl 00049 00050 #endif // Tanl_Text_Utf8Normalizer_H