00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef DeSR_MorphExtractor_H
00025 #define DeSR_MorphExtractor_H
00026
00027 #include "text/RegExp.h"
00028
00029 namespace Tanl {
00030
00036 struct MorphExtractor
00037 {
00038 MorphExtractor() { }
00039
00040 struct Features {
00041 Features() {
00042 Case[0] = '\0';
00043 extra[0] = '\0';
00044 gender[0] = '\0';
00045 mode[0] = '\0';
00046 subcat[0] = '\0';
00047 negative[0] = '\0';
00048 number[0] = '\0';
00049 person[0] = '\0';
00050 tense[0] = '\0';
00051 trans[0] = '\0';
00052 full[0] = '\0';
00053 }
00054
00055 char Case[20];
00056 char extra[20];
00057 char gender[20];
00058 char mode[20];
00059 char subcat[20];
00060 char negative[20];
00061 char number[20];
00062 char person[20];
00063 char tense[20];
00064 char trans[20];
00065 char full[256];
00066 };
00067
00068 virtual void operator() (char const* start, char const* end,
00069 Features& mf) const;
00070 };
00071
00075 struct ArabicMorphExtractor : public MorphExtractor
00076 {
00077 static Tanl::Text::RegExp::Pattern reCase;
00078 static Tanl::Text::RegExp::Pattern reGender;
00079 static Tanl::Text::RegExp::Pattern reNumber;
00080 static Tanl::Text::RegExp::Pattern rePerson;
00081
00082 void operator() (char const* start, char const* end, Features& mf) const;
00083 };
00084
00088 struct BasqueMorphExtractor : public MorphExtractor
00089 {
00090 static Tanl::Text::RegExp::Pattern reNumber;
00091
00092 BasqueMorphExtractor() {}
00093
00094 void operator() (char const* start, char const* end, Features& mf) const;
00095 };
00096
00100 struct BulgarianMorphExtractor : public MorphExtractor
00101 {
00102 static Tanl::Text::RegExp::Pattern reGender;
00103 static Tanl::Text::RegExp::Pattern reNumber;
00104 static Tanl::Text::RegExp::Pattern rePerson;
00105
00106 void operator() (char const* start, char const* end, Features& mf) const;
00107 };
00108
00112 struct CzechMorphExtractor : public MorphExtractor
00113 {
00114 static Tanl::Text::RegExp::Pattern reCase;
00115 static Tanl::Text::RegExp::Pattern reGender;
00116 static Tanl::Text::RegExp::Pattern reNumber;
00117 static Tanl::Text::RegExp::Pattern rePerson;
00118 static Tanl::Text::RegExp::Pattern reNegative;
00119 static Tanl::Text::RegExp::Pattern reGra;
00120
00121 void operator() (char const* start, char const* end, Features& mf) const;
00122 };
00123
00127 struct DanishMorphExtractor : public MorphExtractor
00128 {
00129 static Tanl::Text::RegExp::Pattern reGender;
00130 static Tanl::Text::RegExp::Pattern reNumber;
00131 static Tanl::Text::RegExp::Pattern rePerson;
00132 static Tanl::Text::RegExp::Pattern reCase;
00133
00134 void operator() (char const* start, char const* end, Features& mf) const;
00135 };
00136
00140 struct DutchMorphExtractor : public MorphExtractor
00141 {
00142 static Tanl::Text::RegExp::Pattern reGender;
00143 static Tanl::Text::RegExp::Pattern reNumber;
00144 static Tanl::Text::RegExp::Pattern rePerson;
00145 static Tanl::Text::RegExp::Pattern reCase;
00146
00147 void operator() (char const* start, char const* end, Features& mf) const;
00148 };
00149
00153 struct FrenchMorphExtractor : public MorphExtractor
00154 {
00155 static Tanl::Text::RegExp::Pattern reGender;
00156 static Tanl::Text::RegExp::Pattern reNumber;
00157 static Tanl::Text::RegExp::Pattern rePerson;
00216 static Tanl::Text::RegExp::Pattern reSubcat;
00217
00218 void operator() (char const* start, char const* end, Features& mf) const;
00219 };
00220
00221
00222
00223
00227 struct GreekMorphExtractor : public MorphExtractor
00228 {
00229 static Tanl::Text::RegExp::Pattern reGender;
00230 static Tanl::Text::RegExp::Pattern reNumber;
00231 static Tanl::Text::RegExp::Pattern rePerson;
00232 static Tanl::Text::RegExp::Pattern reCase;
00233
00234 void operator() (char const* start, char const* end, Features& mf) const;
00235 };
00236
00240 struct HungarianMorphExtractor : public MorphExtractor
00241 {
00242 static Tanl::Text::RegExp::Pattern reNumber;
00243 static Tanl::Text::RegExp::Pattern rePerson;
00244 static Tanl::Text::RegExp::Pattern reCase;
00245
00246 void operator() (char const* start, char const* end, Features& mf) const;
00247 };
00248
00252 struct IndianMorphExtractor : public MorphExtractor
00253 {
00254 static Tanl::Text::RegExp::Pattern reCase;
00255 static Tanl::Text::RegExp::Pattern reGender;
00256 static Tanl::Text::RegExp::Pattern reNumber;
00257 static Tanl::Text::RegExp::Pattern rePerson;
00258 static Tanl::Text::RegExp::Pattern reVibhakti;
00259 static Tanl::Text::RegExp::Pattern reTam;
00260
00261 void operator() (char const* start, char const* end, Features& mf) const;
00262 };
00263
00267 struct ItalianMorphExtractor : public MorphExtractor
00268 {
00269 static Tanl::Text::RegExp::Pattern reGender;
00270 static Tanl::Text::RegExp::Pattern reMode;
00271 static Tanl::Text::RegExp::Pattern reNumber;
00272 static Tanl::Text::RegExp::Pattern rePerson;
00273 static Tanl::Text::RegExp::Pattern reTense;
00274
00275 void operator() (char const* start, char const* end, Features& mf) const;
00276 };
00277
00281 struct ItalianTutMorphExtractor : public MorphExtractor
00282 {
00283 static Tanl::Text::RegExp::Pattern reCase;
00284 static Tanl::Text::RegExp::Pattern reGender;
00285 static Tanl::Text::RegExp::Pattern reMode;
00286 static Tanl::Text::RegExp::Pattern reNumber;
00287 static Tanl::Text::RegExp::Pattern rePerson;
00288 static Tanl::Text::RegExp::Pattern reSem;
00289 static Tanl::Text::RegExp::Pattern reTense;
00290 static Tanl::Text::RegExp::Pattern reTrans;
00291 static Tanl::Text::RegExp::Pattern reVTrans;
00292
00293 void operator() (char const* start, char const* end, Features& mf) const;
00294 };
00295
00299 struct PortugueseMorphExtractor : public MorphExtractor
00300 {
00301 static Tanl::Text::RegExp::Pattern reGender;
00302 static Tanl::Text::RegExp::Pattern reNumber;
00303 static Tanl::Text::RegExp::Pattern rePerson;
00304
00305 PortugueseMorphExtractor() {}
00306
00307 void operator() (char const* start, char const* end, Features& mf) const;
00308 };
00309
00313 struct SloveneMorphExtractor : public MorphExtractor
00314 {
00315 static Tanl::Text::RegExp::Pattern reCase;
00316 static Tanl::Text::RegExp::Pattern reGender;
00317 static Tanl::Text::RegExp::Pattern reNegative;
00318 static Tanl::Text::RegExp::Pattern reNumber;
00319 static Tanl::Text::RegExp::Pattern rePerson;
00320
00321 void operator() (char const* start, char const* end, Features& mf) const;
00322 };
00323
00327 struct SpanishMorphExtractor : public MorphExtractor
00328 {
00329 static Tanl::Text::RegExp::Pattern reGender;
00330 static Tanl::Text::RegExp::Pattern reNumber;
00331 static Tanl::Text::RegExp::Pattern rePerson;
00332
00333 void operator() (char const* start, char const* end, Features& mf) const;
00334 };
00335
00339 struct TurkishMorphExtractor : public MorphExtractor
00340 {
00341 static Tanl::Text::RegExp::Pattern reAFeats;
00342 static Tanl::Text::RegExp::Pattern rePFeats;
00343
00344 void operator() (char const* start, char const* end, Features& mf) const;
00345 };
00346
00347 }
00348
00349 #endif // DeSR_MorphExtractor_H