00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef DeSR_Language_H
00025 #define DeSR_Language_H
00026
00027 #include "MorphExtractor.h"
00028
00029 namespace Tanl {
00030
00036 struct Language
00037 {
00038 char const* verbCPos;
00039 char const* nounCPos;
00040 char const* prepCPos;
00041 char const* timeDep;
00042 char const* locDep;
00043 bool hasPostpositions;
00044 MorphExtractor* morphExtractor;
00045
00046 Language(char const* v = "VB", char const* n = "NN", char const* p = "IN") :
00047 verbCPos(v),
00048 nounCPos(n),
00049 prepCPos(p),
00050 timeDep(""),
00051 locDep(""),
00052 hasPostpositions(false),
00053 morphExtractor(new MorphExtractor())
00054 { }
00055
00056 ~Language() { delete morphExtractor; }
00057
00059 virtual bool rootPos(std::string const& pos) const { return true; }
00060
00061 virtual char const* rootLabel() const;
00062
00064 char const* code() const;
00065
00067 static Language const* get(char const* code);
00068
00070 virtual bool numbAgree(char x, char y) const { return false; }
00071
00073 virtual bool gendAgree(char x, char y) const { return false; }
00074
00076 virtual bool morphoLeft(std::string const& pos) const { return false; }
00077
00079 virtual bool morphoRight(std::string const& pos) const { return false; }
00080 };
00081
00085 struct ArabicLanguage : public Language
00086 {
00087 ArabicLanguage() { morphExtractor = new ArabicMorphExtractor(); }
00088 };
00089
00093 struct IndianLanguage : public Language
00094 {
00095 IndianLanguage() :
00096 Language("v", "n", "psp")
00097 {
00098 hasPostpositions = true;
00099 timeDep = "k7t";
00100 locDep = "k7p";
00101 morphExtractor = new IndianMorphExtractor();
00102 }
00103
00104 char const* rootLabel() const;
00105
00106 };
00107
00111 struct BanglaLanguage : public IndianLanguage
00112 {
00113 BanglaLanguage() :
00114 IndianLanguage() { }
00115 };
00116
00120 struct BasqueLanguage : public Language
00121 {
00122 BasqueLanguage() :
00123 Language("ADI", "IZE", "")
00124 { morphExtractor = new BasqueMorphExtractor(); }
00125 };
00126
00130 struct BulgarianLanguage : public Language
00131 {
00132 BulgarianLanguage() { morphExtractor = new MorphExtractor(); }
00133 };
00134
00138 struct CatalanLanguage : public Language
00139 {
00140 CatalanLanguage() :
00141 Language("v", "n", "s") {
00142 timeDep = "CCT";
00143 locDep = "CCL";
00144 morphExtractor = new SpanishMorphExtractor();
00145 }
00146
00147 bool morphoRight(std::string const& pos) const { return pos == "sp"; }
00148
00149 char const* rootLabel() const;
00150 };
00151
00155 struct ChineseLanguage : public Language
00156 {
00157 ChineseLanguage() { morphExtractor = new MorphExtractor(); }
00158 };
00159
00163 struct CzechLanguage : public Language
00164 {
00165 CzechLanguage() { morphExtractor = new CzechMorphExtractor(); }
00166 };
00167
00171 struct DanishLanguage : public Language
00172 {
00173 DanishLanguage() { morphExtractor = new DanishMorphExtractor(); }
00174 };
00175
00179 struct DutchLanguage : public Language
00180 {
00181 DutchLanguage() { morphExtractor = new DutchMorphExtractor(); }
00182 };
00183
00187 struct EnglishLanguage : public Language
00188 {
00189 EnglishLanguage() {
00190 timeDep = "TMP";
00191 locDep = "LOC";
00192 morphExtractor = new MorphExtractor();
00193 }
00194
00195 bool rootPos(std::string const& pos) const {
00196 return pos == "VBD" || pos == "VBP" || pos == "VBZ" || pos == "MD";
00197 }
00198 };
00199
00203 struct FrenchLanguage : public Language
00204 {
00205 FrenchLanguage() :
00206 Language("V", "N", "P")
00207 {
00208 morphExtractor = new FrenchMorphExtractor();
00209 }
00210
00211 bool numbAgree(char x, char y) const {
00212 return x == y;
00213 }
00214
00215 bool gendAgree(char x, char y) const {
00216 return x == y || x == '\0' || y == '\0';
00217 }
00218
00219 bool morphoRight(std::string const& pos) const { return pos == "P+D"; }
00220
00221 char const* rootLabel() const;
00222 };
00223
00227 struct GermanLanguage : public Language
00228 {
00229 GermanLanguage() { morphExtractor = new MorphExtractor(); }
00230 };
00231
00235 struct GreekLanguage : public Language
00236 {
00237 GreekLanguage() { morphExtractor = new MorphExtractor(); }
00238 };
00239
00243 struct ItalianLanguage : public Language
00244 {
00245 ItalianLanguage() :
00246 Language("V", "S", "E")
00247 {
00248 timeDep = "comp_temp";
00249 locDep = "comp_loc";
00250 morphExtractor = new ItalianMorphExtractor();
00251 }
00252
00253 bool numbAgree(char x, char y) const {
00254 return x == y || x == 'n' || y == 'n';
00255 }
00256
00257 bool gendAgree(char x, char y) const {
00258 return x == y || x == 'n' || y == 'n';
00259 }
00260
00261 bool morphoRight(std::string const& pos) const { return pos == "EA"; }
00262 };
00263
00267 struct ItalianTutLanguage : public Language
00268 {
00269 ItalianTutLanguage() :
00270 Language("VERB", "NOUN", "PREP")
00271 { morphExtractor = new ItalianTutMorphExtractor(); }
00272
00273 char const* rootLabel() const;
00274 };
00275
00279 struct JapaneseLanguage : public Language
00280 {
00281 JapaneseLanguage() { morphExtractor = new MorphExtractor(); }
00282 };
00283
00287 struct HindiLanguage : public IndianLanguage
00288 { };
00289
00293 struct HungarianLanguage : public Language
00294 {
00295 HungarianLanguage() { morphExtractor = new MorphExtractor(); }
00296 };
00297
00301 struct PortugueseLanguage : public Language
00302 {
00303 PortugueseLanguage() {
00304 morphExtractor = new PortugueseMorphExtractor();
00305 verbCPos = "v";
00306 }
00307 };
00308
00312 struct SloveneLanguage : public Language
00313 {
00314 SloveneLanguage() {
00315 morphExtractor = new SloveneMorphExtractor();
00316 }
00317 };
00318
00322 struct SpanishLanguage : public Language
00323 {
00324 SpanishLanguage() { morphExtractor = new SpanishMorphExtractor(); }
00325
00326 char const* rootLabel() const;
00327 };
00328
00332 struct SwedishLanguage : public Language
00333 {
00334 SwedishLanguage() { morphExtractor = new MorphExtractor(); }
00335
00336 bool rootPos(std::string const& pos) const {
00337 return pos.size() > 1 && pos[1] == 'V';
00338 }
00339 };
00340
00344 struct TeluguLanguage : public IndianLanguage
00345 {
00346 TeluguLanguage() :
00347 IndianLanguage() { }
00348 };
00349
00353 struct TurkishLanguage : public Language
00354 {
00355 TurkishLanguage() { morphExtractor = new TurkishMorphExtractor(); }
00356 };
00357
00358 }
00359
00360 #endif // DeSR_Language_H