00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef Tanl_Classifier_Event_H
00025 #define Tanl_Classifier_Event_H
00026
00027
00028 #include <stdexcept>
00029 #include <string>
00030 #include <vector>
00031 #include <iostream>
00032
00033
00034 #include "include/Iterator.h"
00035 #include "text/WordIndex.h"
00036
00037 #define MIN (std::min)
00038 #define MAX (std::max)
00039
00040
00041
00042 #define FOR_EACH(T, C, I) \
00043 for (T::const_iterator I = (C).begin(); I != (C).end(); ++I)
00044
00045 #define TO_EACH(T, C, I) \
00046 for (T::iterator I = (C).begin(); I != (C).end(); ++I)
00047
00048 namespace Tanl {
00049 namespace Classifier {
00050
00051 typedef unsigned ClassID;
00052 typedef std::string ClassName;
00053 typedef unsigned PID;
00054
00060 class Features : public std::vector<std::string>
00061 {
00062 public:
00063 void add(char const* predicate) {
00064 push_back(predicate);
00065 }
00066
00067 void add(std::string const& predicate) {
00068 push_back(predicate);
00069 }
00070 };
00071
00081 class Context : public std::vector<PID>
00082 {
00083 public:
00084 Context() { }
00085
00090 Context(Features& feats, Tanl::Text::WordIndex& featIndex) {
00091 FOR_EACH (Features, feats, fit) {
00092 Tanl::Text::WordIndex::const_iterator found = featIndex.find(fit->c_str());
00093 if (found != featIndex.end())
00094 add(found->second);
00095 }
00096 }
00097
00098 void add(PID pid) { push_back(pid); }
00099 };
00100
00108 class FeatureEncoder : public Features
00109 {
00110 public:
00111 FeatureEncoder(Context& context, Tanl::Text::WordIndex& featIndex) :
00112 context(context), featIndex(featIndex)
00113 {
00114 context.clear();
00115 }
00116
00117 void add(char const* feature) {
00118 Tanl::Text::WordIndex::const_iterator found = featIndex.find(feature);
00119 if (found != featIndex.end())
00120 context.add(found->second);
00121 }
00122
00123 void add(std::string const& feature) {
00124 Tanl::Text::WordIndex::const_iterator found = featIndex.find(feature.c_str());
00125 if (found != featIndex.end())
00126 context.add(found->second);
00127 }
00128
00129 private:
00130 Context& context;
00131 Tanl::Text::WordIndex& featIndex;
00132 };
00133
00134
00145 template <class FeatureType = Features>
00146 class GenericEvent
00147 {
00148 public:
00149
00150 GenericEvent(ClassName className = "") : className(className) { }
00151
00152 GenericEvent(ClassName className, FeatureType features) :
00153 className(className),
00154 features(features)
00155 { }
00156
00157 ClassName className;
00158 FeatureType features;
00159 };
00160
00161 typedef GenericEvent<> Event;
00162
00163 inline std::ostream& operator <<(std::ostream& s, const Event& e)
00164 {
00165 s << e.className;
00166 FOR_EACH (std::vector<std::string>, e.features, vit)
00167 s << " " << *vit;
00168 return s;
00169 }
00170
00171
00181 class EventStream : public Iterator<Event*> {
00182
00183 public:
00184 EventStream() { }
00185
00189 char const** outcomes() { return outcomeLabels; }
00190
00191 private:
00192 char const** outcomeLabels;
00193 };
00194
00198 typedef Iterator<Context*> ContextStream;
00199
00200
00201
00205 class FileError : public std::runtime_error {
00206 public:
00207 FileError(char const* what): std::runtime_error(what) { }
00208 };
00209
00213 class EventStreamError : public std::runtime_error {
00214 public:
00215 EventStreamError(char const* what): std::runtime_error(what) { }
00216 };
00217
00218 }
00219 }
00220
00221 #endif // Tanl_Classifier_Event_H