00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef Tanl_POS_linear_interpolated_lm_H
00025 #define Tanl_POS_linear_interpolated_lm_H
00026
00027 #include "serialization.h"
00028
00029
00030 #include <tr1/unordered_map>
00031 #include <vector>
00032 #include <stdexcept>
00033
00034 namespace Tanl { namespace POS {
00035
00036 class ProbError : public std::runtime_error {
00037 public:
00038 ProbError(const std::string& s)
00039 : std::runtime_error(s)
00040 { }
00041 };
00042
00065 template <class CT>
00066 struct Context
00067 {
00068 typedef std::tr1::unordered_map<int, Context<CT>*> CMap;
00069 typedef std::tr1::unordered_map<CT, double> WordFreq;
00070
00071 unsigned freq;
00072 CMap childs;
00073 WordFreq words;
00074
00075 Context() :
00076 freq(0)
00077 { }
00078
00079 ~Context()
00080 {
00081 typename CMap::iterator it = childs.begin();
00082 for (; it != childs.end(); ++it)
00083 delete it->second;
00084 }
00085
00086 void serialize(std::ostream &out);
00087
00088 void serialize(std::istream &in);
00089
00096 void add_word(std::vector<int>::const_iterator context, int n, CT& word);
00097
00098 WordFreq& get_words() { return words; }
00099 unsigned total_context_freq() { return freq; }
00100 size_t word_count_at_context() { return words.size(); }
00101
00102 std::vector<double> calculate_lambdas(int level);
00103
00104 void counts_to_prob(std::vector<double>& lambdas);
00105
00106 double wordprob(CT const& word, std::vector<int>& context);
00107
00108 private:
00109 void estimate_at_context(WordFreq& parent_words,
00110 std::vector<double>::const_iterator lambdas);
00111
00112 void adjust_lambdas(std::vector<double>& lambdas,
00113 std::vector<Context<CT>*>& context_nodes,
00114 int max_level);
00115 };
00116
00117 template <class CT>
00118 struct ProbLM : public Context<CT>
00119 {
00120 static const int bos = -1;
00121 };
00122
00123 }
00124 }
00125
00126 #include "linear_interpolated_lm.cpp"
00127
00128 #endif // Tanl_POS_linear_interpolated_lm_H