00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "LBFGS.h"
00025 #include <math.h>
00026 #include <iostream>
00027 #include <fstream>
00028 #include <algorithm>
00029 #include <map>
00030 #include <set>
00031
00032 #include <limits>
00033
00034
00035
00036 #include <assert.h>
00037 #include <stdio.h>
00038
00039 #include "lbfgsAlg.h"
00040 #include "include/Timer.h"
00041
00042 using namespace std;
00043 using namespace Tanl::Text;
00044
00045 #define lbfgs(x, f, g, opt) \
00046 ::LBFGS::minimize(opt.n, opt.m, x, f, g, opt.diagco, opt.diag, \
00047 opt.iprint, opt.eps, opt.xtol, opt.w, opt.iflag, \
00048 opt.niter, opt.nfuns)
00049
00050 namespace Tanl {
00051 namespace Classifier {
00052
00053 struct compare1st {
00054 public:
00055 bool operator()(std::pair<ClassID, int> const& k1,
00056 std::pair<ClassID, int> const& k2) {
00057 return k1 < k2;
00058 }
00059 };
00060
00062
00063 LBFGS::LBFGS(int iterations, int cutoff, double eps) :
00064 MaxEnt(iterations, cutoff),
00065 opt(iterations, eps),
00066 lambda(0)
00067 { }
00068
00069 LBFGS::LBFGS(EventStream& es, int iterations, int cutoff, double eps) :
00070 MaxEnt(iterations, cutoff),
00071 opt(iterations, eps),
00072 lambda(0)
00073 {
00074 train(es);
00075 }
00076
00077 void LBFGS::train(EventStream& es)
00078 {
00079 read(es);
00080 train();
00081 }
00082
00083 void LBFGS::train() {
00084 counter.clear();
00085
00086
00087
00088
00089 numEvents = buildIndex(events, predIndex, eventMap, outcomeLabels, 0, verbose);
00090
00091 numTokens = eventMap.size();
00092 numPreds = predLabels.size();
00093 numOutcomes = outcomeLabels.size();
00094
00095 # ifdef SHOWTIME
00096 Timer timer;
00097 timer.stop();
00098 char buffer[80];
00099 timer.duration(buffer, sizeof(buffer));
00100 cerr << "buildIndex: " << buffer << endl;
00101 timer.start();
00102 # endif
00103
00104 if (numTokens == 0)
00105 throw EventStreamError("No events in stream");
00106
00107
00108 FeatureMap featSum;
00109
00110
00111
00112
00113
00114
00115
00116 typedef map<PID, set<ClassID> > PredOutcomes;
00117 PredOutcomes predOutcomes;
00118 FOR_EACH (EventMap, eventMap, it) {
00119 ClassID oID = it->first.first;
00120 int evCount = it->second;
00121 const vector<PID>& predicates = it->first.second;
00122 for (unsigned j = 0; j < predicates.size(); j++) {
00123 PID pid = predicates[j];
00124 Feature f(pid, oID);
00125 featSum[f] += evCount;
00126 predOutcomes[pid].insert(oID);
00127 }
00128 }
00129
00130
00131
00132
00133
00134
00135 contribTable.clear();
00136 contribTable.resize(numPreds);
00137 int featCount = 0;
00138 FOR_EACH (PredOutcomes, predOutcomes, it) {
00139 ContribTable::value_type& contribs = contribTable[it->first];
00140 FOR_EACH (set<ClassID>, it->second, oit)
00141 contribs.push_back(make_pair(*oit, featCount++));
00142 }
00143
00144 lambda = new double[featCount];
00145 std::fill(lambda, lambda + featCount, 0.0);
00146
00147
00148
00149
00150
00151
00152 double* observedExpects = new double[featCount];
00153 for (PID pid = 0; pid < numPreds; ++pid) {
00154 ContribTable::value_type& param = contribTable[pid];
00155 for (size_t j = 0; j < param.size(); ++j) {
00156 Feature f(pid, param[j].first);
00157 observedExpects[param[j].second] = - featSum[f];
00158 }
00159 }
00160 # ifdef SHOWTIME
00161 timer.stop();
00162 timer.duration(buffer, sizeof(buffer));
00163 cerr << "init: " << buffer << endl;
00164 # endif
00165
00166 static const double LOG_ZERO = log(DBL_MIN);
00167 double* prob = new double[numOutcomes];
00168 double* grad = new double[featCount];
00169
00170 opt.n = featCount;
00171 opt.diag = new double[featCount];
00172 opt.w = new double[featCount * (2*opt.m+1) + 2*opt.m];
00173
00174 while (opt.niter < iterations) {
00175
00176 double ll = 0.0;
00177 int correct = 0;
00178 std::copy(observedExpects, observedExpects + featCount, grad);
00179
00180
00181
00182 FOR_EACH (EventMap, eventMap, it) {
00183 ClassID outcome = it->first.first;
00184 int evCount = it->second;
00185 const vector<PID>& xj = it->first.second;
00186
00187 ClassID best = estimate(xj, prob);
00188 if (best == outcome)
00189 correct += evCount;
00190
00191 for (unsigned i = 0; i < xj.size(); i++) {
00192 int pID = xj[i];
00193 FOR_EACH (ContribTable::value_type, contribTable[pID], cit) {
00194 ClassID oid = cit->first;
00195 int fid = cit->second;
00196 grad[fid] += prob[oid] * evCount;
00197 }
00198 }
00199 assert(isfinite(prob[outcome]));
00200 double t = log(prob[outcome]);
00201 ll -= evCount * (isfinite(t) ? t : LOG_ZERO);
00202 }
00203
00204 lbfgs(lambda, ll, grad, opt);
00205
00206 if (verbose) {
00207 cerr << " " << opt.niter+1 << ". loglikelihood = " << -ll
00208 << " correct = " << correct * 100.0 / numEvents << "%" << endl;
00209 }
00210 if (opt.iflag < 0)
00211 throw runtime_error("lbfgs routine returned error: ");
00212 else if (opt.iflag == 0)
00213 break;
00214 }
00215 delete[] grad;
00216 delete[] prob;
00217 delete[] observedExpects;
00218 }
00219
00220 ClassID LBFGS::estimate(const vector<PID>& preds, double prob[])
00221 {
00222
00223
00224
00225
00226
00227 std::fill(prob, prob + numOutcomes, 0.0);
00228 for (unsigned i = 0; i < preds.size(); i++) {
00229 PID pid = preds[i];
00230 FOR_EACH (ContribTable::value_type, contribTable[pid], it)
00231 prob[it->first] += lambda[it->second];
00232 }
00233
00234 ClassID best = 0;
00235 double max_prob = 0.0;
00236 double sum = 0.0;
00237 for (ClassID oid = 0; oid < numOutcomes; oid++) {
00238 prob[oid] = exp(prob[oid]);
00239 if (!isfinite(prob[oid]))
00240 prob[oid] = numeric_limits<double>::max();
00241 sum += prob[oid];
00242 if (prob[oid] >= max_prob) {
00243 max_prob = prob[oid];
00244 best = oid;
00245 }
00246 }
00247 for (ClassID oid = 0; oid < numOutcomes; oid++) {
00248 prob[oid] /= sum;
00249 }
00250 return best;
00251 }
00252
00253 #define HEADER "#txt,maxent"
00254 #define LINE 16384
00255
00256 void LBFGS::save(char const* path)
00257 {
00258 ofstream ofs(path, ios::binary | ios::trunc);
00259 writeHeader(ofs);
00260 writeData(ofs);
00261 }
00262
00263 void LBFGS::writeHeader(ofstream& ofs)
00264 {
00265
00266 ofs << HEADER << endl;
00267
00268
00269 ofs << numPreds << endl;
00270 for (unsigned i = 0; i < numPreds; ++i)
00271 ofs << predLabels[i] << endl;
00272 predLabels.clear();
00273 predLabels.resize(0);
00274
00275
00276 ofs << numOutcomes << endl;
00277 for (size_t i = 0; i < numOutcomes; ++i) {
00278 char const* label = outcomeLabels[i];
00279 ofs << label << endl;
00280 free((void*)label);
00281 }
00282 outcomeLabels.clear();
00283 outcomeLabels.resize(0);
00284 }
00285
00286 void LBFGS::writeData(ofstream& ofs)
00287 {
00288 ofs.precision(20);
00289
00290
00291 size_t featCount = 0;
00292 for (unsigned i = 0; i < numPreds; ++i) {
00293 ContribTable::value_type& contrib = contribTable[i];
00294 ofs << contrib.size();
00295 FOR_EACH (ContribTable::value_type, contrib, cit) {
00296 ofs << " " << cit->first;
00297 featCount++;
00298 }
00299 ofs << endl;
00300 }
00301
00302
00303 ofs << featCount << endl;
00304 for (unsigned i = 0; i < featCount; ++i)
00305 ofs << lambda[i] << endl;
00306 # ifdef DEBUG
00307 for (int pid = 0; pid < numPreds; ++pid) {
00308 ContribTable::value_type& contrib = contribTable[pid];
00309 FOR_EACH (ContribTable::value_type, contrib, cit) {
00310 char const* outcome = OutcomeName(cit->first);
00311 string const& pred = PredicateName(pid);
00312 printf("lambda(%s, %s)=%.4f\n", outcome, pred.c_str(), lambda[cit->second]);
00313 }
00314 }
00315 # endif
00316 }
00317
00318 }
00319 }