00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef DeSR_WordCounts_H
00025 #define DeSR_WordCounts_H
00026
00027
00028 #include "include/unordered_map.h"
00029
00030
00031 #include "lib/strtok_r.h"
00032
00033
00034 #include <string>
00035
00036 namespace Parser {
00037
00041 struct WordCounts : public unordered_map<std::string, int> {
00042 int add(std::string const& w) {
00043 iterator wcit = find(w);
00044
00045 int count;
00046 if (wcit == end())
00047 count = operator[](w) = 1;
00048 else
00049 count = ++wcit->second;
00050 return count;
00051 }
00052 int count(std::string const& w) {
00053 iterator wcit = find(w);
00054 return (wcit == end()) ? 0 : wcit->second;
00055 }
00056 };
00057
00061 class WordFreq : public unordered_map<const char*, float, hash<const char*>, eqstr>
00062 {
00063 public:
00064
00065 typedef unordered_map<const char*, float, hash<const char*>, eqstr> HashMap;
00066
00070 WordFreq(char const* file) { load(file); }
00071 WordFreq(std::string& file) { load(file.c_str()); }
00072
00073 ~WordFreq() {
00074 for (iterator it = this->begin(); it != this->end(); ++it)
00075 free((void*)it->first);
00076 }
00077
00081 bool contains(char const* ngram) {
00082 return this->find(ngram) != this->end();
00083 }
00084
00088 bool contains(std::string const& ngram) {
00089 return this->find(ngram.c_str()) != this->end();
00090 }
00091
00095 std::pair<iterator,bool> insert(const value_type& __obj) {
00096 iterator _Where = this->find(__obj.first);
00097 key_type _Keyval = (_Where == this->end()) ?
00098 ::strdup(__obj.first) :
00099 _Where->first;
00100 return HashMap::insert(value_type(_Keyval, __obj.second));
00101 }
00102
00103 mapped_type& operator[](const key_type& _Keyval) {
00104
00105 iterator _Where = this->find(_Keyval);
00106 if (_Where == this->end())
00107 _Where = this->insert(value_type(_Keyval, mapped_type())).first;
00108 return _Where->second;
00109 }
00110
00114 void load(char const* file) {
00115 std::ifstream ifs(file);
00116 load(ifs);
00117 }
00118
00123 void load(std::ifstream& ifs) {
00124 # define MAX_LINE_LEN 4096
00125 char line[MAX_LINE_LEN];
00126
00127 while (ifs.getline(line, MAX_LINE_LEN)) {
00128 char* next = line;
00129 char const* ngram = strtok_r(0, "\t", &next);
00130 float count = atof(strtok_r(0, " ", &next));
00131 insert(std::make_pair(ngram, count));
00132 }
00133 # undef MAX_LINE_LEN
00134 }
00135 };
00136
00137 }
00138
00139 #endif // DeSR_WordCounts_H