00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "Common/PostingList.h"
00027
00028 namespace IXE {
00029
00030
00052
00053
00054 PostingList::const_iterator&
00055 PostingList::const_iterator::operator ++()
00056 {
00057
00058
00059 assert(rest_ > 0);
00060 if (--rest_ == 0)
00061 return *this;
00062
00063 posting.index = parseEptacode(c_);
00064
00065 if (!posting.colors.empty())
00066 posting.colors.erase(posting.colors.begin(), posting.colors.end());
00067 if (*c_ == 0x80)
00068 {
00069 ++c_;
00070 while (*c_ != 0x80)
00071 posting.colors.insert(parseEptacode(c_) - 1);
00072 ++c_;
00073 }
00074
00075 posting.termFrequency = parseEptacode(c_);
00076 hitlen = parseEptacode(c_) + posting.termFrequency;
00077 hitsCursor = PostingList::HitsCursor(posting.termFrequency, c_);
00078 c_ += hitlen;
00079 return *this;
00080 }
00081
00088 PostingList::const_iterator&
00089 PostingList::const_iterator::next(DocID min)
00090 {
00091 if (min <= posting.index)
00092 return *this;
00093
00094 # ifndef noSKIP_LIST
00095 if (table_ &&
00096 min >= table_[0].id) {
00097
00098
00099
00100 int current = size_ - rest_;
00101 int start = MAX(0, current / Postings_Segment_Size - 1);
00102 int end = tablesz_;
00103 int mid = (start + end) / 2;
00104
00105 while (min > table_[start].id && mid > start) {
00106 if (min < table_[mid].id)
00107 end = mid;
00108 else
00109 start = mid;
00110 mid = (start + end) / 2;
00111 }
00112
00113 int block_first = (start + 1) * (Postings_Segment_Size);
00114 if (block_first > current) {
00115 c_ = (const byte*)table_ + table_[start].offset;
00116
00117 rest_ = size_ - block_first + 1;
00118 ++(*this);
00119 }
00120 }
00121 # endif // SKIP_LIST
00122
00123 # if 1 // Optimize by inlining ++()
00124 while (rest_ && min > posting.index) {
00125
00126 if (--rest_ == 0)
00127 break;
00128 posting.index = parseEptacode(c_);
00129
00130 if (!posting.colors.empty())
00131 posting.colors.erase(posting.colors.begin(), posting.colors.end());
00132 if (*c_ == 0x80)
00133 {
00134 ++c_;
00135 while (*c_ != 0x80)
00136 posting.colors.insert(parseEptacode(c_) - 1);
00137 ++c_;
00138 }
00139
00140 posting.termFrequency = parseEptacode(c_);
00141 hitlen = parseEptacode(c_) + posting.termFrequency;
00142 c_ += hitlen;
00143 }
00144 hitsCursor = PostingList::HitsCursor(posting.termFrequency, c_ - hitlen);
00145 # else
00146 while (rest_ && min > posting.index)
00147 operator ++();
00148 # endif
00149
00150 return *this;
00151 }
00152
00153 PostingList::remap_iterator &
00154 PostingList::remap_iterator::operator ++()
00155 {
00156 # ifndef oldMerge
00157 assert(rest_ > 0);
00158
00159 const_iterator::operator++();
00160 if (rest_ == 0)
00161 return *this;
00162
00163 while (remap->find(posting.index) != remapEnd) {
00164 const_iterator::operator++();
00165 if (rest_ == 0)
00166 return *this;
00167 }
00168
00169 Remap::const_iterator remf = remap->lower_bound(posting.index);
00170 if (remf != remapEnd) {
00171 posting.index -= remf->second;
00172 if (remf->first>posting.index)
00173 posting.index -= 1;
00174 }
00175 else
00176 posting.index -= remap->size();
00177
00178 posting.index += displace;
00179
00180 return *this;
00181
00182 # else //oldMerge
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210 assert(rest_ > 0);
00211
00212
00213
00214 Count restSave = rest_;
00215 byte const* cSave = c_;
00216
00217 while (rest_) {
00218 cSave = c_;
00219 const_iterator::operator++();
00220 if (!remapped.contains(posting.index))
00221 break;
00222 }
00223
00224
00225
00226 DocID index = posting.index;
00227
00228
00229
00230 while (remapIt != remapEnd) {
00231 if (remapIt->second < index) {
00232 displace--;
00233 ++remapIt;
00234 continue;
00235 }
00236 if (remapIt->first >= displace + index)
00237
00238 break;
00239
00240
00241 DocID postindex = remapIt->first;
00242 DocID indexRemapped = remapIt->second;
00243 ++remapIt;
00244
00245 bool found = next(indexRemapped) != const_iterator();
00246
00247
00248
00249
00250 rest_ = restSave;
00251 c_ = cSave;
00252
00253
00254
00255 if (found && posting.index == indexRemapped) {
00256 remapped.insert(indexRemapped);
00257
00258 posting.index = postindex;
00259 --rest_;
00260
00261 return *this;
00262 }
00263 }
00264
00265
00266 rest_ = restSave;
00267 c_ = cSave;
00268
00269 const_iterator::operator++();
00270 posting.index += displace;
00271 return *this;
00272 # endif //oldMerge
00273 }
00274
00275 }