00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef IXE_PostingList_H
00026 #define IXE_PostingList_H
00027
00028
00029 #include "platform.h"
00030
00031
00032 #include <cassert>
00033 #include <cerrno>
00034 #include <cstddef>
00035 #include <fstream>
00036 #include <iterator>
00037 #include <sys/types.h>
00038
00039
00040 #include "Common/eptacode.h"
00041 #include "Common/lexicon.h"
00042 #include "Common/Remap.h"
00043 #include "include/Set.h"
00044 #include "Index/Posting.h"
00045
00046 using namespace IXE::io;
00047
00048 namespace IXE {
00049
00050 #ifndef noSKIP_LIST
00051 struct PostingOffset {
00052 unsigned int offset;
00053 DocID id;
00054 };
00055 #endif
00056
00068 class PostingList
00069 {
00070 public:
00072
00073 typedef Size size_type;
00074 typedef ptrdiff_t difference_type;
00075 typedef Posting value_type;
00076 typedef value_type* pointer;
00077 typedef value_type const* const_pointer;
00078 typedef value_type& reference;
00079 typedef value_type const& const_reference;
00080
00082
00083 PostingList() : ptr_(0), size_(0) { }
00084
00088 PostingList(byte const* ptr) : ptr_(ptr)
00089 {
00090 size_ = parseEptacode(ptr_);
00091 }
00092
00097 PostingList(FileHandle* fh, off64_t offset, size_type size) {
00098 open(fh, offset, size);
00099 }
00100
00101
00102
00103 PostingList(FileHandle* fh, Lexicon::const_iterator& cit) { open(fh, cit); }
00104
00106
00107 void open(FileHandle* fh, off64_t offset, size_type size)
00108 {
00109 if (!postings.open(fh, offset, size))
00110 throw MmapError(string("Opening PostingList: ") + ::strerror(errno));
00111 ptr_ = (byte const*)postings.begin();
00112 size_ = parseEptacode(ptr_);
00113 }
00114
00115
00116
00117
00118
00119
00120
00121
00122 void open(FileHandle* fh, Lexicon::const_iterator& cit)
00123 {
00124 off64_t postingStart = (*cit)->postingsOffset();
00125 open(fh, postingStart, (Size)((*(cit + 1))->postingsOffset() - postingStart));
00126 }
00127
00128
00129
00130
00131 size_type size() const { return size_; }
00132
00134
00138 class HitsCursor {
00139 public:
00140
00141 HitsCursor() { }
00142
00143 HitsCursor(short count, byte const* hitsOffset) :
00144 current(hitsOffset),
00145 count(count),
00146 pos(0) { }
00147
00152 int next(unsigned min) {
00153 if (pos >= min)
00154 return pos;
00155 while (count) {
00156 count--;
00157 pos += parseEptacode(current);
00158 if (pos >= min)
00159 return pos;
00160 }
00161 return noPosition;
00162 }
00163
00164 inline byte const* Current() { return current; }
00165
00169 unsigned nth(int n) {
00170 if (n > count)
00171 return noPosition;
00172 byte const* scan = current;
00173 unsigned res;
00174 while (n--)
00175 res = parseEptacode(scan);
00176 return res;
00177 }
00178
00179 unsigned operator *() { return pos; }
00180
00181 private:
00182 byte const* current;
00183 short count;
00184 unsigned int pos;
00185 };
00186
00188
00189 class const_iterator;
00190 friend class const_iterator;
00191
00192 class const_iterator : public
00193 std::iterator<std::forward_iterator_tag, value_type, difference_type>
00194 {
00195 friend class PostingList;
00196
00197 public:
00198
00199 const_iterator() : rest_(0) { }
00200
00201 reference operator *() { return posting; }
00202 pointer operator ->() { return &posting; }
00203
00204 const_iterator& operator ++();
00205
00206 const_iterator operator ++(int) {
00207 const_iterator tmp = *this;
00208 ++*this;
00209 return tmp;
00210 }
00211
00212 const_iterator& next(DocID min);
00213
00214 bool atEnd() { return rest_ == 0; }
00215
00216 size_type size() const { return rest_; }
00217
00218 friend inline bool
00219 operator ==(const_iterator const &i, const_iterator const &j) {
00220 return i.rest_ == j.rest_;
00221 }
00222
00223 friend inline bool
00224 operator !=(const_iterator const &i, const_iterator const &j) {
00225 return !(i == j);
00226 }
00227
00231 void copyHits(std::fstream& o) {
00232 o << eptacode(hitlen - posting.termFrequency);
00233
00234
00235 o.write((char*)hitsCursor.Current(), hitlen);
00236 }
00237
00241 size_type index() const { return size_ - rest_; }
00242
00243 HitsCursor hitsCursor;
00244
00245 protected:
00246 const_iterator(size_type s, byte const *p) :
00247 rest_(s), c_(p)
00248 # ifndef noSKIP_LIST
00249 , size_(s), tablesz_(0)
00250 # endif
00251 {
00252 assert(p);
00253 # ifndef noSKIP_LIST
00254 if (size_ > Min_Postings_Table) {
00255 # ifndef BYTE_ALIGN
00256 c_ = (byte*)ALIGN(((off_t)c_), sizeof(int));
00257 # endif
00258 table_ = (PostingOffset*)c_;
00259
00260
00261
00262 tablesz_ = size_ / Postings_Segment_Size;
00263 c_ += tablesz_ * sizeof(PostingOffset);
00264 } else
00265 table_ = 0;
00266 # endif // noSKIP_LIST
00267
00268 rest_++;
00269 operator ++();
00270 }
00271
00272
00277 size_type rest_;
00278 byte const* c_;
00279
00280 # ifndef noSKIP_LIST
00281
00284 size_type tablesz_;
00288 size_type size_;
00300 PostingOffset* table_;
00301 # endif
00302 value_type posting;
00303 Size hitlen;
00304 };
00305
00306 const_iterator begin() const { return const_iterator(size_, ptr_); }
00307 const_iterator end() const { return const_iterator(); }
00308
00309
00314 class remap_iterator : public const_iterator
00315 {
00316 public:
00321 remap_iterator(size_type s, byte const *p, int displace, Remap& remap) :
00322
00323 displace(displace),
00324 #ifndef oldMerge
00325 remap(&remap),
00326 remapEnd(remap.end())
00327 #else
00328 remapIt(remap.begin()),
00329 remapEnd(remap.end())
00330 #endif
00331 {
00332 assert(p);
00333 rest_ = s;
00334 c_ = p;
00335 # ifndef noSKIP_LIST
00336 size_ = s;
00337 tablesz_ = 0;
00338 # endif
00339 # ifndef noSKIP_LIST
00340 if (size_ > Min_Postings_Table) {
00341 # ifndef BYTE_ALIGN
00342 c_ = (byte*)ALIGN(((off_t)c_), sizeof(int));
00343 # endif
00344 table_ = (PostingOffset*)c_;
00345
00346
00347
00348 tablesz_ = size_ / Postings_Segment_Size;
00349 c_ += tablesz_ * sizeof(PostingOffset);
00350 } else
00351 table_ = 0;
00352 # endif // noSKIP_LIST
00353
00354
00355
00356
00357 ++rest_;
00358 operator ++();
00359
00360 }
00361
00362 remap_iterator& operator ++();
00363
00364 private:
00365 int displace;
00366 #ifndef oldMerge
00367 Remap * remap;
00368 Remap::const_iterator remapEnd;
00369 #else //oldMerge
00370 Remap::const_iterator remapIt;
00371 Remap::const_iterator remapEnd;
00372 Set<DocID> remapped;
00373 #endif //oldMerge
00374 };
00375
00376 remap_iterator remap_begin(int displace, Remap& remap) const {
00377 return remap_iterator(size_, ptr_, displace, remap); }
00378
00379 private:
00380 MappedFileView postings;
00381 byte const* ptr_;
00382 mutable size_type size_;
00383 };
00384
00385 }
00386
00387 #endif