00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef IXE_IndexTable_H
00026 #define IXE_IndexTable_H
00027
00028
00029 #include "platform.h"
00030
00031
00032 #include <cstddef>
00033 #include <iterator>
00034 #include <sys/types.h>
00035
00036
00037 #include "include/ixe.h"
00038 #include "io/mappedFile.h"
00039
00040 using namespace IXE::io;
00041
00042 namespace IXE {
00043
00047 struct FileFormat {
00048 short major;
00049 short minor;
00050
00051 bool operator ==(FileFormat const& other) const {
00052 return (major == other.major && minor == other.minor);
00053 }
00054
00055 inline bool operator !=(FileFormat const& other) const {
00056 return !(*this == other);
00057 }
00058
00059 bool isSwapped(FileFormat const& other) const {
00060 return ((major >> 8 | major << 8) == other.major &&
00061 (minor >> 8 | minor << 8) == other.minor);
00062 }
00063 };
00064
00068 struct FileHeader {
00069 FileFormat format;
00070 Count num_docs;
00071 Count num_columns;
00072 # ifdef __alpha
00073 Count _align;
00074 # endif
00075
00076 FileHeader() { }
00077
00078 FileHeader(FileFormat& format, Count num_docs, Count num_columns) :
00079 format(format), num_docs(num_docs), num_columns(num_columns)
00080 { }
00081
00082 inline int get_num_docs() const {
00083 return
00084 # if 0 // WORDS_BIGENDIAN
00085 (fh->format.isSwapped(fileFormatVersion)) ? int4get(&num_docs) :
00086 # endif
00087 num_docs;
00088 }
00089
00090 inline int get_num_columns() const {
00091 return
00092 # if 0 // WORDS_BIGENDIAN
00093 (fh->format.isSwapped(fileFormatVersion)) ? int4get(&num_columns) :
00094 # endif
00095 num_columns;
00096 }
00097 };
00098
00099 extern FileFormat fileFormatVersion;
00100
00101
00102 enum index_id {
00103 stop_word_index = 0,
00104 color_index = 1,
00105 };
00106
00123 template <class Entry>
00124 class IndexTable
00125 {
00126 public:
00128
00129 typedef Size size_type;
00130 typedef ptrdiff_t difference_type;
00131
00132 typedef Entry value_type;
00133 typedef Entry* pointer;
00134 typedef Entry const* const_pointer;
00135 typedef Entry& reference;
00136 typedef Entry const& const_reference;
00137
00139
00140 IndexTable() : table(0) { }
00141 IndexTable(mappedFile const &file, index_id id);
00142
00144
00153 void access(mappedFile const& file, off32_t& offset) {
00154 begin_ = file.begin();
00155 off32_t const *p = reinterpret_cast<off32_t const *>(begin_ + offset);
00156 entries = p[0];
00157 table = reinterpret_cast<Entry const *>(&p[1]);
00158
00159 offset += sizeof(entries) + (entries + 1) * sizeof(Entry)
00160 + *reinterpret_cast<off32_t const*>(&table[entries]);
00161 }
00162
00163 size_type size() const { return entries; }
00164
00170 const_pointer operator [](size_type i) const { return &table[i]; }
00171
00173
00174 class const_iterator :
00175 public std::iterator<std::random_access_iterator_tag,
00176 value_type, difference_type>
00177 {
00178 public:
00179 IndexTable<Entry> const* index;
00180 size_type i;
00181
00182 protected:
00183 const_iterator(IndexTable const* index, size_type i) :
00184 index(index), i(i) { }
00185 friend class IndexTable;
00186
00187 public:
00188 const_iterator() { }
00189
00190 const_pointer operator *() const { return (*index)[i]; }
00191 const_iterator& operator ++() { ++i; return *this; }
00192 const_iterator& operator --() { --i; return *this; }
00193
00194 const_iterator operator ++(int) { return const_iterator(index, i++); }
00195 const_iterator operator --(int) { return const_iterator(index, i--); }
00196 const_iterator& operator +=(int n) { i += n; return *this; }
00197 const_iterator& operator -=(int n) { i -= n; return *this; }
00198
00199 bool
00200 operator ==(const_iterator const& j) {
00201 return i == j.i;
00202 }
00203 bool
00204 operator !=(const_iterator const& j) {
00205 return !(*this == j);
00206 }
00207 bool
00208 operator <(const_iterator const& j) {
00209 return i < j.i;
00210 }
00211
00212 typename const_iterator::difference_type
00213 operator -(const_iterator const& j) {
00214 return this->i - j.i;
00215 }
00216
00217 friend inline bool
00218 operator ==(const_iterator const& i, const_iterator const& j) {
00219 return i.i == j.i;
00220 }
00221 friend inline bool
00222 operator !=(const_iterator const& i, const_iterator const& j) {
00223 return !(i == j);
00224 }
00225
00226 friend const_iterator
00227 operator +(const_iterator const& i, int n) {
00228 return typename IndexTable<Entry>::const_iterator(i.index, i.i + n);
00229 }
00230 friend const_iterator
00231 operator -(const_iterator const& i, int n) {
00232 return IndexTable<Entry>::const_iterator(i.index, i.i - n);
00233 }
00234 friend typename const_iterator::difference_type
00235 operator -(const_iterator const& i, const_iterator const& j) {
00236 return i.i - j.i;
00237 }
00238 size_type position() { return i; }
00239 };
00240
00241 # if defined(__DECCXX) || defined(_MSC_VER) || (__GNUC__ >= 3)
00242 friend class const_iterator;
00243 # else
00244 friend class IndexTable::const_iterator;
00245 # endif
00246
00247 const_iterator begin() const {
00248 return const_iterator(this, 0);
00249 }
00250 const_iterator end() const {
00251 return const_iterator(this, entries);
00252 }
00253 const_iterator position_at(size_type i) const {
00254 return const_iterator(this, i);
00255 }
00256 protected:
00257 mappedFile::const_iterator begin_;
00258 size_type entries;
00259 Entry const* table;
00260 };
00261
00266 class BigramTable : public IndexTable<TermID> {
00267 public:
00268 void access(mappedFile const& file, off32_t& offset);
00269 value_type operator [](size_type i) const { return table[i]; }
00270 value_type& operator [](size_type i) {
00271 return ((value_type*)table)[i];
00272 }
00273 };
00274
00275 class StringTable : public IndexTable<Size> {
00276 public:
00277 StringTable() { }
00278
00279 StringTable(mappedFile const& file, index_id id);
00280
00281
00282 class const_iterator : public IndexTable<Size>::const_iterator
00283 {
00284 public:
00285 const_iterator() { }
00286
00287 const_iterator(StringTable const* index_, size_type i_) :
00288 IndexTable<Size>::const_iterator(index_, i_) { }
00289
00290
00291 const_iterator& operator ++() { ++i; return *this; }
00292
00293 char const* operator *() const {
00294 return (*(StringTable*)index)[i];
00295 }
00296
00297 # ifdef __DECCXX
00298 StringTable::const_iterator&
00299 operator =(IndexTable<Size>::const_iterator const& other) {
00300 index = other.index;
00301 i = other.i;
00302 return *this;
00303 }
00304 # endif
00305 };
00306
00307 char const* operator [](size_type i) const {
00308 return begin_ + table[i];
00309 }
00310
00311 const_iterator begin() const {
00312 return const_iterator(this, 0);
00313 }
00314 const_iterator end() const {
00315 return const_iterator(this, entries);
00316 }
00317 const_iterator position_at(size_type i) const {
00318 return const_iterator(this, i);
00319 }
00320
00321
00322 void clear() { entries = 0; }
00323
00324 };
00325
00326 }
00327
00328 #endif