Tanl Linguistic Pipeline |
00001 /* 00002 ** Tanl 00003 ** tag/TanlPos/Vocab.h 00004 ** ---------------------------------------------------------------------- 00005 ** Copyright (c) 2005 Giuseppe Attardi (attardi@di.unipi.it). 00006 ** ---------------------------------------------------------------------- 00007 ** 00008 ** This file is part of Tanl. 00009 ** 00010 ** Tanl is free software; you can redistribute it and/or modify it 00011 ** under the terms of the GNU General Public License, version 3, 00012 ** as published by the Free Software Foundation. 00013 ** 00014 ** Tanl is distributed in the hope that it will be useful, 00015 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 ** GNU General Public License for more details. 00018 ** 00019 ** You should have received a copy of the GNU General Public License 00020 ** along with this program. If not, see <http://www.gnu.org/licenses/>. 00021 ** ---------------------------------------------------------------------- 00022 */ 00023 00024 #ifndef Tanl_POS_vocab_H 00025 #define Tanl_POS_vocab_H 00026 00027 #include <string> 00028 #include <vector> 00029 #include <tr1/unordered_map> 00030 00031 namespace Tanl { namespace POS { 00032 00037 struct Vocab 00038 { 00039 std::tr1::unordered_map<std::string, int> word2id; 00040 std::tr1::unordered_map<int, std::string> id2word; 00041 00042 void serialize(std::ostream &out); 00043 00044 void serialize(std::istream &in); 00045 00046 int toindex(std::string const& w); 00047 00051 std::vector<int> toindex(std::vector<std::string>& ngram); 00052 00053 std::string* toword(int idx); 00054 00055 int size() { return word2id.size(); } 00056 00057 }; 00058 00059 } // namespace POS 00060 } // namespace Tanl 00061 00062 #endif // Tanl_POS_vocab_H