Tanl Linguistic Pipeline |
00001 /* 00002 ** Tanl 00003 ** Corpus/SentenceReader.h 00004 ** ---------------------------------------------------------------------- 00005 ** Copyright (c) 2006 Giuseppe Attardi (attardi@di.unipi.it). 00006 ** ---------------------------------------------------------------------- 00007 ** 00008 ** This file is part of Tanl. 00009 ** 00010 ** Tanl is free software; you can redistribute it and/or modify it 00011 ** under the terms of the GNU General Public License, version 3, 00012 ** as published by the Free Software Foundation. 00013 ** 00014 ** Tanl is distributed in the hope that it will be useful, 00015 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 ** GNU General Public License for more details. 00018 ** 00019 ** You should have received a copy of the GNU General Public License 00020 ** along with this program. If not, see <http://www.gnu.org/licenses/>. 00021 ** ---------------------------------------------------------------------- 00022 */ 00023 00024 #ifndef Tanl_Corpus_SentenceReader_H 00025 #define Tanl_Corpus_SentenceReader_H 00026 00027 // library 00028 #include "text/RegExp.h" 00029 00030 // standard 00031 #include <istream> 00032 #include <string> 00033 #include <vector> 00034 00035 // local 00036 #include "Token.h" 00037 #include "include/Enumerator.h" 00038 00039 namespace Tanl { 00040 00041 class Corpus; 00042 00046 class SentenceReader : public Enumerator<Sentence*> 00047 { 00048 public: 00049 00055 SentenceReader(std::istream* is, Corpus* corpus); 00056 00058 virtual bool MoveNext(); 00059 00061 virtual Sentence* Current(); 00062 00064 virtual void reset(); 00065 00066 virtual ~SentenceReader() {}; 00067 00068 Corpus* corpus; 00069 00070 protected: 00071 Sentence* sentence; 00072 std::istream* is; 00073 std::string line; 00074 }; 00075 00076 } // namespace Tanl 00077 00078 #endif // Tanl_Corpus_SentenceReader_H