00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef Tanl_Text_RegExp_h
00025 #define Tanl_Text_RegExp_h
00026
00027
00028 #include "text/text.h"
00029 #include "text/pcre/pcre.h"
00030
00031
00032 #include <string>
00033 #include <vector>
00034 #include <stdexcept>
00035
00036 namespace Tanl {
00037 namespace Text {
00041 namespace RegExp {
00042
00043 class RegExpException : public std::runtime_error {
00044 public:
00045 RegExpException(std::string const& msg) : std::runtime_error(msg) {}
00046 };
00047
00048 enum CompileFlags
00049 {
00050 IgnoreCase = PCRE_CASELESS,
00051 NoNewLine = PCRE_MULTILINE,
00052 DotAll = PCRE_DOTALL,
00053 Extended = PCRE_EXTENDED,
00054 Anchored = PCRE_ANCHORED,
00055 DollarEndOnly = PCRE_DOLLAR_ENDONLY,
00056 Extra = PCRE_EXTRA,
00057 UnGreedy = PCRE_UNGREEDY,
00058 Utf8 = PCRE_UTF8
00059 };
00060
00061 enum EvaluateFlags {
00062 NotBol = PCRE_NOTBOL,
00063 NotEmpty = PCRE_NOTEMPTY,
00064 NotEol = PCRE_NOTEOL
00065
00066 };
00067
00071 class MatchGroups : std::vector<int>
00072 {
00073 public:
00074 MatchGroups(int size) :
00075 std::vector<int>(3 * size, -1)
00076 { }
00077
00081 size_t size() { return std::vector<int>::size() / 3; }
00082
00087 std::pair<int, int>& operator [](int i) {
00088 return *(std::pair<int, int>*)&std::vector<int>::operator[](2 * i); }
00089 };
00090
00114 class Pattern
00115 {
00116 private:
00117 int _errorCode;
00118 pcre* _pcre;
00119 pcre_extra* _pcre_extra;
00120
00124 int subpatterns;
00125
00126 public:
00127
00128 Pattern() { }
00129
00138 Pattern(std::string const& expression, int cflags = 0);
00139
00148 Pattern(char const* expression, int cflags = 0);
00149
00153 Pattern(Pattern const& other) {
00154 _errorCode = other._errorCode;
00155 _pcre = other._pcre;
00156 if (_pcre)
00157 pcre_refcount(_pcre, 1);
00158 _pcre_extra = other._pcre_extra;
00159 subpatterns = other.subpatterns;
00160 }
00161
00162 ~Pattern();
00163
00167 Pattern& operator =(Pattern const& other) {
00168 if (this != &other) {
00169 _errorCode = other._errorCode;
00170 _pcre = other._pcre;
00171 if (_pcre)
00172 pcre_refcount(_pcre, 1);
00173 _pcre_extra = other._pcre_extra;
00174 subpatterns = other.subpatterns;
00175 }
00176 return *this;
00177 }
00178
00185 bool test(std::string const& str, int eflags = 0) const;
00186
00195 bool test(char const* str, size_t len = 0, int eflags = 0);
00196
00203 int matchSize(std::string const& text, int eflags = 0);
00214 int match(const char* start, const char* end,
00215 MatchGroups& pos, int eflags = 0);
00225 int match(std::string const& text, MatchGroups& pos, int eflags = 0);
00233 std::vector<std::string> match(std::string const& str, int eflags = 0);
00234
00252 std::string replace(std::string& text, std::string& rewrite, bool replaceAll = false);
00253
00257 static std::string escape(std::string& str);
00258
00263 static const unsigned char* setLocale(char const* locale);
00264
00266 static const unsigned char* CharTables;
00267 };
00268
00269 }
00270 }
00271 }
00272
00273 #endif // Tanl_Text_RegExp_h