bes  Updated for version 3.20.10
BESRegex.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2005 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 //#define DODS_DEBUG
28 
29 #include "config.h"
30 
31 #if 0
32 #ifndef WIN32
33 #include <alloca.h>
34 #endif
35 #include <stdlib.h>
36 
37 #include <sys/types.h>
38 #include <regex.h>
39 
40 #include <new>
41 #include <string>
42 #include <vector>
43 #include <stdexcept>
44 #endif
45 
46 #include <string>
47 #include <vector>
48 
49 #include <regex.h>
50 
51 //#include <libdap/Error.h>
52 #include <libdap/debug.h>
53 #include <libdap/util.h>
54 
55 #include "BESError.h"
56 #include "BESRegex.h"
57 
58 #if 0
59 #include "util.h"
60 #include "debug.h"
61 #endif
62 
63 using namespace std;
64 
65 void
66 BESRegex::init(const char *t)
67 {
68 #if !USE_CPP_11_REGEX
69  d_preg = static_cast<void*>(new regex_t);
70 
71  int result = regcomp(static_cast<regex_t*>(d_preg), t, REG_EXTENDED);
72  if (result != 0) {
73  size_t msg_len = regerror(result, static_cast<regex_t*>(d_preg),
74  static_cast<char*>(nullptr),
75  static_cast<size_t>(0));
76 
77  vector<char> msg(msg_len+1);
78  regerror(result, static_cast<regex_t*>(d_preg), &msg[0], msg_len);
79  string err = string("BESRegex error: ") + string(&msg[0], msg_len);
80  throw BESError(err, BES_SYNTAX_USER_ERROR, __FILE__, __LINE__);
81  }
82 #else
83  d_exp = regex(t);
84 #endif
85 }
86 
87 #if 0
88 void
89 BESRegex::init(const string &t)
90 {
91  d_exp = regex(t);
92 }
93 #endif
94 
95 #if !USE_CPP_11_REGEX
96 BESRegex::~BESRegex()
97 {
98  regfree(static_cast<regex_t*>(d_preg));
99  delete static_cast<regex_t*>(d_preg); d_preg = 0;
100 }
101 #endif
102 
103 #if 0
107 BESRegex::BESRegex(const char* t)
108 {
109  init(t);
110 }
111 
114 BESRegex::BESRegex(const char* t, int)
115 {
116  init(t);
117 }
118 #endif
119 
126 int
127 BESRegex::match(const char *s, int len, int pos) const
128 {
129 #if !USE_CPP_11_REGEX
130  if (len > 32766) // Integer overflow protection
131  return -1;
132 
133  regmatch_t *pmatch = new regmatch_t[len+1];
134  string ss = s;
135 
136  int result = regexec(static_cast<regex_t*>(d_preg),
137  ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
138  int matchnum;
139  if (result == REG_NOMATCH)
140  matchnum = -1;
141  else
142  matchnum = pmatch[0].rm_eo - pmatch[0].rm_so;
143 
144  delete[] pmatch; pmatch = 0;
145 
146  return matchnum;
147 #else
148  if (pos > len)
149  throw Error("Position exceed length in BESRegex::match()");
150 
151  smatch match;
152  auto target = string(s+pos, len-pos);
153  bool found = regex_search(target, match, d_exp);
154  if (found)
155  return (int)match.length();
156  else
157  return -1;
158 #endif
159 }
160 
166 int
167 BESRegex::match(const string &s) const
168 {
169 #if USE_CPP_11_REGEX
170  smatch match;
171  bool found = regex_search(s, match, d_exp);
172  if (found)
173  return (int)match.length();
174  else
175  return -1;
176 #else
177  return match(s.c_str(), s.length(), 0);
178 #endif
179 }
180 
191 int
192 BESRegex::search(const char *s, int len, int& matchlen, int pos) const
193 {
194 #if !USE_CPP_11_REGEX
195  // sanitize allocation
196  if (!libdap::size_ok(sizeof(regmatch_t), len+1))
197  return -1;
198 
199  // alloc space for len matches, which is theoretical max.
200  // Problem: If somehow 'len' is very large - say the size of a 32-bit int,
201  // then len+1 is a an integer overflow and this might be exploited by
202  // an attacker. It's not likely there will be more than a handful of
203  // matches, so I am going to limit this value to 32766. jhrg 3/4/09
204  if (len > 32766)
205  return -1;
206 
207  regmatch_t *pmatch = new regmatch_t[len+1];
208  string ss = s;
209 
210  int result = regexec(static_cast<regex_t*>(d_preg),
211  ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
212  if (result == REG_NOMATCH) {
213  delete[] pmatch; pmatch = 0;
214  return -1;
215  }
216 
217  // Match found, find the first one (pmatch lists the longest first)
218  int m = 0;
219  for (int i = 1; i < len; ++i)
220  if (pmatch[i].rm_so != -1 && pmatch[i].rm_so < pmatch[m].rm_so)
221  m = i;
222 
223  matchlen = pmatch[m].rm_eo - pmatch[m].rm_so;
224  int matchpos = pmatch[m].rm_so;
225 
226  delete[] pmatch; pmatch = 0;
227  return matchpos;
228 #else
229  smatch match;
230  // This is needed because in C++14, the first arg to regex_search() cannot be a
231  // temporary string. It seems the C++11 compilers on some linux dists are using
232  // regex headers that enforce c++14 rules. jhrg 12/2/21
233  auto target = string(s+pos, len-pos);
234  bool found = regex_search(target, match, d_exp);
235  matchlen = (int)match.length();
236  if (found)
237  return (int)match.position();
238  else
239  return -1;
240 #endif
241 }
242 
249 int
250 BESRegex::search(const string &s, int& matchlen) const
251 {
252 #if USE_CPP_11_REGEX
253  smatch match;
254  bool found = regex_search(s, match, d_exp);
255  matchlen = (int)match.length();
256  if (found)
257  return (int)match.position();
258  else
259  return -1;
260 #else
261  // search(const char *s, int len, int& matchlen, int pos) const
262  return search(s.c_str(), s.length(), matchlen, 0);
263 #endif
264 }
265 
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
BESRegex(const char *s)
initialize a BESRegex with a C string
Definition: BESRegex.h:77
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition: BESRegex.cc:127
int search(const char *s, int len, int &matchlen, int pos=0) const
How much of the string does the pattern match.
Definition: BESRegex.cc:192