libdap  Updated for version 3.20.9
libdap4 is an implementation of OPeNDAP's DAP protocol.
GNURegex.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2005 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 //#define DODS_DEBUG
28 
29 #include "config.h"
30 
31 #if 0
32 #ifndef WIN32
33 #include <alloca.h>
34 #endif
35 #include <stdlib.h>
36 
37 #include <sys/types.h>
38 #include <regex.h>
39 
40 #include <new>
41 #include <string>
42 #include <vector>
43 #include <stdexcept>
44 #endif
45 
46 #include <vector>
47 
48 #include <regex.h>
49 
50 #include "GNURegex.h"
51 #include "Error.h"
52 
53 #include "debug.h"
54 #include "util.h"
55 
56 #if 0
57 #include "util.h"
58 #include "debug.h"
59 #endif
60 
61 using namespace std;
62 
63 namespace libdap {
64 
65 void
66 Regex::init(const char *t)
67 {
68 #if !USE_CPP_11_REGEX
69  DBG( cerr << "Regex::init() - BEGIN" << endl);
70 
71  DBG( cerr << "Regex::init() - creating new regex..." << endl);
72  d_preg = static_cast<void*>(new regex_t);
73 
74  DBG( cerr << "Regex::init() - Calling regcomp()..." << endl);
75  int result = regcomp(static_cast<regex_t*>(d_preg), t, REG_EXTENDED);
76 
77  if (result != 0) {
78  DBG( cerr << "Regex::init() - Call to regcomp FAILED" << endl);
79  DBG( cerr << "Regex::init() - Calling regerror()..." << endl);
80  size_t msg_len = regerror(result, static_cast<regex_t*>(d_preg),
81  static_cast<char*>(NULL),
82  static_cast<size_t>(0));
83 
84  DBG( cerr << "Regex::init() - Creating message" << endl);
85  vector<char> msg(msg_len+1);
86  //char *msg = new char[msg_len+1];
87  DBG( cerr << "Regex::init() - Calling regerror() again..." << endl);
88  regerror(result, static_cast<regex_t*>(d_preg), &msg[0], msg_len);
89  DBG( cerr << "Regex::init() - Throwing libdap::Error" << endl);
90  throw Error(string("Regex error: ") + string(&msg[0]));
91  //delete[] msg;
92  //throw e;
93  }
94  DBG( cerr << "Regex::init() - Call to regcomp() SUCCEEDED" << endl);
95  DBG( cerr << "Regex::init() - END" << endl);
96 #else
97  d_exp = regex(t);
98 #endif
99 }
100 
101 #if 0
102 void
103 Regex::init(const string &t)
104 {
105  d_exp = regex(t);
106 }
107 #endif
108 
109 #if !USE_CPP_11_REGEX
110 Regex::~Regex()
111 {
112  regfree(static_cast<regex_t*>(d_preg));
113  delete static_cast<regex_t*>(d_preg); d_preg = 0;
114 }
115 #endif
116 
117 #if 0
121 Regex::Regex(const char* t)
122 {
123  init(t);
124 }
125 
128 Regex::Regex(const char* t, int)
129 {
130  init(t);
131 }
132 #endif
133 
140 int
141 Regex::match(const char *s, int len, int pos) const
142 {
143 #if !USE_CPP_11_REGEX
144  if (len > 32766) // Integer overflow protection
145  return -1;
146 
147  regmatch_t *pmatch = new regmatch_t[len+1];
148  string ss = s;
149 
150  int result = regexec(static_cast<regex_t*>(d_preg),
151  ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
152  int matchnum;
153  if (result == REG_NOMATCH)
154  matchnum = -1;
155  else
156  matchnum = pmatch[0].rm_eo - pmatch[0].rm_so;
157 
158  delete[] pmatch; pmatch = 0;
159 
160  return matchnum;
161 #else
162  if (pos > len)
163  throw Error("Position exceed length in Regex::match()");
164 
165  smatch match;
166  auto target = string(s+pos, len-pos);
167  bool found = regex_search(target, match, d_exp);
168  if (found)
169  return (int)match.length();
170  else
171  return -1;
172 #endif
173 }
174 
180 int
181 Regex::match(const string &s) const
182 {
183 #if USE_CPP_11_REGEX
184  smatch match;
185  bool found = regex_search(s, match, d_exp);
186  if (found)
187  return (int)match.length();
188  else
189  return -1;
190 #else
191  return match(s.c_str(), s.length(), 0);
192 #endif
193 }
194 
205 int
206 Regex::search(const char *s, int len, int& matchlen, int pos) const
207 {
208 #if !USE_CPP_11_REGEX
209  // sanitize allocation
210  if (!size_ok(sizeof(regmatch_t), len+1))
211  return -1;
212 
213  // alloc space for len matches, which is theoretical max.
214  // Problem: If somehow 'len' is very large - say the size of a 32-bit int,
215  // then len+1 is a an integer overflow and this might be exploited by
216  // an attacker. It's not likely there will be more than a handful of
217  // matches, so I am going to limit this value to 32766. jhrg 3/4/09
218  if (len > 32766)
219  return -1;
220 
221  regmatch_t *pmatch = new regmatch_t[len+1];
222  string ss = s;
223 
224  int result = regexec(static_cast<regex_t*>(d_preg),
225  ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
226  if (result == REG_NOMATCH) {
227  delete[] pmatch; pmatch = 0;
228  return -1;
229  }
230 
231  // Match found, find the first one (pmatch lists the longest first)
232  int m = 0;
233  for (int i = 1; i < len; ++i)
234  if (pmatch[i].rm_so != -1 && pmatch[i].rm_so < pmatch[m].rm_so)
235  m = i;
236 
237  matchlen = pmatch[m].rm_eo - pmatch[m].rm_so;
238  int matchpos = pmatch[m].rm_so;
239 
240  delete[] pmatch; pmatch = 0;
241  return matchpos;
242 #else
243  smatch match;
244  // This is needed because in C++14, the first arg to regex_search() cannot be a
245  // temporary string. It seems the C++11 compilers on some linux dists are using
246  // regex headers that enforce c++14 rules. jhrg 12/2/21
247  auto target = string(s+pos, len-pos);
248  bool found = regex_search(target, match, d_exp);
249  matchlen = (int)match.length();
250  if (found)
251  return (int)match.position();
252  else
253  return -1;
254 #endif
255 }
256 
263 int
264 Regex::search(const string &s, int& matchlen) const
265 {
266 #if USE_CPP_11_REGEX
267  smatch match;
268  bool found = regex_search(s, match, d_exp);
269  matchlen = (int)match.length();
270  if (found)
271  return (int)match.position();
272  else
273  return -1;
274 #else
275  // search(const char *s, int len, int& matchlen, int pos) const
276  return search(s.c_str(), s.length(), matchlen, 0);
277 #endif
278 }
279 
280 } // namespace libdap
281 
A class for error processing.
Definition: Error.h:94
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
bool size_ok(unsigned int sz, unsigned int nelem)
sanitize the size of an array. Test for integer overflow when dynamically allocating an array.
Definition: util.cc:1150