libdap Updated for version 3.20.11
libdap4 is an implementation of OPeNDAP's DAP protocol.
GNURegex.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2005 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26
27//#define DODS_DEBUG
28
29#include "config.h"
30
31#if 0
32#ifndef WIN32
33#include <alloca.h>
34#endif
35#include <stdlib.h>
36
37#include <sys/types.h>
38#include <regex.h>
39
40#include <new>
41#include <string>
42#include <vector>
43#include <stdexcept>
44#endif
45
46#include <vector>
47
48#include <regex.h>
49
50#include "GNURegex.h"
51#include "Error.h"
52
53#include "debug.h"
54#include "util.h"
55
56#if 0
57#include "util.h"
58#include "debug.h"
59#endif
60
61using namespace std;
62
63namespace libdap {
64
65void
66Regex::init(const char *t)
67{
68#if !USE_CPP_11_REGEX
69 DBG( cerr << "Regex::init() - BEGIN" << endl);
70
71 DBG( cerr << "Regex::init() - creating new regex..." << endl);
72 d_preg = static_cast<void*>(new regex_t);
73
74 DBG( cerr << "Regex::init() - Calling regcomp()..." << endl);
75 int result = regcomp(static_cast<regex_t*>(d_preg), t, REG_EXTENDED);
76
77 if (result != 0) {
78 DBG( cerr << "Regex::init() - Call to regcomp FAILED" << endl);
79 DBG( cerr << "Regex::init() - Calling regerror()..." << endl);
80 size_t msg_len = regerror(result, static_cast<regex_t*>(d_preg),
81 static_cast<char*>(NULL),
82 static_cast<size_t>(0));
83
84 DBG( cerr << "Regex::init() - Creating message" << endl);
85 vector<char> msg(msg_len+1);
86 //char *msg = new char[msg_len+1];
87 DBG( cerr << "Regex::init() - Calling regerror() again..." << endl);
88 regerror(result, static_cast<regex_t*>(d_preg), msg.data(), msg_len);
89 DBG( cerr << "Regex::init() - Throwing libdap::Error" << endl);
90 throw Error(string("Regex error: ") + string(msg.data()));
91 //delete[] msg;
92 //throw e;
93 }
94 DBG( cerr << "Regex::init() - Call to regcomp() SUCCEEDED" << endl);
95 DBG( cerr << "Regex::init() - END" << endl);
96#else
97 d_exp = regex(t);
98#endif
99}
100
101#if 0
102void
103Regex::init(const string &t)
104{
105 d_exp = regex(t);
106}
107#endif
108
109#if !USE_CPP_11_REGEX
110Regex::~Regex()
111{
112 regfree(static_cast<regex_t*>(d_preg));
113 delete static_cast<regex_t*>(d_preg); d_preg = 0;
114}
115#endif
116
117#if 0
121Regex::Regex(const char* t)
122{
123 init(t);
124}
125
128Regex::Regex(const char* t, int)
129{
130 init(t);
131}
132#endif
133
140int
141Regex::match(const char *s, int len, int pos) const
142{
143#if !USE_CPP_11_REGEX
144 if (len > 32766) // Integer overflow protection
145 return -1;
146
147 regmatch_t *pmatch = new regmatch_t[len+1];
148 string ss = s;
149
150 int result = regexec(static_cast<regex_t*>(d_preg),
151 ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
152 int matchnum;
153 if (result == REG_NOMATCH)
154 matchnum = -1;
155 else
156 matchnum = pmatch[0].rm_eo - pmatch[0].rm_so;
157
158 delete[] pmatch; pmatch = 0;
159
160 return matchnum;
161#else
162 if (pos > len)
163 throw Error("Position exceed length in Regex::match()");
164
165 smatch match;
166 auto target = string(s+pos, len-pos);
167 bool found = regex_search(target, match, d_exp);
168 if (found)
169 return (int)match.length();
170 else
171 return -1;
172#endif
173}
174
180int
181Regex::match(const string &s) const
182{
183#if USE_CPP_11_REGEX
184 smatch match;
185 bool found = regex_search(s, match, d_exp);
186 if (found)
187 return (int)match.length();
188 else
189 return -1;
190#else
191 return match(s.c_str(), s.length(), 0);
192#endif
193}
194
205int
206Regex::search(const char *s, int len, int& matchlen, int pos) const
207{
208#if !USE_CPP_11_REGEX
209 // sanitize allocation
210 if (!size_ok(sizeof(regmatch_t), len+1))
211 return -1;
212
213 // alloc space for len matches, which is theoretical max.
214 // Problem: If somehow 'len' is very large - say the size of a 32-bit int,
215 // then len+1 is a an integer overflow and this might be exploited by
216 // an attacker. It's not likely there will be more than a handful of
217 // matches, so I am going to limit this value to 32766. jhrg 3/4/09
218 if (len > 32766)
219 return -1;
220
221 regmatch_t *pmatch = new regmatch_t[len+1];
222 string ss = s;
223
224 int result = regexec(static_cast<regex_t*>(d_preg),
225 ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
226 if (result == REG_NOMATCH) {
227 delete[] pmatch; pmatch = 0;
228 return -1;
229 }
230
231 // Match found, find the first one (pmatch lists the longest first)
232 int m = 0;
233 for (int i = 1; i < len; ++i)
234 if (pmatch[i].rm_so != -1 && pmatch[i].rm_so < pmatch[m].rm_so)
235 m = i;
236
237 matchlen = pmatch[m].rm_eo - pmatch[m].rm_so;
238 int matchpos = pmatch[m].rm_so;
239
240 delete[] pmatch; pmatch = 0;
241 return matchpos;
242#else
243 smatch match;
244 // This is needed because in C++14, the first arg to regex_search() cannot be a
245 // temporary string. It seems the C++11 compilers on some linux dists are using
246 // regex headers that enforce c++14 rules. jhrg 12/2/21
247 auto target = string(s+pos, len-pos);
248 bool found = regex_search(target, match, d_exp);
249 matchlen = (int)match.length();
250 if (found)
251 return (int)match.position();
252 else
253 return -1;
254#endif
255}
256
263int
264Regex::search(const string &s, int& matchlen) const
265{
266#if USE_CPP_11_REGEX
267 smatch match;
268 bool found = regex_search(s, match, d_exp);
269 matchlen = (int)match.length();
270 if (found)
271 return (int)match.position();
272 else
273 return -1;
274#else
275 // search(const char *s, int len, int& matchlen, int pos) const
276 return search(s.c_str(), s.length(), matchlen, 0);
277#endif
278}
279
280} // namespace libdap
281
A class for error processing.
Definition: Error.h:94
Regex(const char *s)
initialize a Regex with a C string
Definition: GNURegex.h:79
int search(const char *s, int len, int &matchlen, int pos=0) const
How much of the string does the pattern match.
Definition: GNURegex.cc:206
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition: GNURegex.cc:141
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
bool size_ok(unsigned int sz, unsigned int nelem)
sanitize the size of an array. Test for integer overflow when dynamically allocating an array.
Definition: util.cc:1152