bes  Updated for version 3.20.10
HttpUtils.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 // This file is part of the BES http package, part of the Hyrax data server.
3 
4 // Copyright (c) 2020 OPeNDAP, Inc.
5 // Author: Nathan Potter <ndp@opendap.org>
6 //
7 // This library is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Lesser General Public
9 // License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 //
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // Lesser General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public
18 // License along with this library; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 //
21 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
22 
23 // Authors:
24 // ndp Nathan Potter <ndp@opendap.org>
25 
26 #include "config.h"
27 
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 
32 #include <cstdlib>
33 #include <cstring>
34 #include <string>
35 #include <map>
36 #include <vector>
37 #include <sstream>
38 #include <time.h>
39 
40 #include <curl/curl.h>
41 
42 #include <BESUtil.h>
43 #include <BESCatalogUtils.h>
44 #include <BESCatalogList.h>
45 #include <BESCatalog.h>
46 #include <BESRegex.h>
47 #include <TheBESKeys.h>
48 #include <BESInternalError.h>
49 #include <BESNotFoundError.h>
50 #include <BESSyntaxUserError.h>
51 #include <BESDebug.h>
52 
53 #include "HttpNames.h"
54 #include "HttpUtils.h"
55 #include "ProxyConfig.h"
56 
57 #define MODULE "http"
58 
59 using namespace std;
60 using namespace http;
61 
62 // These are static class members
63 
64 #define prolog string("HttpUtils::").append(__func__).append("() - ")
65 
66 namespace http {
71 void load_mime_list_from_keys(map<string, string> &mime_list)
72 {
73  // MimeTypes - translate from a mime type to a module name
74  bool found = false;
75  vector<string> vals;
76  TheBESKeys::TheKeys()->get_values(HTTP_MIMELIST_KEY, vals, found);
77  if (found && vals.size()) {
78  vector<string>::iterator i = vals.begin();
79  vector<string>::iterator e = vals.end();
80  for (; i != e; i++) {
81  size_t colon = (*i).find(":");
82  if (colon == string::npos) {
83  string err = (string) "Malformed " + HTTP_MIMELIST_KEY + " " + (*i) +
84  " specified in the gateway configuration";
85  throw BESSyntaxUserError(err, __FILE__, __LINE__);
86  }
87  string mod = (*i).substr(0, colon);
88  string mime = (*i).substr(colon + 1);
89  mime_list[mod] = mime;
90  }
91  }
92 }
93 
94 
95 
109 void get_type_from_disposition(const string &disp, string &type)
110 {
111  // If this function extracts a filename from disp and it matches a handler's
112  // regex using the Catalog Utils, this will be set to a non-empty value.
113  type = "";
114 
115  size_t fnpos = disp.find("filename");
116  if (fnpos != string::npos) {
117  // Got the filename attribute, now get the
118  // filename, which is after the pound sign (#)
119  size_t pos = disp.find("#", fnpos);
120  if (pos == string::npos) pos = disp.find("=", fnpos);
121  if (pos != string::npos) {
122  // Got the filename to the end of the
123  // string, now get it to either the end of
124  // the string or the start of the next
125  // attribute
126  string filename;
127  size_t sp = disp.find(" ", pos);
128  if (pos != string::npos) {
129  // space before the next attribute
130  filename = disp.substr(pos + 1, sp - pos - 1);
131  } else {
132  // to the end of the string
133  filename = disp.substr(pos + 1);
134  }
135 
136  // now see if it's wrapped in quotes
137  if (filename[0] == '"') {
138  filename = filename.substr(1);
139  }
140  if (filename[filename.length() - 1] == '"') {
141  filename = filename.substr(0, filename.length() - 1);
142  }
143 
144  // we have the filename now, run it through
145  // the type match to get the file type.
146 
148  type = utils->get_handler_name(filename);
149  }
150  }
151 }
152 
153 void get_type_from_content_type(const string &ctype, string &type)
154 {
155  BESDEBUG(MODULE, prolog << "BEGIN content-type: " << ctype << endl);
156  map<string,string> mime_list;
157  load_mime_list_from_keys(mime_list);
158  map<string, string>::iterator i = mime_list.begin();
159  map<string, string>::iterator e = mime_list.end();
160  bool done = false;
161  for (; i != e && !done; i++) {
162  BESDEBUG(MODULE, prolog << "Comparing content type '" << ctype << "' against mime list element '" << (*i).second << "'" << endl);
163  BESDEBUG(MODULE, prolog << "first: " << (*i).first << " second: " << (*i).second << endl);
164  if ((*i).second == ctype) {
165  BESDEBUG(MODULE, prolog << "MATCH" << endl);
166  type = (*i).first;
167  done = true;
168  }
169  }
170  BESDEBUG(MODULE, prolog << "END" << endl);
171 }
172 
173 void get_type_from_url(const string &url, string &type) {
174  const BESCatalogUtils *utils = BESCatalogList::TheCatalogList()->find_catalog("catalog")->get_catalog_utils();
175 
176  type = utils->get_handler_name(url);
177 }
178 
184  size_t max_redirects=0;
185  bool found = false;
186  string value;
187  TheBESKeys::TheKeys()->get_value(HTTP_MAX_REDIRECTS_KEY, value, found);
188  if (found && !value.empty()) {
189  std::istringstream(value) >> max_redirects; // Returns 0 if the parse fails.
190  }
191  if(!max_redirects){
192  max_redirects = HTTP_MAX_REDIRECTS_DEFAULT;
193  }
194  return max_redirects;
195 }
196 
197 #if 0
214  void HttpUtils::decompose_url(const string target_url, map<string,string> &url_info)
215  {
216  string url_base;
217  string query_string;
218 
219  size_t query_index = target_url.find_first_of("?");
220  BESDEBUG(MODULE, prolog << "query_index: " << query_index << endl);
221  if(query_index != string::npos){
222  query_string = target_url.substr(query_index+1);
223  url_base = target_url.substr(0,query_index);
224  }
225  else {
226  url_base = target_url;
227  }
228  url_info.insert( std::pair<string,string>(HTTP_TARGET_URL_KEY,target_url));
229  BESDEBUG(MODULE, prolog << HTTP_TARGET_URL_KEY << ": " << target_url << endl);
230  url_info.insert( std::pair<string,string>(HTTP_URL_BASE_KEY,url_base));
231  BESDEBUG(MODULE, prolog << HTTP_URL_BASE_KEY <<": " << url_base << endl);
232  url_info.insert( std::pair<string,string>(HTTP_QUERY_STRING_KEY,query_string));
233  BESDEBUG(MODULE, prolog << HTTP_QUERY_STRING_KEY << ": " << query_string << endl);
234  if(!query_string.empty()){
235  vector<string> records;
236  string delimiters = "&";
237  BESUtil::tokenize(query_string,records, delimiters);
238  vector<string>::iterator i = records.begin();
239  for(; i!=records.end(); i++){
240  size_t index = i->find('=');
241  if(index != string::npos) {
242  string key = i->substr(0, index);
243  string value = i->substr(index+1);
244  BESDEBUG(MODULE, prolog << "key: " << key << " value: " << value << endl);
245  url_info.insert( std::pair<string,string>(key,value));
246  }
247  }
248  }
249  time_t now;
250  time(&now); /* get current time; same as: timer = time(NULL) */
251  stringstream unix_time;
252  unix_time << now;
253  url_info.insert( std::pair<string,string>(HTTP_INGEST_TIME_KEY,unix_time.str()));
254  }
255 
256 #endif
257 
258 }
259 
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
virtual BESCatalog * default_catalog() const
The the default catalog.
std::string get_handler_name(const std::string &item) const
Find the handler name that will process.
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:113
error thrown if there is a user syntax error in the request or any other user error
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
Definition: BESUtil.cc:1086
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:371
utility class for the HTTP catalog module
Definition: AllowedHosts.cc:55
size_t load_max_redirects_from_keys()
Definition: HttpUtils.cc:183
void get_type_from_disposition(const string &disp, string &type)
Definition: HttpUtils.cc:109
void load_mime_list_from_keys(map< string, string > &mime_list)
Definition: HttpUtils.cc:71