libdap  Updated for version 3.20.9
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPConnect.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 
33 #include <sys/stat.h>
34 
35 #ifdef WIN32
36 #include <io.h>
37 #endif
38 
39 #include <string>
40 #include <vector>
41 #include <functional>
42 #include <algorithm>
43 #include <sstream>
44 #include <fstream>
45 #include <iterator>
46 #include <cstdlib>
47 #include <cstring>
48 #include <cerrno>
49 
50 //#define DODS_DEBUG2
51 //#define HTTP_TRACE
52 //#define DODS_DEBUG
53 
54 #undef USE_GETENV
55 
56 
57 #include "debug.h"
58 #include "mime_util.h"
59 #include "media_types.h"
60 #include "GNURegex.h"
61 #include "HTTPCache.h"
62 #include "HTTPConnect.h"
63 #include "RCReader.h"
64 #include "HTTPResponse.h"
65 #include "HTTPCacheResponse.h"
66 
67 using namespace std;
68 
69 namespace libdap {
70 
71 // These global variables are not MT-Safe, but I'm leaving them as is because
72 // they are used only for debugging (set them in a debugger like gdb or ddd).
73 // They are not static because I think that many debuggers cannot access
74 // static variables. 08/07/02 jhrg
75 
76 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
77 int www_trace = 0;
78 
79 // Set this to 1 to turn on libcurl's VERY verbose mode.
80 int www_trace_extensive = 0;
81 
82 // Keep the temporary files; useful for debugging.
83 int dods_keep_temps = 0;
84 
85 #define CLIENT_ERR_MIN 400
86 #define CLIENT_ERR_MAX 417
87 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
88  {
89  "Bad Request:",
90  "Unauthorized: Contact the server administrator.",
91  "Payment Required.",
92  "Forbidden: Contact the server administrator.",
93  "Not Found: The data source or server could not be found.\n\
94  Often this means that the OPeNDAP server is missing or needs attention.\n\
95  Please contact the server administrator.",
96  "Method Not Allowed.",
97  "Not Acceptable.",
98  "Proxy Authentication Required.",
99  "Request Time-out.",
100  "Conflict.",
101  "Gone:.",
102  "Length Required.",
103  "Precondition Failed.",
104  "Request Entity Too Large.",
105  "Request URI Too Large.",
106  "Unsupported Media Type.",
107  "Requested Range Not Satisfiable.",
108  "Expectation Failed."
109  };
110 
111 #define SERVER_ERR_MIN 500
112 #define SERVER_ERR_MAX 505
113 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
114  {
115  "Internal Server Error.",
116  "Not Implemented.",
117  "Bad Gateway.",
118  "Service Unavailable.",
119  "Gateway Time-out.",
120  "HTTP Version Not Supported."
121  };
122 
125 static string
126 http_status_to_string(int status)
127 {
128  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
129  return string(http_client_errors[status - CLIENT_ERR_MIN]);
130  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
131  return string(http_server_errors[status - SERVER_ERR_MIN]);
132  else
133  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
134 }
135 
136 static ObjectType
137 determine_object_type(const string &header_value)
138 {
139  // DAP4 Data: application/vnd.opendap.dap4.data
140  // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
141 
142  string::size_type plus = header_value.find('+');
143  string base_type;
144  string type_extension = "";
145  if (plus != string::npos) {
146  base_type= header_value.substr(0, plus);
147  type_extension = header_value.substr(plus+1);
148  }
149  else
150  base_type = header_value;
151 
152  if (base_type == DMR_Content_Type
153  || (base_type.find("application/") != string::npos
154  && base_type.find("dap4.dataset-metadata") != string::npos)) {
155  if (type_extension == "xml")
156  return dap4_dmr;
157  else
158  return unknown_type;
159  }
160  else if (base_type == DAP4_DATA_Content_Type
161  || (base_type.find("application/") != string::npos
162  && base_type.find("dap4.data") != string::npos)) {
163  return dap4_data;
164  }
165  else if (header_value.find("text/html") != string::npos) {
166  return web_error;
167  }
168  else
169  return unknown_type;
170 }
171 
176 class ParseHeader : public unary_function<const string &, void>
177 {
178  ObjectType type; // What type of object is in the stream?
179  string server; // Server's version string.
180  string protocol; // Server's protocol version.
181  string location; // Url returned by server
182 
183 public:
184  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
185  { }
186 
187  void operator()(const string &line)
188  {
189  string name, value;
190  parse_mime_header(line, name, value);
191 
192  DBG2(cerr << name << ": " << value << endl);
193 
194  // Content-Type is used to determine the content of DAP4 responses, but allow the
195  // Content-Description header to override CT o preserve operation with DAP2 servers.
196  // jhrg 11/12/13
197  if (type == unknown_type && name == "content-type") {
198  type = determine_object_type(value); // see above
199  }
200  if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
201  type = get_description_type(value); // defined in mime_util.cc
202  }
203  // The second test (== "dods/0.0") tests if xopendap-server has already
204  // been seen. If so, use that header in preference to the old
205  // XDODS-Server header. jhrg 2/7/06
206  else if (name == "xdods-server" && server == "dods/0.0") {
207  server = value;
208  }
209  else if (name == "xopendap-server") {
210  server = value;
211  }
212  else if (name == "xdap") {
213  protocol = value;
214  }
215  else if (server == "dods/0.0" && name == "server") {
216  server = value;
217  }
218  else if (name == "location") {
219  location = value;
220  }
221  }
222 
223  ObjectType get_object_type()
224  {
225  return type;
226  }
227 
228  string get_server()
229  {
230  return server;
231  }
232 
233  string get_protocol()
234  {
235  return protocol;
236  }
237 
238  string get_location() {
239  return location;
240  }
241 };
242 
258 static size_t
259 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
260 {
261  DBG2(cerr << "Inside the header parser." << endl);
262  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
263 
264  // Grab the header, minus the trailing newline. Or \r\n pair.
265  string complete_line;
266  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
267  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
268  else
269  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
270 
271  // Store all non-empty headers that are not HTTP status codes
272  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
273  DBG(cerr << "Header line: " << complete_line << endl);
274  hdrs->push_back(complete_line);
275  }
276 
277  return size * nmemb;
278 }
279 
281 static int
282 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
283 {
284  string message(msg, size);
285 
286  switch (info) {
287  case CURLINFO_TEXT:
288  cerr << "Text: " << message; break;
289  case CURLINFO_HEADER_IN:
290  cerr << "Header in: " << message; break;
291  case CURLINFO_HEADER_OUT:
292  cerr << "Header out: " << message; break;
293  case CURLINFO_DATA_IN:
294  if (www_trace_extensive)
295  cerr << "Data in: " << message; break;
296  case CURLINFO_DATA_OUT:
297  if (www_trace_extensive)
298  cerr << "Data out: " << message; break;
299  case CURLINFO_END:
300  cerr << "End: " << message; break;
301 #ifdef CURLINFO_SSL_DATA_IN
302  case CURLINFO_SSL_DATA_IN:
303  cerr << "SSL Data in: " << message; break;
304 #endif
305 #ifdef CURLINFO_SSL_DATA_OUT
306  case CURLINFO_SSL_DATA_OUT:
307  cerr << "SSL Data out: " << message; break;
308 #endif
309  default:
310  if (www_trace_extensive)
311  cerr << "Curl info: " << message; break;
312  }
313  return 0;
314 }
315 
319 void
320 HTTPConnect::www_lib_init()
321 {
322  curl_global_init(CURL_GLOBAL_DEFAULT);
323 
324  d_curl = curl_easy_init();
325  if (!d_curl)
326  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
327 
328  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
329 
330  curl_easy_setopt(d_curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); // enables TLSv1.2 / TLSv1.3 version only
331 
332  // Now set options that will remain constant for the duration of this
333  // CURL object.
334 
335  // Set the proxy host.
336  if (!d_rcr->get_proxy_server_host().empty()) {
337  DBG(cerr << "Setting up a proxy server." << endl);
338  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
339  << endl);
340  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
341  << endl);
342  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
343  << endl);
344  curl_easy_setopt(d_curl, CURLOPT_PROXY,
345  d_rcr->get_proxy_server_host().c_str());
346  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
347  d_rcr->get_proxy_server_port());
348 
349  // As of 4/21/08 only NTLM, Digest and Basic work.
350 #ifdef CURLOPT_PROXYAUTH
351  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
352 #endif
353 
354  // Password might not be required. 06/21/04 jhrg
355  if (!d_rcr->get_proxy_server_userpw().empty())
356  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
357  d_rcr->get_proxy_server_userpw().c_str());
358  }
359 
360  // We have to set FailOnError to false for any of the non-Basic
361  // authentication schemes to work. 07/28/03 jhrg
362  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
363 
364  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
365  // choosing the the 'safest' one supported by the server.
366  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
367  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
368 
369  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
370  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
371  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
372  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
373  // param of save_raw_http_headers to a vector<string> object.
374 
375  // Follow 302 (redirect) responses
376  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
377  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
378 
379  // If the user turns off SSL validation...
380  if (d_rcr->get_validate_ssl() == 0) {
381  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
382  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
383  }
384 
385  // Set libcurl to use netrc to access data behind URS auth.
386  // libcurl will use the provided pathname for the ~/.netrc info. 08/23/19 kln
387  curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
388 
389  // Look to see if cookies are turned on in the .dodsrc file. If so,
390  // activate here. We honor 'session cookies' (cookies without an
391  // expiration date) here so that session-based SSO systems will work as
392  // expected.
393  if (!d_cookie_jar.empty()) {
394  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
395  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
396  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
397  }
398 
399  if (www_trace) {
400  cerr << "Curl version: " << curl_version() << endl;
401  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
402  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
403  }
404 }
405 
409 class BuildHeaders : public unary_function<const string &, void>
410 {
411  struct curl_slist *d_cl;
412 
413 public:
414  BuildHeaders() : d_cl(0)
415  {}
416 
417  void operator()(const string &header)
418  {
419  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
420  << endl);
421  d_cl = curl_slist_append(d_cl, header.c_str());
422  }
423 
424  struct curl_slist *get_headers()
425  {
426  return d_cl;
427  }
428 };
429 
444 long
445 HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
446 {
447  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
448 
449 #ifdef WIN32
450  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
451  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
452  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
453  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
454  // this issue is that one should not pass a FILE * to a windows DLL. Close
455  // inspection of libcurl yields that their default write function when using
456  // the CURLOPT_WRITEDATA is just "fwrite".
457  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
458  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
459 #else
460  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
461 #endif
462 
463  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
464  ostream_iterator<string>(cerr, "\n")));
465 
466  BuildHeaders req_hdrs;
467  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
468  req_hdrs);
469  if (headers)
470  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
471 
472  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
473 
474  // Turn off the proxy for this URL?
475  bool temporary_proxy = false;
476  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
477  DBG(cerr << "Suppress proxy for url: " << url << endl);
478  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
479  }
480 
481  string::size_type at_sign = url.find('@');
482  // Assume username:password present *and* assume it's an HTTP URL; it *is*
483  // HTTPConnect, after all. 7 is position after "http://"; the second arg
484  // to substr() is the sub string length.
485  if (at_sign != url.npos)
486  d_upstring = url.substr(7, at_sign - 7);
487 
488  if (!d_upstring.empty())
489  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
490 
491  // Pass save_raw_http_headers() a pointer to the vector<string> where the
492  // response headers may be stored. Callers can use the resp_hdrs
493  // value/result parameter to get the raw response header information .
494  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
495 
496  // This is the call that causes curl to go and get the remote resource and "write it down"
497  // utilizing the configuration state that has been previously conditioned by various perturbations
498  // of calls to curl_easy_setopt().
499  CURLcode res = curl_easy_perform(d_curl);
500 
501  // Free the header list and null the value in d_curl.
502  curl_slist_free_all(req_hdrs.get_headers());
503  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
504 
505  // Reset the proxy?
506  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
507  curl_easy_setopt(d_curl, CURLOPT_PROXY,
508  d_rcr->get_proxy_server_host().c_str());
509 
510  if (res != 0)
511  throw Error(d_error_buffer);
512 
513  long status;
514  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
515  if (res != 0)
516  throw Error(d_error_buffer);
517 
518  char *ct_ptr = 0;
519  res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
520  if (res == CURLE_OK && ct_ptr)
521  d_content_type = ct_ptr;
522  else
523  d_content_type = "";
524 
525  return status;
526 }
527 
531 bool
532 HTTPConnect::url_uses_proxy_for(const string &url)
533 {
534  if (d_rcr->is_proxy_for_used()) {
535  // NB: This could be improved by moving the Regex instance into
536  // the RCReader class, but the proxy stuff is all deprecated.
537  // jhrg 12/1/21
538  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
539  int index = 0, matchlen;
540  return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
541  }
542 
543  return false;
544 }
545 
549 bool
550 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
551 {
552  return d_rcr->is_no_proxy_for_used()
553  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
554 }
555 
556 // Public methods. Mostly...
557 
564 HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
565  d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
566 
567 {
568  d_accept_deflate = rcr->get_deflate();
569  d_rcr = rcr;
570 
571  // Load in the default headers to send with a request. The empty Pragma
572  // headers overrides libcurl's default Pragma: no-cache header (which
573  // will disable caching by Squid, et c.). The User-Agent header helps
574  // make server logs more readable. 05/05/03 jhrg
575  d_request_headers.push_back(string("Pragma:"));
576  string user_agent = string("User-Agent: ") + string(CNAME)
577  + string("/") + string(CVER);
578  d_request_headers.push_back(user_agent);
579  if (d_accept_deflate)
580  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
581 
582  // HTTPCache::instance returns a valid ptr or 0.
583  if (d_rcr->get_use_cache())
584  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
585  else
586  d_http_cache = 0;
587 
588  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
589  << ")" << endl);
590 
591  if (d_http_cache) {
592  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
593  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
594  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
595  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
596  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
597  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
598  }
599 
600  d_cookie_jar = rcr->get_cookie_jar();
601 
602  www_lib_init(); // This may throw either Error or InternalErr
603 }
604 
605 HTTPConnect::~HTTPConnect()
606 {
607  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
608 
609  curl_easy_cleanup(d_curl);
610 
611  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
612 }
613 
615 class HeaderMatch : public unary_function<const string &, bool> {
616  const string &d_header;
617  public:
618  HeaderMatch(const string &header) : d_header(header) {}
619  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
620 };
621 
634 HTTPResponse *
635 HTTPConnect::fetch_url(const string &url)
636 {
637 #ifdef HTTP_TRACE
638  cout << "GET " << url << " HTTP/1.0" << endl;
639 #endif
640 
641  HTTPResponse *stream;
642 
643  if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
644  stream = caching_fetch_url(url);
645  }
646  else {
647  stream = plain_fetch_url(url);
648  }
649 
650 #ifdef HTTP_TRACE
651  stringstream ss;
652  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
653  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
654  ss << stream->get_headers()->at(i) << endl;
655  }
656  cout << ss.str();
657 #endif
658 
659  ParseHeader parser;
660 
661  // An apparent quirk of libcurl is that it does not pass the Content-type
662  // header to the callback used to save them, but check and add it from the
663  // saved state variable only if it's not there (without this a test failed
664  // in HTTPCacheTest). jhrg 11/12/13
665  if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
666  HeaderMatch("Content-Type:")) == stream->get_headers()->end())
667  stream->get_headers()->push_back("Content-Type: " + d_content_type);
668 
669  parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
670 
671 #ifdef HTTP_TRACE
672  cout << endl << endl;
673 #endif
674 
675  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
676  if (parser.get_location() != "" &&
677  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
678  delete stream;
679  return fetch_url(parser.get_location());
680  }
681 
682  stream->set_type(parser.get_object_type()); // uses the value of content-description
683 
684  stream->set_version(parser.get_server());
685  stream->set_protocol(parser.get_protocol());
686 
687  if (d_use_cpp_streams) {
688  stream->transform_to_cpp();
689  }
690 
691  return stream;
692 }
693 
694 // Look around for a reasonable place to put a temporary file. Check first
695 // the value of the TMPDIR env var. If that does not yeild a path that's
696 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
697 // defined in stdio.h. If both come up empty, then use `./'.
698 
699 // Change this to a version that either returns a string or an open file
700 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
701 // (see open()) to make it more secure. Ideal solution: get deserialize()
702 // methods to read from a stream returned by libcurl, not from a temporary
703 // file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
704 static string
705 get_tempfile_template(const string &file_template)
706 {
707  string c;
708 
709  // Windows has one idea of the standard name(s) for a temporary files dir
710 #ifdef WIN32
711  // white list for a WIN32 directory
712  Regex directory("[-a-zA-Z0-9_:\\]*");
713 
714  // If we're OK to use getenv(), try it.
715 #ifdef USE_GETENV
716  c = getenv("TEMP");
717  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
718  goto valid_temp_directory;
719 
720  c= getenv("TMP");
721  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
722  goto valid_temp_directory;
723 #endif // USE_GETENV
724 
725  // The windows default
726  c = "c:\tmp";
727  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
728  goto valid_temp_directory;
729 
730 #else // Unix/Linux/OSX has another...
731  // white list for a directory
732  const Regex directory("[-a-zA-Z0-9_/]*");
733 #ifdef USE_GETENV
734  c = getenv("TMPDIR");
735  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
736  goto valid_temp_directory;
737 #endif // USE_GETENV
738 
739  // Unix defines this sometimes - if present, use it.
740 #ifdef P_tmpdir
741  if (access(P_tmpdir, W_OK | R_OK) == 0) {
742  c = P_tmpdir;
743  goto valid_temp_directory;
744  }
745 #endif
746 
747  // The Unix default
748  c = "/tmp";
749  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
750  goto valid_temp_directory;
751 
752 #endif // WIN32
753 
754  // If we found nothing useful, use the current directory
755  c = ".";
756 
757 valid_temp_directory:
758 
759 #ifdef WIN32
760  c += "\\" + file_template;
761 #else
762  c += "/" + file_template;
763 #endif
764 
765  return c;
766 }
767 
786 string
787 get_temp_file(FILE *&stream) throw(Error)
788 {
789  string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
790 
791  vector<char> pathname(dods_temp.length() + 1);
792 
793  strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
794 
795  DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
796 
797  // Open truncated for update. NB: mkstemp() returns a file descriptor.
798 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
799  stream = fopen(_mktemp(&pathname[0]), "w+b");
800 #else
801  // Make sure that temp files are accessible only by the owner.
802  int mask = umask(077);
803  if (mask < 0)
804  throw Error("Could not set the file creation mask: " + string(strerror(errno)));
805  int fd = mkstemp(&pathname[0]);
806  if (fd < 0)
807  throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
808 
809  stream = fdopen(fd, "w+");
810  umask(mask);
811 #endif
812 
813  if (!stream)
814  throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
815 
816  dods_temp = &pathname[0];
817  return dods_temp;
818 }
819 
820 
826 void
827 close_temp(FILE *s, const string &name)
828 {
829  int res = fclose(s);
830  if (res)
831  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
832 
833  res = unlink(name.c_str());
834  if (res != 0)
835  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
836 }
837 
859 HTTPResponse *
860 HTTPConnect::caching_fetch_url(const string &url)
861 {
862  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
863 
864  vector<string> *headers = new vector<string>;
865  string file_name;
866  FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
867  if (!s) {
868  // url not in cache; get it and cache it
869  DBGN(cerr << "no; getting response and caching." << endl);
870  delete headers; headers = 0;
871  time_t now = time(0);
872  HTTPResponse *rs = plain_fetch_url(url);
873  d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
874 
875  return rs;
876  }
877  else { // url in cache
878  DBGN(cerr << "yes... ");
879 
880  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
881  DBGN(cerr << "and it's valid; using cached response." << endl);
882  HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
883  return crs;
884  }
885  else { // url in cache but not valid; validate
886  DBGN(cerr << "but it's not valid; validating... ");
887 
888  d_http_cache->release_cached_response(s); // This closes 's'
889  headers->clear();
890  vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
891  FILE *body = 0;
892  string dods_temp = get_temp_file(body);
893  time_t now = time(0); // When was the request made (now).
894  long http_status;
895 
896  try {
897  http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
898  rewind(body);
899  }
900  catch (Error &e) {
901  close_temp(body, dods_temp);
902  delete headers;
903  throw ;
904  }
905 
906  switch (http_status) {
907  case 200: { // New headers and new body
908  DBGN(cerr << "read a new response; caching." << endl);
909 
910  d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
911  HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
912 
913  return rs;
914  }
915 
916  case 304: { // Just new headers, use cached body
917  DBGN(cerr << "cached response valid; updating." << endl);
918 
919  close_temp(body, dods_temp);
920  d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
921  string file_name;
922  FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
923  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
924  return crs;
925  }
926 
927  default: { // Oops.
928  close_temp(body, dods_temp);
929  if (http_status >= 400) {
930  delete headers; headers = 0;
931  string msg = "Error while reading the URL: ";
932  msg += url;
933  msg
934  += ".\nThe OPeNDAP server returned the following message:\n";
935  msg += http_status_to_string(http_status);
936  throw Error(msg);
937  }
938  else {
939  delete headers; headers = 0;
940  throw InternalErr(__FILE__, __LINE__,
941  "Bad response from the HTTP server: " + long_to_string(http_status));
942  }
943  }
944  }
945  }
946  }
947 
948  throw InternalErr(__FILE__, __LINE__, "Should never get here");
949 }
950 
962 HTTPResponse *
963 HTTPConnect::plain_fetch_url(const string &url)
964 {
965  DBG(cerr << "Getting URL: " << url << endl);
966  FILE *stream = 0;
967  string dods_temp = get_temp_file(stream);
968  vector<string> *resp_hdrs = new vector<string>;
969 
970  int status = -1;
971  try {
972  status = read_url(url, stream, resp_hdrs); // Throws Error.
973  if (status >= 400) {
974  // delete resp_hdrs; resp_hdrs = 0;
975  string msg = "Error while reading the URL: ";
976  msg += url;
977  msg += ".\nThe OPeNDAP server returned the following message:\n";
978  msg += http_status_to_string(status);
979  throw Error(msg);
980  }
981  }
982 
983  catch (Error &e) {
984  delete resp_hdrs;
985  close_temp(stream, dods_temp);
986  throw;
987  }
988 
989 #if 0
990  if (d_use_cpp_streams) {
991  fclose(stream);
992  fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
993  return new HTTPResponse(in, status, resp_hdrs, dods_temp);
994  }
995  else {
996 #endif
997  rewind(stream);
998  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
999 #if 0
1000 }
1001 #endif
1002 }
1003 
1015 void
1017 {
1018  d_accept_deflate = deflate;
1019 
1020  if (d_accept_deflate) {
1021  if (find(d_request_headers.begin(), d_request_headers.end(),
1022  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1023  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
1024  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1025  ostream_iterator<string>(cerr, "\n")));
1026  }
1027  else {
1028  vector<string>::iterator i;
1029  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1030  bind2nd(equal_to<string>(),
1031  string("Accept-Encoding: deflate, gzip, compress")));
1032  d_request_headers.erase(i, d_request_headers.end());
1033  }
1034 }
1035 
1044 void
1045 HTTPConnect::set_xdap_protocol(int major, int minor)
1046 {
1047  // Look for, and remove if one exists, an XDAP-Accept header
1048  vector<string>::iterator i;
1049  i = find_if(d_request_headers.begin(), d_request_headers.end(),
1050  HeaderMatch("XDAP-Accept:"));
1051  if (i != d_request_headers.end())
1052  d_request_headers.erase(i);
1053 
1054  // Record and add the new header value
1055  d_dap_client_protocol_major = major;
1056  d_dap_client_protocol_minor = minor;
1057  ostringstream xdap_accept;
1058  xdap_accept << "XDAP-Accept: " << major << "." << minor;
1059 
1060  d_request_headers.push_back(xdap_accept.str());
1061 
1062  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1063  ostream_iterator<string>(cerr, "\n")));
1064 }
1065 
1081 void
1082 HTTPConnect::set_credentials(const string &u, const string &p)
1083 {
1084  if (u.empty())
1085  return;
1086 
1087  // Store the credentials locally.
1088  d_username = u;
1089  d_password = p;
1090 
1091  d_upstring = u + ":" + p;
1092 }
1093 
1094 } // namespace libdap
A class for error processing.
Definition: Error.h:94
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1156
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:129
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:690
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:819
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1571
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1249
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:635
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:772
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1388
void set_always_validate(bool validate)
Definition: HTTPCache.cc:841
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1319
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:724
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1480
void set_accept_deflate(bool defalte)
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:635
void set_credentials(const string &u, const string &p)
void set_xdap_protocol(int major, int minor)
A class for software fault reporting.
Definition: InternalErr.h:65
Regular expression matching.
Definition: GNURegex.h:57
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:339
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:912
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:787
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:827
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:58