libdap Updated for version 3.20.11
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPCache.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#include "config.h"
27
28// #define DODS_DEBUG
29// #define DODS_DEBUG2
30#undef USE_GETENV
31
32#include <pthread.h>
33#include <limits.h>
34#include <unistd.h> // for stat
35#include <sys/types.h> // for stat and mkdir
36#include <sys/stat.h>
37
38#include <cstring>
39#include <cerrno>
40
41#include <iostream>
42#include <sstream>
43#include <algorithm>
44#include <iterator>
45#include <set>
46
47#include "Error.h"
48#include "InternalErr.h"
49#include "ResponseTooBigErr.h"
50#ifndef WIN32
51#include "SignalHandler.h"
52#endif
53#include "HTTPCacheInterruptHandler.h"
54#include "HTTPCacheTable.h"
55#include "HTTPCache.h"
56#include "HTTPCacheMacros.h"
57#include "SignalHandlerRegisteredErr.h"
58
59#include "util_mit.h"
60#include "debug.h"
61
62using namespace std;
63
64namespace libdap {
65
66HTTPCache *HTTPCache::_instance = 0;
67
68// instance_mutex is used to ensure that only one instance is created.
69// That is, it protects the body of the HTTPCache::instance() method. This
70// mutex is initialized from within the static function once_init_routine()
71// and the call to that takes place using pthread_once_init() where the mutex
72// once_block is used to protect that call. All of this ensures that no matter
73// how many threads call the instance() method, only one instance is ever
74// made.
75static pthread_mutex_t instance_mutex;
76static pthread_once_t once_block = PTHREAD_ONCE_INIT;
77
78
79#define NO_LM_EXPIRATION 24*3600 // 24 hours
80
81#define DUMP_FREQUENCY 10 // Dump index every x loads
82
83#define MEGA 0x100000L
84#define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
85#define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
86#define CACHE_GC_PCT 10 // 10% of cache size free after GC
87#define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
88#define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
89
90static void
91once_init_routine()
92{
93 int status;
94 status = INIT(&instance_mutex);
95
96 if (status != 0)
97 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
98}
99
128HTTPCache *
129HTTPCache::instance(const string &cache_root, bool force)
130{
131 int status = pthread_once(&once_block, once_init_routine);
132 if (status != 0)
133 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
134
135 LOCK(&instance_mutex);
136
137 DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" << "... ");
138
139 try {
140 if (!_instance) {
141 _instance = new HTTPCache(cache_root, force);
142
143 DBG(cerr << "New instance: " << _instance << ", cache root: "
144 << _instance->d_cache_root << endl);
145
146 atexit(delete_instance);
147
148#ifndef WIN32
149 // Register the interrupt handler. If we've already registered
150 // one, barf. If this becomes a problem, hack SignalHandler so
151 // that we can chain these handlers... 02/10/04 jhrg
152 //
153 // Technically we're leaking memory here. However, since this
154 // class is a singleton, we know that only three objects will
155 // ever be created and they will all exist until the process
156 // exits. We can let this slide... 02/12/04 jhrg
158 if (old_eh) {
159 SignalHandler::instance()->register_handler(SIGINT, old_eh);
161 "Could not register event handler for SIGINT without superseding an existing one.");
162 }
163
165 if (old_eh) {
166 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
168 "Could not register event handler for SIGPIPE without superseding an existing one.");
169 }
170
172 if (old_eh) {
173 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
175 "Could not register event handler for SIGTERM without superseding an existing one.");
176 }
177#endif
178 }
179 }
180 catch (...) {
181 DBG2(cerr << "The constructor threw an Error!" << endl);
182 UNLOCK(&instance_mutex);
183 throw;
184 }
185
186 UNLOCK(&instance_mutex);
187 DBGN(cerr << "returning " << hex << _instance << dec << endl);
188
189 return _instance;
190}
191
195void
196HTTPCache::delete_instance()
197{
198 DBG(cerr << "Entering delete_instance()..." << endl);
199
200 if (HTTPCache::_instance) {
201 DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
202 delete HTTPCache::_instance;
203 HTTPCache::_instance = 0;
204
205 //Now remove the signal handlers
206 delete SignalHandler::instance()->remove_handler(SIGINT);
207 delete SignalHandler::instance()->remove_handler(SIGPIPE);
208 delete SignalHandler::instance()->remove_handler(SIGTERM);
209 }
210
211 DBG(cerr << "Exiting delete_instance()" << endl);
212}
213
228HTTPCache::HTTPCache(string cache_root, bool force) :
229 d_locked_open_file(0),
230 d_cache_enabled(false),
231 d_cache_protected(false),
232
233 d_cache_disconnected(DISCONNECT_NONE),
234
235 d_expire_ignored(false),
236 d_always_validate(false),
237 d_total_size(CACHE_TOTAL_SIZE * MEGA),
238 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
239 d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
240 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
241 d_default_expiration(NO_LM_EXPIRATION),
242 d_max_age(-1),
243 d_max_stale(-1),
244 d_min_fresh(-1),
245 d_http_cache_table(0)
246{
247 DBG(cerr << "Entering the constructor for " << this << "... ");
248#if 0
249 int status = pthread_once(&once_block, once_init_routine);
250 if (status != 0)
251 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
252#endif
253 INIT(&d_cache_mutex);
254
255 // This used to throw an Error object if we could not get the
256 // single user lock. However, that results in an invalid object. It's
257 // better to have an instance that has default values. If we cannot get
258 // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
259 //
260 // I fixed this block so that the cache root is set before we try to get
261 // the single user lock. That was the fix for bug #661. To make that
262 // work, I had to move the call to create_cache_root out of
263 // set_cache_root(). 09/08/03 jhrg
264
265 set_cache_root(cache_root);
266 int block_size;
267
268 if (!get_single_user_lock(force))
269 throw Error(internal_error, "Could not get single user lock for the cache");
270
271#ifdef WIN32
272 // Windows is unable to provide us this information. 4096 appears
273 // a best guess. It is likely to be in the range [2048, 8192] on
274 // windows, but will the level of truth of that statement vary over
275 // time ?
276 block_size = 4096;
277#else
278 struct stat s;
279 if (stat(cache_root.c_str(), &s) == 0)
280 block_size = s.st_blksize;
281 else
282 throw Error(internal_error, "Could not set file system block size.");
283#endif
284 d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
285 d_cache_enabled = true;
286
287 DBGN(cerr << "exiting" << endl);
288}
289
303{
304 DBG(cerr << "Entering the destructor for " << this << "... ");
305
306 try {
307 if (startGC())
308 perform_garbage_collection();
309
310 d_http_cache_table->cache_index_write();
311 }
312 catch (Error &e) {
313 // If the cache index cannot be written, we've got problems. However,
314 // unless we're debugging, still free up the cache table in memory.
315 // How should we let users know they cache index is not being
316 // written?? 10/03/02 jhrg
317 DBG(cerr << e.get_error_message() << endl);
318 }
319
320 delete d_http_cache_table;
321
322 release_single_user_lock();
323
324 DBGN(cerr << "exiting destructor." << endl);
325 DESTROY(&d_cache_mutex);
326}
327
328
332
336bool
337HTTPCache::stopGC() const
338{
339 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
340}
341
348bool
349HTTPCache::startGC() const
350{
351 DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
352 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
353}
354
369void
370HTTPCache::perform_garbage_collection()
371{
372 DBG(cerr << "Performing garbage collection" << endl);
373
374 // Remove all the expired responses.
375 expired_gc();
376
377 // Remove entries larger than max_entry_size.
378 too_big_gc();
379
380 // Remove entries starting with zero hits, 1, ..., until stopGC()
381 // returns true.
382 hits_gc();
383}
384
390void
391HTTPCache::expired_gc()
392{
393 if (!d_expire_ignored) {
394 d_http_cache_table->delete_expired_entries();
395 }
396}
397
414void
415HTTPCache::hits_gc()
416{
417 int hits = 0;
418
419 if (startGC()) {
420 while (!stopGC()) {
421 d_http_cache_table->delete_by_hits(hits);
422 hits++;
423 }
424 }
425}
426
431void HTTPCache::too_big_gc() {
432 if (startGC())
433 d_http_cache_table->delete_by_size(d_max_entry_size);
434}
435
437
448bool HTTPCache::get_single_user_lock(bool force)
449{
450 if (!d_locked_open_file) {
451 FILE * fp = NULL;
452
453 try {
454 // It's OK to call create_cache_root if the directory already
455 // exists.
456 create_cache_root(d_cache_root);
457 }
458 catch (Error &e) {
459 // We need to catch and return false because this method is
460 // called from a ctor and throwing at this point will result in a
461 // partially constructed object. 01/22/04 jhrg
462 DBG(cerr << "Failure to create the cache root" << endl);
463 return false;
464 }
465
466 // Try to read the lock file. If we can open for reading, it exists.
467 string lock = d_cache_root + CACHE_LOCK;
468 if ((fp = fopen(lock.c_str(), "r")) != NULL) {
469 int res = fclose(fp);
470 if (res) {
471 DBG(cerr << "Failed to close " << (void *)fp << endl);
472 }
473 if (force)
474 REMOVE(lock.c_str());
475 else
476 return false;
477 }
478
479 if ((fp = fopen(lock.c_str(), "w")) == NULL) {
480 DBG(cerr << "Could not open for write access" << endl);
481 return false;
482 }
483
484 d_locked_open_file = fp;
485 return true;
486 }
487
488 DBG(cerr << "locked_open_file is true" << endl);
489 return false;
490}
491
494void
495HTTPCache::release_single_user_lock()
496{
497 if (d_locked_open_file) {
498 int res = fclose(d_locked_open_file);
499 if (res) {
500 DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
501 }
502 d_locked_open_file = 0;
503 }
504
505 string lock = d_cache_root + CACHE_LOCK;
506 REMOVE(lock.c_str());
507}
508
511
515string
517{
518 return d_cache_root;
519}
520
521
530void
531HTTPCache::create_cache_root(const string &cache_root)
532{
533#ifdef WIN32
534 string::size_type cur = cache_root[1] == ':' ? 3 : 1;
535 typedef int mode_t;
536
537 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
538 string dir = cache_root.substr(0, cur);
539 struct stat stat_info;
540 if (stat(dir.c_str(), &stat_info) == -1) {
541 DBG2(cerr << "Cache....... Creating " << dir << endl);
542 mode_t mask = UMASK(0);
543 if (MKDIR(dir.c_str(), 0777) < 0) {
544 DBG2(cerr << "Error: can't create." << endl);
545 UMASK(mask);
546 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
547 }
548 UMASK(mask);
549 }
550 else {
551 DBG2(cerr << "Cache....... Found " << dir << endl);
552 }
553 cur++;
554 }
555#else
556 // OSX and Linux
557
558 // Save the mask
559 mode_t mask = umask(0);
560
561 // Ignore the error if the directory exists
562 errno = 0;
563 if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
564 umask(mask);
565 throw Error("Could not create the directory for the cache at '" + cache_root + "' (" + strerror(errno) + ").");
566 }
567
568 // Restore themask
569 umask(mask);
570
571#endif
572}
573
588void
589HTTPCache::set_cache_root(const string &root)
590{
591 if (root != "") {
592 d_cache_root = root;
593 // cache root should end in /.
594 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
595 d_cache_root += DIR_SEPARATOR_CHAR;
596 }
597 else {
598 // If no cache root has been indicated then look for a suitable
599 // location.
600#ifdef USE_GETENV
601 char * cr = (char *) getenv("DODS_CACHE");
602 if (!cr) cr = (char *) getenv("TMP");
603 if (!cr) cr = (char *) getenv("TEMP");
604 if (!cr) cr = (char*)CACHE_LOCATION;
605 d_cache_root = cr;
606#else
607 d_cache_root = CACHE_LOCATION;
608#endif
609
610 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
611 d_cache_root += DIR_SEPARATOR_CHAR;
612
613 d_cache_root += CACHE_ROOT;
614 }
615
616 // Test d_hhtp_cache_table because this method can be called before that
617 // instance is created and also can be called later to change the cache
618 // root. jhrg 05.14.08
619 if (d_http_cache_table)
620 d_http_cache_table->set_cache_root(d_cache_root);
621}
622
634void
636{
637 lock_cache_interface();
638
639 d_cache_enabled = mode;
640
641 unlock_cache_interface();
642}
643
646bool
648{
649 DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
650 << endl);
651 return d_cache_enabled;
652}
653
663void
665{
666 lock_cache_interface();
667
668 d_cache_disconnected = mode;
669
670 unlock_cache_interface();
671}
672
677{
678 return d_cache_disconnected;
679}
680
689void
691{
692 lock_cache_interface();
693
694 d_expire_ignored = mode;
695
696 unlock_cache_interface();
697}
698
699/* Is the cache ignoring Expires headers returned with responses that have
700 been cached? */
701
702bool
703HTTPCache::is_expire_ignored() const
704{
705 return d_expire_ignored;
706}
707
723void
724HTTPCache::set_max_size(unsigned long size)
725{
726 lock_cache_interface();
727
728 try {
729 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
730 MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
731 unsigned long old_size = d_total_size;
732 d_total_size = new_size;
733 d_folder_size = d_total_size / CACHE_FOLDER_PCT;
734 d_gc_buffer = d_total_size / CACHE_GC_PCT;
735
736 if (new_size < old_size && startGC()) {
737 perform_garbage_collection();
738 d_http_cache_table->cache_index_write();
739 }
740 }
741 catch (...) {
742 unlock_cache_interface();
743 DBGN(cerr << "Unlocking interface." << endl);
744 throw;
745 }
746
747 DBG2(cerr << "Cache....... Total cache size: " << d_total_size
748 << " with " << d_folder_size
749 << " bytes for meta information and folders and at least "
750 << d_gc_buffer << " bytes free after every gc" << endl);
751
752 unlock_cache_interface();
753}
754
757unsigned long
759{
760 return d_total_size / MEGA;
761}
762
771void
773{
774 lock_cache_interface();
775
776 try {
777 unsigned long new_size = size * MEGA;
778 if (new_size > 0 && new_size < d_total_size - d_folder_size) {
779 unsigned long old_size = d_max_entry_size;
780 d_max_entry_size = new_size;
781 if (new_size < old_size && startGC()) {
782 perform_garbage_collection();
783 d_http_cache_table->cache_index_write();
784 }
785 }
786 }
787 catch (...) {
788 unlock_cache_interface();
789 throw;
790 }
791
792 DBG2(cerr << "Cache...... Max entry cache size is "
793 << d_max_entry_size << endl);
794
795 unlock_cache_interface();
796}
797
802unsigned long
804{
805 return d_max_entry_size / MEGA;
806}
807
818void
820{
821 lock_cache_interface();
822
823 d_default_expiration = exp_time;
824
825 unlock_cache_interface();
826}
827
830int
832{
833 return d_default_expiration;
834}
835
840void
842{
843 d_always_validate = validate;
844}
845
849bool
851{
852 return d_always_validate;
853}
854
871void
872HTTPCache::set_cache_control(const vector<string> &cc)
873{
874 lock_cache_interface();
875
876 try {
877 d_cache_control = cc;
878
879 vector<string>::const_iterator i;
880 for (i = cc.begin(); i != cc.end(); ++i) {
881 string header = (*i).substr(0, (*i).find(':'));
882 string value = (*i).substr((*i).find(": ") + 2);
883 if (header != "Cache-Control") {
884 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
885 }
886 else {
887 if (value == "no-cache" || value == "no-store")
888 d_cache_enabled = false;
889 else if (value.find("max-age") != string::npos) {
890 string max_age = value.substr(value.find("=") + 1);
891 d_max_age = parse_time(max_age.c_str());
892 }
893 else if (value == "max-stale")
894 d_max_stale = 0; // indicates will take anything;
895 else if (value.find("max-stale") != string::npos) {
896 string max_stale = value.substr(value.find("=") + 1);
897 d_max_stale = parse_time(max_stale.c_str());
898 }
899 else if (value.find("min-fresh") != string::npos) {
900 string min_fresh = value.substr(value.find("=") + 1);
901 d_min_fresh = parse_time(min_fresh.c_str());
902 }
903 }
904 }
905 }
906 catch (...) {
907 unlock_cache_interface();
908 throw;
909 }
910
911 unlock_cache_interface();
912}
913
914
919vector<string>
921{
922 return d_cache_control;
923}
924
926
935bool
936HTTPCache::is_url_in_cache(const string &url)
937{
938 DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
939
940 HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
941 bool status = entry != 0;
942 if (entry) {
943 entry->unlock_read_response();
944 }
945 return status;
946}
947
953bool
954is_hop_by_hop_header(const string &header)
955{
956 return header.find("Connection") != string::npos
957 || header.find("Keep-Alive") != string::npos
958 || header.find("Proxy-Authenticate") != string::npos
959 || header.find("Proxy-Authorization") != string::npos
960 || header.find("Transfer-Encoding") != string::npos
961 || header.find("Upgrade") != string::npos;
962}
963
975void
976HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
977{
978 string fname = cachename + CACHE_META;
979 d_open_files.push_back(fname);
980
981 FILE *dest = fopen(fname.c_str(), "w");
982 if (!dest) {
983 throw InternalErr(__FILE__, __LINE__,
984 "Could not open named cache entry file.");
985 }
986
987 vector<string>::const_iterator i;
988 for (i = headers.begin(); i != headers.end(); ++i) {
989 if (!is_hop_by_hop_header(*i)) {
990 int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
991 if (s != 1) {
992 fclose(dest);
993 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
994 }
995 s = fwrite("\n", 1, 1, dest);
996 if (s != 1) {
997 fclose(dest);
998 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
999 }
1000 }
1001 }
1002
1003 int res = fclose(dest);
1004 if (res) {
1005 DBG(cerr << "HTTPCache::write_metadata - Failed to close "
1006 << dest << endl);
1007 }
1008
1009 d_open_files.pop_back();
1010}
1011
1022void
1023HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
1024{
1025 FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
1026 if (!md) {
1027 throw InternalErr(__FILE__, __LINE__,
1028 "Could not open named cache entry meta data file.");
1029 }
1030
1031 const size_t line_buf_len = 1024;
1032 char line[line_buf_len];
1033 while (!feof(md) && fgets(line, line_buf_len, md)) {
1034 line[std::min(line_buf_len, strnlen(line, line_buf_len))-1] = '\0'; // erase newline
1035 headers.push_back(string(line));
1036 }
1037
1038 int res = fclose(md);
1039 if (res) {
1040 DBG(cerr << "HTTPCache::read_metadata - Failed to close "
1041 << md << endl);
1042 }
1043}
1044
1066int
1067HTTPCache::write_body(const string &cachename, const FILE *src)
1068{
1069 d_open_files.push_back(cachename);
1070
1071 FILE *dest = fopen(cachename.c_str(), "wb");
1072 if (!dest) {
1073 throw InternalErr(__FILE__, __LINE__,
1074 "Could not open named cache entry file.");
1075 }
1076
1077 // Read and write in 1k blocks; an attempt at doing this efficiently.
1078 // 09/30/02 jhrg
1079 char line[1024];
1080 size_t n;
1081 int total = 0;
1082 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1083 total += fwrite(line, 1, n, dest);
1084 DBG2(sleep(3));
1085 }
1086
1087 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1088 int res = fclose(dest);
1089 res = res & unlink(cachename.c_str());
1090 if (res) {
1091 DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
1092 << dest << endl);
1093 }
1094 throw InternalErr(__FILE__, __LINE__,
1095 "I/O error transferring data to the cache.");
1096 }
1097
1098 rewind(const_cast<FILE *>(src));
1099
1100 int res = fclose(dest);
1101 if (res) {
1102 DBG(cerr << "HTTPCache::write_body - Failed to close "
1103 << dest << endl);
1104 }
1105
1106 d_open_files.pop_back();
1107
1108 return total;
1109}
1110
1119FILE *
1120HTTPCache::open_body(const string &cachename)
1121{
1122 DBG(cerr << "cachename: " << cachename << endl);
1123
1124 FILE *src = fopen(cachename.c_str(), "rb"); // Read only
1125 if (!src)
1126 throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
1127
1128 return src;
1129}
1130
1156bool
1157HTTPCache::cache_response(const string &url, time_t request_time,
1158 const vector<string> &headers, const FILE *body)
1159{
1160 lock_cache_interface();
1161
1162 DBG(cerr << "Caching url: " << url << "." << endl);
1163
1164 try {
1165 // If this is not an http or https URL, don't cache.
1166 if (url.find("http:") == string::npos &&
1167 url.find("https:") == string::npos) {
1168 unlock_cache_interface();
1169 return false;
1170 }
1171
1172 // This does nothing if url is not already in the cache. It's
1173 // more efficient to do this than to first check and see if the entry
1174 // exists. 10/10/02 jhrg
1175 d_http_cache_table->remove_entry_from_cache_table(url);
1176
1178 entry->lock_write_response();
1179
1180 try {
1181 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
1182 if (entry->is_no_cache()) {
1183 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1184 << "(" << url << ")" << endl);
1185 entry->unlock_write_response();
1186 delete entry; entry = 0;
1187 unlock_cache_interface();
1188 return false;
1189 }
1190
1191 // corrected_initial_age, freshness_lifetime, response_time.
1192 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1193
1194 d_http_cache_table->create_location(entry); // cachename, cache_body_fd
1195 // move these write function to cache table
1196 entry->set_size(write_body(entry->get_cachename(), body));
1197 write_metadata(entry->get_cachename(), headers);
1198 d_http_cache_table->add_entry_to_cache_table(entry);
1199 entry->unlock_write_response();
1200 }
1201 catch (ResponseTooBigErr &e) {
1202 // Oops. Bummer. Clean up and exit.
1203 DBG(cerr << e.get_error_message() << endl);
1204 REMOVE(entry->get_cachename().c_str());
1205 REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
1206 DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
1207 << ")" << endl);
1208 entry->unlock_write_response();
1209 delete entry; entry = 0;
1210 unlock_cache_interface();
1211 return false;
1212 }
1213
1214 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1215 if (startGC())
1216 perform_garbage_collection();
1217
1218 d_http_cache_table->cache_index_write(); // resets new_entries
1219 }
1220 }
1221 catch (...) {
1222 unlock_cache_interface();
1223 throw;
1224 }
1225
1226 unlock_cache_interface();
1227
1228 return true;
1229}
1230
1249vector<string>
1251{
1252 lock_cache_interface();
1253
1254 HTTPCacheTable::CacheEntry *entry = 0;
1255 vector<string> headers;
1256
1257 DBG(cerr << "Getting conditional request headers for " << url << endl);
1258
1259 try {
1260 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1261 if (!entry)
1262 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1263
1264 if (entry->get_etag() != "")
1265 headers.push_back(string("If-None-Match: ") + entry->get_etag());
1266
1267 if (entry->get_lm() > 0) {
1268 time_t lm = entry->get_lm();
1269 headers.push_back(string("If-Modified-Since: ")
1270 + date_time_str(&lm));
1271 }
1272 else if (entry->get_max_age() > 0) {
1273 time_t max_age = entry->get_max_age();
1274 headers.push_back(string("If-Modified-Since: ")
1275 + date_time_str(&max_age));
1276 }
1277 else if (entry->get_expires() > 0) {
1278 time_t expires = entry->get_expires();
1279 headers.push_back(string("If-Modified-Since: ")
1280 + date_time_str(&expires));
1281 }
1282 entry->unlock_read_response();
1283 unlock_cache_interface();
1284 }
1285 catch (...) {
1286 unlock_cache_interface();
1287 if (entry) {
1288 entry->unlock_read_response();
1289 }
1290 throw;
1291 }
1292
1293 return headers;
1294}
1295
1299struct HeaderLess: binary_function<const string&, const string&, bool>
1300{
1301 bool operator()(const string &s1, const string &s2) const {
1302 return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1303 }
1304};
1305
1319void
1320HTTPCache::update_response(const string &url, time_t request_time,
1321 const vector<string> &headers)
1322{
1323 lock_cache_interface();
1324
1325 HTTPCacheTable::CacheEntry *entry = 0;
1326 DBG(cerr << "Updating the response headers for: " << url << endl);
1327
1328 try {
1329 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1330 if (!entry)
1331 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1332
1333 // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1334 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1335
1336 // Update corrected_initial_age, freshness_lifetime, response_time.
1337 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1338
1339 // Merge the new headers with those in the persistent store. How:
1340 // Load the new headers into a set, then merge the old headers. Since
1341 // set<> ignores duplicates, old headers with the same name as a new
1342 // header will got into the bit bucket. Define a special compare
1343 // functor to make sure that headers are compared using only their
1344 // name and not their value too.
1345 set<string, HeaderLess> merged_headers;
1346
1347 // Load in the new headers
1348 copy(headers.begin(), headers.end(),
1349 inserter(merged_headers, merged_headers.begin()));
1350
1351 // Get the old headers and load them in.
1352 vector<string> old_headers;
1353 read_metadata(entry->get_cachename(), old_headers);
1354 copy(old_headers.begin(), old_headers.end(),
1355 inserter(merged_headers, merged_headers.begin()));
1356
1357 // Read the values back out. Use reverse iterators with back_inserter
1358 // to preserve header order. NB: vector<> does not support push_front
1359 // so we can't use front_inserter(). 01/09/03 jhrg
1360 vector<string> result;
1361 copy(merged_headers.rbegin(), merged_headers.rend(),
1362 back_inserter(result));
1363
1364 write_metadata(entry->get_cachename(), result);
1365 entry->unlock_write_response();
1366 unlock_cache_interface();
1367 }
1368 catch (...) {
1369 if (entry) {
1370 entry->unlock_read_response();
1371 }
1372 unlock_cache_interface();
1373 throw;
1374 }
1375}
1376
1388bool
1389HTTPCache::is_url_valid(const string &url)
1390{
1391 lock_cache_interface();
1392
1393 bool freshness;
1394 HTTPCacheTable::CacheEntry *entry = 0;
1395
1396 DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
1397
1398 try {
1399 if (d_always_validate) {
1400 unlock_cache_interface();
1401 return false; // force re-validation.
1402 }
1403
1404 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1405 if (!entry)
1406 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1407
1408 // If we supported range requests, we'd need code here to check if
1409 // there was only a partial response in the cache. 10/02/02 jhrg
1410
1411 // In case this entry is of type "must-revalidate" then we consider it
1412 // invalid.
1413 if (entry->get_must_revalidate()) {
1414 entry->unlock_read_response();
1415 unlock_cache_interface();
1416 return false;
1417 }
1418
1419 time_t resident_time = time(NULL) - entry->get_response_time();
1420 time_t current_age = entry->get_corrected_initial_age() + resident_time;
1421
1422 // Check that the max-age, max-stale, and min-fresh directives
1423 // given in the request cache control header is followed.
1424 if (d_max_age >= 0 && current_age > d_max_age) {
1425 DBG(cerr << "Cache....... Max-age validation" << endl);
1426 entry->unlock_read_response();
1427 unlock_cache_interface();
1428 return false;
1429 }
1430 if (d_min_fresh >= 0
1431 && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1432 DBG(cerr << "Cache....... Min-fresh validation" << endl);
1433 entry->unlock_read_response();
1434 unlock_cache_interface();
1435 return false;
1436 }
1437
1438 freshness = (entry->get_freshness_lifetime()
1439 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1440 entry->unlock_read_response();
1441 unlock_cache_interface();
1442 }
1443 catch (...) {
1444 if (entry) {
1445 entry->unlock_read_response();
1446 }
1447 unlock_cache_interface();
1448 throw;
1449 }
1450
1451 return freshness;
1452}
1453
1481FILE * HTTPCache::get_cached_response(const string &url,
1482 vector<string> &headers, string &cacheName) {
1483 lock_cache_interface();
1484
1485 FILE *body = 0;
1486 HTTPCacheTable::CacheEntry *entry = 0;
1487
1488 DBG(cerr << "Getting the cached response for " << url << endl);
1489
1490 try {
1491 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1492 if (!entry) {
1493 unlock_cache_interface();
1494 return 0;
1495 }
1496
1497 cacheName = entry->get_cachename();
1498 read_metadata(entry->get_cachename(), headers);
1499
1500 DBG(cerr << "Headers just read from cache: " << endl);
1501 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1502
1503 body = open_body(entry->get_cachename());
1504
1505 DBG(cerr << "Returning: " << url << " from the cache." << endl);
1506
1507 d_http_cache_table->bind_entry_to_data(entry, body);
1508 }
1509 catch (...) {
1510 // Why make this unlock operation conditional on entry?
1511 if (entry)
1512 unlock_cache_interface();
1513 if (body != 0)
1514 fclose(body);
1515 throw;
1516 }
1517
1518 unlock_cache_interface();
1519
1520 return body;
1521}
1522
1534FILE *
1535HTTPCache::get_cached_response(const string &url, vector<string> &headers)
1536{
1537 string discard_name;
1538 return get_cached_response(url, headers, discard_name);
1539}
1540
1551FILE *
1553{
1554 string discard_name;
1555 vector<string> discard_headers;
1556 return get_cached_response(url, discard_headers, discard_name);
1557}
1558
1571void
1573{
1574 lock_cache_interface();
1575
1576 try {
1577 // fclose(body); This results in a seg fault on linux jhrg 8/27/13
1578 d_http_cache_table->uncouple_entry_from_data(body);
1579 }
1580 catch (...) {
1581 unlock_cache_interface();
1582 throw;
1583 }
1584
1585 unlock_cache_interface();
1586}
1587
1600void
1602{
1603 lock_cache_interface();
1604
1605 try {
1606 if (d_http_cache_table->is_locked_read_responses())
1607 throw Error(internal_error, "Attempt to purge the cache with entries in use.");
1608
1609 d_http_cache_table->delete_all_entries();
1610 }
1611 catch (...) {
1612 unlock_cache_interface();
1613 throw;
1614 }
1615
1616 unlock_cache_interface();
1617}
1618
1619} // namespace libdap
A class for error processing.
Definition: Error.h:94
std::string get_error_message() const
Definition: Error.cc:243
void create_location(CacheEntry *entry)
void calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
void parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size, const vector< string > &headers)
CacheEntry * get_write_locked_entry_from_cache_table(const string &url)
void add_entry_to_cache_table(CacheEntry *entry)
void remove_entry_from_cache_table(const string &url)
CacheDisconnectedMode get_cache_disconnected() const
Definition: HTTPCache.cc:676
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1157
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:129
vector< string > get_cache_control()
Definition: HTTPCache.cc:920
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:690
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:819
string get_cache_root() const
Definition: HTTPCache.cc:516
void set_cache_disconnected(CacheDisconnectedMode mode)
Definition: HTTPCache.cc:664
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1572
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1250
unsigned long get_max_entry_size() const
Definition: HTTPCache.cc:803
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:635
unsigned long get_max_size() const
Definition: HTTPCache.cc:758
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:772
bool get_always_validate() const
Definition: HTTPCache.cc:850
int get_default_expiration() const
Definition: HTTPCache.cc:831
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1389
void set_always_validate(bool validate)
Definition: HTTPCache.cc:841
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1320
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:724
void set_cache_control(const vector< string > &cc)
Definition: HTTPCache.cc:872
virtual ~HTTPCache()
Definition: HTTPCache.cc:302
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1481
bool is_cache_enabled() const
Definition: HTTPCache.cc:647
A class for software fault reporting.
Definition: InternalErr.h:65
EventHandler * register_handler(int signum, EventHandler *eh, bool ignore_by_default=false)
static SignalHandler * instance()
EventHandler * remove_handler(int signum)
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
bool is_hop_by_hop_header(const string &header)
Definition: HTTPCache.cc:954
string date_time_str(time_t *calendar, bool local)
Definition: util_mit.cc:273
time_t parse_time(const char *str, bool expand)
Definition: util_mit.cc:153