bes Updated for version 3.20.10
history_utils.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the Hyrax data server.
4
5// Copyright (c) 2021 OPeNDAP, Inc.
6// Author: Nathan Potter <ndp@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24#include "config.h"
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#ifdef HAVE_UNISTD_H
31#include <unistd.h>
32#endif
33
34#include <sys/stat.h>
35
36#include <fstream>
37#include <sstream> // std::stringstream
38#include <thread>
39#include <future>
40
41// rapidjson
42#include <stringbuffer.h>
43#include <writer.h>
44#include "document.h"
45
46#include <libdap/D4Group.h>
47#include <libdap/D4Attributes.h>
48#include <libdap/DataDDS.h>
49
50#include "BESContextManager.h"
51#include "BESDapResponseBuilder.h"
52#include "DapFunctionUtils.h"
53#include "BESDebug.h"
54#include "BESUtil.h"
55#include "TempFile.h"
56
57#include "FONcBaseType.h"
58#include "FONcTransmitter.h"
59#include "FONcTransform.h"
60
61using namespace std;
62using namespace rapidjson;
63
64#define NEW_LINE ((char)0x0a)
65#define CF_HISTORY_KEY "history"
66#define CF_HISTORY_CONTEXT "cf_history_entry"
67#define HISTORY_JSON_KEY "history_json"
68#define HISTORY_JSON_CONTEXT "history_json_entry"
69
70#define MODULE "fonc"
71#define prolog string("history_utils::").append(__func__).append("() - ")
72
73#if 0
74void appendHistoryJson(vector<string> *global_attr, vector<string> jsonNew)
75{
76
77 const char *oldJson = global_attr->at(0).c_str();
78 const char *newJson = jsonNew.at(0).c_str();
79 Document docNew, docOld;
80 Document::AllocatorType &allocator = docOld.GetAllocator();
81 docNew.SetArray();
82 docNew.Parse(newJson);
83 docOld.SetArray();
84 docOld.Parse(oldJson);
85 docNew.PushBack(docOld, allocator);
86
87 // Stringify JSON
88 StringBuffer buffer;
89 Writer<StringBuffer> writer(buffer);
90 docNew.Accept(writer);
91 global_attr->clear();
92 global_attr->push_back(buffer.GetString());
93}
94#endif
95
96
97
105string create_cf_history_txt(const string &request_url)
106{
107 // This code will be used only when the 'cf_history_context' is not set,
108 // which should be never in an operating server. However, when we are
109 // testing, often only the besstandalone code is running and the existing
110 // baselines don't set the context, so we have this. It must do something
111 // so the tests are not hopelessly obscure and filter out junk that varies
112 // by host (e.g., the names of cached files that have been decompressed).
113 // jhrg 6/3/16
114
115 string cf_history_entry;
116 std::stringstream ss;
117 time_t raw_now;
118 struct tm *timeinfo;
119 time(&raw_now); /* get current time; same as: timer = time(NULL) */
120 timeinfo = localtime(&raw_now);
121
122 char time_str[100];
123 strftime(time_str, 100, "%Y-%m-%d %H:%M:%S", timeinfo);
124
125 ss << time_str << " " << "Hyrax" << " " << request_url << '\n';
126 cf_history_entry = ss.str();
127 BESDEBUG(MODULE, prolog << "New cf history entry: '" << cf_history_entry << "'" << endl);
128 return cf_history_entry;
129}
130
138template <typename Writer>
139void create_json_history_obj(const string &request_url, Writer& writer)
140{
141 // This code will be used only when the 'history_json_context' is not set,
142 // which should be never in an operating server. However, when we are
143 // testing, often only the besstandalone code is running and the existing
144 // baselines don't set the context, so we have this. It must do something
145 // so the tests are not hopelessly obscure and filter out junk that varies
146 // by host (e.g., the names of cached files that have been decompressed).
147 // jhrg 6/3/16
148 // sk 6/17/21
149
150 // "$schema"
151 string schema = "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-0.1.0.json";
152 // "date_time"
153 time_t raw_now;
154 struct tm *timeinfo;
155 time(&raw_now); /* get current time; same as: timer = time(NULL) */
156 timeinfo = localtime(&raw_now);
157 char time_str[100];
158 strftime(time_str, 100, "%Y-%m-%dT%H:%M:%S", timeinfo);
159
160 writer.StartObject();
161 writer.Key("$schema");
162 writer.String(schema.c_str());
163 writer.Key("date_time");
164 writer.String(time_str);
165 writer.Key("program");
166 writer.String("hyrax");
167 writer.Key("version");
168 writer.String("1.16.3");
169 writer.Key("parameters");
170 writer.StartArray();
171 writer.StartObject();
172 writer.Key("request_url");
173 writer.String(request_url.c_str());
174 writer.EndObject();
175 writer.EndArray();
176 writer.EndObject();
177}
178
184string get_cf_history_entry (const string &request_url)
185{
186 bool foundIt = false;
187 string cf_history_entry = BESContextManager::TheManager()->get_context(CF_HISTORY_CONTEXT, foundIt);
188 if (!foundIt) {
189 // If the cf_history_entry context was not set by the incoming command then
190 // we compute and the value of the history string here.
191 cf_history_entry = create_cf_history_txt(request_url);
192 }
193 return cf_history_entry;
194}
195
196#if 0
202vector<string> get_history_json_entry (const string &request_url)
203{
204 vector<string> history_json_entry_vec;
205 bool foundIt = false;
206 string history_json_entry = BESContextManager::TheManager()->get_context("history_json_entry", foundIt);
207
208 if (!foundIt) {
209 // If the history_json_entry context was not set by the incoming command then
210 // we compute and the value of the history string here.
211 Document history_json_doc;
212 history_json_doc.SetObject();
213 StringBuffer buffer;
214 Writer<StringBuffer> writer(buffer);
215 create_json_history_obj(request_url, writer);
216 history_json_entry = buffer.GetString();
217 }
218
219 BESDEBUG(MODULE,prolog << "Using history_json_entry: " << history_json_entry << endl);
220 // And here we add to the returned vector.
221 history_json_entry_vec.push_back(history_json_entry);
222 return history_json_entry_vec;
223}
224#endif
225
226
227
233string get_history_json_entry (const string &request_url)
234{
235 bool foundIt = false;
236 string history_json_entry = BESContextManager::TheManager()->get_context(HISTORY_JSON_CONTEXT, foundIt);
237 if (!foundIt) {
238 // If the history_json_entry context was not set as a context key on BESContextManager
239 // we compute and the value of the history string here.
240 Document history_json_doc;
241 history_json_doc.SetObject();
242 StringBuffer buffer;
243 Writer<StringBuffer> writer(buffer);
244 create_json_history_obj(request_url, writer);
245 history_json_entry = buffer.GetString();
246 }
247
248 BESDEBUG(MODULE,prolog << "Using history_json_entry: " << history_json_entry << endl);
249 return history_json_entry;
250}
251
260string json_append_entry_to_array(const string& source_array_str, const string& new_entry_str)
261{
262 Document target_array;
263 target_array.SetArray();
264 Document::AllocatorType &allocator = target_array.GetAllocator();
265 target_array.Parse(source_array_str.c_str()); // Parse json array
266
267 Document entry;
268 entry.Parse(new_entry_str.c_str()); // Parse new entry
269
270 target_array.PushBack(entry, allocator);
271
272 // Stringify JSON
273 StringBuffer buffer;
274 Writer<StringBuffer> writer(buffer);
275 target_array.Accept(writer);
276 return buffer.GetString();
277}
278
284void update_history_json_attr(D4Attribute *global_attribute, const string &request_url)
285{
286 BESDEBUG(MODULE,prolog << "Updating history_json entry for global DAP4 attribute: " << global_attribute->name() << endl);
287
288 string hj_entry_str = get_history_json_entry(request_url);
289 BESDEBUG(MODULE,prolog << "hj_entry_str: " << hj_entry_str << endl);
290
291 string history_json;
292
293 D4Attribute *history_json_attr = nullptr;
294 if(global_attribute->type() == D4AttributeType::attr_container_c){
295 history_json_attr = global_attribute->attributes()->find(HISTORY_JSON_KEY);
296 }
297 else if( global_attribute->name() == HISTORY_JSON_KEY){
298 history_json_attr = global_attribute;
299 }
300
301 if (!history_json_attr) {
302 // If there is no source history_json attribute then we make one from scratch
303 // and add it to the global_attribute
304 BESDEBUG(MODULE, prolog << "Adding history_json entry to global_attribute " << global_attribute->name() << endl);
305 history_json_attr = new D4Attribute(HISTORY_JSON_KEY, attr_str_c);
306 global_attribute->attributes()->add_attribute_nocopy(history_json_attr);
307
308 // Promote the entry to an json array, assigning it the value of the attribute
309 history_json = "[" + hj_entry_str +"]";
310 BESDEBUG(MODULE,prolog << "CREATED history_json: " << history_json << endl);
311
312 } else {
313 // We found an existing history_jason attribute!
314 // We know the convention is that this should be a single valued DAP attribute
315 // We need to get the existing json document, parse it, insert the entry into
316 // the document using rapidjson, and then serialize it to a new string value that
317 // We will use to overwrite the current value in the existing history_json_attr.
318 history_json = *history_json_attr->value_begin();
319 history_json=R"([{"$schema":"https:\/\/harmony.earthdata.nasa.gov\/schemas\/history\/0.1.0\/history-0.1.0.json","date_time":"2021-06-25T13:28:48.951+0000","program":"hyrax","version":"@HyraxVersion@","parameters":[{"request_url":"http:\/\/localhost:8080\/opendap\/hj\/coads_climatology.nc.dap.nc4?GEN1"}]}])";
320 BESDEBUG(MODULE,prolog << "FOUND history_json: " << history_json << endl);
321
322 // Append the entry to the exisiting history_json array
323 history_json = json_append_entry_to_array(history_json, hj_entry_str);
324 BESDEBUG(MODULE,prolog << "NEW history_json: " << history_json << endl);
325
326 }
327
328 // Now the we have the update history_json element, serialized to a string, we use it to
329 // the value of the existing D4Attribute history_json_attr
330 vector<string> attr_vals;
331 attr_vals.push_back(history_json);
332 history_json_attr->add_value_vector(attr_vals); // This replaces the value
333}
334
341string append_cf_history_entry(string cf_history, string cf_history_entry){
342
343 stringstream cf_hist_new;
344 if(!cf_history.empty()){
345 cf_hist_new << cf_history;
346 if(cf_history.back() != NEW_LINE)
347 cf_hist_new << NEW_LINE;
348 }
349 cf_hist_new << cf_history_entry;
350 if(cf_history_entry.back() != NEW_LINE)
351 cf_hist_new << NEW_LINE;
352
353 BESDEBUG(MODULE, prolog << "Updated cf history: '" << cf_hist_new.str() << "'" << endl);
354 return cf_hist_new.str();
355}
356
362void update_cf_history_attr(D4Attribute *global_attribute, const string &request_url){
363 BESDEBUG(MODULE,prolog << "Updating cf history entry for global DAP4 attribute: " << global_attribute->name() << endl);
364
365 string cf_hist_entry = get_cf_history_entry(request_url);
366 BESDEBUG(MODULE, prolog << "New cf history entry: " << cf_hist_entry << endl);
367
368 string cf_history;
369 D4Attribute *history_attr = nullptr;
370 if(global_attribute->type() == D4AttributeType::attr_container_c){
371 history_attr = global_attribute->attributes()->find(CF_HISTORY_KEY);
372 }
373 else if( global_attribute->name() == CF_HISTORY_KEY){
374 history_attr = global_attribute;
375 }
376
377 if (!history_attr) {
378 //if there is no source cf history attribute make one and add it to the global_attribute.
379 BESDEBUG(MODULE, prolog << "Adding history entry to " << global_attribute->name() << endl);
380 history_attr = new D4Attribute(CF_HISTORY_KEY, attr_str_c);
381 global_attribute->attributes()->add_attribute_nocopy(history_attr);
382 }
383 else {
384 cf_history = history_attr->value(0);
385 }
386 cf_history = append_cf_history_entry(cf_history,cf_hist_entry);
387
388 std::vector<std::string> cf_hist_vec;
389 cf_hist_vec.push_back(cf_history);
390 history_attr->add_value_vector(cf_hist_vec);
391}
392
393
399void update_cf_history_attr(AttrTable *global_attr_tbl, const string &request_url) {
400
401 BESDEBUG(MODULE,prolog << "Updating cf history entry for global DAP2 attribute: " << global_attr_tbl->get_name() << endl);
402
403 string cf_hist_entry = get_cf_history_entry(request_url);
404 BESDEBUG(MODULE,prolog << "New cf history entry: '" << cf_hist_entry << "'" <<endl);
405
406 string cf_history = global_attr_tbl->get_attr(CF_HISTORY_KEY); // returns empty string if not found
407 BESDEBUG(MODULE,prolog << "Previous cf history: '" << cf_history << "'" << endl);
408
409 cf_history = append_cf_history_entry(cf_history,cf_hist_entry);
410 BESDEBUG(MODULE,prolog << "Updated cf history: '" << cf_history << "'" << endl);
411
412 global_attr_tbl->del_attr(CF_HISTORY_KEY, -1);
413 int attr_count = global_attr_tbl->append_attr(CF_HISTORY_KEY, "string", cf_history);
414 BESDEBUG(MODULE,prolog << "Found " << attr_count << " value(s) for the cf history attribute." << endl);
415}
416
422void update_history_json_attr(AttrTable *global_attr_tbl, const string &request_url) {
423
424 BESDEBUG(MODULE,prolog << "Updating history_json entry for global DAP2 attribute: " << global_attr_tbl->get_name() << endl);
425
426 string hj_entry_str = get_history_json_entry(request_url);
427 BESDEBUG(MODULE,prolog << "New history_json entry: " << hj_entry_str << endl);
428
429 string history_json = global_attr_tbl->get_attr(HISTORY_JSON_KEY);
430 BESDEBUG(MODULE,prolog << "Previous history_json: " << history_json << endl);
431
432 if (history_json.empty()) {
433 //if there is no source history_json attribute
434 BESDEBUG(MODULE, prolog << "Creating new history_json entry to global attribute: " << global_attr_tbl->get_name() << endl);
435 history_json = "[" + hj_entry_str +"]"; // Hack to make the entry into a json array.
436 } else {
437 history_json = json_append_entry_to_array(history_json,hj_entry_str);
438 global_attr_tbl->del_attr(HISTORY_JSON_KEY, -1);
439 }
440 BESDEBUG(MODULE,prolog << "New history_json: " << history_json << endl);
441 int attr_count = global_attr_tbl->append_attr(HISTORY_JSON_KEY, "string", history_json);
442 BESDEBUG(MODULE,prolog << "Found " << attr_count << " value(s) for the history_json attribute." << endl);
443
444}
445
446
453void updateHistoryAttributes(DDS *dds, const string &ce)
454{
455 string request_url = dds->filename();
456 // remove path info
457 request_url = request_url.substr(request_url.find_last_of('/')+1);
458 // remove 'uncompress' cache mangling
459 request_url = request_url.substr(request_url.find_last_of('#')+1);
460 if(!ce.empty()) request_url += "?" + ce;
461
462 // Add the new entry to the "history" attribute
463 // Get the top level Attribute table.
464 AttrTable &globals = dds->get_attr_table();
465
466 // Since many files support "CF" conventions the history tag may already exist in the source data
467 // and we should add an entry to it if possible.
468 bool added_history = false; // Used to indicate that we located a toplevel AttrTable whose name ends in "_GLOBAL" and that has an existing "history" attribute.
469// unsigned int num_attrs = globals.get_size();
470 if (globals.is_global_attribute()) {
471 // Here we look for a top level AttrTable whose name ends with "_GLOBAL" which is where, by convention,
472 // data ingest handlers place global level attributes found in the source dataset.
473 auto i = globals.attr_begin();
474 auto e = globals.attr_end();
475 for (; i != e; i++) {
476 AttrType attrType = globals.get_attr_type(i);
477 string attr_name = globals.get_name(i);
478 // Test the entry...
479 if (attrType == Attr_container && BESUtil::endsWith(attr_name, "_GLOBAL")) {
480 // We are going to append to an existing history attribute if there is one
481 // Or just add a history attribute if there is not one. In a most
482 // handy API moment, append_attr() does just this.
483
484 AttrTable *global_attr_tbl = globals.get_attr_table(i);
485 update_cf_history_attr(global_attr_tbl,request_url);
486 update_history_json_attr(global_attr_tbl,request_url);
487 added_history = true;
488 BESDEBUG(MODULE, prolog << "Added history entries to " << attr_name << endl);
489 }
490 }
491 if(!added_history){
492 auto dap_global_at = globals.append_container("DAP_GLOBAL");
493 dap_global_at->set_name("DAP_GLOBAL");
494 dap_global_at->set_is_global_attribute(true);
495
496 update_cf_history_attr(dap_global_at,request_url);
497 update_history_json_attr(dap_global_at,request_url);
498 BESDEBUG(MODULE, prolog << "No top level AttributeTable name matched '*_GLOBAL'. "
499 "Created DAP_GLOBAL AttributeTable and added history attributes to it." << endl);
500 }
501 }
502}
503
504
511void updateHistoryAttributes(DMR *dmr, const string &ce)
512{
513 string request_url = dmr->filename();
514 // remove path info
515 request_url = request_url.substr(request_url.find_last_of('/')+1);
516 // remove 'uncompress' cache mangling
517 request_url = request_url.substr(request_url.find_last_of('#')+1);
518 if(!ce.empty()) request_url += "?" + ce;
519
520 bool added_cf_history = false;
521 bool added_json_history = false;
522 D4Group* root_grp = dmr->root();
523 D4Attributes *root_attrs = root_grp->attributes();
524 for (auto attrs = root_attrs->attribute_begin(); attrs != root_attrs->attribute_end(); ++attrs) {
525 string name = (*attrs)->name();
526 BESDEBUG(MODULE, prolog << "Attribute name is "<< name << endl);
527 if ((*attrs)->type() == D4AttributeType::attr_container_c && BESUtil::endsWith(name, "_GLOBAL")) {
528 // Update Climate Forecast history attribute.
529 update_cf_history_attr(*attrs, request_url);
530 added_cf_history = true;
531
532 // Update NASA's history_json attribute
533 update_history_json_attr(*attrs, request_url);
534 added_json_history = true;
535 }
536 else if(name == CF_HISTORY_KEY){ // A top level cf history attribute
537 update_cf_history_attr(*attrs, request_url);
538 added_cf_history = true;
539 }
540 else if( name == HISTORY_JSON_KEY){ // A top level history_json attribute
541 update_cf_history_attr(*attrs, request_url);
542 added_json_history = true;
543 }
544 }
545 if(!added_cf_history || !added_json_history){
546 auto *dap_global = new D4Attribute("DAP_GLOBAL",attr_container_c);
547 root_attrs->add_attribute_nocopy(dap_global);
548 // CF history attribute
549 if(!added_cf_history){
550 update_cf_history_attr(dap_global, request_url);
551 }
552 // NASA's history_json attribute
553 if(!added_json_history){
554 update_history_json_attr(dap_global,request_url);
555 }
556 }
557}
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition: BESUtil.cc:961
A document for parsing JSON text as DOM.
Definition: document.h:2203
Allocator & GetAllocator()
Get the allocator of this document.
Definition: document.h:2491
GenericDocument & Parse(const typename SourceEncoding::Ch *str)
Parse JSON text from a read-only string (with Encoding conversion)
Definition: document.h:2404
Represents an in-memory output stream.
Definition: stringbuffer.h:41
JSON writer.
Definition: writer.h:90
Concept for allocating, resizing and freeing memory block.
main RapidJSON namespace