Bridges-C++  3.2.0
Bridges(C++API)
DataSource.h
Go to the documentation of this file.
1 #ifndef DATA_SOURCE_H
2 #define DATA_SOURCE_H
3 
4 #include <vector>
5 #include <string>
6 #include <unordered_map>
7 
8 using namespace std;
9 
10 #include <JSONutil.h>
12 #include "./data_src/Game.h"
13 #include "./data_src/Shakespeare.h"
17 #include "./data_src/Song.h"
19 #include "./data_src/OSMData.h"
20 #include "./data_src/OSMVertex.h"
21 #include "./data_src/OSMEdge.h"
23 #include "ColorGrid.h"
24 #include "base64.h"
25 #include <GraphAdjList.h>
26 #include <ServerComm.h>
27 #include <Bridges.h>
28 #include "rapidjson/document.h"
29 #include "assert.h"
30 #include "rapidjson/error/en.h"
31 #include <fstream>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <stdio.h>
35 
36 
37 #include <Cache.h>
38 
39 
40 namespace bridges {
41  using namespace bridges::dataset;
42  using namespace bridges::datastructure;
43 
44 
45 
46 
59  class DataSource {
60 
61  private:
62 
63  int debug() const {
64  return 0;
65  }
66  bridges::Bridges* bridges_inst;
67  bridges::lruCache my_cache;
68 
69  string getOSMBaseURL() const {
70  //return "http://cci-bridges-osm-t.uncc.edu/";
71  return "http://cci-bridges-osm.uncc.edu/";
72  }
73 
74  public:
76  : bridges_inst(br), my_cache(120) {}
77 
79  : DataSource(&br) {}
80 
81 
94  vector<Game> getGameData() {
95  using namespace rapidjson;
96  Document d;
97 
98  // request the game dataset and parse it
99  d.Parse(
100  ServerComm::makeRequest("http://bridgesdata.herokuapp.com/api/games",
101  {"Accept: application/json"}).c_str());
102 
103  const Value& D = d["data"];
104  vector<Game> wrapper;
105  for (SizeType i = 0; i < D.Size(); i++) {
106  const Value& V = D[i];
107  const Value& G = V["genre"];
108 
109  vector<string> genre;
110  for (SizeType j = 0; j < G.Size(); j++) {
111  genre.push_back(G[j].GetString());
112  }
113  wrapper.push_back(
114  Game( V["game"].GetString(),
115  V["platform"].GetString(),
116  V["rating"].GetDouble(),
117  genre ) );
118  }
119  return wrapper;
120  }
132  vector<ActorMovieIMDB> getActorMovieIMDBData(int number = 0) {
133  using namespace rapidjson;
134  Document d;
135  vector<ActorMovieIMDB> wrapper;
136  string url = "http://bridgesdata.herokuapp.com/api/imdb?limit=" +
137  to_string(number);
138 
139  // retrieve the data and parse
140  d.Parse(ServerComm::makeRequest( url, {"Accept: application/json"}).c_str());
141 
142  const Value& D = d["data"];
143  for (SizeType i = 0; i < D.Size(); i++) {
144  const Value& V = D[i];
145  wrapper.push_back(
147  V["actor"].GetString(),
148  V["movie"].GetString()
149  )
150  );
151  }
152  return wrapper;
153  }
154 
155 
166  vector<ActorMovieIMDB> getActorMovieIMDBData2() {
167  using namespace rapidjson;
168  Document d;
169  vector<ActorMovieIMDB> wrapper;
170  string url = "http://bridgesdata.herokuapp.com/api/imdb2";
171 
172  // retrieve the data and parse
173  d.Parse(ServerComm::makeRequest( url, {"Accept: application/json"}).c_str());
174 
175  const Value& D = d["data"];
176  for (SizeType i = 0; i < D.Size(); i++) {
177  const Value& V = D[i];
178  string actor = V["actor"].GetString();
179  string movie = V["movie"].GetString();
180  double rating = V["rating"].GetDouble();
181  const Value& G = V["genres"];
182  vector<string> genres;
183  for (SizeType j = 0; j < G.Size(); j++) {
184  genres.push_back(G[j].GetString());
185  }
186  wrapper.push_back(ActorMovieIMDB( actor, movie, (float)rating, genres));
187  }
188  return wrapper;
189  }
190 
202  vector<EarthquakeUSGS> getEarthquakeUSGSData(int number = 0) {
203  using namespace rapidjson;
204  Document d;
205  vector<EarthquakeUSGS> wrapper;
206  if (number <= 0) {
207  d.Parse(ServerComm::makeRequest( "http://earthquakes-uncc.herokuapp.com/eq",
208  {"Accept: application/json"}).c_str());
209  for (SizeType i = 0; i < d.Size(); i++) {
210  const Value& V = d[i]["properties"];
211  const Value& G = d[i]["geometry"]["coordinates"];
212  wrapper.push_back(
214  V["mag"].GetDouble(),
215  G[0].GetDouble(),
216  G[1].GetDouble(),
217  V["place"].GetString(),
218  V["title"].GetString(),
219  V["url"].GetString(),
220  V["time"].GetString() )
221  );
222  }
223  }
224  else {
225  d.Parse(ServerComm::makeRequest( "http://earthquakes-uncc.herokuapp.com/eq/latest/" +
226  to_string(number), {"Accept: application/json"}).c_str());
227 
228  const Value& D = d["Earthquakes"];
229  for (SizeType i = 0; i < D.Size(); i++) {
230  const Value& V = D[i]["properties"];
231  const Value& G = D[i]["geometry"]["coordinates"];
232  // wrapper.push_back({V["mag"].GetDouble(),G[0].GetDouble(),G[1].GetDouble(),V["place"].GetString(),V["title"].GetString(),V["url"].GetString(),V["time"].GetString()});
233  wrapper.push_back(
235  V["mag"].GetDouble(),
236  G[0].GetDouble(),
237  G[1].GetDouble(),
238  V["place"].GetString(),
239  V["title"].GetString(),
240  V["url"].GetString(),
241  V["time"].GetString() )
242  );
243  }
244  }
245  return wrapper;
246  }
262  vector<Shakespeare> getShakespeareData(string type = "",
263  bool textonly = false) {
264  using namespace rapidjson;
265  Document d;
266  vector<Shakespeare> wrapper;
267 
268  string url = "http://bridgesdata.herokuapp.com/api/shakespeare/";
269 
270  if (type == "plays" || type == "poems")
271  url += "/" + type;
272  if (textonly) {
273  url += "?format=simple";
274  }
275  // retrieve the data and parse
276  d.Parse(ServerComm::makeRequest( url, {"Accept: application/json"}).c_str());
277 
278  const Value& D = d["data"];
279  for (SizeType i = 0; i < D.Size(); i++) {
280  const Value& V = D[i];
281  wrapper.push_back(
282  Shakespeare(
283  V["title"].GetString(),
284  V["type"].GetString(),
285  V["text"].GetString()
286  )
287  );
288 
289  }
290  return wrapper;
291  }
309  Song getSong(string songTitle, string artistName) {
310  using namespace rapidjson;
311 
312  Document d;
313  vector<Song> wrapper;
314  string url = "http://bridgesdata.herokuapp.com/api/songs/find/";
315  // retrieve the data and parse
316  if (songTitle.size() > 0)
317  url += songTitle;
318  else {
319  throw "Incorrect use of getSong. songTitle should be given.";
320  }
321 
322  if (artistName.size())
323  url += "?artistName=" + artistName;
324  else {
325  throw "Incorrect use of getSong. artistName should be given.";
326  }
327  // check for spaces in url and replace them by '%20'
328  string::size_type n = 0;
329  while ( (n = url.find(" ", n)) != string::npos) {
330  url.replace(n, 1, "%20");
331  n++;
332  }
333 
334  d.Parse(ServerComm::makeRequest( url, {"Accept: application/json"}).c_str());
335 
336  string artist = (d.HasMember("artist")) ?
337  d["artist"].GetString() : string();
338  string song = (d.HasMember("song")) ?
339  d["song"].GetString() : string();
340  string album = (d.HasMember("album")) ?
341  d["album"].GetString() : string();
342  string lyrics = (d.HasMember("lyrics")) ?
343  d["lyrics"].GetString() : string();
344  string release_date = (d.HasMember("release_date")) ?
345  d["release_date"].GetString() : string();
346 
347  return Song (artist, song, album, lyrics, release_date);
348  }
364  vector<Song> getSongData() {
365  using namespace rapidjson;
366 
367  Document d;
368  vector<Song> all_songs;
369 
370  string url = "http://bridgesdata.herokuapp.com/api/songs/";
371  // retrieve the data and parse
372 
373  d.Parse(ServerComm::makeRequest( url, {"Accept: application/json"}).c_str());
374 
375  const Value& D = d["data"];
376 
377  // get the songs and put them into a vector
378  for (SizeType i = 0; i < D.Size(); i++) {
379  const Value& v = D[i];
380 
381  //cout << v["artist"].GetString() << endl;
382  string artist = (v.HasMember("artist")) ? v["artist"].GetString() : string();
383  string song = (v.HasMember("song")) ? v["song"].GetString() : string();
384  string album = (v.HasMember("album")) ? v["album"].GetString() : string();
385  string lyrics = (v.HasMember("lyrics")) ? v["lyrics"].GetString() : string();
386  string release_date = (v.HasMember("release_date")) ?
387  v["release_date"].GetString() : string();
388  all_songs.push_back( Song ( artist, song, album, lyrics, release_date) );
389 
390  }
391  return all_songs;
392  }
404  vector<GutenbergBook> getGutenbergBookData(int num = 0) {
405  using namespace rapidjson;
406 
407  Document d;
408  vector<GutenbergBook> wrapper;
409  string url = "http://bridgesdata.herokuapp.com/api/books";
410  if (num > 0) {
411  url += "?limit=" + to_string(num);
412  }
413 
414  d.Parse(ServerComm::makeRequest( url, {"Accept: application/json"}).c_str());
415  const Value& D = d["data"];
416  for (SizeType i = 0; i < D.Size(); i++) {
417  const Value& V = D[i];
418 
419  const Value& A = V["author"];
420  const Value& L = V["languages"];
421 
422  vector<string> lang;
423  for (SizeType j = 0; j < L.Size(); j++) {
424  lang.push_back(L[j].GetString());
425  }
426 
427  const Value& G = V["genres"];
428  vector<string> genre;
429  for (SizeType j = 0; j < G.Size(); j++) {
430  genre.push_back(G[j].GetString());
431  }
432 
433  const Value& S = V["subjects"];
434  vector<string> subject;
435  for (SizeType j = 0; j < S.Size(); j++) {
436  subject.push_back(S[j].GetString());
437  }
438 
439  const Value& M = V["metrics"];
440  wrapper.push_back(
442  A["name"].GetString(),
443  A["birth"].GetInt(),
444  A["death"].GetInt(),
445  V["title"].GetString(),
446  lang,
447  genre,
448  subject,
449  M["characters"].GetInt(),
450  M["words"].GetInt(),
451  M["sentences"].GetInt(),
452  M["difficultWords"].GetInt(),
453  V["url"].GetString(),
454  V["downloads"].GetInt()
455  )
456  );
457  }
458  return wrapper;
459  }
466  vector<CancerIncidence> getCancerIncidenceData(int num = 0) {
467  using namespace rapidjson;
468 
469  Document d;
470  vector<CancerIncidence> wrapper;
471  string url = "http://bridgesdata.herokuapp.com/api/cancer/withlocations";
472  if (num > 0) {
473  url += "?limit=" + to_string(num);
474  }
475 
476  d.Parse(ServerComm::makeRequest( url, {"Accept: application/json"}).c_str());
477 
478  // get the JSON dataset
479  const Value& D = d["data"];
480 
481  CancerIncidence c;
482  for (SizeType i = 0; i < D.Size(); i++) {
483  const Value& v = D[i];
484  const Value& age = v["Age"];
485 
486  c.setAgeAdjustedRate( age["Age Adjusted Rate"].GetDouble());
487  c.setAgeAdjustedCI_Lower(age["Age Adjusted CI Lower"].GetDouble());
488  c.setAgeAdjustedCI_Upper(age["Age Adjusted CI Upper"].GetDouble());
489 
490  c.setYear(v["Year"].GetInt());
491 
492  const Value& data = v["Data"];
493  c.setCrudeRate(data["Crude Rate"].GetDouble());
494  c.setCrudeRate_CI_Lower(data["Crude CI Lower"].GetDouble());
495  c.setCrudeRate_CI_Upper(data["Crude CI Upper"].GetDouble());
496  c.setRace(data["Race"].GetString());
497  c.setPopulation(data["Population"].GetInt());
498  c.setEventType(data["Event Type"].GetString());
499  c.setCount(data["Count"].GetInt());
500 
501  c.setAffectedArea(v["Area"].GetString());
502 
503  const Value& loc = v["loc"];
504  c.setLocationX(loc[0].GetDouble());
505  c.setLocationY(loc[1].GetDouble());
506 
507  wrapper.push_back(c);
508  }
509  return wrapper;
510  }
518  OSMData getOSMDataFromJSON (const string& osm_json) {
519  using namespace rapidjson;
520 
521  Document osm_data;
522 
523  osm_data.Parse(osm_json.c_str());
524 
525  // create an osm data object
526  OSMData osm;
527 
528  if (osm_data.HasMember("nodes")) {
529  vector<OSMVertex> vertices;
530  Value& nodes = osm_data["nodes"];
531 
532  vector<long> vertex_ids;
533  // get the vertices
534  for (SizeType i = 0; i < nodes.Size(); i++) {
535  const Value& node = nodes[i];
536  OSMVertex::OSMVertexID id = node[0].GetInt64();
537 
538  vertex_ids.push_back(id);
539  double lat = node[1].GetDouble(), longit = node[2].GetDouble();
540  vertices.push_back(OSMVertex(id, lat, longit));
541  }
542  osm.setVertices(vertices);
543  }
544  // add vertices to object
545  // get the edges
546 
547  if (osm_data.HasMember("edges")) {
548  vector<OSMEdge> edges;
549  Value& links = osm_data["edges"];
550 
551  for (SizeType i = 0; i < links.Size(); i++) {
552  const Value& link = links[i];
553  OSMVertex::OSMVertexID id1 = link[0].GetInt64();
554  OSMVertex::OSMVertexID id2 = link[1].GetInt64();
555  double dist = link[2].GetDouble();
556 
557  edges.push_back(OSMEdge(id1, id2, dist));
558  }
559  osm.setEdges(edges);
560  }
561  // add edges to object
562 
563  if (osm_data.HasMember("meta")) {
564  // get lat long range
565  Value& meta = osm_data["meta"];
566  double lat_min = meta["lat_min"].GetDouble();
567  double lat_max = meta["lat_max"].GetDouble();
568  double longit_min = meta["lon_min"].GetDouble();
569  double longit_max = meta["lon_max"].GetDouble();
570  osm.setLatLongRange(lat_min, lat_max, longit_min, longit_max);
571  // get dataset name
572  osm.setName(meta["name"].GetString());
573  }
574  return osm;
575  }
576 
577 
593  OSMData getOSMData (double lat_min, double long_min,
594  double lat_max, double long_max, string level = "default") {
595 
596  //URL for hash request
597  string hash_url = getOSMBaseURL() + "hash?minLon=" + std::to_string(long_min) +
598  "&minLat=" + std::to_string(lat_min) +
599  "&maxLon=" + std::to_string(long_max) +
600  "&maxLat=" + std::to_string(lat_max) +
601  "&level=" + ServerComm::encodeURLPart(level);
602 
603  //URL to request map
604  string url =
605  getOSMBaseURL() + "coords?minLon=" + std::to_string(long_min) +
606  "&minLat=" + std::to_string(lat_min) +
607  "&maxLon=" + std::to_string(long_max) +
608  "&maxLat=" + std::to_string(lat_max) +
609  "&level=" + ServerComm::encodeURLPart(level);
610 
611  //trys to get hash value for bounding box map
612  if (debug())
613  std::cerr << "Hitting hash URL: " << hash_url << "\n";
614  string hash_value = ServerComm::makeRequest(hash_url, {"Accept: application/json"});
615 
616 
617  std::string osm_json;
618  //std::cerr<<"url: "<<url<<"\n";
619 
620  //Checks to see if map requested is stored in local cache
621  if (my_cache.inCache(hash_value) == true) { //local map is up-to-date
622  try {
623  if (my_cache.inCache(hash_value)) {
624  osm_json = my_cache.getDoc(hash_value);
625  }
626  }
627  catch (CacheException& ce) {
628  //something went bad trying to access the cache
629  std::cout << "Exception while reading from cache. Ignoring cache and continue." << std::endl;
630  }
631 
632  }
633  else if (hash_value.compare("false") == 0 || my_cache.inCache(hash_value) == false) {
634  //Server response is false or somehow map got saved as false
635 
636  if (debug())
637  std::cerr << "Hitting json URL: " << url << "\n";
638 
639  osm_json = ServerComm::makeRequest(url, {"Accept: application/json"}); //Requests the map data then requests the maps hash
640  if (debug())
641  std::cerr << "Hitting hash URL: " << hash_url << "\n";
642 
643  hash_value = ServerComm::makeRequest(hash_url, {"Accept: application/json"});
644 
645  if (hash_value.compare("false") == 0) {
646  std::cerr << "Error while gathering hash data for generated map..." << std::endl;
647  std::cerr << osm_json << std::endl;
648  abort();
649  }
650 
651  //Saves map to cache directory
652  try {
653  my_cache.putDoc(hash_value, osm_json);
654 
655  }
656  catch (CacheException& ce) {
657 
658  //something went bad trying to access the cache
659  std::cerr << "Exception while storing in cache. Weird but not critical." << std::endl;
660  if (debug())
661  std::cerr << "Tried to store hash=" << hash_value << " key=" << osm_json << std::endl;
662  }
663 
664  }
665  return getOSMDataFromJSON(osm_json);
666  }
667 
668  void getAmenityData(double minLat, double minLon, double
669  maxLat, double maxLon, std::string amenity) {
670  }
671 
672  void getAmenityData(const std::string& location, const std::string& amenity) {
673  std::string url = getOSMBaseURL() + "amenity?location=" + location
674  + "&amenity=" + amenity;
675 
676  std::string hash_url = getOSMBaseURL() + "hash?location=" + location
677  + "&amenity=" + amenity;
678 
679 
680  }
681 
682 
695  OSMData getOSMData (string location, string level = "default") {
696  //URL for hash request
697  string hash_url = getOSMBaseURL() + "hash?location=" + ServerComm::encodeURLPart(location) +
698  "&level=" + ServerComm::encodeURLPart(level);
699 
700  //URL to request map
701  string url =
702  getOSMBaseURL() + "loc?location=" + ServerComm::encodeURLPart(location) +
703  "&level=" + ServerComm::encodeURLPart(level);
704 
705  //trys to get hash value for bounding box map
706  if (debug())
707  std::cerr << "Hitting hash URL: " << hash_url << "\n";
708  string hash_value = ServerComm::makeRequest(hash_url, {"Accept: application/json"});
709 
710 
711  std::string osm_json;
712 
713  if (debug())
714  std::cerr << "url: " << url << "\n";
715 
716  if (my_cache.inCache(hash_value) == true) { //local map is up-to-date
717  try {
718  if (my_cache.inCache(hash_value)) {
719  osm_json = my_cache.getDoc(hash_value);
720  }
721  }
722  catch (CacheException& ce) { //something went bad trying to access the cache
723  std::cout << "Exception while reading from cache. Ignoring cache and continue." << std::endl;
724  }
725 
726  }
727  else if (hash_value.compare("false") == 0 || my_cache.inCache(hash_value) == false) { //Server response is false or somehow map got saved as false
728  if (debug())
729  std::cerr << "Hitting json URL: " << url << "\n";
730  osm_json = ServerComm::makeRequest(url, {"Accept: application/json"}); //Requests the map data then requests the maps hash
731  if (debug())
732  std::cerr << "Hitting hash URL: " << hash_url << "\n";
733  hash_value = ServerComm::makeRequest(hash_url, {"Accept: application/json"});
734  if (hash_value.compare("false") == 0) {
735  std::cerr << "Error while gathering hash data for generated map..." << std::endl;
736  std::cerr << osm_json << std::endl;
737  abort();
738  }
739 
740  //Saves map to cache directory
741  try {
742  my_cache.putDoc(hash_value, osm_json);
743  }
744  catch (CacheException& ce) {
745  //something went bad trying to access the cache
746  std::cerr << "Exception while storing in cache. Weird but not critical." << std::endl;
747  if (debug())
748  std::cerr << "Tried to store hash=" << hash_value << " key=" << osm_json << std::endl;
749  }
750 
751  }
752  return getOSMDataFromJSON(osm_json);
753  }
754 
755 
817  const std::string& user,
818  int assignment,
819  int subassignment = 0) {
820 
822 
823  std::string s = this->getAssignment(user, assignment, subassignment);
824 
825  rapidjson::Document doc;
826  doc.Parse(s.c_str());
827  if (doc.HasParseError())
828  throw "Malformed JSON";
829 
830  //Access doc["assignmentJSON"]
831  const auto& assjson = doc.FindMember("assignmentJSON");
832 
833  if (assjson == doc.MemberEnd())
834  throw "Malformed GraphAdjacencyList JSON: no assignmentJSON";
835 
836  //Access doc["assignmentJSON"]["data"]
837  const auto& dataArray = assjson->value.FindMember("data");
838 
839  if (dataArray == assjson->value.MemberEnd()
840  || dataArray->value.IsArray() == false)
841  throw "Malformed GraphAdjacencyList JSON: No data";
842 
843  const auto& data = dataArray->value.GetArray()[0];
844 
845  //Access doc["assignmentJSON"]["data"][0]["visual"]
846  const auto& dataVisual = data.FindMember("visual");
847 
848  if (dataVisual == data.MemberEnd() ||
849  dataVisual->value.IsString() == false)
850  throw "Malformed GraphAdjacencyList JSON";
851 
852  std::string assignment_type = dataVisual->value.GetString();
853 
854  if (assignment_type != "GraphAdjacencyList")
855  throw "Malformed GraphAdjacencyList JSON: Not a GraphAdjacencyList";
856 
857  //reconstructing vertices out of nodes, and using the optional "name" as the data associated
858  {
859  const auto& nodes = data.FindMember("nodes");
860  if (nodes == data.MemberEnd() ||
861  nodes->value.IsArray() == false)
862  throw "Malformed GraphAdjacencyList JSON: malformed nodes";
863 
864 
865  const auto& nodeArray = nodes->value.GetArray();
866  int nbVertex = nodeArray.Size();
867  for (int i = 0; i < nbVertex; ++i) {
868  std::string name;
869 
870  const auto& vertexJSONstr = nodeArray[i];
871 
872  const auto& nameJSON = vertexJSONstr.FindMember("name");
873  if (nameJSON != vertexJSONstr.MemberEnd()
874  && nameJSON->value.IsString()) {
875  name = nameJSON->value.GetString();
876  }
877  gr.addVertex(i, name);
878  }
879  }
880 
881  //reconstructing links, and using "label" as data associated with the link
882  {
883  const auto& links = data.FindMember("links");
884  if (links == data.MemberEnd() ||
885  links->value.IsArray() == false)
886  throw "Malformed GraphAdjacencyList JSON: malformed links";
887 
888  const auto& linkArray = links->value.GetArray();
889  int nbLink = linkArray.Size();
890  for (int i = 0; i < nbLink; ++i) {
891  std::string name;
892  int src;
893  int dest;
894  int wgt;
895 
896  const auto& linkJSONstr = linkArray[i];
897 
898  //checking label. Maybe does not exist? (is that field optional?)
899  const auto& nameJSON = linkJSONstr.FindMember("label");
900  if (nameJSON != linkJSONstr.MemberEnd()
901  && nameJSON->value.IsString()) {
902  name = nameJSON->value.GetString();
903  }
904 
905  //checking source
906  const auto& srcJSON = linkJSONstr.FindMember("source");
907  if (srcJSON == linkJSONstr.MemberEnd()
908  || srcJSON->value.IsInt() == false) {
909  throw "Malformed GraphAdjacencyList JSON: malformed link";
910  }
911  src = srcJSON->value.GetInt();
912 
913 
914  //checking destination
915  const auto& dstJSON = linkJSONstr.FindMember("target");
916  if (dstJSON == linkJSONstr.MemberEnd()
917  || dstJSON->value.IsInt() == false) {
918  throw "Malformed GraphAdjacencyList JSON: malformed link";
919  }
920  dest = dstJSON->value.GetInt();
921 
922  //checking weight. //why is weight a mandatory parameter?
923  const auto& wgtJSON = linkJSONstr.FindMember("weight");
924  if (wgtJSON == linkJSONstr.MemberEnd()
925  || wgtJSON->value.IsInt() == false) {
926  throw "Malformed GraphAdjacencyList JSON: malformed link";
927  }
928  wgt = wgtJSON->value.GetInt();
929 
930 
931  //adding edge.
932  gr.addEdge(src, dest, name);
933  }
934  }
935 
936  return gr;
937  }
938 
947  int assignment,
948  int subassignment = 0) {
949 
950  std::string s = this->getAssignment(user, assignment, subassignment);
951 
952  rapidjson::Document doc;
953  doc.Parse(s.c_str());
954  if (doc.HasParseError())
955  throw "Malformed JSON";
956 
957  try {
958  std::string assignment_type = doc["assignment_type"].GetString();
959 
960  if (assignment_type != "ColorGrid")
961  throw "Malformed ColorGrid JSON: Not a ColorGrid";
962  }
963  catch (rapidjson_exception re) {
964  throw "Malformed JSON: Not a Bridges assignment?";
965  }
966 
967 
968  try {
969  auto& data = doc["data"][0];
970 
971  std::string encoding = data["encoding"].GetString();
972  if (encoding != "RAW" && encoding != "RLE")
973  throw "Malformed ColorGrid JSON: encoding not supported";
974 
975 
976  //Access doc["data"][0]["dimensions"]
977  const auto& dimensions = data["dimensions"];
978  int dimx = dimensions[0].GetInt();
979  int dimy = dimensions[1].GetInt();
980 
981  if (debug())
982  std::cerr << "Dimensions: " << dimx << "x" << dimy << std::endl;
983 
984  //Access doc["data"][0]["nodes"][0]
985  std::string base64_encoded_assignment = data["nodes"][0].GetString();
986 
987 
988  std::vector<bridges::BYTE> decoded = bridges::base64::decode(base64_encoded_assignment);
989 
990  bridges::ColorGrid cg (dimx, dimy);
991 
992 
993  if (encoding == "RAW") {
994  if (debug())
995  std::cerr << "decoding RAW" << std::endl;
996  if (debug())
997  std::cerr << "length: " << decoded.size() << std::endl;
998  if (decoded.size() < dimx * dimy * 4)
999  throw "Malformed ColorGrid JSON: nodes is smaller than expected";
1000 
1001  //first pixel
1002  //std::cerr<<(int)decoded[0]<<" "<<(int)decoded[1]<<" "<<(int)decoded[2]<<" "<<(int)decoded[3]<<std::endl;
1003 
1004  //bridges::ColorGrid* ptr = new bridges::ColorGrid (dimx, dimy);
1005 
1006  size_t base = 0;
1007 
1008  for (int x = 0; x < dimx; ++x) {
1009  for (int y = 0; y < dimy; ++y) {
1010  bridges::Color c ((int)decoded[base],
1011  (int)decoded[base + 1],
1012  (int)decoded[base + 2],
1013  (int)decoded[base + 3]
1014  );
1015 
1016  cg.set(x, y, c);
1017  base += 4;
1018  }
1019  }
1020  }
1021  else if (encoding == "RLE") {
1022  if (debug())
1023  std::cerr << "Decoding RLE" << std::endl;
1024 
1025  int currentInDecoded = 0;
1026  int currentInCG = 0;
1027  while (currentInDecoded != decoded.size()) {
1028  if (currentInDecoded + 5 > decoded.size())
1029  throw "Malformed ColorGrid JSON: nodes is not a multiple of 5";
1030 
1031 
1032 
1033  int repeat = (BYTE) decoded[currentInDecoded++];
1034  int r = (BYTE) decoded[currentInDecoded++];
1035  int g = (BYTE) decoded[currentInDecoded++];
1036  int b = (BYTE) decoded[currentInDecoded++];
1037  int a = (BYTE) decoded[currentInDecoded++];
1038 
1039  if (debug())
1040  std::cerr << "indecoded: " << currentInDecoded
1041  << " repeat: " << (int)repeat
1042  << " color(" << (int)r << "," << (int)g << "," << (int)b << "," << (int)a << ")"
1043  << std::endl;
1044 
1045  bridges::Color c (r, g, b, a);
1046 
1047  while (repeat >= 0) {
1048  int posX = currentInCG / dimy;
1049  int posY = currentInCG % dimy;
1050  if (posX >= dimx || posY >= dimy) {
1051  if (debug())
1052  std::cerr << posX << " " << dimx << " " << posY << " " << dimy << std::endl;
1053  throw "Malformed ColorGrid JSON: Too much data in nodes";
1054  }
1055  cg.set(posX, posY, c);
1056 
1057  currentInCG++;
1058  repeat --;
1059  }
1060  }
1061  if (debug())
1062  std::cerr << "written " << currentInCG << " pixels" << std::endl;
1063  if (currentInCG != dimx * dimy)
1064  throw "Malformed ColorGrid JSON: Not enough data in nodes";
1065  }
1066 
1067  return cg;
1068  }
1069  catch (rapidjson_exception re) {
1070  throw "Malformed ColorGrid JSON";
1071  }
1072 
1073  }
1074  private:
1075  /***
1076  * This function obtains the JSON representation of a particular subassignment.
1077  *
1078  * @return a string that is the JSON representation of the subassignment as stored by the Bridges server.
1079  * @param user the name of the user who uploaded the assignment
1080  * @param assignment the ID of the assignment to get
1081  * @param subassignment the ID of the subassignment to get
1082  ***/
1083  std::string getAssignment(std::string user,
1084  int assignment,
1085  int subassignment = 0) {
1086  std::vector<std::string> headers;
1087 
1088  std::stringstream ss;
1089 
1091  if (bridges_inst)
1092  ss << bridges_inst->getServerURL();
1093  else
1094  ss << bridges::Bridges::getDefaultServerURL();
1095  ss << "/assignmentJSON/"
1096  << assignment << ".";
1097  ss << std::setfill('0') << std::setw(2) << subassignment;
1098  ss << "/" << user;
1099 
1100  std::string url = ss.str();
1101 
1102  // std::cout<<"URL: "<<url<<std::endl;
1103 
1104  std::string s = bridges::ServerComm::makeRequest(url, headers);
1105 
1106  return s;
1107  }
1108 
1109 
1110  void removeFirstOccurence (std::string & str, const std::string & toRemove) {
1111  size_t pos = str.find(toRemove);
1112  if (pos != std::string::npos) {
1113  str.erase(pos, toRemove.length());
1114  }
1115  }
1116 
1126  void getWikidataActorMovieDirect (int yearbegin, int yearend, std::vector<MovieActorWikidata>& vout) {
1127  std::string codename = "wikidata-actormovie-" + std::to_string(yearbegin) + "-" + std::to_string(yearend);
1128  std::string json;
1129  bool from_cache = false;
1130  try {
1131  if (my_cache.inCache(codename)) {
1132  json = my_cache.getDoc(codename);
1133  from_cache = true;
1134  }
1135  }
1136  catch (CacheException& ce) {
1137  //something went bad trying to access the cache
1138  std::cout << "Exception while reading from cache. Ignoring cache and continue." << std::endl;
1139  }
1140 
1141 
1142  if (!from_cache) {
1143  std::vector<std::string> http_headers;
1144  http_headers.push_back("User-Agent: bridges-cxx"); //wikidata kicks you out if you don't have a useragent
1145  http_headers.push_back("Accept: application/json"); //tell wikidata we are OK with JSON
1146 
1147  string url = "https://query.wikidata.org/sparql?";
1148 
1149  //Q1860 is "English"
1150  //P364 is "original language of film or TV show"
1151  //P161 is "cast member"
1152  //P577 is "publication date"
1153  //A11424 is "film"
1154  //P31 is "instance of"
1155  // "instance of film" is necessary to filter out tv shows
1156  std::string sparqlquery = "SELECT ?movie ?movieLabel ?actor ?actorLabel WHERE \
1157 {\
1158  ?movie wdt:P31 wd:Q11424.\
1159  ?movie wdt:P161 ?actor.\
1160  ?movie wdt:P364 wd:Q1860.\
1161  ?movie wdt:P577 ?date.\
1162  FILTER(YEAR(?date) >= " + std::to_string(yearbegin) + " && YEAR(?date) <= " + std::to_string(yearend) + ").\
1163  SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". } \
1164 }";
1165  url += "query=" + ServerComm::encodeURLPart(sparqlquery);
1166  url += "&";
1167  url += "format=json";
1168 
1169  // get the OSM data json
1170  json = ServerComm::makeRequest(url, http_headers);
1171 
1172  try {
1173  my_cache.putDoc(codename, json);
1174  }
1175  catch (CacheException& ce) {
1176  //something went bad trying to access the cache
1177  std::cerr << "Exception while storing in cache. Weird but not critical." << std::endl;
1178  }
1179  }
1180 
1181  {
1182  using namespace rapidjson;
1183  rapidjson::Document doc;
1184  doc.Parse(json.c_str());
1185  if (doc.HasParseError())
1186  throw "Malformed JSON";
1187 
1188  try {
1189  const auto& resultsArray = doc["results"]["bindings"].GetArray();
1190 
1191  for (auto& mak_json : resultsArray) {
1192  MovieActorWikidata mak;
1193 
1194  // all wikidata uri start with "http://www.wikidata.org/entity/"
1195  // so strip it out because it does not help discriminate and
1196  // consume memory and runtime to compare string
1197  std::string actoruri = mak_json["actor"]["value"].GetString();
1198  std::string movieuri = mak_json["movie"]["value"].GetString();
1199  removeFirstOccurence (actoruri, "http://www.wikidata.org/entity/");
1200 
1201  removeFirstOccurence (movieuri, "http://www.wikidata.org/entity/");
1202 
1203 
1204  mak.setActorURI(actoruri);
1205  mak.setMovieURI(movieuri);
1206  mak.setActorName(mak_json["actorLabel"]["value"].GetString());
1207  mak.setMovieName(mak_json["movieLabel"]["value"].GetString());
1208  vout.push_back(mak);
1209  }
1210 
1211  }
1212  catch (rapidjson_exception re) {
1213  throw "Malformed JSON: Not from wikidata?";
1214  }
1215  }
1216  }
1217  public:
1218 
1226  std::vector<MovieActorWikidata> getWikidataActorMovie (int yearbegin, int yearend) {
1227  //Internally this function get the data year by year. This
1228  //is pretty bad because it hits wikidata the first time
1229  //for multiple years. But it enables to work around
1230  //wikidata's time limit. This also works well because the
1231  //Cache will store each year independently and so without
1232  //redundancy. Though I (Erik) am not completely sure that a
1233  //movie can be appear in different years, for instance it
1234  //can be released in the US in 2005 but in canada in
1235  //2006...
1236 
1237  std::vector<MovieActorWikidata> ret;
1238  for (int y = yearbegin; y <= yearend; ++y) {
1239  cout << "getting year " << y << endl;
1240  getWikidataActorMovieDirect (y, y, ret);
1241  }
1242  return ret;
1243  }
1244 
1245 
1258  double latitMin, double longitMin,
1259  double latitMax, double longitMax, double res = 0.0166) {
1260 
1261  // set up the elevation data url to get the data, given
1262  // a lat/long bounding box
1263  string server_str =
1264  "http://bridges-data-server-elevation.bridgesuncc.org/";
1265 
1266  string elev_str = "elevation?";
1267 
1268  string bbox_str =
1269  "&minLon=" + std::to_string(longitMin) +
1270  "&minLat=" + std::to_string(latitMin) +
1271  "&maxLon=" + std::to_string(longitMax) +
1272  "&maxLat=" + std::to_string(latitMax);
1273 
1274  string resn_str = "&resX=" + std::to_string(res)
1275  + "&resY=" + std::to_string(res);
1276 
1277  string elev_data_url =
1278  server_str + elev_str + bbox_str + resn_str;
1279 
1280  if (debug())
1281  cerr << "Hitting data URL: " << elev_data_url << "\n";
1282  string hash_str = "hash?";
1283  string hash_url = server_str + hash_str + bbox_str + resn_str;
1284 
1285  if (debug())
1286  cerr << "Hitting hash URL: " << hash_url << "\n";
1287 
1288  // get hash value for elevation data
1289  string hash_value = ServerComm::makeRequest(hash_url,
1290  {"Accept: application/json"});
1291 
1292  string elev_json;
1293 
1294  //Checks to see if elevation data is already in local cache
1295  if (my_cache.inCache(hash_value)) { // already exists
1296  try {
1297  elev_json = my_cache.getDoc(hash_value);
1298  }
1299  catch (CacheException& ce) {
1300  //something went bad trying to access the cache
1301  cout << "Exception while reading from cache. Ignoring cache." << std::endl;
1302  }
1303  }
1304  else { //Server response is false or not cached
1305 
1306  if (debug())
1307  cerr << "Hitting json URL: " << elev_data_url << "\n";
1308 
1309  // get the eleveation data
1310  elev_json = ServerComm::makeRequest(elev_data_url,
1311  {"Accept: application/json"});
1312 
1313  if (debug())
1314  cerr << "Hitting elev data URL: " << elev_data_url << "\n";
1315 
1316  string hash_value = ServerComm::makeRequest(hash_url,
1317  {"Accept: application/json"});
1318 
1319  if (hash_value != "false") {
1320  // store map in cache
1321  try {
1322  my_cache.putDoc(hash_value, elev_json);
1323  }
1324  catch (CacheException& ce) {
1325  //something went bad trying to access the cache
1326  cerr << "Exception while storing in cache. Weird but not critical."
1327  << endl;
1328  }
1329  }
1330  }
1331  return getElevationDataFromJSON(elev_json);
1332  }
1333 
1334  // get Elevation data from the JSON
1336 
1337  // use a string stream to parse the data, which is not really a JSON,
1338  // but raw text
1339  stringstream ss(elev_json);
1340 
1341  int rows, cols, elev_val;
1342  double ll_x, ll_y, cell_size;
1343  string tmp;
1344 
1345  // get the dimensions, origin
1346  ss >> tmp >> cols >> tmp >> rows >>
1347  tmp >> ll_x >> tmp >> ll_y >>
1348  tmp >> cell_size;
1349 
1350 
1351  // create the elevation object
1352  ElevationData elev_data (rows, cols);
1353  elev_data.setxll(ll_x);
1354  elev_data.setyll(ll_y);
1355  elev_data.setCellSize(cell_size);
1356 
1357  // load the elevation data
1358  for (int i = 0; i < rows; i++) {
1359  for (int j = 0; j < cols; j++) {
1360  ss >> elev_val;
1361  elev_data.setVal(i, j, elev_val);
1362  }
1363  }
1364  return elev_data;
1365  }
1366 
1367  }; // class DataSource
1368 } // namespace bridges
1369 #endif
void setEventType(const string &et)
Set event type.
Definition: CancerIncidence.h:233
void setYear(int y)
Definition: CancerIncidence.h:183
void setMovieURI(std::string mu)
Definition: MovieActorWikidata.h:32
void setAffectedArea(const string &area)
Set cancer incidenc area.
Definition: CancerIncidence.h:269
A Gutenberg Book object metadata only, used along with the books data source.
Definition: GutenbergBook.h:25
void setCrudeRate(double cr)
Definition: CancerIncidence.h:126
Definition: Cache.h:14
vector< Shakespeare > getShakespeareData(string type="", bool textonly=false)
Get data of Shakespeare works (plays, poems)
Definition: DataSource.h:262
Definition: ActorMovieIMDB.h:10
A Shakespeare Data source object containing sonnets, poems and plays.
Definition: Shakespeare.h:31
virtual void putDoc(const std::string &hash_value, const std::string &content) override
Definition: Cache.h:172
std::vector< MovieActorWikidata > getWikidataActorMovie(int yearbegin, int yearend)
This function returns the Movie and Actors playing in them between two years.
Definition: DataSource.h:1226
This is a helper class for accessing actor-movie data from Wikidata.
Definition: MovieActorWikidata.h:15
void setCrudeRate_CI_Lower(double cr_l)
Definition: CancerIncidence.h:145
Definition: Cache.h:144
void setCrudeRate_CI_Upper(double cr_u)
Set crude rate CI (upper)
Definition: CancerIncidence.h:165
vector< Game > getGameData()
Get meta data of the IGN games collection.
Definition: DataSource.h:94
STL namespace.
This class provides methods to represent adjacency list based graphs.
Definition: Element.h:19
void setActorName(std::string an)
Definition: MovieActorWikidata.h:56
A Song object, used along with the Songs data source.
Definition: Song.h:24
long OSMVertexID
Definition: OSMVertex.h:30
vector< Song > getSongData()
Get data of the songs (including lyrics) using the Genius API https://docs.genius.com/ Valid endpoints: https://bridgesdata.herokuapp.com/api/songs/.
Definition: DataSource.h:364
Class that hold Open Street Map edges.
Definition: OSMEdge.h:21
void setLocationX(double locX)
Set location (X coord)
Definition: CancerIncidence.h:303
This is a class in BRIDGES for representing an image.
Definition: ColorGrid.h:22
bridges::ColorGrid getColorGridFromAssignment(const std::string &user, int assignment, int subassignment=0)
Definition: DataSource.h:946
Definition: JSONutil.h:6
void setName(const string &n)
change the name of the dataset
Definition: OSMData.h:224
void setAgeAdjustedCI_Lower(double ci_l)
Definition: CancerIncidence.h:90
void setCount(int c)
Set cancer incidence count.
Definition: CancerIncidence.h:286
virtual std::string getDoc(const std::string &hash_value) override
Definition: Cache.h:156
ElevationData getElevationDataFromJSON(string elev_json)
Definition: DataSource.h:1335
vector< ActorMovieIMDB > getActorMovieIMDBData(int number=0)
Get ActorMovie IMDB Data Data is retrieved, formatted into a list of ActorMovieIMDB objects...
Definition: DataSource.h:132
This class represents Color, and supports rgba, hexadecimal and named color values.
Definition: Color.h:51
void getAmenityData(const std::string &location, const std::string &amenity)
Definition: DataSource.h:672
This class contains methods to connect and transmit a user&#39;s data structure representation to the Bri...
Definition: Bridges.h:39
void addVertex(const K &k, const E1 &e=E1())
Adds a vertex to the graph.
Definition: GraphAdjList.h:174
void setActorURI(std::string au)
Definition: MovieActorWikidata.h:40
void setLocationY(double locY)
Set location (Y coord)
Definition: CancerIncidence.h:321
vector< EarthquakeUSGS > getEarthquakeUSGSData(int number=0)
Get USGS earthquake data USGS Tweet data (https://earthquake.usgs.gov/earthquakes/map/) retrieved...
Definition: DataSource.h:202
these methods convert byte arrays in to base64 codes and are used in BRIDGES to represent the color a...
Definition: alltypes.h:4
ElevationData getElevationData(double latitMin, double longitMin, double latitMax, double longitMax, double res=0.0166)
Definition: DataSource.h:1257
This class provides an API to various data sources used in BRIDGES.
Definition: DataSource.h:59
void setPopulation(int pop)
Set population size.
Definition: CancerIncidence.h:251
void setyll(int y_ll)
Definition: ElevationData.h:183
void setMovieName(std::string mn)
Definition: MovieActorWikidata.h:48
void setVertices(const vector< OSMVertex > &verts)
replace the vertices stored by this new set.
Definition: OSMData.h:269
vector< GutenbergBook > getGutenbergBookData(int num=0)
Get meta data of the Gutenberg book collection. This function retrieves, and formats the data into a ...
Definition: DataSource.h:404
A class to hold actor movie data – using IMDB dataset.
Definition: ActorMovieIMDB.h:23
void setLatLongRange(double *lat_range, double *longit_range)
set the latitude and longitude range of the dataset
Definition: OSMData.h:108
void setAgeAdjustedCI_Upper(double ci_u)
Definition: CancerIncidence.h:109
vector< CancerIncidence > getCancerIncidenceData(int num=0)
Retrieves the CDC dataset of Cancer Incidence. Data is retrieved into a vector of records See CancerI...
Definition: DataSource.h:466
unsigned char BYTE
Definition: base64.h:44
Class that hold elevation data.
Definition: ElevationData.h:23
bridges::GraphAdjList< int, std::string > getGraphFromAssignment(const std::string &user, int assignment, int subassignment=0)
old interface for the OSM data set.
Definition: DataSource.h:816
void getAmenityData(double minLat, double minLon, double maxLat, double maxLon, std::string amenity)
Definition: DataSource.h:668
Song getSong(string songTitle, string artistName)
Get data of a particular songs (including lyrics) using the Genius API (https://docs.genius.com/), given the song title and artist name. Valid endpoints: http://bridgesdata.herokuapp.com/api/songs/find/ Valid queryParams: song title, artist name.
Definition: DataSource.h:309
Class that hold Open Street Map Data.
Definition: OSMData.h:34
void setRace(const string &r)
Set race.
Definition: CancerIncidence.h:216
DataSource(bridges::Bridges *br=nullptr)
Definition: DataSource.h:75
void set(int row, int col, E val)
Set the grid value for the (row, col) element.
Definition: Grid.h:192
OSMData getOSMData(double lat_min, double long_min, double lat_max, double long_max, string level="default")
Get OpenStreetMap data given a bounding rectangle of lat/long values.
Definition: DataSource.h:593
void setxll(int x_ll)
Definition: ElevationData.h:166
OSMData getOSMData(string location, string level="default")
Definition: DataSource.h:695
DataSource(bridges::Bridges &br)
Definition: DataSource.h:78
void setCellSize(int cell_size)
Definition: ElevationData.h:201
void setAgeAdjustedRate(double aar)
Definition: CancerIncidence.h:69
virtual bool inCache(const std::string &hash_value) override
Definition: Cache.h:165
void setEdges(const vector< OSMEdge > &e)
set edges
Definition: OSMData.h:307
A Game object, used along with the Games data source.
Definition: Game.h:29
A class representing the attributes for cancer incidence.
Definition: CancerIncidence.h:26
OSMData getOSMDataFromJSON(const string &osm_json)
Retrieves the Open Street Map data from a prebuilt JSON of OSM dataset.
Definition: DataSource.h:518
Class that hold earthquake data, for use with USGIS retrieved quake data.
Definition: EarthquakeUSGS.h:21
void setVal(int r, int c, int val)
Definition: ElevationData.h:131
vector< ActorMovieIMDB > getActorMovieIMDBData2()
Get ActorMovie IMDB Data Data is retrieved, formatted into a list of ActorMovieIMDB objects...
Definition: DataSource.h:166
Definition: Array.h:9
void addEdge(const K &src, const K &dest, const E2 &data=E2())
Add an edge with data.
Definition: GraphAdjList.h:197
Class that hold Open Street Map vertices.
Definition: OSMVertex.h:28
vector< BYTE > decode(string const &encoded_string)
Definition: base64.h:103