From 4e70090b403dfacc8201c3f0223d84b34a1b81f3 Mon Sep 17 00:00:00 2001 From: "D. Berge" Date: Sat, 26 Sep 2020 22:57:36 +0200 Subject: [PATCH] Export structured sequence data to JSON files. Script meant to be run by runner.sh. It will not overwrite existing files. If a sequence is modified after the first export, the resulting file needs to be removed by the user before a re-export will occur. The idea is to eventually export on demand when a new raw is added to final_lines. --- bin/human_exports_seis.py | 74 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100755 bin/human_exports_seis.py diff --git a/bin/human_exports_seis.py b/bin/human_exports_seis.py new file mode 100755 index 0000000..8fda7c3 --- /dev/null +++ b/bin/human_exports_seis.py @@ -0,0 +1,74 @@ +#!/usr/bin/python3 + +""" +Export data that is entered directly into Dougal +as opposed to being read from external sources. + +This data will be read back in when the database +is recreated for an existing survey. +""" + +import os +from glob import glob +import pathlib +import string +import configuration +import requests +import json +#from datastore import Datastore + +def sane_name(filename): + allowed_chars = string.ascii_letters + string.digits + " _-#+&^%$!();:.," + return ''.join([c for c in filename if c in allowed_chars]) + +def write_file (filename, payload): + print("Writing to", filename) + tmpname = filename.parent / (filename.name + ".tmp") + filename.parent.mkdir(parents=True, exist_ok=True) + with open(tmpname, "w") as fd: + json.dump(payload, fd, indent=4) + os.rename(tmpname, filename) + +def seis_data (survey): + try: + pathPrefix = survey["exports"]["human"]["path"] + except KeyError: + print("Survey does not define an export path for human data") + return + + if not pathlib.Path(pathPrefix).exists(): + print(pathPrefix) + raise ValueError("Export path does not exist") + + print(f"Requesting sequences for {survey['id']}") + url = f"http://localhost:3000/api/project/{survey['id']}/sequence" + r = requests.get(url) + print(r.status_code, url) + for sequence in r.json(): + if sequence['status'] not in ["final", "ntbp"]: + continue + + filename = pathlib.Path(pathPrefix, "SSE", "sequence{:0>3d}.json".format(sequence['sequence'])) + if filename.exists(): + print(f"Skipping export for sequence {sequence['sequence']} – file already exists") + continue + + print(f"Processing sequence {sequence['sequence']}") + url = f"http://localhost:3000/api/project/{survey['id']}/event?sequence={sequence['sequence']}" + headers = { "Accept": "application/vnd.seis+json" } + r = requests.get(url, headers=headers) + if r.status_code == requests.codes.ok: + write_file(filename, r.json()) + + +if __name__ == '__main__': + + print("Reading configuration") + surveys = configuration.surveys() + + print("Reading surveys") + for survey in surveys: + print(f'Survey: {survey["id"]} ({survey["schema"]})') + seis_data(survey) + + print("Done")