Export structured sequence data to JSON files.

Script meant to be run by runner.sh.

It will not overwrite existing files. If a
sequence is modified after the first export,
the resulting file needs to be removed by the
user before a re-export will occur.

The idea is to eventually export on demand
when a new raw is added to final_lines.
This commit is contained in:
D. Berge
2020-09-26 22:57:36 +02:00
parent acf58df59f
commit 4e70090b40

74
bin/human_exports_seis.py Executable file
View File

@@ -0,0 +1,74 @@
#!/usr/bin/python3
"""
Export data that is entered directly into Dougal
as opposed to being read from external sources.
This data will be read back in when the database
is recreated for an existing survey.
"""
import os
from glob import glob
import pathlib
import string
import configuration
import requests
import json
#from datastore import Datastore
def sane_name(filename):
allowed_chars = string.ascii_letters + string.digits + " _-#+&^%$!();:.,"
return ''.join([c for c in filename if c in allowed_chars])
def write_file (filename, payload):
print("Writing to", filename)
tmpname = filename.parent / (filename.name + ".tmp")
filename.parent.mkdir(parents=True, exist_ok=True)
with open(tmpname, "w") as fd:
json.dump(payload, fd, indent=4)
os.rename(tmpname, filename)
def seis_data (survey):
try:
pathPrefix = survey["exports"]["human"]["path"]
except KeyError:
print("Survey does not define an export path for human data")
return
if not pathlib.Path(pathPrefix).exists():
print(pathPrefix)
raise ValueError("Export path does not exist")
print(f"Requesting sequences for {survey['id']}")
url = f"http://localhost:3000/api/project/{survey['id']}/sequence"
r = requests.get(url)
print(r.status_code, url)
for sequence in r.json():
if sequence['status'] not in ["final", "ntbp"]:
continue
filename = pathlib.Path(pathPrefix, "SSE", "sequence{:0>3d}.json".format(sequence['sequence']))
if filename.exists():
print(f"Skipping export for sequence {sequence['sequence']} file already exists")
continue
print(f"Processing sequence {sequence['sequence']}")
url = f"http://localhost:3000/api/project/{survey['id']}/event?sequence={sequence['sequence']}"
headers = { "Accept": "application/vnd.seis+json" }
r = requests.get(url, headers=headers)
if r.status_code == requests.codes.ok:
write_file(filename, r.json())
if __name__ == '__main__':
print("Reading configuration")
surveys = configuration.surveys()
print("Reading surveys")
for survey in surveys:
print(f'Survey: {survey["id"]} ({survey["schema"]})')
seis_data(survey)
print("Done")