mirror of
https://gitlab.com/wgp/dougal/software.git
synced 2025-12-06 11:17:08 +00:00
Export structured sequence data to JSON files.
Script meant to be run by runner.sh. It will not overwrite existing files. If a sequence is modified after the first export, the resulting file needs to be removed by the user before a re-export will occur. The idea is to eventually export on demand when a new raw is added to final_lines.
This commit is contained in:
74
bin/human_exports_seis.py
Executable file
74
bin/human_exports_seis.py
Executable file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
"""
|
||||
Export data that is entered directly into Dougal
|
||||
as opposed to being read from external sources.
|
||||
|
||||
This data will be read back in when the database
|
||||
is recreated for an existing survey.
|
||||
"""
|
||||
|
||||
import os
|
||||
from glob import glob
|
||||
import pathlib
|
||||
import string
|
||||
import configuration
|
||||
import requests
|
||||
import json
|
||||
#from datastore import Datastore
|
||||
|
||||
def sane_name(filename):
|
||||
allowed_chars = string.ascii_letters + string.digits + " _-#+&^%$!();:.,"
|
||||
return ''.join([c for c in filename if c in allowed_chars])
|
||||
|
||||
def write_file (filename, payload):
|
||||
print("Writing to", filename)
|
||||
tmpname = filename.parent / (filename.name + ".tmp")
|
||||
filename.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(tmpname, "w") as fd:
|
||||
json.dump(payload, fd, indent=4)
|
||||
os.rename(tmpname, filename)
|
||||
|
||||
def seis_data (survey):
|
||||
try:
|
||||
pathPrefix = survey["exports"]["human"]["path"]
|
||||
except KeyError:
|
||||
print("Survey does not define an export path for human data")
|
||||
return
|
||||
|
||||
if not pathlib.Path(pathPrefix).exists():
|
||||
print(pathPrefix)
|
||||
raise ValueError("Export path does not exist")
|
||||
|
||||
print(f"Requesting sequences for {survey['id']}")
|
||||
url = f"http://localhost:3000/api/project/{survey['id']}/sequence"
|
||||
r = requests.get(url)
|
||||
print(r.status_code, url)
|
||||
for sequence in r.json():
|
||||
if sequence['status'] not in ["final", "ntbp"]:
|
||||
continue
|
||||
|
||||
filename = pathlib.Path(pathPrefix, "SSE", "sequence{:0>3d}.json".format(sequence['sequence']))
|
||||
if filename.exists():
|
||||
print(f"Skipping export for sequence {sequence['sequence']} – file already exists")
|
||||
continue
|
||||
|
||||
print(f"Processing sequence {sequence['sequence']}")
|
||||
url = f"http://localhost:3000/api/project/{survey['id']}/event?sequence={sequence['sequence']}"
|
||||
headers = { "Accept": "application/vnd.seis+json" }
|
||||
r = requests.get(url, headers=headers)
|
||||
if r.status_code == requests.codes.ok:
|
||||
write_file(filename, r.json())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
print("Reading configuration")
|
||||
surveys = configuration.surveys()
|
||||
|
||||
print("Reading surveys")
|
||||
for survey in surveys:
|
||||
print(f'Survey: {survey["id"]} ({survey["schema"]})')
|
||||
seis_data(survey)
|
||||
|
||||
print("Done")
|
||||
Reference in New Issue
Block a user