Files
dougal-software/bin/import_raw_p190.py

99 lines
2.5 KiB
Python
Raw Normal View History

2020-08-08 23:59:13 +02:00
#!/usr/bin/python3
"""
Import raw p190.
For each survey in configuration.surveys(), check for new
or modified final P1/90 files and (re-)import them into the
database.
"""
import os
2020-08-29 10:40:55 +02:00
import sys
import pathlib
2020-08-08 23:59:13 +02:00
import re
import time
2020-08-08 23:59:13 +02:00
import configuration
import p190
from datastore import Datastore
if __name__ == '__main__':
print("Reading configuration")
surveys = configuration.surveys()
file_min_age = configuration.read().get('imports', {}).get('file_min_age', 10)
2020-08-08 23:59:13 +02:00
print("Connecting to database")
db = Datastore()
db.connect()
print("Reading surveys")
for survey in surveys:
print(f'Survey: {survey["id"]} ({survey["schema"]})')
db.set_survey(survey["schema"])
try:
raw_p190 = survey["raw"]["p190"]
except KeyError:
print("No raw P1/90 configuration")
exit(0)
2020-08-08 23:59:13 +02:00
pattern = raw_p190["pattern"]
rx = re.compile(pattern["regex"])
if "ntbp" in survey["raw"]:
2020-09-01 10:59:29 +02:00
ntbpRx = re.compile(survey["raw"]["ntbp"]["pattern"]["regex"])
2020-08-08 23:59:13 +02:00
for fileprefix in raw_p190["paths"]:
print(f"Path prefix: {fileprefix}")
for globspec in raw_p190["globs"]:
for filepath in pathlib.Path(fileprefix).glob(globspec):
filepath = str(filepath)
2020-08-08 23:59:13 +02:00
print(f"Found {filepath}")
if not db.file_in_db(filepath):
age = time.time() - os.path.getmtime(filepath)
if age < file_min_age:
print("Skipping file because too new", filepath)
continue
2020-08-08 23:59:13 +02:00
print("Importing")
match = rx.match(os.path.basename(filepath))
if not match:
error_message = f"File path not match the expected format! ({filepath} ~ {pattern['regex']})"
print(error_message, file=sys.stderr)
print("This file will be ignored!")
continue
2020-08-08 23:59:13 +02:00
file_info = dict(zip(pattern["captures"], match.groups()))
if ntbpRx:
ntbp = ntbpRx.match(filepath) is not None
else:
ntbp = False
2020-08-08 23:59:13 +02:00
p190_data = p190.from_file(filepath, with_objrefs=True)
p190_data_timestamped = p190.apply_tstamps(
p190_data,
raw_p190["timestamp_format"],
fix_bad_seconds=True
)
p190_data_normalised = p190.normalise(p190_data_timestamped)
print("Saving")
p190_records = p190.p190_type("S", p190_data_normalised)
file_data = dict()
file_data["offsets_p190"] = [h["description"].strip().split(" to ")+h["data"].split() for h in p190_data_normalised if h["record_type"] == "H" and h["header_type"] == "09"]
db.save_raw_p190(p190_records, file_info, filepath, survey["epsg"], file_data, ntbp)
2020-08-08 23:59:13 +02:00
else:
print("Already in DB")
print("Done")