Files
dougal-software/bin/import_final_p190.py
D. Berge 0fdb42c593 Do not import files that have just been modified.
We now check that a file is at least a few seconds old
before attempting to import it.

The actual minimum age can be configured in etc/config.yaml or
else is defaults to 10 seconds.

The idea is that this should give the OS enough time to fully
write the file before we import it.

The timestamp being looked at is the modification time.

Fixes #92.
2021-05-07 13:50:32 +02:00

93 lines
2.3 KiB
Python
Executable File

#!/usr/bin/python3
"""
Import final p190.
For each survey in configuration.surveys(), check for new
or modified final P1/90 files and (re-)import them into the
database.
"""
import os
import sys
import pathlib
import re
import time
import configuration
import p190
from datastore import Datastore
if __name__ == '__main__':
print("Reading configuration")
surveys = configuration.surveys()
file_min_age = configuration.read().get('imports', {}).get('file_min_age', 10)
print("Connecting to database")
db = Datastore()
db.connect()
print("Reading surveys")
for survey in surveys:
print(f'Survey: {survey["id"]} ({survey["schema"]})')
db.set_survey(survey["schema"])
try:
final_p190 = survey["final"]["p190"]
except KeyError:
print("No final P1/90 configuration")
exit(0)
pattern = final_p190["pattern"]
rx = re.compile(pattern["regex"])
for fileprefix in final_p190["paths"]:
print(f"Path prefix: {fileprefix}")
for globspec in final_p190["globs"]:
for filepath in pathlib.Path(fileprefix).glob(globspec):
filepath = str(filepath)
print(f"Found {filepath}")
if not db.file_in_db(filepath):
age = time.time() - os.path.getmtime(filepath)
if age < file_min_age:
print("Skipping file because too new", filepath)
continue
print("Importing")
match = rx.match(os.path.basename(filepath))
if not match:
error_message = f"File path not match the expected format! ({filepath} ~ {pattern['regex']})"
print(error_message, file=sys.stderr)
print("This file will be ignored!")
continue
file_info = dict(zip(pattern["captures"], match.groups()))
p190_data = p190.from_file(filepath, with_objrefs=True)
p190_data_timestamped = p190.apply_tstamps(
p190_data,
final_p190["timestamp_format"],
fix_bad_seconds=True
)
p190_data_normalised = p190.normalise(p190_data_timestamped)
print("Saving")
p190_records = p190.p190_type("S", p190_data_normalised)
file_data = dict()
file_data["offsets_p190"] = [h["description"].strip().split(" to ")+h["data"].split() for h in p190_data_normalised if h["record_type"] == "H" and h["header_type"] == "09"]
db.save_final_p190(p190_records, file_info, filepath, survey["epsg"])
else:
print("Already in DB")
print("Done")