mirror of
https://gitlab.com/wgp/dougal/software.git
synced 2025-12-06 11:07:08 +00:00
We now check that a file is at least a few seconds old before attempting to import it. The actual minimum age can be configured in etc/config.yaml or else is defaults to 10 seconds. The idea is that this should give the OS enough time to fully write the file before we import it. The timestamp being looked at is the modification time. Fixes #92.
99 lines
2.5 KiB
Python
Executable File
99 lines
2.5 KiB
Python
Executable File
#!/usr/bin/python3
|
|
|
|
"""
|
|
Import raw p190.
|
|
|
|
For each survey in configuration.surveys(), check for new
|
|
or modified final P1/90 files and (re-)import them into the
|
|
database.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import pathlib
|
|
import re
|
|
import time
|
|
import configuration
|
|
import p190
|
|
from datastore import Datastore
|
|
|
|
if __name__ == '__main__':
|
|
|
|
print("Reading configuration")
|
|
surveys = configuration.surveys()
|
|
file_min_age = configuration.read().get('imports', {}).get('file_min_age', 10)
|
|
|
|
print("Connecting to database")
|
|
db = Datastore()
|
|
db.connect()
|
|
|
|
print("Reading surveys")
|
|
for survey in surveys:
|
|
print(f'Survey: {survey["id"]} ({survey["schema"]})')
|
|
|
|
db.set_survey(survey["schema"])
|
|
|
|
try:
|
|
raw_p190 = survey["raw"]["p190"]
|
|
except KeyError:
|
|
print("No raw P1/90 configuration")
|
|
exit(0)
|
|
|
|
pattern = raw_p190["pattern"]
|
|
rx = re.compile(pattern["regex"])
|
|
|
|
if "ntbp" in survey["raw"]:
|
|
ntbpRx = re.compile(survey["raw"]["ntbp"]["pattern"]["regex"])
|
|
|
|
for fileprefix in raw_p190["paths"]:
|
|
print(f"Path prefix: {fileprefix}")
|
|
|
|
for globspec in raw_p190["globs"]:
|
|
for filepath in pathlib.Path(fileprefix).glob(globspec):
|
|
filepath = str(filepath)
|
|
print(f"Found {filepath}")
|
|
|
|
if not db.file_in_db(filepath):
|
|
|
|
age = time.time() - os.path.getmtime(filepath)
|
|
if age < file_min_age:
|
|
print("Skipping file because too new", filepath)
|
|
continue
|
|
|
|
print("Importing")
|
|
|
|
match = rx.match(os.path.basename(filepath))
|
|
if not match:
|
|
error_message = f"File path not match the expected format! ({filepath} ~ {pattern['regex']})"
|
|
print(error_message, file=sys.stderr)
|
|
print("This file will be ignored!")
|
|
continue
|
|
|
|
file_info = dict(zip(pattern["captures"], match.groups()))
|
|
if ntbpRx:
|
|
ntbp = ntbpRx.match(filepath) is not None
|
|
else:
|
|
ntbp = False
|
|
|
|
p190_data = p190.from_file(filepath, with_objrefs=True)
|
|
p190_data_timestamped = p190.apply_tstamps(
|
|
p190_data,
|
|
raw_p190["timestamp_format"],
|
|
fix_bad_seconds=True
|
|
)
|
|
p190_data_normalised = p190.normalise(p190_data_timestamped)
|
|
|
|
print("Saving")
|
|
|
|
p190_records = p190.p190_type("S", p190_data_normalised)
|
|
|
|
file_data = dict()
|
|
file_data["offsets_p190"] = [h["description"].strip().split(" to ")+h["data"].split() for h in p190_data_normalised if h["record_type"] == "H" and h["header_type"] == "09"]
|
|
|
|
db.save_raw_p190(p190_records, file_info, filepath, survey["epsg"], file_data, ntbp)
|
|
else:
|
|
print("Already in DB")
|
|
|
|
print("Done")
|
|
|