dougal-software/bin/import_raw_p190.py

#!/usr/bin/python3

"""
Import raw p190.

For each survey in configuration.surveys(), check for new
or modified final P1/90 files and (re-)import them into the
database.
"""

import os
import sys
import pathlib
import re
import time
import configuration
import p190
from datastore import Datastore

if __name__ == '__main__':

	print("Reading configuration")
	surveys = configuration.surveys()
	file_min_age = configuration.read().get('imports', {}).get('file_min_age', 10)

	print("Connecting to database")
	db = Datastore()
	db.connect()

	print("Reading surveys")
	for survey in surveys:
		print(f'Survey: {survey["id"]} ({survey["schema"]})')

		db.set_survey(survey["schema"])

		try:
			raw_p190 = survey["raw"]["p190"]
		except KeyError:
			print("No raw P1/90 configuration")
			exit(0)

		pattern = raw_p190["pattern"]
		rx = re.compile(pattern["regex"])

		if "ntbp" in survey["raw"]:
			ntbpRx = re.compile(survey["raw"]["ntbp"]["pattern"]["regex"])

		for fileprefix in raw_p190["paths"]:
			print(f"Path prefix: {fileprefix}")

			for globspec in raw_p190["globs"]:
				for filepath in pathlib.Path(fileprefix).glob(globspec):
					filepath = str(filepath)
					print(f"Found {filepath}")

					if not db.file_in_db(filepath):

						age = time.time() - os.path.getmtime(filepath)
						if age < file_min_age:
							print("Skipping file because too new", filepath)
							continue

						print("Importing")

						match = rx.match(os.path.basename(filepath))
						if not match:
							error_message = f"File path not match the expected format! ({filepath} ~ {pattern['regex']})"
							print(error_message, file=sys.stderr)
							print("This file will be ignored!")
							continue

						file_info = dict(zip(pattern["captures"], match.groups()))
						if ntbpRx:
							ntbp = ntbpRx.match(filepath) is not None
						else:
							ntbp = False

						p190_data = p190.from_file(filepath, with_objrefs=True)
						p190_data_timestamped = p190.apply_tstamps(
							p190_data,
							raw_p190["timestamp_format"],
							fix_bad_seconds=True
						)
						p190_data_normalised = p190.normalise(p190_data_timestamped)

						print("Saving")

						p190_records = p190.p190_type("S", p190_data_normalised)

						file_data = dict()
						file_data["offsets_p190"] = [h["description"].strip().split(" to ")+h["data"].split() for h in p190_data_normalised if h["record_type"] == "H" and h["header_type"] == "09"]

						db.save_raw_p190(p190_records, file_info, filepath, survey["epsg"], file_data, ntbp)
					else:
						print("Already in DB")

	print("Done")