2020-08-10 22:52:43 +02:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
Import raw p111.
|
|
|
|
|
|
|
|
|
|
For each survey in configuration.surveys(), check for new
|
|
|
|
|
or modified final P1/11 files and (re-)import them into the
|
|
|
|
|
database.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import os
|
2020-08-29 10:40:55 +02:00
|
|
|
import sys
|
2020-08-28 13:49:43 +02:00
|
|
|
import pathlib
|
2020-08-10 22:52:43 +02:00
|
|
|
import re
|
2021-05-07 13:50:32 +02:00
|
|
|
import time
|
2020-08-10 22:52:43 +02:00
|
|
|
import configuration
|
|
|
|
|
import p111
|
2024-05-04 17:33:50 +02:00
|
|
|
import fwr
|
2020-08-10 22:52:43 +02:00
|
|
|
from datastore import Datastore
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
print("Reading configuration")
|
2021-05-07 13:50:32 +02:00
|
|
|
file_min_age = configuration.read().get('imports', {}).get('file_min_age', 10)
|
2020-08-10 22:52:43 +02:00
|
|
|
|
|
|
|
|
print("Connecting to database")
|
|
|
|
|
db = Datastore()
|
2023-08-30 14:19:56 +02:00
|
|
|
surveys = db.surveys()
|
2020-08-10 22:52:43 +02:00
|
|
|
|
|
|
|
|
print("Reading surveys")
|
|
|
|
|
for survey in surveys:
|
|
|
|
|
print(f'Survey: {survey["id"]} ({survey["schema"]})')
|
|
|
|
|
|
|
|
|
|
db.set_survey(survey["schema"])
|
|
|
|
|
|
2020-08-24 19:45:59 +02:00
|
|
|
try:
|
|
|
|
|
raw_p111 = survey["raw"]["p111"]
|
|
|
|
|
except KeyError:
|
|
|
|
|
print("No raw P1/11 configuration")
|
|
|
|
|
exit(0)
|
|
|
|
|
|
2024-05-04 17:33:50 +02:00
|
|
|
lineNameInfo = raw_p111.get("lineNameInfo")
|
|
|
|
|
pattern = raw_p111.get("pattern")
|
2025-08-07 11:00:42 +02:00
|
|
|
if not lineNameInfo:
|
|
|
|
|
if not pattern:
|
|
|
|
|
print("ERROR! Missing raw.p111.lineNameInfo in project configuration. Cannot import raw P111")
|
2025-08-07 17:17:43 +02:00
|
|
|
raise Exception("Missing raw.p111.lineNameInfo")
|
2025-08-07 11:00:42 +02:00
|
|
|
else:
|
|
|
|
|
print("WARNING! No `lineNameInfo` in project configuration (raw.p111). You should add it to the settings.")
|
2024-05-04 17:33:50 +02:00
|
|
|
rx = None
|
|
|
|
|
if pattern and pattern.get("regex"):
|
|
|
|
|
rx = re.compile(pattern["regex"])
|
2020-08-10 22:52:43 +02:00
|
|
|
|
2020-08-31 13:04:46 +02:00
|
|
|
if "ntbp" in survey["raw"]:
|
2020-09-01 10:59:29 +02:00
|
|
|
ntbpRx = re.compile(survey["raw"]["ntbp"]["pattern"]["regex"])
|
2020-08-31 13:04:46 +02:00
|
|
|
|
2020-08-10 22:52:43 +02:00
|
|
|
for fileprefix in raw_p111["paths"]:
|
2023-08-30 14:56:09 +02:00
|
|
|
realprefix = configuration.translate_path(fileprefix)
|
|
|
|
|
print(f"Path prefix: {fileprefix} → {realprefix}")
|
2020-08-10 22:52:43 +02:00
|
|
|
|
|
|
|
|
for globspec in raw_p111["globs"]:
|
2023-08-30 14:56:09 +02:00
|
|
|
for physical_filepath in pathlib.Path(realprefix).glob(globspec):
|
|
|
|
|
physical_filepath = str(physical_filepath)
|
|
|
|
|
logical_filepath = configuration.untranslate_path(physical_filepath)
|
|
|
|
|
print(f"Found {logical_filepath}")
|
2020-08-10 22:52:43 +02:00
|
|
|
|
2020-09-08 18:36:30 +02:00
|
|
|
if ntbpRx:
|
2023-08-30 14:56:09 +02:00
|
|
|
ntbp = ntbpRx.search(physical_filepath) is not None
|
2020-09-08 18:36:30 +02:00
|
|
|
else:
|
|
|
|
|
ntbp = False
|
|
|
|
|
|
2023-08-30 14:56:09 +02:00
|
|
|
if not db.file_in_db(logical_filepath):
|
2022-04-29 14:48:21 +02:00
|
|
|
|
2023-08-30 14:56:09 +02:00
|
|
|
age = time.time() - os.path.getmtime(physical_filepath)
|
2021-05-07 13:50:32 +02:00
|
|
|
if age < file_min_age:
|
2023-08-30 14:56:09 +02:00
|
|
|
print("Skipping file because too new", logical_filepath)
|
2021-05-07 13:50:32 +02:00
|
|
|
continue
|
2022-04-29 14:48:21 +02:00
|
|
|
|
2020-08-10 22:52:43 +02:00
|
|
|
print("Importing")
|
|
|
|
|
|
2024-05-04 17:33:50 +02:00
|
|
|
if rx:
|
|
|
|
|
match = rx.match(os.path.basename(logical_filepath))
|
|
|
|
|
if not match:
|
|
|
|
|
error_message = f"File path not matching the expected format! ({logical_filepath} ~ {pattern['regex']})"
|
|
|
|
|
print(error_message, file=sys.stderr)
|
|
|
|
|
print("This file will be ignored!")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
file_info = dict(zip(pattern["captures"], match.groups()))
|
|
|
|
|
file_info["meta"] = {}
|
|
|
|
|
|
|
|
|
|
if lineNameInfo:
|
|
|
|
|
basename = os.path.basename(physical_filepath)
|
|
|
|
|
fields = lineNameInfo.get("fields", {})
|
|
|
|
|
fixed = lineNameInfo.get("fixed")
|
|
|
|
|
try:
|
|
|
|
|
parsed_line = fwr.parse_line(basename, fields, fixed)
|
|
|
|
|
except ValueError as err:
|
|
|
|
|
parsed_line = "Line format error: " + str(err)
|
|
|
|
|
if type(parsed_line) == str:
|
|
|
|
|
print(parsed_line, file=sys.stderr)
|
|
|
|
|
print("This file will be ignored!")
|
|
|
|
|
continue
|
|
|
|
|
|
2025-08-07 11:00:42 +02:00
|
|
|
file_info = {}
|
|
|
|
|
file_info["sequence"] = parsed_line["sequence"]
|
|
|
|
|
file_info["line"] = parsed_line["line"]
|
|
|
|
|
del(parsed_line["sequence"])
|
|
|
|
|
del(parsed_line["line"])
|
|
|
|
|
file_info["meta"] = {
|
|
|
|
|
"fileInfo": parsed_line
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-30 14:56:09 +02:00
|
|
|
p111_data = p111.from_file(physical_filepath)
|
2020-08-10 22:52:43 +02:00
|
|
|
|
|
|
|
|
print("Saving")
|
|
|
|
|
|
|
|
|
|
p111_records = p111.p111_type("S", p111_data)
|
2022-03-07 21:08:22 +01:00
|
|
|
if len(p111_records):
|
|
|
|
|
file_info["meta"]["lineName"] = p111.line_name(p111_data)
|
2020-08-10 22:52:43 +02:00
|
|
|
|
2023-08-30 14:56:09 +02:00
|
|
|
db.save_raw_p111(p111_records, file_info, logical_filepath, survey["epsg"], ntbp=ntbp)
|
2022-03-07 21:08:22 +01:00
|
|
|
else:
|
|
|
|
|
print("No source records found in file")
|
2020-08-10 22:52:43 +02:00
|
|
|
else:
|
|
|
|
|
print("Already in DB")
|
|
|
|
|
|
2020-09-08 18:36:30 +02:00
|
|
|
# Update the NTBP status to whatever the latest is,
|
|
|
|
|
# as it might have changed.
|
2023-08-30 14:56:09 +02:00
|
|
|
db.set_ntbp(logical_filepath, ntbp)
|
2020-09-08 18:36:30 +02:00
|
|
|
if ntbp:
|
|
|
|
|
print("Sequence is NTBP")
|
|
|
|
|
|
2020-08-10 22:52:43 +02:00
|
|
|
print("Done")
|
|
|
|
|
|