Files
dougal-software/bin/import_smsrc.py
D. Berge c99a625b60 Add function to retrieve survey configurations from DB.
As the survey definitions will no longer be stored in files
under etc/surveys/ but directly on the database, this
function replaces configuration.surveys()
2023-08-30 14:27:15 +02:00

84 lines
1.9 KiB
Python
Executable File

#!/usr/bin/python3
"""
Import SmartSource data.
For each survey in configuration.surveys(), check for new
or modified final gun header files and (re-)import them into the
database.
"""
import os
import sys
import pathlib
import re
import time
import configuration
import smsrc
from datastore import Datastore
if __name__ == '__main__':
print("Reading configuration")
file_min_age = configuration.read().get('imports', {}).get('file_min_age', 10)
print("Connecting to database")
db = Datastore()
surveys = db.surveys()
print("Reading surveys")
for survey in surveys:
print(f'Survey: {survey["id"]} ({survey["schema"]})')
db.set_survey(survey["schema"])
try:
raw_smsrc = survey["raw"]["smsrc"]
except KeyError:
print("No SmartSource data configuration")
continue
flags = 0
if "flags" in raw_smsrc:
configuration.rxflags(raw_smsrc["flags"])
pattern = raw_smsrc["pattern"]
rx = re.compile(pattern["regex"], flags)
for fileprefix in raw_smsrc["paths"]:
print(f"Path prefix: {fileprefix}")
for globspec in raw_smsrc["globs"]:
for filepath in pathlib.Path(fileprefix).glob(globspec):
filepath = str(filepath)
print(f"Found {filepath}")
if not db.file_in_db(filepath):
age = time.time() - os.path.getmtime(filepath)
if age < file_min_age:
print("Skipping file because too new", filepath)
continue
print("Importing")
match = rx.match(os.path.basename(filepath))
if not match:
error_message = f"File path not matching the expected format! ({filepath} ~ {pattern['regex']})"
print(error_message, file=sys.stderr)
print("This file will be ignored!")
continue
file_info = dict(zip(pattern["captures"], match.groups()))
smsrc_records = smsrc.from_file(filepath)
print("Saving")
db.save_raw_smsrc(smsrc_records, file_info, filepath)
else:
print("Already in DB")
print("Done")