diff --git a/bin/import_smsrc.py b/bin/import_smsrc.py index b3fd4e7..7f91e81 100755 --- a/bin/import_smsrc.py +++ b/bin/import_smsrc.py @@ -15,6 +15,7 @@ import re import time import configuration import smsrc +import fwr from datastore import Datastore if __name__ == '__main__': @@ -33,17 +34,21 @@ if __name__ == '__main__': db.set_survey(survey["schema"]) try: - raw_smsrc = survey["raw"]["smsrc"] + raw_smsrc = survey["raw"]["source"]["smsrc"]["header"] except KeyError: print("No SmartSource data configuration") continue - flags = 0 - if "flags" in raw_smsrc: - configuration.rxflags(raw_smsrc["flags"]) + # NOTE I've no idea what this is 🤔 + # flags = 0 + # if "flags" in raw_smsrc: + # configuration.rxflags(raw_smsrc["flags"]) - pattern = raw_smsrc["pattern"] - rx = re.compile(pattern["regex"], flags) + lineNameInfo = raw_smsrc.get("lineNameInfo") + pattern = raw_smsrc.get("pattern") + rx = None + if pattern and pattern.get("regex"): + rx = re.compile(pattern["regex"]) for fileprefix in raw_smsrc["paths"]: realprefix = configuration.translate_path(fileprefix) @@ -64,14 +69,39 @@ if __name__ == '__main__': print("Importing") - match = rx.match(os.path.basename(logical_filepath)) - if not match: - error_message = f"File path not matching the expected format! ({logical_filepath} ~ {pattern['regex']})" - print(error_message, file=sys.stderr) - print("This file will be ignored!") - continue + if rx: + match = rx.match(os.path.basename(logical_filepath)) + if not match: + error_message = f"File path not matching the expected format! ({logical_filepath} ~ {pattern['regex']})" + print(error_message, file=sys.stderr) + print("This file will be ignored!") + continue - file_info = dict(zip(pattern["captures"], match.groups())) + file_info = dict(zip(pattern["captures"], match.groups())) + file_info["meta"] = {} + + + if lineNameInfo: + basename = os.path.basename(physical_filepath) + fields = lineNameInfo.get("fields", {}) + fixed = lineNameInfo.get("fixed") + try: + parsed_line = fwr.parse_line(basename, fields, fixed) + except ValueError as err: + parsed_line = "Line format error: " + str(err) + if type(parsed_line) == str: + print(parsed_line, file=sys.stderr) + print("This file will be ignored!") + continue + + file_info = {} + file_info["sequence"] = parsed_line["sequence"] + file_info["line"] = parsed_line["line"] + del(parsed_line["sequence"]) + del(parsed_line["line"]) + file_info["meta"] = { + "fileInfo": parsed_line + } smsrc_records = smsrc.from_file(physical_filepath)