2020-08-10 22:54:36 +02:00
#!/usr/bin/python3
"""
Import final p111 .
For each survey in configuration . surveys ( ) , check for new
or modified final P1 / 11 files and ( re - ) import them into the
database .
"""
import os
2020-08-29 10:40:55 +02:00
import sys
2020-08-28 13:49:43 +02:00
import pathlib
2020-08-10 22:54:36 +02:00
import re
2021-05-07 13:50:32 +02:00
import time
2020-08-10 22:54:36 +02:00
import configuration
import p111
2024-05-04 17:35:05 +02:00
import fwr
2020-08-10 22:54:36 +02:00
from datastore import Datastore
2021-05-21 15:15:15 +02:00
def add_pending_remark ( db , sequence ) :
text = ' <!-- @@DGL:PENDING@@ --><h4 style= " color:red;cursor:help; " title= " Edit the sequence file or directory name to import final data " >Marked as <code>PENDING</code>.</h4><!-- @@/DGL:PENDING@@ --> \n '
2022-04-29 14:48:21 +02:00
2021-05-21 15:15:15 +02:00
with db . conn . cursor ( ) as cursor :
qry = " SELECT remarks FROM raw_lines WHERE sequence = %s ; "
cursor . execute ( qry , ( sequence , ) )
remarks = cursor . fetchone ( ) [ 0 ]
rx = re . compile ( " ^(<!-- @@DGL:PENDING@@ -->.*<!-- @@/DGL:PENDING@@ --> \n ) " )
m = rx . match ( remarks )
if m is None :
remarks = text + remarks
qry = " UPDATE raw_lines SET remarks = %s WHERE sequence = %s ; "
cursor . execute ( qry , ( remarks , sequence ) )
db . maybe_commit ( )
def del_pending_remark ( db , sequence ) :
2022-04-29 14:48:21 +02:00
2021-05-21 15:15:15 +02:00
with db . conn . cursor ( ) as cursor :
qry = " SELECT remarks FROM raw_lines WHERE sequence = %s ; "
cursor . execute ( qry , ( sequence , ) )
2022-04-29 14:32:46 +02:00
row = cursor . fetchone ( )
if row is not None :
remarks = row [ 0 ]
rx = re . compile ( " ^(<!-- @@DGL:PENDING@@ -->.*<!-- @@/DGL:PENDING@@ --> \n ) " )
m = rx . match ( remarks )
if m is not None :
remarks = rx . sub ( " " , remarks )
qry = " UPDATE raw_lines SET remarks = %s WHERE sequence = %s ; "
cursor . execute ( qry , ( remarks , sequence ) )
db . maybe_commit ( )
2021-05-21 15:15:15 +02:00
2020-08-10 22:54:36 +02:00
if __name__ == ' __main__ ' :
print ( " Reading configuration " )
2021-05-07 13:50:32 +02:00
file_min_age = configuration . read ( ) . get ( ' imports ' , { } ) . get ( ' file_min_age ' , 10 )
2020-08-10 22:54:36 +02:00
print ( " Connecting to database " )
db = Datastore ( )
2023-08-30 14:19:56 +02:00
surveys = db . surveys ( )
2020-08-10 22:54:36 +02:00
print ( " Reading surveys " )
for survey in surveys :
print ( f ' Survey: { survey [ " id " ] } ( { survey [ " schema " ] } ) ' )
db . set_survey ( survey [ " schema " ] )
2020-08-24 19:45:59 +02:00
try :
final_p111 = survey [ " final " ] [ " p111 " ]
except KeyError :
print ( " No final P1/11 configuration " )
exit ( 0 )
2024-05-04 17:35:05 +02:00
lineNameInfo = final_p111 . get ( " lineNameInfo " )
pattern = final_p111 . get ( " pattern " )
2025-08-07 11:00:42 +02:00
if not lineNameInfo :
if not pattern :
print ( " ERROR! Missing final.p111.lineNameInfo in project configuration. Cannot import final P111 " )
throw Exception ( " Missing final.p111.lineNameInfo " )
else :
print ( " WARNING! No `lineNameInfo` in project configuration (final.p111). You should add it to the settings. " )
2024-05-04 17:35:05 +02:00
rx = None
if pattern and pattern . get ( " regex " ) :
rx = re . compile ( pattern [ " regex " ] )
2020-08-10 22:54:36 +02:00
2021-05-21 15:15:15 +02:00
if " pending " in survey [ " final " ] :
pendingRx = re . compile ( survey [ " final " ] [ " pending " ] [ " pattern " ] [ " regex " ] )
2020-08-10 22:54:36 +02:00
for fileprefix in final_p111 [ " paths " ] :
2023-08-30 14:56:09 +02:00
realprefix = configuration . translate_path ( fileprefix )
print ( f " Path prefix: { fileprefix } → { realprefix } " )
2020-08-10 22:54:36 +02:00
for globspec in final_p111 [ " globs " ] :
2023-08-30 14:56:09 +02:00
for physical_filepath in pathlib . Path ( realprefix ) . glob ( globspec ) :
physical_filepath = str ( physical_filepath )
logical_filepath = configuration . untranslate_path ( physical_filepath )
print ( f " Found { logical_filepath } " )
2020-08-10 22:54:36 +02:00
2021-05-21 15:15:15 +02:00
pending = False
if pendingRx :
2023-08-30 14:56:09 +02:00
pending = pendingRx . search ( physical_filepath ) is not None
2021-05-21 15:15:15 +02:00
2023-08-30 14:56:09 +02:00
if not db . file_in_db ( logical_filepath ) :
2022-04-29 14:48:21 +02:00
2023-08-30 14:56:09 +02:00
age = time . time ( ) - os . path . getmtime ( physical_filepath )
2021-05-07 13:50:32 +02:00
if age < file_min_age :
2023-08-30 14:56:09 +02:00
print ( " Skipping file because too new " , logical_filepath )
2021-05-07 13:50:32 +02:00
continue
2022-04-29 14:48:21 +02:00
2020-08-10 22:54:36 +02:00
print ( " Importing " )
2024-05-04 17:35:05 +02:00
if rx :
match = rx . match ( os . path . basename ( logical_filepath ) )
if not match :
error_message = f " File path not match the expected format! ( { logical_filepath } ~ { pattern [ ' regex ' ] } ) "
print ( error_message , file = sys . stderr )
print ( " This file will be ignored! " )
continue
file_info = dict ( zip ( pattern [ " captures " ] , match . groups ( ) ) )
file_info [ " meta " ] = { }
2025-08-07 11:00:42 +02:00
if lineNameInfo :
basename = os . path . basename ( physical_filepath )
fields = lineNameInfo . get ( " fields " , { } )
fixed = lineNameInfo . get ( " fixed " )
try :
parsed_line = fwr . parse_line ( basename , fields , fixed )
except ValueError as err :
parsed_line = " Line format error: " + str ( err )
if type ( parsed_line ) == str :
print ( parsed_line , file = sys . stderr )
print ( " This file will be ignored! " )
continue
file_info = { }
file_info [ " sequence " ] = parsed_line [ " sequence " ]
file_info [ " line " ] = parsed_line [ " line " ]
del ( parsed_line [ " sequence " ] )
del ( parsed_line [ " line " ] )
file_info [ " meta " ] = {
" fileInfo " : parsed_line
}
2022-04-29 14:48:21 +02:00
2021-05-21 15:15:15 +02:00
if pending :
2023-08-30 14:56:09 +02:00
print ( " Skipping / removing final file because marked as PENDING " , logical_filepath )
2021-05-21 15:15:15 +02:00
db . del_sequence_final ( file_info [ " sequence " ] )
add_pending_remark ( db , file_info [ " sequence " ] )
continue
else :
del_pending_remark ( db , file_info [ " sequence " ] )
2020-08-10 22:54:36 +02:00
2023-08-30 14:56:09 +02:00
p111_data = p111 . from_file ( physical_filepath )
2020-08-10 22:54:36 +02:00
print ( " Saving " )
p111_records = p111 . p111_type ( " S " , p111_data )
2021-05-24 13:30:25 +02:00
file_info [ " meta " ] [ " lineName " ] = p111 . line_name ( p111_data )
2020-08-10 22:54:36 +02:00
2023-08-30 14:56:09 +02:00
db . save_final_p111 ( p111_records , file_info , logical_filepath , survey [ " epsg " ] )
2020-08-10 22:54:36 +02:00
else :
print ( " Already in DB " )
2021-05-21 15:15:15 +02:00
if pending :
print ( " Removing from database because marked as PENDING " )
db . del_sequence_final ( file_info [ " sequence " ] )
add_pending_remark ( db , file_info [ " sequence " ] )
2020-08-10 22:54:36 +02:00
print ( " Done " )