diff --git a/bin/datastore.py b/bin/datastore.py index 91d4d08..151f06a 100644 --- a/bin/datastore.py +++ b/bin/datastore.py @@ -4,6 +4,7 @@ import psycopg2 import configuration import preplots import p111 +from hashlib import md5 # Because it's good enough """ Interface to the PostgreSQL database. @@ -11,13 +12,16 @@ Interface to the PostgreSQL database. def file_hash(file): """ - Calculate a file hash based on its size, inode, modification and creation times. + Calculate a file hash based on its name, size, modification and creation times. The hash is used to uniquely identify files in the database and detect if they have changed. """ + h = md5() + h.update(file.encode()) + name_digest = h.hexdigest()[:16] st = os.stat(file) - return ":".join([str(v) for v in [st.st_size, st.st_mtime, st.st_ctime, st.st_ino]]) + return ":".join([str(v) for v in [st.st_size, st.st_mtime, st.st_ctime, name_digest]]) class Datastore: """ @@ -390,9 +394,9 @@ class Datastore: with self.conn.cursor() as cursor: cursor.execute("BEGIN;") - + hash = self.add_file(filepath, cursor) - + if not records or len(records) == 0: print("File has no records (or none have been detected)") # We add the file to the database anyway to signal that we have @@ -412,13 +416,13 @@ class Datastore: """ cursor.execute(qry, (fileinfo["sequence"], fileinfo["line"], ntbp, incr, json.dumps(fileinfo["meta"]))) - + qry = """ UPDATE raw_lines SET meta = meta || %s WHERE sequence = %s; """ - + cursor.execute(qry, (json.dumps(fileinfo["meta"]), fileinfo["sequence"])) qry = """ @@ -452,7 +456,7 @@ class Datastore: with self.conn.cursor() as cursor: cursor.execute("BEGIN;") - + hash = self.add_file(filepath, cursor) qry = """ @@ -462,13 +466,13 @@ class Datastore: """ cursor.execute(qry, (fileinfo["sequence"], fileinfo["line"], json.dumps(fileinfo["meta"]))) - + qry = """ UPDATE raw_lines SET meta = meta || %s WHERE sequence = %s; """ - + cursor.execute(qry, (json.dumps(fileinfo["meta"]), fileinfo["sequence"])) qry = """ @@ -495,7 +499,7 @@ class Datastore: if filedata is not None: self.save_file_data(filepath, json.dumps(filedata), cursor) - + cursor.execute("CALL final_line_post_import(%s);", (fileinfo["sequence"],)) self.maybe_commit() @@ -662,7 +666,7 @@ class Datastore: """ Remove final data for a sequence. """ - + if cursor is None: cur = self.conn.cursor() else: @@ -674,4 +678,4 @@ class Datastore: self.maybe_commit() # We do not commit if we've been passed a cursor, instead # we assume that we are in the middle of a transaction - +