mirror of
https://gitlab.com/wgp/dougal/software.git
synced 2025-12-06 09:27:07 +00:00
Merge branch '173-do-not-use-inodes-as-part-of-a-file-s-fingerprint' into 'devel'
Resolve "Do not use inodes as part of a file's fingerprint" Closes #173 See merge request wgp/dougal/software!19
This commit is contained in:
@@ -4,6 +4,7 @@ import psycopg2
|
|||||||
import configuration
|
import configuration
|
||||||
import preplots
|
import preplots
|
||||||
import p111
|
import p111
|
||||||
|
from hashlib import md5 # Because it's good enough
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Interface to the PostgreSQL database.
|
Interface to the PostgreSQL database.
|
||||||
@@ -11,13 +12,16 @@ Interface to the PostgreSQL database.
|
|||||||
|
|
||||||
def file_hash(file):
|
def file_hash(file):
|
||||||
"""
|
"""
|
||||||
Calculate a file hash based on its size, inode, modification and creation times.
|
Calculate a file hash based on its name, size, modification and creation times.
|
||||||
|
|
||||||
The hash is used to uniquely identify files in the database and detect if they
|
The hash is used to uniquely identify files in the database and detect if they
|
||||||
have changed.
|
have changed.
|
||||||
"""
|
"""
|
||||||
|
h = md5()
|
||||||
|
h.update(file.encode())
|
||||||
|
name_digest = h.hexdigest()[:16]
|
||||||
st = os.stat(file)
|
st = os.stat(file)
|
||||||
return ":".join([str(v) for v in [st.st_size, st.st_mtime, st.st_ctime, st.st_ino]])
|
return ":".join([str(v) for v in [st.st_size, st.st_mtime, st.st_ctime, name_digest]])
|
||||||
|
|
||||||
class Datastore:
|
class Datastore:
|
||||||
"""
|
"""
|
||||||
|
|||||||
84
etc/db/upgrades/upgrade10-83be83e4→53ed096e-v0.2.0.sql
Normal file
84
etc/db/upgrades/upgrade10-83be83e4→53ed096e-v0.2.0.sql
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
-- Upgrade the database from commit 83be83e4 to 53ed096e.
|
||||||
|
--
|
||||||
|
-- New schema version: 0.2.0
|
||||||
|
--
|
||||||
|
-- ATTENTION:
|
||||||
|
--
|
||||||
|
-- ENSURE YOU HAVE BACKED UP THE DATABASE BEFORE RUNNING THIS SCRIPT.
|
||||||
|
--
|
||||||
|
--
|
||||||
|
-- NOTE: This upgrade affects all schemas in the database.
|
||||||
|
-- NOTE: Each application starts a transaction, which must be committed
|
||||||
|
-- or rolled back.
|
||||||
|
--
|
||||||
|
-- This migrates the file hashes to address issue #173.
|
||||||
|
-- The new hashes use size, modification time, creation time and the
|
||||||
|
-- first half of the MD5 hex digest of the file's absolute path.
|
||||||
|
--
|
||||||
|
-- It's a minor (rather than patch) version number increment because
|
||||||
|
-- changes to `bin/datastore.py` mean that the data is no longer
|
||||||
|
-- compatible with the hashing function.
|
||||||
|
--
|
||||||
|
-- To apply, run as the dougal user:
|
||||||
|
--
|
||||||
|
-- psql <<EOF
|
||||||
|
-- \i $THIS_FILE
|
||||||
|
-- COMMIT;
|
||||||
|
-- EOF
|
||||||
|
--
|
||||||
|
-- NOTE: It can take a while if run on a large database.
|
||||||
|
-- NOTE: It can be applied multiple times without ill effect.
|
||||||
|
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
CREATE OR REPLACE PROCEDURE show_notice (notice text) AS $$
|
||||||
|
BEGIN
|
||||||
|
RAISE NOTICE '%', notice;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE PROCEDURE migrate_hashes (schema_name text) AS $$
|
||||||
|
BEGIN
|
||||||
|
RAISE NOTICE 'Migrating schema %', schema_name;
|
||||||
|
-- We need to set the search path because some of the trigger
|
||||||
|
-- functions reference other tables in survey schemas assuming
|
||||||
|
-- they are in the search path.
|
||||||
|
EXECUTE format('SET search_path TO %I,public', schema_name);
|
||||||
|
EXECUTE format('UPDATE %I.files SET hash = array_to_string(array_append(trim_array(string_to_array(hash, '':''), 1), left(md5(path), 16)), '':'')', schema_name);
|
||||||
|
EXECUTE 'SET search_path TO public'; -- Back to the default search path for good measure
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE PROCEDURE upgrade_10 () AS $$
|
||||||
|
DECLARE
|
||||||
|
row RECORD;
|
||||||
|
BEGIN
|
||||||
|
FOR row IN
|
||||||
|
SELECT schema_name FROM information_schema.schemata
|
||||||
|
WHERE schema_name LIKE 'survey_%'
|
||||||
|
ORDER BY schema_name
|
||||||
|
LOOP
|
||||||
|
CALL migrate_hashes(row.schema_name);
|
||||||
|
END LOOP;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CALL upgrade_10();
|
||||||
|
|
||||||
|
CALL show_notice('Cleaning up');
|
||||||
|
DROP PROCEDURE migrate_hashes (schema_name text);
|
||||||
|
DROP PROCEDURE upgrade_10 ();
|
||||||
|
|
||||||
|
CALL show_notice('Updating db_schema version');
|
||||||
|
INSERT INTO public.info VALUES ('version', '{"db_schema": "0.2.0"}')
|
||||||
|
ON CONFLICT (key) DO UPDATE
|
||||||
|
SET value = public.info.value || '{"db_schema": "0.2.0"}' WHERE public.info.key = 'version';
|
||||||
|
|
||||||
|
|
||||||
|
CALL show_notice('All done. You may now run "COMMIT;" to persist the changes');
|
||||||
|
DROP PROCEDURE show_notice (notice text);
|
||||||
|
|
||||||
|
--
|
||||||
|
--NOTE Run `COMMIT;` now if all went well
|
||||||
|
--
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
"license": "UNLICENSED",
|
"license": "UNLICENSED",
|
||||||
"private": true,
|
"private": true,
|
||||||
"config": {
|
"config": {
|
||||||
"db_schema": "^0.1.0"
|
"db_schema": "^0.2.0"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=14.0.0"
|
"node": ">=14.0.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user