Files
dougal-software/etc/db/upgrades/upgrade11-v0.2.1-new-event-log-schema.sql
2022-02-27 19:56:12 +01:00

269 lines
8.7 KiB
PL/PgSQL

-- Add new event log schema.
--
-- New schema version: 0.2.1
--
-- ATTENTION:
--
-- ENSURE YOU HAVE BACKED UP THE DATABASE BEFORE RUNNING THIS SCRIPT.
--
--
-- NOTE: This upgrade affects all schemas in the database.
-- NOTE: Each application starts a transaction, which must be committed
-- or rolled back.
--
-- This is a redesign of the event logging mechanism. The old mechanism
-- relied on a distinction between sequence events (i.e., those which can
-- be associated to a shotpoint within a sequence), timed events (those
-- which occur outside any acquisition sequence) and so-called virtual
-- events (deduced from the data). It was inflexible and inefficient,
-- as most of the time we needed to merge those two types of events into
-- a single view.
--
-- The new mechanism:
-- - uses a single table
-- - accepts sequence event entries for shots or sequences which may not (yet)
-- exist. (https://gitlab.com/wgp/dougal/software/-/issues/170)
-- - keeps edit history (https://gitlab.com/wgp/dougal/software/-/issues/138)
-- - Keeps track of when an entry was made or subsequently edited.
--
-- To apply, run as the dougal user:
--
-- psql <<EOF
-- \i $THIS_FILE
-- COMMIT;
-- EOF
--
-- NOTE: It can take a while if run on a large database.
-- NOTE: It can be applied multiple times without ill effect, as long
-- as the new tables did not previously exist. If they did, they will
-- be emptied before migrating the data.
--
-- WARNING: Applying this upgrade migrates the old event data. It does
-- NOT yet drop the old tables, which is handled in a separate script,
-- leaving the actions here technically reversible without having to
-- restore from backup.
BEGIN;
CREATE OR REPLACE PROCEDURE show_notice (notice text) AS $$
BEGIN
RAISE NOTICE '%', notice;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE PROCEDURE pg_temp.upgrade_survey_schema (schema_name text) AS $$
BEGIN
RAISE NOTICE 'Updating schema %', schema_name;
-- We need to set the search path because some of the trigger
-- functions reference other tables in survey schemas assuming
-- they are in the search path.
EXECUTE format('SET search_path TO %I,public', schema_name);
CREATE SEQUENCE IF NOT EXISTS event_log_uid_seq
AS integer
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
CREATE TABLE IF NOT EXISTS event_log_full (
-- uid is a unique id for each entry in the table,
-- including revisions of an existing entry.
uid integer NOT NULL PRIMARY KEY DEFAULT nextval('event_log_uid_seq'),
-- All revisions of an entry share the same id.
-- If inserting a new entry, id = uid.
id integer NOT NULL,
-- No default tstamp because, for instance, a user could
-- enter a sequence/point event referring to the future.
-- An external process should scan those at regular intervals
-- and populate the tstamp as needed.
tstamp timestamptz NULL,
sequence integer NULL,
point integer NULL,
remarks text NOT NULL DEFAULT '',
labels text[] NOT NULL DEFAULT ARRAY[]::text[],
-- TODO: Need a geometry column? Let us check performance as it is
-- and if needed either add a geometry column + spatial index.
meta jsonb NOT NULL DEFAULT '{}'::jsonb,
validity tstzrange NOT NULL CHECK (NOT isempty(validity)),
-- We accept either:
-- - Just a tstamp
-- - Just a sequence / point pair
-- - All three
-- We don't accept:
-- - A sequence without a point or vice-versa
-- - Nothing being provided
CHECK (
(tstamp IS NOT NULL AND sequence IS NOT NULL AND point IS NOT NULL) OR
(tstamp IS NOT NULL AND sequence IS NULL AND point IS NULL) OR
(tstamp IS NULL AND sequence IS NOT NULL AND point IS NOT NULL)
)
);
CREATE INDEX IF NOT EXISTS event_log_id ON event_log_full USING btree (id);
CREATE OR REPLACE FUNCTION event_log_full_insert() RETURNS TRIGGER AS $inner$
BEGIN
NEW.id := COALESCE(NEW.id, NEW.uid);
NEW.validity := tstzrange(current_timestamp, NULL);
NEW.meta = COALESCE(NEW.meta, '{}'::jsonb);
NEW.labels = COALESCE(NEW.labels, ARRAY[]::text[]);
IF cardinality(NEW.labels) > 0 THEN
-- Remove duplicates
SELECT array_agg(DISTINCT elements)
INTO NEW.labels
FROM (SELECT unnest(NEW.labels) AS elements) AS labels;
END IF;
RETURN NEW;
END;
$inner$ LANGUAGE plpgsql;
CREATE OR REPLACE TRIGGER event_log_full_insert_tg
BEFORE INSERT ON event_log_full
FOR EACH ROW EXECUTE FUNCTION event_log_full_insert();
-- The public.notify() trigger to alert clients that something has changed
CREATE TRIGGER event_log_full_notify_tg
AFTER INSERT OR DELETE OR UPDATE
ON event_log_full FOR EACH ROW EXECUTE FUNCTION public.notify('event');
--
-- VIEW event_log
--
-- This is what is exposed to the user most of the time.
-- It shows the current version of records in the event_log_full
-- table.
--
-- The user applies edits to this table directly, which are
-- processed via triggers.
--
CREATE OR REPLACE VIEW event_log AS
SELECT
id, tstamp, sequence, point, remarks, labels, meta,
uid <> id AS has_edits,
lower(validity) AS modified_on
FROM event_log_full
WHERE validity @> current_timestamp;
CREATE OR REPLACE FUNCTION event_log_update() RETURNS TRIGGER AS $inner$
BEGIN
IF (TG_OP = 'INSERT') THEN
-- Any id that is provided will be ignored. The generated
-- id will match uid.
INSERT INTO event_log_full
(tstamp, sequence, point, remarks, labels, meta)
VALUES (NEW.tstamp, NEW.sequence, NEW.point, NEW.remarks, NEW.labels, NEW.meta);
RETURN NEW;
ELSIF (TG_OP = 'UPDATE') THEN
-- Set end of validity and create a new entry with id
-- matching that of the old entry.
-- NOTE: Do not allow updating an event that has meta.readonly = true
IF EXISTS
(SELECT *
FROM event_log_full
WHERE id = OLD.id AND (meta->>'readonly')::boolean IS TRUE)
THEN
RAISE check_violation USING MESSAGE = 'Cannot modify read-only entry';
RETURN NULL;
END IF;
UPDATE event_log_full
SET validity = tstzrange(lower(validity), current_timestamp)
WHERE validity @> current_timestamp AND id = OLD.id;
-- Any attempt to modify id will be ignored.
INSERT INTO event_log_full
(id, tstamp, sequence, point, remarks, labels, meta)
VALUES (OLD.id, NEW.tstamp, NEW.sequence, NEW.point, NEW.remarks, NEW.labels, NEW.meta);
RETURN NEW;
ELSIF (TG_OP = 'DELETE') THEN
-- Set end of validity.
-- NOTE: We *do* allow deleting an event that has meta.readonly = true
-- This could be of interest if for instance we wanted to keep the history
-- of QC results for a point, provided that the QC routines write to
-- event_log and not event_log_full
UPDATE event_log_full
SET validity = tstzrange(lower(validity), current_timestamp)
WHERE validity @> current_timestamp AND id = OLD.id;
RETURN NULL;
END IF;
END;
$inner$ LANGUAGE plpgsql;
CREATE OR REPLACE TRIGGER event_log_tg
INSTEAD OF INSERT OR UPDATE OR DELETE ON event_log
FOR EACH ROW EXECUTE FUNCTION event_log_update();
-- NOTE
-- This is where we migrate the actual data
RAISE NOTICE 'Migrating schema %', schema_name;
-- We start by deleting any data that the new tables might
-- have had if they already existed.
DELETE FROM event_log_full;
-- We purposefully bypass event_log here, as the tables we're
-- migrating from only contain a single version of each event.
INSERT INTO event_log_full (tstamp, sequence, point, remarks, labels, meta)
SELECT
tstamp, sequence, point, remarks, labels,
meta || json_build_object('geometry', geometry, 'readonly', virtual)::jsonb
FROM events;
UPDATE event_log_full SET meta = meta - 'geometry' WHERE meta->>'geometry' IS NULL;
UPDATE event_log_full SET meta = meta - 'readonly' WHERE (meta->'readonly')::boolean IS false;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE PROCEDURE pg_temp.upgrade_12 () AS $$
DECLARE
row RECORD;
BEGIN
FOR row IN
SELECT schema_name FROM information_schema.schemata
WHERE schema_name LIKE 'survey_%'
ORDER BY schema_name
LOOP
CALL pg_temp.upgrade_survey_schema(row.schema_name);
END LOOP;
END;
$$ LANGUAGE plpgsql;
CALL pg_temp.upgrade_12();
CALL show_notice('Cleaning up');
DROP PROCEDURE pg_temp.upgrade_survey_schema (schema_name text);
DROP PROCEDURE pg_temp.upgrade_12 ();
CALL show_notice('Updating db_schema version');
-- This is technically still compatible with 0.2.0 as we are only adding
-- some more tables and views but not yet dropping the old ones, which we
-- will do separately so that these scripts do not get too big.
INSERT INTO public.info VALUES ('version', '{"db_schema": "0.2.1"}')
ON CONFLICT (key) DO UPDATE
SET value = public.info.value || '{"db_schema": "0.2.1"}' WHERE public.info.key = 'version';
CALL show_notice('All done. You may now run "COMMIT;" to persist the changes');
DROP PROCEDURE show_notice (notice text);
--
--NOTE Run `COMMIT;` now if all went well
--