Files
dougal-software/etc/db/upgrades/upgrade12-v0.2.2-new-event-log-schema.sql

361 lines
12 KiB
MySQL
Raw Permalink Normal View History

2022-02-27 17:19:17 +01:00
-- Add new event log schema.
--
-- New schema version: 0.2.2
2022-02-27 17:19:17 +01:00
--
-- ATTENTION:
--
-- ENSURE YOU HAVE BACKED UP THE DATABASE BEFORE RUNNING THIS SCRIPT.
--
-- REQUIRES POSTGRESQL VERSION 14 OR NEWER
-- (Because of CREATE OR REPLACE TRIGGER)
--
2022-02-27 17:19:17 +01:00
--
-- NOTE: This upgrade affects all schemas in the database.
-- NOTE: Each application starts a transaction, which must be committed
-- or rolled back.
--
-- This is a redesign of the event logging mechanism. The old mechanism
-- relied on a distinction between sequence events (i.e., those which can
-- be associated to a shotpoint within a sequence), timed events (those
-- which occur outside any acquisition sequence) and so-called virtual
-- events (deduced from the data). It was inflexible and inefficient,
-- as most of the time we needed to merge those two types of events into
-- a single view.
--
-- The new mechanism:
-- - uses a single table
-- - accepts sequence event entries for shots or sequences which may not (yet)
-- exist. (https://gitlab.com/wgp/dougal/software/-/issues/170)
-- - keeps edit history (https://gitlab.com/wgp/dougal/software/-/issues/138)
-- - Keeps track of when an entry was made or subsequently edited.
--
-- To apply, run as the dougal user:
--
-- psql <<EOF
-- \i $THIS_FILE
-- COMMIT;
-- EOF
--
-- NOTE: It can take a while if run on a large database.
-- NOTE: It can be applied multiple times without ill effect, as long
-- as the new tables did not previously exist. If they did, they will
-- be emptied before migrating the data.
--
-- WARNING: Applying this upgrade migrates the old event data. It does
-- NOT yet drop the old tables, which is handled in a separate script,
-- leaving the actions here technically reversible without having to
-- restore from backup.
BEGIN;
CREATE OR REPLACE PROCEDURE show_notice (notice text) AS $$
BEGIN
RAISE NOTICE '%', notice;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE PROCEDURE pg_temp.upgrade_survey_schema (schema_name text) AS $$
BEGIN
RAISE NOTICE 'Updating schema %', schema_name;
-- We need to set the search path because some of the trigger
-- functions reference other tables in survey schemas assuming
-- they are in the search path.
EXECUTE format('SET search_path TO %I,public', schema_name);
CREATE SEQUENCE IF NOT EXISTS event_log_uid_seq
AS integer
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
CREATE TABLE IF NOT EXISTS event_log_full (
-- uid is a unique id for each entry in the table,
-- including revisions of an existing entry.
uid integer NOT NULL PRIMARY KEY DEFAULT nextval('event_log_uid_seq'),
-- All revisions of an entry share the same id.
-- If inserting a new entry, id = uid.
id integer NOT NULL,
-- No default tstamp because, for instance, a user could
-- enter a sequence/point event referring to the future.
-- An external process should scan those at regular intervals
-- and populate the tstamp as needed.
tstamp timestamptz NULL,
sequence integer NULL,
point integer NULL,
remarks text NOT NULL DEFAULT '',
labels text[] NOT NULL DEFAULT ARRAY[]::text[],
-- TODO: Need a geometry column? Let us check performance as it is
-- and if needed either add a geometry column + spatial index.
meta jsonb NOT NULL DEFAULT '{}'::jsonb,
validity tstzrange NOT NULL CHECK (NOT isempty(validity)),
-- We accept either:
-- - Just a tstamp
-- - Just a sequence / point pair
-- - All three
-- We don't accept:
-- - A sequence without a point or vice-versa
-- - Nothing being provided
CHECK (
(tstamp IS NOT NULL AND sequence IS NOT NULL AND point IS NOT NULL) OR
(tstamp IS NOT NULL AND sequence IS NULL AND point IS NULL) OR
(tstamp IS NULL AND sequence IS NOT NULL AND point IS NOT NULL)
)
);
CREATE INDEX IF NOT EXISTS event_log_id ON event_log_full USING btree (id);
CREATE OR REPLACE FUNCTION event_log_full_insert() RETURNS TRIGGER AS $inner$
BEGIN
NEW.id := COALESCE(NEW.id, NEW.uid);
NEW.validity := tstzrange(current_timestamp, NULL);
NEW.meta = COALESCE(NEW.meta, '{}'::jsonb);
NEW.labels = COALESCE(NEW.labels, ARRAY[]::text[]);
IF cardinality(NEW.labels) > 0 THEN
-- Remove duplicates
SELECT array_agg(DISTINCT elements)
INTO NEW.labels
FROM (SELECT unnest(NEW.labels) AS elements) AS labels;
END IF;
RETURN NEW;
END;
$inner$ LANGUAGE plpgsql;
CREATE OR REPLACE TRIGGER event_log_full_insert_tg
BEFORE INSERT ON event_log_full
FOR EACH ROW EXECUTE FUNCTION event_log_full_insert();
-- The public.notify() trigger to alert clients that something has changed
CREATE OR REPLACE TRIGGER event_log_full_notify_tg
2022-02-27 17:19:17 +01:00
AFTER INSERT OR DELETE OR UPDATE
ON event_log_full FOR EACH ROW EXECUTE FUNCTION public.notify('event');
--
-- VIEW event_log
--
-- This is what is exposed to the user most of the time.
-- It shows the current version of records in the event_log_full
-- table.
--
-- The user applies edits to this table directly, which are
-- processed via triggers.
--
CREATE OR REPLACE VIEW event_log AS
SELECT
id, tstamp, sequence, point, remarks, labels, meta,
uid <> id AS has_edits,
lower(validity) AS modified_on
FROM event_log_full
WHERE validity @> current_timestamp;
CREATE OR REPLACE FUNCTION event_log_update() RETURNS TRIGGER AS $inner$
BEGIN
IF (TG_OP = 'INSERT') THEN
-- Complete the tstamp if possible
IF NEW.sequence IS NOT NULL AND NEW.point IS NOT NULL AND NEW.tstamp IS NULL THEN
SELECT COALESCE(
tstamp_from_sequence_shot(NEW.sequence, NEW.point),
tstamp_interpolate(NEW.sequence, NEW.point)
)
INTO NEW.tstamp;
END IF;
2022-02-27 17:19:17 +01:00
-- Any id that is provided will be ignored. The generated
-- id will match uid.
INSERT INTO event_log_full
(tstamp, sequence, point, remarks, labels, meta)
VALUES (NEW.tstamp, NEW.sequence, NEW.point, NEW.remarks, NEW.labels, NEW.meta);
RETURN NEW;
ELSIF (TG_OP = 'UPDATE') THEN
-- Set end of validity and create a new entry with id
-- matching that of the old entry.
-- NOTE: Do not allow updating an event that has meta.readonly = true
IF EXISTS
(SELECT *
FROM event_log_full
WHERE id = OLD.id AND (meta->>'readonly')::boolean IS TRUE)
THEN
RAISE check_violation USING MESSAGE = 'Cannot modify read-only entry';
RETURN NULL;
END IF;
-- If the sequence / point has changed, and no new tstamp is provided, get one
IF NEW.sequence <> OLD.sequence OR NEW.point <> OLD.point
AND NEW.sequence IS NOT NULL AND NEW.point IS NOT NULL
AND NEW.tstamp IS NULL OR NEW.tstamp = OLD.tstamp THEN
SELECT COALESCE(
tstamp_from_sequence_shot(NEW.sequence, NEW.point),
tstamp_interpolate(NEW.sequence, NEW.point)
)
INTO NEW.tstamp;
END IF;
2022-02-27 17:19:17 +01:00
UPDATE event_log_full
SET validity = tstzrange(lower(validity), current_timestamp)
WHERE validity @> current_timestamp AND id = OLD.id;
-- Any attempt to modify id will be ignored.
INSERT INTO event_log_full
(id, tstamp, sequence, point, remarks, labels, meta)
VALUES (OLD.id, NEW.tstamp, NEW.sequence, NEW.point, NEW.remarks, NEW.labels, NEW.meta);
RETURN NEW;
ELSIF (TG_OP = 'DELETE') THEN
-- Set end of validity.
-- NOTE: We *do* allow deleting an event that has meta.readonly = true
-- This could be of interest if for instance we wanted to keep the history
-- of QC results for a point, provided that the QC routines write to
-- event_log and not event_log_full
UPDATE event_log_full
SET validity = tstzrange(lower(validity), current_timestamp)
WHERE validity @> current_timestamp AND id = OLD.id;
RETURN NULL;
END IF;
END;
$inner$ LANGUAGE plpgsql;
CREATE OR REPLACE TRIGGER event_log_tg
INSTEAD OF INSERT OR UPDATE OR DELETE ON event_log
FOR EACH ROW EXECUTE FUNCTION event_log_update();
-- NOTE
-- This is where we migrate the actual data
RAISE NOTICE 'Migrating schema %', schema_name;
-- We start by deleting any data that the new tables might
-- have had if they already existed.
DELETE FROM event_log_full;
-- We purposefully bypass event_log here, as the tables we're
-- migrating from only contain a single version of each event.
INSERT INTO event_log_full (tstamp, sequence, point, remarks, labels, meta)
SELECT
tstamp, sequence, point, remarks, labels,
meta || json_build_object('geometry', geometry, 'readonly', virtual)::jsonb
FROM events;
UPDATE event_log_full SET meta = meta - 'geometry' WHERE meta->>'geometry' IS NULL;
UPDATE event_log_full SET meta = meta - 'readonly' WHERE (meta->'readonly')::boolean IS false;
-- This function used the superseded `events` view.
-- We need to drop it because we're changing the return type.
DROP FUNCTION IF EXISTS label_in_sequence (_sequence integer, _label text);
CREATE OR REPLACE FUNCTION label_in_sequence (_sequence integer, _label text)
RETURNS event_log
LANGUAGE sql
AS $inner$
SELECT * FROM event_log WHERE sequence = _sequence AND _label = ANY(labels);
$inner$;
-- This function used the superseded `events` view (and a strange logic).
CREATE OR REPLACE PROCEDURE handle_final_line_events (_seq integer, _label text, _column text)
LANGUAGE plpgsql
AS $inner$
DECLARE
_line final_lines_summary%ROWTYPE;
_column_value integer;
_tg_name text := 'final_line';
_event event_log%ROWTYPE;
event_id integer;
BEGIN
SELECT * INTO _line FROM final_lines_summary WHERE sequence = _seq;
_event := label_in_sequence(_seq, _label);
_column_value := row_to_json(_line)->>_column;
--RAISE NOTICE '% is %', _label, _event;
--RAISE NOTICE 'Line is %', _line;
--RAISE NOTICE '% is % (%)', _column, _column_value, _label;
IF _event IS NULL THEN
--RAISE NOTICE 'We will populate the event log from the sequence data';
INSERT INTO event_log (sequence, point, remarks, labels, meta)
VALUES (
-- The sequence
_seq,
-- The shotpoint
_column_value,
-- Remark. Something like "FSP <linename>"
format('%s %s', _label, (SELECT meta->>'lineName' FROM final_lines WHERE sequence = _seq)),
-- Label
ARRAY[_label],
-- Meta. Something like {"auto" : {"FSP" : "final_line"}}
json_build_object('auto', json_build_object(_label, _tg_name))
);
ELSE
--RAISE NOTICE 'We may populate the sequence meta from the event log';
--RAISE NOTICE 'Unless the event log was populated by us previously';
--RAISE NOTICE 'Populated by us previously? %', _event.meta->'auto'->>_label = _tg_name;
IF _event.meta->'auto'->>_label IS DISTINCT FROM _tg_name THEN
--RAISE NOTICE 'Adding % found in events log to final_line meta', _label;
UPDATE final_lines
SET meta = jsonb_set(meta, ARRAY[_label], to_jsonb(_event.point))
WHERE sequence = _seq;
END IF;
END IF;
END;
$inner$;
2022-02-27 17:19:17 +01:00
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE PROCEDURE pg_temp.upgrade_12 () AS $$
DECLARE
row RECORD;
BEGIN
FOR row IN
SELECT schema_name FROM information_schema.schemata
WHERE schema_name LIKE 'survey_%'
ORDER BY schema_name
LOOP
CALL pg_temp.upgrade_survey_schema(row.schema_name);
END LOOP;
END;
$$ LANGUAGE plpgsql;
CALL pg_temp.upgrade_12();
CALL show_notice('Cleaning up');
DROP PROCEDURE pg_temp.upgrade_survey_schema (schema_name text);
DROP PROCEDURE pg_temp.upgrade_12 ();
CALL show_notice('Updating db_schema version');
-- This is technically still compatible with 0.2.0 as we are only adding
-- some more tables and views but not yet dropping the old ones, which we
-- will do separately so that these scripts do not get too big.
INSERT INTO public.info VALUES ('version', '{"db_schema": "0.2.2"}')
2022-02-27 17:19:17 +01:00
ON CONFLICT (key) DO UPDATE
SET value = public.info.value || '{"db_schema": "0.2.2"}' WHERE public.info.key = 'version';
2022-02-27 17:19:17 +01:00
CALL show_notice('All done. You may now run "COMMIT;" to persist the changes');
DROP PROCEDURE show_notice (notice text);
--
--NOTE Run `COMMIT;` now if all went well
--