Merge branch '182-periodically-scan-the-events-table-for-missing-information' into 'devel'

Resolve "Periodically scan the events table for missing information"

Closes #182

See merge request wgp/dougal/software!26
This commit is contained in:
D. Berge
2022-05-01 18:24:35 +00:00
7 changed files with 319 additions and 10 deletions

View File

@@ -679,3 +679,19 @@ class Datastore:
# We do not commit if we've been passed a cursor, instead
# we assume that we are in the middle of a transaction
def housekeep_event_log(self, cursor = None):
"""
Call housekeeping actions on the event log
"""
if cursor is None:
cur = self.conn.cursor()
else:
cur = cursor
qry = "CALL augment_event_data();"
cur.execute(qry)
if cursor is None:
self.maybe_commit()
# We do not commit if we've been passed a cursor, instead
# we assume that we are in the middle of a transaction

25
bin/housekeep_database.py Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/python3
"""
Do housekeeping actions on the database.
"""
import configuration
from datastore import Datastore
if __name__ == '__main__':
print("Reading configuration")
surveys = configuration.surveys()
print("Connecting to database")
db = Datastore()
print("Reading surveys")
for survey in surveys:
print(f'Survey: {survey["id"]} ({survey["schema"]})')
db.set_survey(survey["schema"])
db.housekeep_event_log()
print("Done")

View File

@@ -119,6 +119,9 @@ print_log "Process ASAQC queue"
# export NODE_TLS_REJECT_UNAUTHORIZED=0
run $DOUGAL_ROOT/lib/www/server/queues/asaqc/index.js
print_log "Run database housekeeping actions"
run $BINDIR/housekeep_database.py
rm "$LOCKFILE"
print_info "End run"

View File

@@ -3,7 +3,7 @@
--
-- Dumped from database version 14.2
-- Dumped by pg_dump version 14.1
-- Dumped by pg_dump version 14.2
SET statement_timeout = 0;
SET lock_timeout = 0;
@@ -20,7 +20,7 @@ SET row_security = off;
-- Name: dougal; Type: DATABASE; Schema: -; Owner: postgres
--
CREATE DATABASE dougal WITH TEMPLATE = template0 ENCODING = 'UTF8' LC_COLLATE = 'C' LC_CTYPE = 'en_GB.UTF-8';
CREATE DATABASE dougal WITH TEMPLATE = template0 ENCODING = 'UTF8' LOCALE = 'en_GB.UTF-8';
ALTER DATABASE dougal OWNER TO postgres;
@@ -82,7 +82,7 @@ CREATE EXTENSION IF NOT EXISTS btree_gist WITH SCHEMA public;
--
-- Name: EXTENSION btree_gist; Type: COMMENT; Schema: -; Owner:
-- Name: EXTENSION btree_gist; Type: COMMENT; Schema: -; Owner:
--
COMMENT ON EXTENSION btree_gist IS 'support for indexing common datatypes in GiST';
@@ -96,7 +96,7 @@ CREATE EXTENSION IF NOT EXISTS postgis WITH SCHEMA public;
--
-- Name: EXTENSION postgis; Type: COMMENT; Schema: -; Owner:
-- Name: EXTENSION postgis; Type: COMMENT; Schema: -; Owner:
--
COMMENT ON EXTENSION postgis IS 'PostGIS geometry, geography, and raster spatial types and functions';
@@ -110,7 +110,7 @@ CREATE EXTENSION IF NOT EXISTS postgis_sfcgal WITH SCHEMA public;
--
-- Name: EXTENSION postgis_sfcgal; Type: COMMENT; Schema: -; Owner:
-- Name: EXTENSION postgis_sfcgal; Type: COMMENT; Schema: -; Owner:
--
COMMENT ON EXTENSION postgis_sfcgal IS 'PostGIS SFCGAL functions';
@@ -124,7 +124,7 @@ CREATE EXTENSION IF NOT EXISTS postgis_topology WITH SCHEMA topology;
--
-- Name: EXTENSION postgis_topology; Type: COMMENT; Schema: -; Owner:
-- Name: EXTENSION postgis_topology; Type: COMMENT; Schema: -; Owner:
--
COMMENT ON EXTENSION postgis_topology IS 'PostGIS topology spatial types and functions';
@@ -144,6 +144,34 @@ CREATE TYPE public.queue_item_status AS ENUM (
ALTER TYPE public.queue_item_status OWNER TO postgres;
--
-- Name: geometry_from_tstamp(timestamp with time zone, numeric); Type: FUNCTION; Schema: public; Owner: postgres
--
CREATE FUNCTION public.geometry_from_tstamp(ts timestamp with time zone, tolerance numeric, OUT geometry public.geometry, OUT delta numeric) RETURNS record
LANGUAGE sql
AS $$
SELECT
geometry,
extract('epoch' FROM (meta->>'tstamp')::timestamptz - ts ) AS delta
FROM real_time_inputs
WHERE
geometry IS NOT NULL AND
abs(extract('epoch' FROM (meta->>'tstamp')::timestamptz - ts )) < tolerance
ORDER BY abs(extract('epoch' FROM (meta->>'tstamp')::timestamptz - ts ))
LIMIT 1;
$$;
ALTER FUNCTION public.geometry_from_tstamp(ts timestamp with time zone, tolerance numeric, OUT geometry public.geometry, OUT delta numeric) OWNER TO postgres;
--
-- Name: FUNCTION geometry_from_tstamp(ts timestamp with time zone, tolerance numeric, OUT geometry public.geometry, OUT delta numeric); Type: COMMENT; Schema: public; Owner: postgres
--
COMMENT ON FUNCTION public.geometry_from_tstamp(ts timestamp with time zone, tolerance numeric, OUT geometry public.geometry, OUT delta numeric) IS 'Get geometry from timestamp';
--
-- Name: notify(); Type: FUNCTION; Schema: public; Owner: postgres
--
@@ -400,6 +428,13 @@ ALTER TABLE ONLY public.queue_items
ADD CONSTRAINT queue_items_pkey PRIMARY KEY (item_id);
--
-- Name: meta_tstamp_idx; Type: INDEX; Schema: public; Owner: postgres
--
CREATE INDEX meta_tstamp_idx ON public.real_time_inputs USING btree (((meta ->> 'tstamp'::text)) DESC);
--
-- Name: tstamp_idx; Type: INDEX; Schema: public; Owner: postgres
--
@@ -449,6 +484,7 @@ CREATE TRIGGER real_time_inputs_tg AFTER INSERT ON public.real_time_inputs FOR E
ALTER TABLE ONLY public.queue_items
ADD CONSTRAINT queue_items_parent_id_fkey FOREIGN KEY (parent_id) REFERENCES public.queue_items(item_id);
--
-- PostgreSQL database dump complete
--

View File

@@ -1,3 +1,3 @@
INSERT INTO public.info VALUES ('version', '{"db_schema": "0.3.3"}')
INSERT INTO public.info VALUES ('version', '{"db_schema": "0.3.4"}')
ON CONFLICT (key) DO UPDATE
SET value = public.info.value || '{"db_schema": "0.3.3"}' WHERE public.info.key = 'version';
SET value = public.info.value || '{"db_schema": "0.3.4"}' WHERE public.info.key = 'version';

View File

@@ -2,8 +2,8 @@
-- PostgreSQL database dump
--
-- Dumped from database version 14.1
-- Dumped by pg_dump version 14.1
-- Dumped from database version 14.2
-- Dumped by pg_dump version 14.2
SET statement_timeout = 0;
SET lock_timeout = 0;
@@ -274,6 +274,72 @@ $$;
ALTER FUNCTION _SURVEY__TEMPLATE_.assoc_tstamp() OWNER TO postgres;
--
-- Name: augment_event_data(); Type: PROCEDURE; Schema: _SURVEY__TEMPLATE_; Owner: postgres
--
CREATE PROCEDURE _SURVEY__TEMPLATE_.augment_event_data()
LANGUAGE sql
AS $$
-- Populate the timestamp of sequence / point events
UPDATE event_log_full
SET tstamp = tstamp_from_sequence_shot(sequence, point)
WHERE
tstamp IS NULL AND sequence IS NOT NULL AND point IS NOT NULL;
-- Populate the geometry of sequence / point events for which
-- there is raw_shots data.
UPDATE event_log_full
SET meta = meta ||
jsonb_build_object(
'geometry',
(
SELECT st_transform(geometry, 4326)::jsonb
FROM raw_shots rs
WHERE rs.sequence = event_log_full.sequence AND rs.point = event_log_full.point
)
)
WHERE
sequence IS NOT NULL AND point IS NOT NULL AND
NOT meta ? 'geometry';
-- Populate the geometry of time-based events
UPDATE event_log_full e
SET
meta = meta || jsonb_build_object('geometry',
(SELECT st_transform(g.geometry, 4326)::jsonb
FROM geometry_from_tstamp(e.tstamp, 3) g))
WHERE
tstamp IS NOT NULL AND
sequence IS NULL AND point IS NULL AND
NOT meta ? 'geometry';
-- Get rid of null geometries
UPDATE event_log_full
SET
meta = meta - 'geometry'
WHERE
jsonb_typeof(meta->'geometry') = 'null';
-- Simplify the GeoJSON when the CRS is EPSG:4326
UPDATE event_log_full
SET
meta = meta #- '{geometry, crs}'
WHERE
meta->'geometry'->'crs'->'properties'->>'name' = 'EPSG:4326';
$$;
ALTER PROCEDURE _SURVEY__TEMPLATE_.augment_event_data() OWNER TO postgres;
--
-- Name: PROCEDURE augment_event_data(); Type: COMMENT; Schema: _SURVEY__TEMPLATE_; Owner: postgres
--
COMMENT ON PROCEDURE _SURVEY__TEMPLATE_.augment_event_data() IS 'Populate missing timestamps and geometries in event_log_full';
--
-- Name: binning_parameters(); Type: FUNCTION; Schema: _SURVEY__TEMPLATE_; Owner: postgres
--

View File

@@ -0,0 +1,163 @@
-- Fix not being able to edit a time-based event.
--
-- New schema version: 0.3.4
--
-- ATTENTION:
--
-- ENSURE YOU HAVE BACKED UP THE DATABASE BEFORE RUNNING THIS SCRIPT.
--
--
-- NOTE: This upgrade affects all schemas in the database.
-- NOTE: Each application starts a transaction, which must be committed
-- or rolled back.
--
-- This creates a new procedure augment_event_data() which tries to
-- populate missing event_log data, namely timestamps and geometries.
--
-- To do this it also adds a function public.geometry_from_tstamp()
-- which, given a timestamp, tries to fetch a geometry from real_time_inputs.
--
-- To apply, run as the dougal user:
--
-- psql <<EOF
-- \i $THIS_FILE
-- COMMIT;
-- EOF
--
-- NOTE: It can be applied multiple times without ill effect.
--
BEGIN;
CREATE OR REPLACE PROCEDURE show_notice (notice text) AS $$
BEGIN
RAISE NOTICE '%', notice;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE PROCEDURE pg_temp.upgrade_survey_schema (schema_name text) AS $$
BEGIN
RAISE NOTICE 'Updating schema %', schema_name;
-- We need to set the search path because some of the trigger
-- functions reference other tables in survey schemas assuming
-- they are in the search path.
EXECUTE format('SET search_path TO %I,public', schema_name);
CREATE OR REPLACE PROCEDURE augment_event_data ()
LANGUAGE sql
AS $inner$
-- Populate the timestamp of sequence / point events
UPDATE event_log_full
SET tstamp = tstamp_from_sequence_shot(sequence, point)
WHERE
tstamp IS NULL AND sequence IS NOT NULL AND point IS NOT NULL;
-- Populate the geometry of sequence / point events for which
-- there is raw_shots data.
UPDATE event_log_full
SET meta = meta ||
jsonb_build_object(
'geometry',
(
SELECT st_transform(geometry, 4326)::jsonb
FROM raw_shots rs
WHERE rs.sequence = event_log_full.sequence AND rs.point = event_log_full.point
)
)
WHERE
sequence IS NOT NULL AND point IS NOT NULL AND
NOT meta ? 'geometry';
-- Populate the geometry of time-based events
UPDATE event_log_full e
SET
meta = meta || jsonb_build_object('geometry',
(SELECT st_transform(g.geometry, 4326)::jsonb
FROM geometry_from_tstamp(e.tstamp, 3) g))
WHERE
tstamp IS NOT NULL AND
sequence IS NULL AND point IS NULL AND
NOT meta ? 'geometry';
-- Get rid of null geometries
UPDATE event_log_full
SET
meta = meta - 'geometry'
WHERE
jsonb_typeof(meta->'geometry') = 'null';
-- Simplify the GeoJSON when the CRS is EPSG:4326
UPDATE event_log_full
SET
meta = meta #- '{geometry, crs}'
WHERE
meta->'geometry'->'crs'->'properties'->>'name' = 'EPSG:4326';
$inner$;
COMMENT ON PROCEDURE augment_event_data()
IS 'Populate missing timestamps and geometries in event_log_full';
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE PROCEDURE pg_temp.upgrade_17 () AS $$
DECLARE
row RECORD;
BEGIN
CALL show_notice('Adding index to real_time_inputs.meta->tstamp');
CREATE INDEX IF NOT EXISTS meta_tstamp_idx
ON public.real_time_inputs
USING btree ((meta->>'tstamp') DESC);
CALL show_notice('Creating function geometry_from_tstamp');
CREATE OR REPLACE FUNCTION public.geometry_from_tstamp(
IN ts timestamptz,
IN tolerance numeric,
OUT "geometry" geometry,
OUT "delta" numeric)
AS $inner$
SELECT
geometry,
extract('epoch' FROM (meta->>'tstamp')::timestamptz - ts ) AS delta
FROM real_time_inputs
WHERE
geometry IS NOT NULL AND
abs(extract('epoch' FROM (meta->>'tstamp')::timestamptz - ts )) < tolerance
ORDER BY abs(extract('epoch' FROM (meta->>'tstamp')::timestamptz - ts ))
LIMIT 1;
$inner$ LANGUAGE SQL;
COMMENT ON FUNCTION public.geometry_from_tstamp(timestamptz, numeric)
IS 'Get geometry from timestamp';
FOR row IN
SELECT schema_name FROM information_schema.schemata
WHERE schema_name LIKE 'survey_%'
ORDER BY schema_name
LOOP
CALL pg_temp.upgrade_survey_schema(row.schema_name);
END LOOP;
END;
$$ LANGUAGE plpgsql;
CALL pg_temp.upgrade_17();
CALL show_notice('Cleaning up');
DROP PROCEDURE pg_temp.upgrade_survey_schema (schema_name text);
DROP PROCEDURE pg_temp.upgrade_17 ();
CALL show_notice('Updating db_schema version');
INSERT INTO public.info VALUES ('version', '{"db_schema": "0.3.4"}')
ON CONFLICT (key) DO UPDATE
SET value = public.info.value || '{"db_schema": "0.3.4"}' WHERE public.info.key = 'version';
CALL show_notice('All done. You may now run "COMMIT;" to persist the changes');
DROP PROCEDURE show_notice (notice text);
--
--NOTE Run `COMMIT;` now if all went well
--