Merge branch '287-the-project_summary-view-is-too-slow' into 'devel'

Resolve "The `project_summary` view is too slow"

Closes #287

See merge request wgp/dougal/software!55
This commit is contained in:
D. Berge
2023-11-02 14:29:35 +00:00
7 changed files with 262 additions and 4 deletions

View File

@@ -0,0 +1,147 @@
-- Turn project_summary into a materialised view
--
-- New schema version: 0.4.5
--
-- ATTENTION:
--
-- ENSURE YOU HAVE BACKED UP THE DATABASE BEFORE RUNNING THIS SCRIPT.
--
--
-- NOTE: This upgrade affects all schemas in the database.
-- NOTE: Each application starts a transaction, which must be committed
-- or rolled back.
--
-- The project_summary view is quite a bottleneck. While it itself is
-- not the real culprit (rather the underlying views are), this is one
-- relatively cheap way of improving responsiveness from the client's
-- point of view.
-- We leave the details of how / when to refresh the view to the non-
-- database code.
--
-- To apply, run as the dougal user:
--
-- psql <<EOF
-- \i $THIS_FILE
-- COMMIT;
-- EOF
--
-- NOTE: It can be applied multiple times without ill effect.
--
BEGIN;
CREATE OR REPLACE PROCEDURE pg_temp.show_notice (notice text) AS $$
BEGIN
RAISE NOTICE '%', notice;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE PROCEDURE pg_temp.upgrade_survey_schema (schema_name text) AS $outer$
BEGIN
RAISE NOTICE 'Updating schema %', schema_name;
-- We need to set the search path because some of the trigger
-- functions reference other tables in survey schemas assuming
-- they are in the search path.
EXECUTE format('SET search_path TO %I,public', schema_name);
DROP VIEW project_summary;
CREATE MATERIALIZED VIEW project_summary AS
WITH fls AS (
SELECT
avg((final_lines_summary.duration / ((final_lines_summary.num_points - 1))::double precision)) AS shooting_rate,
avg((final_lines_summary.length / date_part('epoch'::text, final_lines_summary.duration))) AS speed,
sum(final_lines_summary.duration) AS prod_duration,
sum(final_lines_summary.length) AS prod_distance
FROM final_lines_summary
), project AS (
SELECT
p.pid,
p.name,
p.schema
FROM public.projects p
WHERE (split_part(current_setting('search_path'::text), ','::text, 1) = p.schema)
)
SELECT
project.pid,
project.name,
project.schema,
( SELECT count(*) AS count
FROM preplot_lines
WHERE (preplot_lines.class = 'V'::bpchar)) AS lines,
ps.total,
ps.virgin,
ps.prime,
ps.other,
ps.ntba,
ps.remaining,
( SELECT to_json(fs.*) AS to_json
FROM final_shots fs
ORDER BY fs.tstamp
LIMIT 1) AS fsp,
( SELECT to_json(fs.*) AS to_json
FROM final_shots fs
ORDER BY fs.tstamp DESC
LIMIT 1) AS lsp,
( SELECT count(*) AS count
FROM raw_lines rl) AS seq_raw,
( SELECT count(*) AS count
FROM final_lines rl) AS seq_final,
fls.prod_duration,
fls.prod_distance,
fls.speed AS shooting_rate
FROM preplot_summary ps,
fls,
project;
END;
$outer$ LANGUAGE plpgsql;
CREATE OR REPLACE PROCEDURE pg_temp.upgrade () AS $outer$
DECLARE
row RECORD;
current_db_version TEXT;
BEGIN
SELECT value->>'db_schema' INTO current_db_version FROM public.info WHERE key = 'version';
IF current_db_version >= '0.4.5' THEN
RAISE EXCEPTION
USING MESSAGE='Patch already applied';
END IF;
IF current_db_version != '0.4.4' THEN
RAISE EXCEPTION
USING MESSAGE='Invalid database version: ' || current_db_version,
HINT='Ensure all previous patches have been applied.';
END IF;
FOR row IN
SELECT schema_name FROM information_schema.schemata
WHERE schema_name LIKE 'survey_%'
ORDER BY schema_name
LOOP
CALL pg_temp.upgrade_survey_schema(row.schema_name);
END LOOP;
END;
$outer$ LANGUAGE plpgsql;
CALL pg_temp.upgrade();
CALL pg_temp.show_notice('Cleaning up');
DROP PROCEDURE pg_temp.upgrade_survey_schema (schema_name text);
DROP PROCEDURE pg_temp.upgrade ();
CALL pg_temp.show_notice('Updating db_schema version');
INSERT INTO public.info VALUES ('version', '{"db_schema": "0.4.5"}')
ON CONFLICT (key) DO UPDATE
SET value = public.info.value || '{"db_schema": "0.4.5"}' WHERE public.info.key = 'version';
CALL pg_temp.show_notice('All done. You may now run "COMMIT;" to persist the changes');
DROP PROCEDURE pg_temp.show_notice (notice text);
--
--NOTE Run `COMMIT;` now if all went well
--

View File

@@ -1,3 +1,4 @@
module.exports = {
get: require('./get'),
refresh: require('./refresh')
};

View File

@@ -0,0 +1,23 @@
const { setSurvey } = require('../../connection');
async function refresh (projectId, opts = {}) {
try {
const client = await setSurvey(projectId);
const text = `
REFRESH MATERIALIZED VIEW project_summary;
`;
const res = await client.query(text);
client.release();
return res.rows[0];
} catch (err) {
if (err.code == "42P01") {
throw { status: 404, message: "Not found" };
} else {
throw err;
}
}
}
module.exports = refresh;

View File

@@ -11,7 +11,7 @@
"license": "UNLICENSED",
"private": true,
"config": {
"db_schema": "^0.4.2",
"db_schema": "^0.4.5",
"api": "^0.4.0"
},
"engines": {

View File

@@ -4,10 +4,11 @@ const { ALERT, ERROR, WARNING, NOTICE, INFO, DEBUG } = require('DOUGAL_ROOT/debu
function init () {
const iids = [];
function start () {
async function start () {
INFO("Initialising %d periodic tasks", tasks.length);
for (let t of tasks) {
const iid = setInterval(t.task, t.timeout);
const fn = t.init ? await t.init() : t.task;
const iid = setInterval(fn, t.timeout);
iids.push(iid);
}
return iids;

View File

@@ -1,4 +1,5 @@
module.exports = [
require('./purge-notifications')
require('./purge-notifications'),
require('./refresh-project-summary')
];

View File

@@ -0,0 +1,85 @@
const db = require('../../lib/db');
const { listen } = require('../../lib/db/notify');
const { ALERT, ERROR, WARNING, NOTICE, INFO, DEBUG } = require('DOUGAL_ROOT/debug')(__filename);
const timeout = 30*1000;
async function init () {
INFO("Setting up task");
// Full list of channels in
// ../lib/db/channels
const channels = [
"project",
"preplot_lines", "preplot_points",
"raw_lines", "raw_shots",
"final_lines", "final_shots"
];
const throttlePeriod = 10*1000;
const projects = {};
listen (channels, (data) => {
// Something important has changed,
// set the dirty flag for the relevant project
const pid = data.payload?.pid ?? data.payload?.new?.pid ?? data.payload?.old?.pid;
if (pid) {
if (!pid in projects) {
projects[pid] = {
lastRefreshed: 0
};
}
if (!projects[pid].needsRefresh) {
projects[pid].needsRefresh = true;
DEBUG("Setting up refresh flag for %s: %j", pid, projects[pid]);
}
}
});
const task = async () => {
for (pid in projects) {
const project = projects[pid];
if (project.needsRefresh) {
const now = Date.now();
const lastRefreshAge = now - project.lastRefreshed;
if (lastRefreshAge > throttlePeriod) {
// Do the actual refresh
try {
DEBUG("Refreshing", pid);
await db.project.summary.refresh(pid);
} catch (err) {
if (err.status == 404) {
DEBUG("Project %s not found. Removing from refresh list", pid);
delete projects[pid];
} else {
ERROR(err);
}
}
project.needsRefresh = false;
project.lastRefreshed = now;
}
}
}
};
// Let us populate the project list and do a first refresh on startup
for (const project of await db.project.get()) {
projects[project.pid] = {
lastRefreshed: 0,
needsRefresh: true
}
}
task(); // No need to await
return task;
}
async function cleanup () {
}
module.exports = {
init,
timeout,
cleanup
};