diff --git a/lib/www/server/lib/qc/index.js b/lib/www/server/lib/qc/index.js index 1657f43..7b444bf 100644 --- a/lib/www/server/lib/qc/index.js +++ b/lib/www/server/lib/qc/index.js @@ -5,7 +5,7 @@ const Cursor = require('pg-cursor'); const { pool, setSurvey, transaction, fetchRow } = require('../db/connection') const { project, sequence, configuration, info } = require('../db') const flattenQCDefinitions = require('./flatten'); -const { projectLastModified, sequenceLastModified } = require('./last-modified'); +const { projectHash, sequenceHash } = require('./last-modified'); const { runShotsQC, saveShotsQC } = require('./shots'); const { runSequenceQCs, saveSequenceQCs } = require('./sequences'); @@ -35,15 +35,14 @@ async function main () { if (!project.archived) { const QCTstamp = new Date(); - const projectTstamp = await projectLastModified(projectId); - const updatedOn = await info.get(projectId, "qc/updatedOn"); - const lastQCTstamp = isNaN(new Date(updatedOn)) ? -Infinity : new Date(updatedOn); - console.log("QCTstamp", QCTstamp); - console.log("projectTstamp", projectTstamp); - console.log("lastQCTstamp", lastQCTstamp); - if (projectTstamp >= lastQCTstamp) { - console.log("projectTstamp >= lastQCTstamp", projectId, projectTstamp, lastQCTstamp, projectTstamp >= lastQCTstamp); + const currentQCHash = await projectHash(projectId); + const lastQCHash = await info.get(projectId, "qc/hash"); + console.log("projectHash", projectHash); + console.log("lastQCHash", lastQCHash); + + if (currentQCHash != lastQCHash) { + console.log("currentQCHash != lastQCHash", projectId, currentQCHash, lastQCHash); // Fetch definitions and parameters const { definitions, parameters } = await getProjectQCConfig(projectId); @@ -60,14 +59,17 @@ async function main () { // Run shot QCs for (const seq of sequences) { const sequenceNumber = seq.sequence; - const sequenceTstamp = await sequenceLastModified(projectId, sequenceNumber); + const sequenceCurrentHash = await sequenceHash(projectId, sequenceNumber); + const sequenceLastQCHash = seq.meta?.lastQCHash; + console.log("sequenceCurrentHash", sequenceCurrentHash); + console.log("sequenceLastQCHash", sequenceLastQCHash); - if (sequenceTstamp >= lastQCTstamp) { + if (sequenceCurrentHash != sequenceLastQCHash) { const results = await runShotsQC(projectId, sequenceNumber, shotQCs, parameters); await saveShotsQC(projectId, {[sequenceNumber]: results}); -// console.log("Saved", sequenceNumber); + await sequenceHash(projectId, sequenceNumber, sequenceCurrentHash); } else { console.log("NOT MODIFIED: SEQ", sequenceNumber); @@ -80,7 +82,7 @@ async function main () { // Run survey-wide QCs TODO maybe - await info.put(projectId, "qc", {updatedOn: QCTstamp}, {}, null); + await info.put(projectId, "qc", {updatedOn: QCTstamp, hash: currentQCHash}, {}, null); } } } diff --git a/lib/www/server/lib/qc/last-modified.js b/lib/www/server/lib/qc/last-modified.js index 05c3de9..b07c6fc 100644 --- a/lib/www/server/lib/qc/last-modified.js +++ b/lib/www/server/lib/qc/last-modified.js @@ -38,7 +38,98 @@ async function sequenceLastModified (projectId, sequence) { return res; } +/** Return or save a hash representing the state of the project. + * + * The hash is an MD5 digest from the concatenation of all + * file paths + file hashes known to Dougal for a given + * project. + * + * The idea is that this should change every time a new + * file is imported or when an existing file is changed + * or deleted. + * + * Going only by file timestamp does not work as we may + * be importing files with timestamps older than the last + * QC run. + * + * @a projectId The ID of the project to operate on. + * @a hash If present, sets info.qc->'hash' to this value, + * if absent, return a hash. + */ +async function projectHash (projectId, hash) { + const client = await setSurvey(projectId); + + if (hash) { + const text = ` + INSERT INTO info (key, value) + VALUES ('qc', json_build_object('hash', to_jsonb($1::text))) + ON CONFLICT (key) + DO UPDATE + SET value = jsonb_set(info.value, ARRAY['hash'], to_jsonb($1::text)); + `; + + const values = [hash]; + const res = await client.query(text, values); + await client.release(); + return res; + } else { + const text = ` + SELECT md5(text) hash FROM ( + SELECT string_agg(path || E'\t' || hash, E'\n') AS text + FROM files + ) AS t; + `; + + const res = ((await client.query(text))?.rows ?? [])[0]?.hash; + await client.release(); + return res; + } +} + +/** Return or save a hash representing the state of a sequence. + * + * Analogous to projectHash() but for a specific sequence. + */ +async function sequenceHash (projectId, sequence, hash) { + const client = await setSurvey(projectId); + + if (hash) { + const text = ` + UPDATE raw_lines + SET + meta = jsonb_set(meta, array['lastQCHash'], to_jsonb($2::text)) + WHERE sequence = $1; + `; + + const values = [ sequence, hash ]; + + const res = await client.query(text, values); + await client.release(); + return res; + } else { + const text = ` + SELECT sequence, md5(string_agg(hash, E'\t')) AS hash + FROM ( + SELECT * + FROM raw_lines_files + UNION SELECT * + FROM final_lines_files + ) AS t + GROUP BY sequence + HAVING sequence = $1; + `; + + const values = [ sequence ]; + + const res = ((await client.query(text, values))?.rows ?? [])[0]?.hash; + await client.release(); + return res; + } +} + module.exports = { projectLastModified, - sequenceLastModified + sequenceLastModified, + projectHash, + sequenceHash };