mirror of
https://gitlab.com/wgp/dougal/software.git
synced 2025-12-06 13:07:08 +00:00
Use digests rather than timestamps for QC execution.
Using timestamps does not work as we might be importing files with timestamps older than the last QC run. Those would not be detected by a timestamp based method but would be by this digest based approach. There is a project-wide digest and per sequence digests. The former takes the path and hashes of all files known to Dougal for this project (the `files` table), concatenantes them and computes the MD5 checksum. Sequence digests do the same but only including the files related to that sequence.
This commit is contained in:
@@ -5,7 +5,7 @@ const Cursor = require('pg-cursor');
|
||||
const { pool, setSurvey, transaction, fetchRow } = require('../db/connection')
|
||||
const { project, sequence, configuration, info } = require('../db')
|
||||
const flattenQCDefinitions = require('./flatten');
|
||||
const { projectLastModified, sequenceLastModified } = require('./last-modified');
|
||||
const { projectHash, sequenceHash } = require('./last-modified');
|
||||
|
||||
const { runShotsQC, saveShotsQC } = require('./shots');
|
||||
const { runSequenceQCs, saveSequenceQCs } = require('./sequences');
|
||||
@@ -35,15 +35,14 @@ async function main () {
|
||||
|
||||
if (!project.archived) {
|
||||
const QCTstamp = new Date();
|
||||
const projectTstamp = await projectLastModified(projectId);
|
||||
const updatedOn = await info.get(projectId, "qc/updatedOn");
|
||||
const lastQCTstamp = isNaN(new Date(updatedOn)) ? -Infinity : new Date(updatedOn);
|
||||
console.log("QCTstamp", QCTstamp);
|
||||
console.log("projectTstamp", projectTstamp);
|
||||
console.log("lastQCTstamp", lastQCTstamp);
|
||||
|
||||
if (projectTstamp >= lastQCTstamp) {
|
||||
console.log("projectTstamp >= lastQCTstamp", projectId, projectTstamp, lastQCTstamp, projectTstamp >= lastQCTstamp);
|
||||
const currentQCHash = await projectHash(projectId);
|
||||
const lastQCHash = await info.get(projectId, "qc/hash");
|
||||
console.log("projectHash", projectHash);
|
||||
console.log("lastQCHash", lastQCHash);
|
||||
|
||||
if (currentQCHash != lastQCHash) {
|
||||
console.log("currentQCHash != lastQCHash", projectId, currentQCHash, lastQCHash);
|
||||
|
||||
// Fetch definitions and parameters
|
||||
const { definitions, parameters } = await getProjectQCConfig(projectId);
|
||||
@@ -60,14 +59,17 @@ async function main () {
|
||||
// Run shot QCs
|
||||
for (const seq of sequences) {
|
||||
const sequenceNumber = seq.sequence;
|
||||
const sequenceTstamp = await sequenceLastModified(projectId, sequenceNumber);
|
||||
const sequenceCurrentHash = await sequenceHash(projectId, sequenceNumber);
|
||||
const sequenceLastQCHash = seq.meta?.lastQCHash;
|
||||
console.log("sequenceCurrentHash", sequenceCurrentHash);
|
||||
console.log("sequenceLastQCHash", sequenceLastQCHash);
|
||||
|
||||
if (sequenceTstamp >= lastQCTstamp) {
|
||||
if (sequenceCurrentHash != sequenceLastQCHash) {
|
||||
|
||||
const results = await runShotsQC(projectId, sequenceNumber, shotQCs, parameters);
|
||||
|
||||
await saveShotsQC(projectId, {[sequenceNumber]: results});
|
||||
// console.log("Saved", sequenceNumber);
|
||||
await sequenceHash(projectId, sequenceNumber, sequenceCurrentHash);
|
||||
|
||||
} else {
|
||||
console.log("NOT MODIFIED: SEQ", sequenceNumber);
|
||||
@@ -80,7 +82,7 @@ async function main () {
|
||||
|
||||
// Run survey-wide QCs TODO maybe
|
||||
|
||||
await info.put(projectId, "qc", {updatedOn: QCTstamp}, {}, null);
|
||||
await info.put(projectId, "qc", {updatedOn: QCTstamp, hash: currentQCHash}, {}, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,7 +38,98 @@ async function sequenceLastModified (projectId, sequence) {
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Return or save a hash representing the state of the project.
|
||||
*
|
||||
* The hash is an MD5 digest from the concatenation of all
|
||||
* file paths + file hashes known to Dougal for a given
|
||||
* project.
|
||||
*
|
||||
* The idea is that this should change every time a new
|
||||
* file is imported or when an existing file is changed
|
||||
* or deleted.
|
||||
*
|
||||
* Going only by file timestamp does not work as we may
|
||||
* be importing files with timestamps older than the last
|
||||
* QC run.
|
||||
*
|
||||
* @a projectId The ID of the project to operate on.
|
||||
* @a hash If present, sets info.qc->'hash' to this value,
|
||||
* if absent, return a hash.
|
||||
*/
|
||||
async function projectHash (projectId, hash) {
|
||||
const client = await setSurvey(projectId);
|
||||
|
||||
if (hash) {
|
||||
const text = `
|
||||
INSERT INTO info (key, value)
|
||||
VALUES ('qc', json_build_object('hash', to_jsonb($1::text)))
|
||||
ON CONFLICT (key)
|
||||
DO UPDATE
|
||||
SET value = jsonb_set(info.value, ARRAY['hash'], to_jsonb($1::text));
|
||||
`;
|
||||
|
||||
const values = [hash];
|
||||
const res = await client.query(text, values);
|
||||
await client.release();
|
||||
return res;
|
||||
} else {
|
||||
const text = `
|
||||
SELECT md5(text) hash FROM (
|
||||
SELECT string_agg(path || E'\t' || hash, E'\n') AS text
|
||||
FROM files
|
||||
) AS t;
|
||||
`;
|
||||
|
||||
const res = ((await client.query(text))?.rows ?? [])[0]?.hash;
|
||||
await client.release();
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
/** Return or save a hash representing the state of a sequence.
|
||||
*
|
||||
* Analogous to projectHash() but for a specific sequence.
|
||||
*/
|
||||
async function sequenceHash (projectId, sequence, hash) {
|
||||
const client = await setSurvey(projectId);
|
||||
|
||||
if (hash) {
|
||||
const text = `
|
||||
UPDATE raw_lines
|
||||
SET
|
||||
meta = jsonb_set(meta, array['lastQCHash'], to_jsonb($2::text))
|
||||
WHERE sequence = $1;
|
||||
`;
|
||||
|
||||
const values = [ sequence, hash ];
|
||||
|
||||
const res = await client.query(text, values);
|
||||
await client.release();
|
||||
return res;
|
||||
} else {
|
||||
const text = `
|
||||
SELECT sequence, md5(string_agg(hash, E'\t')) AS hash
|
||||
FROM (
|
||||
SELECT *
|
||||
FROM raw_lines_files
|
||||
UNION SELECT *
|
||||
FROM final_lines_files
|
||||
) AS t
|
||||
GROUP BY sequence
|
||||
HAVING sequence = $1;
|
||||
`;
|
||||
|
||||
const values = [ sequence ];
|
||||
|
||||
const res = ((await client.query(text, values))?.rows ?? [])[0]?.hash;
|
||||
await client.release();
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
projectLastModified,
|
||||
sequenceLastModified
|
||||
sequenceLastModified,
|
||||
projectHash,
|
||||
sequenceHash
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user