Merge branch '184-refactor-qcs' into devel

This commit is contained in:
D. Berge
2022-03-17 18:37:14 +01:00
2 changed files with 107 additions and 14 deletions

View File

@@ -5,7 +5,7 @@ const Cursor = require('pg-cursor');
const { pool, setSurvey, transaction, fetchRow } = require('../db/connection')
const { project, sequence, configuration, info } = require('../db')
const flattenQCDefinitions = require('./flatten');
const { projectLastModified, sequenceLastModified } = require('./last-modified');
const { projectHash, sequenceHash } = require('./last-modified');
const { runShotsQC, saveShotsQC } = require('./shots');
const { runSequenceQCs, saveSequenceQCs } = require('./sequences');
@@ -35,15 +35,14 @@ async function main () {
if (!project.archived) {
const QCTstamp = new Date();
const projectTstamp = await projectLastModified(projectId);
const updatedOn = await info.get(projectId, "qc/updatedOn");
const lastQCTstamp = isNaN(new Date(updatedOn)) ? -Infinity : new Date(updatedOn);
console.log("QCTstamp", QCTstamp);
console.log("projectTstamp", projectTstamp);
console.log("lastQCTstamp", lastQCTstamp);
if (projectTstamp >= lastQCTstamp) {
console.log("projectTstamp >= lastQCTstamp", projectId, projectTstamp, lastQCTstamp, projectTstamp >= lastQCTstamp);
const currentQCHash = await projectHash(projectId);
const lastQCHash = await info.get(projectId, "qc/hash");
console.log("projectHash", projectHash);
console.log("lastQCHash", lastQCHash);
if (currentQCHash != lastQCHash) {
console.log("currentQCHash != lastQCHash", projectId, currentQCHash, lastQCHash);
// Fetch definitions and parameters
const { definitions, parameters } = await getProjectQCConfig(projectId);
@@ -60,14 +59,17 @@ async function main () {
// Run shot QCs
for (const seq of sequences) {
const sequenceNumber = seq.sequence;
const sequenceTstamp = await sequenceLastModified(projectId, sequenceNumber);
const sequenceCurrentHash = await sequenceHash(projectId, sequenceNumber);
const sequenceLastQCHash = seq.meta?.lastQCHash;
console.log("sequenceCurrentHash", sequenceCurrentHash);
console.log("sequenceLastQCHash", sequenceLastQCHash);
if (sequenceTstamp >= lastQCTstamp) {
if (sequenceCurrentHash != sequenceLastQCHash) {
const results = await runShotsQC(projectId, sequenceNumber, shotQCs, parameters);
await saveShotsQC(projectId, {[sequenceNumber]: results});
// console.log("Saved", sequenceNumber);
await sequenceHash(projectId, sequenceNumber, sequenceCurrentHash);
} else {
console.log("NOT MODIFIED: SEQ", sequenceNumber);
@@ -80,7 +82,7 @@ async function main () {
// Run survey-wide QCs TODO maybe
await info.put(projectId, "qc", {updatedOn: QCTstamp}, {}, null);
await info.put(projectId, "qc", {updatedOn: QCTstamp, hash: currentQCHash}, {}, null);
}
}
}

View File

@@ -38,7 +38,98 @@ async function sequenceLastModified (projectId, sequence) {
return res;
}
/** Return or save a hash representing the state of the project.
*
* The hash is an MD5 digest from the concatenation of all
* file paths + file hashes known to Dougal for a given
* project.
*
* The idea is that this should change every time a new
* file is imported or when an existing file is changed
* or deleted.
*
* Going only by file timestamp does not work as we may
* be importing files with timestamps older than the last
* QC run.
*
* @a projectId The ID of the project to operate on.
* @a hash If present, sets info.qc->'hash' to this value,
* if absent, return a hash.
*/
async function projectHash (projectId, hash) {
const client = await setSurvey(projectId);
if (hash) {
const text = `
INSERT INTO info (key, value)
VALUES ('qc', json_build_object('hash', to_jsonb($1::text)))
ON CONFLICT (key)
DO UPDATE
SET value = jsonb_set(info.value, ARRAY['hash'], to_jsonb($1::text));
`;
const values = [hash];
const res = await client.query(text, values);
await client.release();
return res;
} else {
const text = `
SELECT md5(text) hash FROM (
SELECT string_agg(path || E'\t' || hash, E'\n') AS text
FROM files
) AS t;
`;
const res = ((await client.query(text))?.rows ?? [])[0]?.hash;
await client.release();
return res;
}
}
/** Return or save a hash representing the state of a sequence.
*
* Analogous to projectHash() but for a specific sequence.
*/
async function sequenceHash (projectId, sequence, hash) {
const client = await setSurvey(projectId);
if (hash) {
const text = `
UPDATE raw_lines
SET
meta = jsonb_set(meta, array['lastQCHash'], to_jsonb($2::text))
WHERE sequence = $1;
`;
const values = [ sequence, hash ];
const res = await client.query(text, values);
await client.release();
return res;
} else {
const text = `
SELECT sequence, md5(string_agg(hash, E'\t')) AS hash
FROM (
SELECT *
FROM raw_lines_files
UNION SELECT *
FROM final_lines_files
) AS t
GROUP BY sequence
HAVING sequence = $1;
`;
const values = [ sequence ];
const res = ((await client.query(text, values))?.rows ?? [])[0]?.hash;
await client.release();
return res;
}
}
module.exports = {
projectLastModified,
sequenceLastModified
sequenceLastModified,
projectHash,
sequenceHash
};