mirror of
https://gitlab.com/wgp/dougal/software.git
synced 2025-12-06 11:37:08 +00:00
Add comparison functions to server/lib
This commit is contained in:
563
lib/www/server/lib/comparisons/geometric-differences.js
Normal file
563
lib/www/server/lib/comparisons/geometric-differences.js
Normal file
@@ -0,0 +1,563 @@
|
||||
const d3a = require('d3-array');
|
||||
const { DougalBinaryBundle } = require('@dougal/binary');
|
||||
const { pool, setSurvey } = require('../db/connection');
|
||||
const db = require('../db');
|
||||
const { bundle } = require('../binary/bundle');
|
||||
const setops = require('../utils/setops');
|
||||
const { ijRMS, combinations, computeSample } = require('./utils');
|
||||
const { computePCA } = require('./pca');
|
||||
const { ERROR, INFO, DEBUG } = require('DOUGAL_ROOT/debug')(__filename);
|
||||
|
||||
|
||||
async function fetchErrors (pid) {
|
||||
|
||||
const client = await setSurvey(pid);
|
||||
|
||||
try {
|
||||
const text = `
|
||||
SELECT
|
||||
fs.line, fs.point,
|
||||
ij_error(fs.line::double precision, fs.point::double precision, fs.geometry)::json AS errorfinal
|
||||
FROM
|
||||
final_shots fs
|
||||
ORDER BY fs.line, fs.point;
|
||||
`;
|
||||
|
||||
const res = await client.query(text);
|
||||
|
||||
return res.rows.map( row =>
|
||||
[row.line, row.point, row.errorfinal.coordinates[0], row.errorfinal.coordinates[1]]
|
||||
);
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async function groups () {
|
||||
const projects = await db.project.get();
|
||||
const groupNames = [
|
||||
...projects
|
||||
.reduce( (acc, cur) => acc.add(...cur.groups), new Set() )
|
||||
].filter( i => !!i );
|
||||
|
||||
return Object.fromEntries(groupNames.map( g => [g, projects.filter( p => p.groups.includes(g) )] ));
|
||||
}
|
||||
|
||||
/*
|
||||
async function compare (baselineProjectID, monitorProjectID) {
|
||||
console.log("Getting baseline", baselineProjectID);
|
||||
const baselineData = await db.sequence.get(baselineProjectID);
|
||||
console.log("Getting monitor", monitorProjectID);
|
||||
const monitorData = await db.sequence.get(monitorProjectID);
|
||||
console.log("Comparing");
|
||||
|
||||
const comparison = comparisonGeometricDifferences(baselineData, monitorData);
|
||||
return comparison;
|
||||
}
|
||||
*/
|
||||
|
||||
function geometric_differences (baseline, monitor) {
|
||||
|
||||
if (!baseline || !baseline.length) {
|
||||
throw new Error("No baseline data");
|
||||
}
|
||||
|
||||
if (!monitor || !monitor.length) {
|
||||
throw new Error("No monitor data");
|
||||
}
|
||||
|
||||
const comparison = []; // An array of { line, point, εi, εj }; line + point may be repeated
|
||||
|
||||
for (const bp of baseline) {
|
||||
const monitor_points = monitor.filter( mp => mp[0] === bp[0] && mp[1] === bp[1] );
|
||||
|
||||
if (!monitor_points.length) {
|
||||
// console.log(`No match for L${bp[0]} P${bp[1]}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const mp of monitor_points) {
|
||||
const εi = mp[2] - bp[2], εj = mp[3] - bp[3];
|
||||
comparison.push([bp[0], bp[1], εi, εj]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return comparison;
|
||||
}
|
||||
|
||||
async function compare (baselineProjectID, monitorProjectID, infoObj) {
|
||||
console.log("Getting baseline", baselineProjectID);
|
||||
const baselineData = await fetchErrors(baselineProjectID);
|
||||
console.log("Getting monitor", monitorProjectID);
|
||||
const monitorData = await fetchErrors(monitorProjectID);
|
||||
console.log("Comparing");
|
||||
|
||||
const comparison = geometric_differences(baselineData, monitorData);
|
||||
|
||||
if (infoObj instanceof Object) {
|
||||
const baselineIJ = baselineData.map(i => i.slice(0,2));
|
||||
const monitorIJ = monitorData.map(i => i.slice(0,2));
|
||||
|
||||
infoObj.compared = comparison.length;
|
||||
infoObj.baselineLength = baselineData.length;
|
||||
infoObj.monitorLength = monitorData.length;
|
||||
infoObj.baselineUniqueLength = setops.unique(baselineIJ).length;
|
||||
infoObj.monitorUniqueLength = setops.unique(monitorIJ).length;
|
||||
infoObj.common = setops.intersection(baselineIJ, monitorIJ).length;
|
||||
}
|
||||
|
||||
return comparison;
|
||||
}
|
||||
|
||||
|
||||
async function save (baselineProjectID, monitorProjectID, bundle, meta) {
|
||||
const info = {};
|
||||
if (!bundle) {
|
||||
const comparison = await compare(baselineProjectID, monitorProjectID, info);
|
||||
if (comparison.length) {
|
||||
bundle = asBundle(comparison);
|
||||
} else {
|
||||
console.warn(`No matching points between ${baselineProjectID} and ${monitorProjectID}`);
|
||||
return;
|
||||
}
|
||||
} else if (!(bundle instanceof DougalBinaryBundle)) {
|
||||
throw new Error("Illegal data: `bundle` must of null or of type DougalBinaryBundle");
|
||||
}
|
||||
|
||||
if (!bundle.byteLength) {
|
||||
console.warn(`Empty comparison results between ${baselineProjectID} and ${monitorProjectID}. Refusing to store`);
|
||||
return;
|
||||
}
|
||||
|
||||
meta = {tstamp: (new Date()), ...info, ...stats(bundle), ...meta};
|
||||
|
||||
console.log("Storing in database");
|
||||
const client = await pool.connect();
|
||||
|
||||
try {
|
||||
const text = `
|
||||
INSERT INTO comparisons.comparisons
|
||||
(type, baseline_pid, monitor_pid, data, meta)
|
||||
VALUES ('geometric_difference', $1, $2, $3, $4)
|
||||
ON CONFLICT (type, baseline_pid, monitor_pid)
|
||||
DO UPDATE SET
|
||||
data = EXCLUDED.data,
|
||||
meta = EXCLUDED.meta;
|
||||
`;
|
||||
|
||||
const values = [ baselineProjectID, monitorProjectID, Buffer.from(bundle), meta ];
|
||||
const res = await client.query(text, values);
|
||||
return res.rowCount;
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
async function saveSample (baselineProjectID, monitorProjectID, opts = {}) {
|
||||
let sample = opts.sample;
|
||||
let populationStats = opts.populationStats;
|
||||
let sampleStats = opts.sampleStats;
|
||||
|
||||
if (!sample?.length) {
|
||||
const sampleSize = opts.sampleSize ?? 2000;
|
||||
const record = await get(baselineProjectID, monitorProjectID);
|
||||
let data;
|
||||
|
||||
if (record) {
|
||||
data = record.data;
|
||||
} else {
|
||||
console.log("Full data not found in database");
|
||||
data = asBundle(await compare(baselineProjectID, monitorProjectID));
|
||||
}
|
||||
|
||||
sample = computeSample(data, opts);
|
||||
|
||||
if (!populationStats) {
|
||||
populationStats = stats(data);
|
||||
}
|
||||
}
|
||||
|
||||
const bundle = asBundle(sample, {type: 0x0c});
|
||||
|
||||
if (!sampleStats) {
|
||||
sampleStats = stats(bundle);
|
||||
}
|
||||
|
||||
meta = {tstamp: (new Date()), populationStats, sampleStats, ...(opts.meta??{})};
|
||||
|
||||
const client = await pool.connect();
|
||||
|
||||
try {
|
||||
const text = `
|
||||
INSERT INTO comparisons.comparisons
|
||||
(type, baseline_pid, monitor_pid, data, meta)
|
||||
VALUES ('geometric_difference_sample', $1, $2, $3, $4)
|
||||
ON CONFLICT (type, baseline_pid, monitor_pid)
|
||||
DO UPDATE SET
|
||||
data = EXCLUDED.data,
|
||||
meta = EXCLUDED.meta;
|
||||
`;
|
||||
|
||||
const values = [ baselineProjectID, monitorProjectID, Buffer.from(bundle), meta ];
|
||||
const res = await client.query(text, values);
|
||||
return res.rowCount;
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
async function get (baselineProjectID, monitorProjectID, type = 'geometric_difference') {
|
||||
|
||||
const client = await pool.connect();
|
||||
|
||||
try {
|
||||
|
||||
const text = `
|
||||
SELECT data, meta
|
||||
FROM comparisons.comparisons
|
||||
WHERE type = $3 AND baseline_pid = $1 AND monitor_pid = $2;
|
||||
`;
|
||||
|
||||
const values = [ baselineProjectID, monitorProjectID, type ];
|
||||
const res = await client.query(text, values);
|
||||
if (!res.rows.length) {
|
||||
console.log("Comparison not found in database");
|
||||
return;
|
||||
}
|
||||
|
||||
const { data, meta } = res.rows[0];
|
||||
return {
|
||||
data: DougalBinaryBundle.clone(data),
|
||||
meta
|
||||
};
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async function getSample (baselineProjectID, monitorProjectID) {
|
||||
return await get(baselineProjectID, monitorProjectID, 'geometric_difference_sample');
|
||||
}
|
||||
|
||||
|
||||
async function remove (baselineProjectID, monitorProjectID) {
|
||||
const client = await pool.connect();
|
||||
|
||||
try {
|
||||
const text = `
|
||||
DELETE
|
||||
FROM comparisons.comparisons
|
||||
WHERE
|
||||
(type = 'geometric_difference' OR type = 'geometric_difference_sample')
|
||||
AND baseline_pid = $1
|
||||
AND monitor_pid = $2;
|
||||
`;
|
||||
|
||||
const values = [ baselineProjectID, monitorProjectID ];
|
||||
|
||||
const res = await client.query(text, values);
|
||||
return res.rowCount;
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
function stats (comparison) {
|
||||
let i, j, δi, δj;
|
||||
|
||||
if (comparison instanceof DougalBinaryBundle) {
|
||||
console.log("Computing stats");
|
||||
const udv = comparison.chunks()[0]?.udv;
|
||||
|
||||
if (!udv) {
|
||||
console.error("Could not determine udv from first chunk");
|
||||
console.log(comparison.chunks());
|
||||
return;
|
||||
}
|
||||
|
||||
let records;
|
||||
|
||||
if (udv == 0xa) {
|
||||
records = comparison.records;
|
||||
|
||||
// Transpose the records
|
||||
[ i, j, δi, δj ] = Array.from({ length: 4 }, (_, i) => records.map(row => row[i]));
|
||||
} else if (udv == 0xc) {
|
||||
records = comparison.records;
|
||||
let _;
|
||||
[ _, _, i, j, δi, δj ] = Array.from({ length: 6 }, (_, i) => records.map(row => row[i]));
|
||||
} else {
|
||||
throw new Error(`Unrecognised DougalBinaryBundle User Defined Value: ${udv}`);
|
||||
}
|
||||
|
||||
return {
|
||||
length: records.length,
|
||||
μ: [ d3a.mean(δi), d3a.mean(δj) ],
|
||||
σ: [ d3a.deviation(δi), d3a.deviation(δj) ],
|
||||
//rms: ijRMS(δi, δj),
|
||||
...computePCA(records)
|
||||
}
|
||||
} else if (Array.isArray(comparison)) {
|
||||
if (Array.isArray(comparison[0])) {
|
||||
return stats(asBundle(comparison, {type: 0xc}));
|
||||
} else {
|
||||
// Assume object
|
||||
return stats(asBundle(comparison));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Compare two projects' errorfinal quantities.
|
||||
*
|
||||
* Assumes that the preplots are the same.
|
||||
* It is not a terribly efficient way of doing it, but considering
|
||||
* that this is, by and large only going to be done once every few
|
||||
* hours for an active prospect, and never for inactive ones, I
|
||||
* think and hope we can live with that.
|
||||
*
|
||||
* `baseline` and `monitor` are the result of calling
|
||||
* db.sequence.get(projectId) on each of the respective
|
||||
* projects.
|
||||
*/
|
||||
/*
|
||||
function comparisonGeometricDifferences (baseline, monitor) {
|
||||
|
||||
if (!baseline || !baseline.length) {
|
||||
throw new Error("No baseline data");
|
||||
}
|
||||
|
||||
if (!monitor || !monitor.length) {
|
||||
throw new Error("No monitor data");
|
||||
}
|
||||
|
||||
const comparison = []; // An array of { line, point, εi, εj, δts }; line + point may be repeated
|
||||
|
||||
for (const bp of baseline) {
|
||||
if (!bp.errorfinal) {
|
||||
console.log(`No final data for baseline point L${bp.line} S${bp.sequence} P${bp.point}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const monitor_points = monitor.filter( mp => mp.line === bp.line && mp.point === bp.point );
|
||||
for (const mp of monitor_points) {
|
||||
if (!mp.errorfinal) {
|
||||
console.log(`No final data for monitor point L${mp.line} S${mp.sequence} P${mp.point}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const line = bp.line;
|
||||
const point = bp.point;
|
||||
const baseSeq = bp.sequence;
|
||||
const monSeq = mp.sequence;
|
||||
const baseTStamp = bp.tstamp;
|
||||
const monTStamp = mp.tstamp;
|
||||
const δi = bp.errorfinal.coordinates[0] - mp.errorfinal.coordinates[0];
|
||||
const δj = bp.errorfinal.coordinates[1] - mp.errorfinal.coordinates[1];
|
||||
|
||||
const obj = {line, point, baseSeq, monSeq, baseTStamp, monTStamp, δi, δj};
|
||||
comparison.push(obj);
|
||||
// console.log(obj);
|
||||
}
|
||||
}
|
||||
|
||||
return comparison.sort(sortFn);
|
||||
}
|
||||
|
||||
function sortComparison (comparison) {
|
||||
return comparison.sort( (a, b) => {
|
||||
if (a.line == b.line) {
|
||||
if (a.point == b.point) {
|
||||
return a.baseTStamp - b.baseTStamp;
|
||||
} else {
|
||||
return a.point - b.point;
|
||||
}
|
||||
} else {
|
||||
return a.line - b.line;
|
||||
}
|
||||
})
|
||||
}
|
||||
*/
|
||||
|
||||
function sortFn (a, b) {
|
||||
if (a.line == b.line) {
|
||||
if (a.point == b.point) {
|
||||
return a.baseTStamp - b.baseTStamp;
|
||||
} else {
|
||||
return a.point - b.point;
|
||||
}
|
||||
} else {
|
||||
return a.line - b.line;
|
||||
}
|
||||
}
|
||||
|
||||
function asBundle (comparison, opts = {type: 0x0a}) {
|
||||
return DougalBinaryBundle.clone(bundle(comparison, opts));
|
||||
}
|
||||
|
||||
function fromBundle (bundle) {
|
||||
if (!(bundle instanceof DougalBinaryBundle)) {
|
||||
bundle = DougalBinaryBundle.clone(bundle);
|
||||
}
|
||||
|
||||
const json = [];
|
||||
for (const record of bundle) {
|
||||
record.shift();
|
||||
json.push(record);
|
||||
}
|
||||
return json;
|
||||
}
|
||||
|
||||
async function saveGroup (group, opts = {}) {
|
||||
if (group == null) {
|
||||
// Save everything
|
||||
const g = await groups();
|
||||
for (const group of Object.values(g)) {
|
||||
await saveGroup(group)
|
||||
}
|
||||
} if (typeof group === "string") {
|
||||
// This is a group name
|
||||
const g = await groups();
|
||||
group = groups[g];
|
||||
}
|
||||
|
||||
if (Array.isArray(group)) {
|
||||
const pids = group.map( i => i.pid ).sort();
|
||||
|
||||
for (const [ baselineProjectID, monitorProjectID ] of combinations(pids, 2)) {
|
||||
try {
|
||||
const isSaved = await save(baselineProjectID, monitorProjectID);
|
||||
if (isSaved) {
|
||||
await saveSample(baselineProjectID, monitorProjectID, opts.sampleOpts);
|
||||
} else {
|
||||
await remove(baselineProjectID, monitorProjectID);
|
||||
}
|
||||
DEBUG("Saved comparison between %s and %s", baselineProjectID, monitorProjectID);
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
ERROR("Error saving comparison between %s and %s", baselineProjectID, monitorProjectID);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
async function getGroup (groupName, opts = {}) {
|
||||
|
||||
const group = (await groups())?.[groupName]?.map( i => i.pid)?.sort();
|
||||
|
||||
if (!group?.length) return;
|
||||
|
||||
const client = await pool.connect();
|
||||
|
||||
try {
|
||||
|
||||
const text = `
|
||||
-- SQL query goes here
|
||||
`;
|
||||
|
||||
const values = combinations(group, 2);
|
||||
const res = await client.query(text, values);
|
||||
if (!res.rows.length) {
|
||||
console.log("Comparison not found in database");
|
||||
return;
|
||||
}
|
||||
|
||||
if (opts.returnData) {
|
||||
return res.rows.map( row => ({
|
||||
data: DougalBinaryBundle.clone(row.data),
|
||||
meta: row.meta
|
||||
});
|
||||
} else {
|
||||
return res.rows.map( row => row.meta );
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
async function getGroup (groupName, opts = {}) {
|
||||
|
||||
const group = (await groups())?.[groupName]?.map( i => i.pid)?.sort();
|
||||
|
||||
if (!group?.length) return;
|
||||
|
||||
const client = await pool.connect();
|
||||
|
||||
try {
|
||||
|
||||
const pairs = combinations(group, 2);
|
||||
const flatValues = pairs.flat();
|
||||
const placeholders = [];
|
||||
for (let i = 0; i < pairs.length; i++) {
|
||||
placeholders.push(`($${i * 2 + 1}, $${i * 2 + 2})`);
|
||||
}
|
||||
const inClause = placeholders.join(',');
|
||||
const selectFields = opts.returnData ? 'data, meta' : 'meta';
|
||||
|
||||
const text = `
|
||||
SELECT baseline_pid, monitor_pid, ${selectFields}
|
||||
FROM comparisons.comparisons
|
||||
WHERE type = 'geometric_difference'
|
||||
AND (baseline_pid, monitor_pid) IN (VALUES ${inClause})
|
||||
ORDER BY baseline_pid, monitor_pid
|
||||
`;
|
||||
|
||||
console.log(text);
|
||||
console.log(flatValues);
|
||||
|
||||
const res = await client.query(text, flatValues);
|
||||
if (!res.rows.length) {
|
||||
console.log("Comparison not found in database");
|
||||
return;
|
||||
}
|
||||
|
||||
if (opts.returnData) {
|
||||
return res.rows.map( row => ({
|
||||
...row,
|
||||
data: DougalBinaryBundle.clone(row.data),
|
||||
}));
|
||||
} else {
|
||||
return res.rows;
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
groups,
|
||||
fetchErrors,
|
||||
compare,
|
||||
computeSample,
|
||||
get,
|
||||
save,
|
||||
getSample,
|
||||
saveSample,
|
||||
saveGroup,
|
||||
getGroup,
|
||||
remove,
|
||||
stats,
|
||||
// comparisonGeometricDifferences,
|
||||
asBundle,
|
||||
fromBundle
|
||||
};
|
||||
4
lib/www/server/lib/comparisons/index.js
Normal file
4
lib/www/server/lib/comparisons/index.js
Normal file
@@ -0,0 +1,4 @@
|
||||
|
||||
module.exports = {
|
||||
...require('./geometric-differences')
|
||||
}
|
||||
80
lib/www/server/lib/comparisons/pca.js
Normal file
80
lib/www/server/lib/comparisons/pca.js
Normal file
@@ -0,0 +1,80 @@
|
||||
const math = require('mathjs');
|
||||
|
||||
/**
|
||||
* Compute PCA (eigenvectors and eigenvalues) for deviation data to assess geometric repeatability.
|
||||
* @param {Array<Array<number>>} deviations - Array of [point, line, i_deviation, j_deviation]
|
||||
* @returns {Object} - { eigenvalues, eigenvectors, rms, anisotropy, primaryDirection }
|
||||
*/
|
||||
function computePCA(deviations) {
|
||||
// Extract i_deviation and j_deviation
|
||||
const deviationMatrix = deviations.map(row => [row[2], row[3]]);
|
||||
|
||||
// Convert to mathjs matrix
|
||||
const D = math.matrix(deviationMatrix);
|
||||
|
||||
// Compute mean for centering (1 x 2 matrix)
|
||||
const mean = math.mean(D, 0);
|
||||
|
||||
// Explicitly repeat mean to match D's shape (n x 2)
|
||||
const n = deviationMatrix.length;
|
||||
const meanRepeated = math.repmat(mean, n, 1);
|
||||
|
||||
// Center the data
|
||||
const centered = math.subtract(D, meanRepeated);
|
||||
|
||||
// Compute covariance matrix: (1/(n-1)) * (D_centered^T * D_centered)
|
||||
const covMatrix = math.multiply(
|
||||
math.multiply(1 / (n - 1), math.transpose(centered)),
|
||||
centered
|
||||
);
|
||||
|
||||
// Perform eigen decomposition
|
||||
const result = math.eigs(covMatrix);
|
||||
let eigenvalues = result.values;
|
||||
const evObjs = result.eigenvectors;
|
||||
|
||||
// Convert eigenvalues to array if it's a matrix
|
||||
eigenvalues = Array.isArray(eigenvalues) ? eigenvalues : eigenvalues.toArray();
|
||||
|
||||
// Create pairs and convert vector to array if necessary
|
||||
const pairs = eigenvalues.map((val, i) => {
|
||||
let vec = evObjs[i].vector;
|
||||
if (vec.toArray) vec = vec.toArray();
|
||||
return { val, vec };
|
||||
});
|
||||
|
||||
// Sort by descending eigenvalues
|
||||
pairs.sort((a, b) => b.val - a.val);
|
||||
|
||||
// Sorted eigenvalues
|
||||
const sortedEigenvalues = pairs.map(p => p.val);
|
||||
|
||||
// Build eigenvector matrix: rows as components, columns as eigenvectors
|
||||
const dimension = pairs[0].vec.length; // e.g., 2
|
||||
const evecRows = [];
|
||||
for (let comp = 0; comp < dimension; comp++) {
|
||||
evecRows.push(pairs.map(p => p.vec[comp]));
|
||||
}
|
||||
const sortedEigenvectors = math.matrix(evecRows);
|
||||
|
||||
// Compute RMS errors along principal axes
|
||||
const rms = sortedEigenvalues.map(val => Math.sqrt(Math.max(val, 0)));
|
||||
|
||||
// Compute anisotropy (ratio of major to minor axis variance)
|
||||
const anisotropy = sortedEigenvalues[0] / (sortedEigenvalues[1] || 1); // Avoid division by zero
|
||||
|
||||
// Primary direction (angle in degrees of major eigenvector)
|
||||
const primaryVector = sortedEigenvectors.subset(math.index([0, 1], 0)).toArray();
|
||||
const primaryDirection = Math.atan2(primaryVector[1], primaryVector[0]) * 180 / Math.PI;
|
||||
|
||||
return {
|
||||
eigenvalues: sortedEigenvalues,
|
||||
eigenvectors: sortedEigenvectors.toArray(),
|
||||
rms: rms, // RMS errors along major/minor axes
|
||||
anisotropy: anisotropy, // Ratio of variances
|
||||
primaryDirection: primaryDirection // Angle of major axis (degrees)
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
module.exports = { computePCA };
|
||||
310
lib/www/server/lib/comparisons/utils.js
Normal file
310
lib/www/server/lib/comparisons/utils.js
Normal file
@@ -0,0 +1,310 @@
|
||||
const d3 = require('d3-array');
|
||||
|
||||
// Function to calculate the root mean square (RMS) of position deviations
|
||||
// This computes the RMS of the Euclidean distances: sqrt( (1/n) * sum(δi² + δj²) )
|
||||
// Assumes deviations are already centered (mean deviation ~0); if normalization by std dev or range is needed, adjust accordingly
|
||||
function ijRMS(δi, δj) {
|
||||
if (!δi.length || !δj.length) return 0;
|
||||
|
||||
if (δi.length != δj.length) {
|
||||
console.warn(`δi and δj have different lengths!`);
|
||||
}
|
||||
|
||||
let sumSquares = 0;
|
||||
const n = Math.min(δi.length, δj.length);
|
||||
|
||||
for (let i=0; i < n; i++) {
|
||||
sumSquares += (δi[i] * δi[i]) + (δj[i] * δj[i]);
|
||||
}
|
||||
|
||||
const meanSquare = sumSquares / n;
|
||||
const rms = Math.sqrt(meanSquare);
|
||||
|
||||
return rms;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Performs stratified sampling on an array of [line, point, δi, δj] data points.
|
||||
* Groups by line and samples proportionally to preserve shape and spread.
|
||||
*
|
||||
* @param {Array<Array<number>>} data - Input data: [[line, point, δi, δj], ...]
|
||||
* @param {number} sampleSize - Target number of samples (e.g., 2000)
|
||||
* @returns {Array<Array<number>>} Sampled data in same format
|
||||
*/
|
||||
function old_stratifiedSample(data, sampleSize) {
|
||||
if (!Array.isArray(data) || data.length === 0) return [];
|
||||
if (!Number.isInteger(sampleSize) || sampleSize <= 0) {
|
||||
throw new Error('sampleSize must be a positive integer');
|
||||
}
|
||||
|
||||
// Group data by line (first element)
|
||||
const grouped = d3.group(data, d => d[0]);
|
||||
const totalSize = data.length;
|
||||
const sampled = [];
|
||||
|
||||
// Ensure sampleSize doesn't exceed data size
|
||||
const effectiveSampleSize = Math.min(sampleSize, totalSize);
|
||||
|
||||
// Iterate over each line group
|
||||
for (const [line, group] of grouped) {
|
||||
// Calculate proportional sample size for this group
|
||||
const groupSize = group.length;
|
||||
const groupSampleSize = Math.max(1, Math.round((groupSize / totalSize) * effectiveSampleSize));
|
||||
|
||||
// Shuffle group and take first N elements
|
||||
const shuffled = d3.shuffle([...group]);
|
||||
sampled.push(...shuffled.slice(0, groupSampleSize));
|
||||
}
|
||||
|
||||
// If sampled size is slightly off due to rounding, adjust
|
||||
if (sampled.length > effectiveSampleSize) {
|
||||
return d3.shuffle(sampled).slice(0, effectiveSampleSize);
|
||||
} else if (sampled.length < effectiveSampleSize) {
|
||||
// Pad with random samples from entire dataset if needed
|
||||
const remaining = effectiveSampleSize - sampled.length;
|
||||
const additional = d3.shuffle(data.filter(d => !sampled.includes(d))).slice(0, remaining);
|
||||
sampled.push(...additional);
|
||||
}
|
||||
|
||||
return sampled;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs stratified sampling on an array of [line, point, δi, δj] data points.
|
||||
* Stratifies by line and δi quantiles to preserve shape and spread, with outlier control.
|
||||
*
|
||||
* @param {Array<Array<number>>} data - Input data: [[line, point, δi, δj], ...]
|
||||
* @param {number} sampleSize - Target number of samples (e.g., 2000)
|
||||
* @param {number} [binsPerLine=10] - Number of δi quantile bins per line
|
||||
* @returns {Array<Array<number>>} Sampled data in same format
|
||||
*/
|
||||
function stratifiedSample(data, sampleSize, binsPerLine = 10) {
|
||||
if (!Array.isArray(data) || data.length === 0) return [];
|
||||
if (!Number.isInteger(sampleSize) || sampleSize <= 0) {
|
||||
throw new Error('sampleSize must be a positive integer');
|
||||
}
|
||||
if (!Number.isInteger(binsPerLine) || binsPerLine <= 0) {
|
||||
throw new Error('binsPerLine must be a positive integer');
|
||||
}
|
||||
|
||||
const totalSize = data.length;
|
||||
const effectiveSampleSize = Math.min(sampleSize, totalSize);
|
||||
const sampled = [];
|
||||
|
||||
// Group by line
|
||||
const groupedByLine = d3.group(data, d => d[0]);
|
||||
|
||||
// Compute population stats for validation
|
||||
const populationStats = computeStats(data);
|
||||
|
||||
// Iterate over each line
|
||||
for (const [line, group] of groupedByLine) {
|
||||
const groupSize = group.length;
|
||||
const lineSampleSize = Math.max(1, Math.round((groupSize / totalSize) * effectiveSampleSize));
|
||||
|
||||
// Create quantile-based bins for δi
|
||||
const δiValues = group.map(d => d[2]).sort(d3.ascending);
|
||||
const quantiles = d3.range(0, binsPerLine + 1).map(i => d3.quantile(δiValues, i / binsPerLine));
|
||||
const binnedData = group.map(d => {
|
||||
const δi = d[2];
|
||||
let binIndex = 0;
|
||||
for (let i = 0; i < binsPerLine; i++) {
|
||||
if (δi >= quantiles[i] && δi < quantiles[i + 1]) {
|
||||
binIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return { data: d, bin: binIndex };
|
||||
});
|
||||
const groupedByBin = d3.group(binnedData, d => d.bin);
|
||||
|
||||
// Allocate samples across bins, inversely weighted by density to control outliers
|
||||
const binSampleSizes = new Map();
|
||||
let remainingLineSamples = lineSampleSize;
|
||||
const binCounts = Array(binsPerLine).fill(0);
|
||||
for (const [bin, binGroup] of groupedByBin) {
|
||||
binCounts[bin] = binGroup.length;
|
||||
}
|
||||
const maxBinCount = d3.max(binCounts);
|
||||
for (const [bin, binGroup] of groupedByBin) {
|
||||
const binSize = binGroup.length;
|
||||
// Inverse weighting: smaller bins (outliers) get fewer samples
|
||||
const weight = binSize > 0 ? Math.max(0.1, 1 - (binSize / maxBinCount) * 0.5) : 1;
|
||||
const binSampleSize = Math.max(1, Math.round(lineSampleSize * (binSize / groupSize) * weight));
|
||||
binSampleSizes.set(bin, Math.min(binSampleSize, binSize));
|
||||
remainingLineSamples -= binSampleSizes.get(bin);
|
||||
}
|
||||
|
||||
// Distribute remaining samples
|
||||
if (remainingLineSamples > 0) {
|
||||
const nonEmptyBins = Array.from(groupedByBin.keys());
|
||||
for (let i = 0; i < remainingLineSamples && nonEmptyBins.length > 0; i++) {
|
||||
const bin = nonEmptyBins[i % nonEmptyBins.length];
|
||||
binSampleSizes.set(bin, binSampleSizes.get(bin) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Sample from each bin
|
||||
for (const [bin, binGroup] of groupedByBin) {
|
||||
const samples = d3.shuffle([...binGroup]).slice(0, binSampleSizes.get(bin)).map(s => s.data);
|
||||
sampled.push(...samples);
|
||||
}
|
||||
}
|
||||
|
||||
// Adjust sample size
|
||||
let finalSample = sampled;
|
||||
if (sampled.length > effectiveSampleSize) {
|
||||
finalSample = d3.shuffle(sampled).slice(0, effectiveSampleSize);
|
||||
} else if (sampled.length < effectiveSampleSize) {
|
||||
const remaining = effectiveSampleSize - sampled.length;
|
||||
const additional = d3.shuffle(data.filter(d => !sampled.includes(d))).slice(0, remaining);
|
||||
finalSample = [...sampled, ...additional];
|
||||
}
|
||||
|
||||
// Validate and adjust if stats are off
|
||||
const sampleStats = computeStats(finalSample);
|
||||
const statTolerance = { μ: 0.1, σ: 0.2 }; // Allowable relative deviation
|
||||
const needsAdjustment =
|
||||
Math.abs(sampleStats.μ[0] - populationStats.μ[0]) / populationStats.μ[0] > statTolerance.μ ||
|
||||
Math.abs(sampleStats.μ[1] - populationStats.μ[1]) / populationStats.μ[1] > statTolerance.μ ||
|
||||
Math.abs(sampleStats.σ[0] - populationStats.σ[0]) / populationStats.σ[0] > statTolerance.σ ||
|
||||
Math.abs(sampleStats.σ[1] - populationStats.σ[1]) / populationStats.σ[1] > statTolerance.σ;
|
||||
|
||||
if (needsAdjustment) {
|
||||
// Add points from underrepresented regions
|
||||
const δiSample = finalSample.map(d => d[2]);
|
||||
const δiPopulation = data.map(d => d[2]);
|
||||
const quantiles = d3.range(0, binsPerLine + 1).map(i => d3.quantile(δiPopulation, i / binsPerLine));
|
||||
const sampleBins = d3.histogram().domain(d3.extent(δiPopulation)).thresholds(quantiles)(δiSample);
|
||||
const populationBins = d3.histogram().domain(d3.extent(δiPopulation)).thresholds(quantiles)(δiPopulation);
|
||||
const underSampledBins = sampleBins
|
||||
.map((b, i) => ({ bin: i, diff: populationBins[i].length / totalSize - b.length / finalSample.length }))
|
||||
.filter(b => b.diff > 0.1); // Significant under-sampling
|
||||
|
||||
if (underSampledBins.length > 0) {
|
||||
const additionalSamples = [];
|
||||
for (const { bin } of underSampledBins) {
|
||||
const binData = data.filter(d => d[2] >= quantiles[bin] && d[2] < quantiles[bin + 1] && !finalSample.includes(d));
|
||||
const needed = Math.round((underSampledBins[0].diff * effectiveSampleSize) / 2);
|
||||
additionalSamples.push(...d3.shuffle(binData).slice(0, needed));
|
||||
}
|
||||
finalSample = d3.shuffle([...finalSample, ...additionalSamples]).slice(0, effectiveSampleSize);
|
||||
}
|
||||
}
|
||||
|
||||
return finalSample;
|
||||
}
|
||||
|
||||
function decimate (data, decimationCount = 20) {
|
||||
return data.filter( (row, index) => (index % decimationCount) == 0 );
|
||||
}
|
||||
|
||||
function computeSample (data, opts = {}) {
|
||||
const DEFAULT_SAMPLE_SIZE = 2000;
|
||||
let sample;
|
||||
|
||||
if (opts.decimate === true) {
|
||||
if (opts.sampleSize > 0) {
|
||||
sample = decimate(data.records, Math.floor(data.records.length / opts.sampleSize));
|
||||
} else {
|
||||
sample = decimate(data.records, Math.floor(data.records.length / DEFAULT_SAMPLE_SIZE));
|
||||
}
|
||||
} else if (opts.decimate > 0) {
|
||||
sample = decimate(data.records, opts.decimate);
|
||||
} else if (opts.sampleSize) {
|
||||
sample = stratifiedSample(data.records, opt.sampleSize);
|
||||
} else {
|
||||
sample = stratifiedSample(data.records, DEFAULT_SAMPLE_SIZE);
|
||||
}
|
||||
|
||||
return sample;
|
||||
}
|
||||
|
||||
// Optional: Utility to compute stats for validation
|
||||
function computeStats(data) {
|
||||
const δi = data.map(d => d[2]);
|
||||
const δj = data.map(d => d[3]);
|
||||
const rms = Math.sqrt(d3.mean(data, d => d[2] ** 2 + d[3] ** 2));
|
||||
return {
|
||||
l: data.length,
|
||||
μ: [d3.mean(δi), d3.mean(δj)],
|
||||
σ: [d3.deviation(δi), d3.deviation(δj)],
|
||||
rms
|
||||
};
|
||||
}
|
||||
|
||||
function centre (data) {
|
||||
const stats = computeStats(data);
|
||||
|
||||
return data.map( row => [row[0], row[1], row[2]-stats.μ[0], row[3]-stats.μ[1]] )
|
||||
}
|
||||
|
||||
function outliers (data, sd=1.96) {
|
||||
const stats = computeStats(data);
|
||||
|
||||
function fn ([l, p, i, j]) {
|
||||
return (i - stats.μ[0]) > stats.σ[0]*sd ||
|
||||
(j - stats.μ[1]) > stats.σ[1]*sd;
|
||||
}
|
||||
|
||||
return data.filter(fn)
|
||||
}
|
||||
|
||||
function inliers (data, sd=1.96) {
|
||||
const stats = computeStats(data);
|
||||
|
||||
function fn ([l, p, i, j]) {
|
||||
return (i - stats.μ[0]) <= stats.σ[0]*sd &&
|
||||
(j - stats.μ[1]) <= stats.σ[1]*sd;
|
||||
}
|
||||
|
||||
return data.filter(fn)
|
||||
}
|
||||
|
||||
function difference (a, b) {
|
||||
const obj = Array.isArray(a) ? [] : {};
|
||||
for (const k in a) {
|
||||
const v0 = a[k];
|
||||
const v1 = b[k]
|
||||
if (v0 instanceof Object && v1 instanceof Object) {
|
||||
obj[k] = difference (v0, v1);
|
||||
} else if (!isNaN(Number(v0)) && !isNaN(Number(v1))) {
|
||||
obj[k] = v1 - v0;
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
function combinations (a, n) {
|
||||
const results = [];
|
||||
|
||||
function combine(current, start) {
|
||||
if (current.length === n) {
|
||||
results.push([...current]);
|
||||
return;
|
||||
}
|
||||
for (let i = start; i < a.length; i++) {
|
||||
current.push(a[i]);
|
||||
combine(current, i + 1);
|
||||
current.pop();
|
||||
}
|
||||
}
|
||||
|
||||
combine([], 0);
|
||||
return results;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
combinations,
|
||||
centre,
|
||||
ijRMS,
|
||||
computeStats,
|
||||
computeSample,
|
||||
stratifiedSample,
|
||||
old_stratifiedSample,
|
||||
decimate,
|
||||
difference,
|
||||
outliers,
|
||||
inliers
|
||||
}
|
||||
@@ -44,6 +44,7 @@
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"leaflet-headless": "git+https://git@gitlab.com/aaltronav/contrib/leaflet-headless.git#devel",
|
||||
"marked": "^4.0.12",
|
||||
"mathjs": "^14.6.0",
|
||||
"node-fetch": "^2.6.1",
|
||||
"nunjucks": "^3.2.3",
|
||||
"path-to-regexp": "^6.2.1",
|
||||
|
||||
Reference in New Issue
Block a user