Add comparison functions to server/lib

This commit is contained in:
D. Berge
2025-08-18 13:53:43 +02:00
parent 7b67b4afc9
commit fd94b3b6f4
6 changed files with 1050 additions and 32 deletions

View File

@@ -0,0 +1,563 @@
const d3a = require('d3-array');
const { DougalBinaryBundle } = require('@dougal/binary');
const { pool, setSurvey } = require('../db/connection');
const db = require('../db');
const { bundle } = require('../binary/bundle');
const setops = require('../utils/setops');
const { ijRMS, combinations, computeSample } = require('./utils');
const { computePCA } = require('./pca');
const { ERROR, INFO, DEBUG } = require('DOUGAL_ROOT/debug')(__filename);
async function fetchErrors (pid) {
const client = await setSurvey(pid);
try {
const text = `
SELECT
fs.line, fs.point,
ij_error(fs.line::double precision, fs.point::double precision, fs.geometry)::json AS errorfinal
FROM
final_shots fs
ORDER BY fs.line, fs.point;
`;
const res = await client.query(text);
return res.rows.map( row =>
[row.line, row.point, row.errorfinal.coordinates[0], row.errorfinal.coordinates[1]]
);
} catch (err) {
console.error(err);
} finally {
client.release();
}
}
async function groups () {
const projects = await db.project.get();
const groupNames = [
...projects
.reduce( (acc, cur) => acc.add(...cur.groups), new Set() )
].filter( i => !!i );
return Object.fromEntries(groupNames.map( g => [g, projects.filter( p => p.groups.includes(g) )] ));
}
/*
async function compare (baselineProjectID, monitorProjectID) {
console.log("Getting baseline", baselineProjectID);
const baselineData = await db.sequence.get(baselineProjectID);
console.log("Getting monitor", monitorProjectID);
const monitorData = await db.sequence.get(monitorProjectID);
console.log("Comparing");
const comparison = comparisonGeometricDifferences(baselineData, monitorData);
return comparison;
}
*/
function geometric_differences (baseline, monitor) {
if (!baseline || !baseline.length) {
throw new Error("No baseline data");
}
if (!monitor || !monitor.length) {
throw new Error("No monitor data");
}
const comparison = []; // An array of { line, point, εi, εj }; line + point may be repeated
for (const bp of baseline) {
const monitor_points = monitor.filter( mp => mp[0] === bp[0] && mp[1] === bp[1] );
if (!monitor_points.length) {
// console.log(`No match for L${bp[0]} P${bp[1]}`);
continue;
}
for (const mp of monitor_points) {
const εi = mp[2] - bp[2], εj = mp[3] - bp[3];
comparison.push([bp[0], bp[1], εi, εj]);
}
}
return comparison;
}
async function compare (baselineProjectID, monitorProjectID, infoObj) {
console.log("Getting baseline", baselineProjectID);
const baselineData = await fetchErrors(baselineProjectID);
console.log("Getting monitor", monitorProjectID);
const monitorData = await fetchErrors(monitorProjectID);
console.log("Comparing");
const comparison = geometric_differences(baselineData, monitorData);
if (infoObj instanceof Object) {
const baselineIJ = baselineData.map(i => i.slice(0,2));
const monitorIJ = monitorData.map(i => i.slice(0,2));
infoObj.compared = comparison.length;
infoObj.baselineLength = baselineData.length;
infoObj.monitorLength = monitorData.length;
infoObj.baselineUniqueLength = setops.unique(baselineIJ).length;
infoObj.monitorUniqueLength = setops.unique(monitorIJ).length;
infoObj.common = setops.intersection(baselineIJ, monitorIJ).length;
}
return comparison;
}
async function save (baselineProjectID, monitorProjectID, bundle, meta) {
const info = {};
if (!bundle) {
const comparison = await compare(baselineProjectID, monitorProjectID, info);
if (comparison.length) {
bundle = asBundle(comparison);
} else {
console.warn(`No matching points between ${baselineProjectID} and ${monitorProjectID}`);
return;
}
} else if (!(bundle instanceof DougalBinaryBundle)) {
throw new Error("Illegal data: `bundle` must of null or of type DougalBinaryBundle");
}
if (!bundle.byteLength) {
console.warn(`Empty comparison results between ${baselineProjectID} and ${monitorProjectID}. Refusing to store`);
return;
}
meta = {tstamp: (new Date()), ...info, ...stats(bundle), ...meta};
console.log("Storing in database");
const client = await pool.connect();
try {
const text = `
INSERT INTO comparisons.comparisons
(type, baseline_pid, monitor_pid, data, meta)
VALUES ('geometric_difference', $1, $2, $3, $4)
ON CONFLICT (type, baseline_pid, monitor_pid)
DO UPDATE SET
data = EXCLUDED.data,
meta = EXCLUDED.meta;
`;
const values = [ baselineProjectID, monitorProjectID, Buffer.from(bundle), meta ];
const res = await client.query(text, values);
return res.rowCount;
} catch (err) {
console.error(err);
} finally {
client.release();
}
}
async function saveSample (baselineProjectID, monitorProjectID, opts = {}) {
let sample = opts.sample;
let populationStats = opts.populationStats;
let sampleStats = opts.sampleStats;
if (!sample?.length) {
const sampleSize = opts.sampleSize ?? 2000;
const record = await get(baselineProjectID, monitorProjectID);
let data;
if (record) {
data = record.data;
} else {
console.log("Full data not found in database");
data = asBundle(await compare(baselineProjectID, monitorProjectID));
}
sample = computeSample(data, opts);
if (!populationStats) {
populationStats = stats(data);
}
}
const bundle = asBundle(sample, {type: 0x0c});
if (!sampleStats) {
sampleStats = stats(bundle);
}
meta = {tstamp: (new Date()), populationStats, sampleStats, ...(opts.meta??{})};
const client = await pool.connect();
try {
const text = `
INSERT INTO comparisons.comparisons
(type, baseline_pid, monitor_pid, data, meta)
VALUES ('geometric_difference_sample', $1, $2, $3, $4)
ON CONFLICT (type, baseline_pid, monitor_pid)
DO UPDATE SET
data = EXCLUDED.data,
meta = EXCLUDED.meta;
`;
const values = [ baselineProjectID, monitorProjectID, Buffer.from(bundle), meta ];
const res = await client.query(text, values);
return res.rowCount;
} catch (err) {
console.error(err);
} finally {
client.release();
}
}
async function get (baselineProjectID, monitorProjectID, type = 'geometric_difference') {
const client = await pool.connect();
try {
const text = `
SELECT data, meta
FROM comparisons.comparisons
WHERE type = $3 AND baseline_pid = $1 AND monitor_pid = $2;
`;
const values = [ baselineProjectID, monitorProjectID, type ];
const res = await client.query(text, values);
if (!res.rows.length) {
console.log("Comparison not found in database");
return;
}
const { data, meta } = res.rows[0];
return {
data: DougalBinaryBundle.clone(data),
meta
};
} catch (err) {
console.error(err);
} finally {
client.release();
}
}
async function getSample (baselineProjectID, monitorProjectID) {
return await get(baselineProjectID, monitorProjectID, 'geometric_difference_sample');
}
async function remove (baselineProjectID, monitorProjectID) {
const client = await pool.connect();
try {
const text = `
DELETE
FROM comparisons.comparisons
WHERE
(type = 'geometric_difference' OR type = 'geometric_difference_sample')
AND baseline_pid = $1
AND monitor_pid = $2;
`;
const values = [ baselineProjectID, monitorProjectID ];
const res = await client.query(text, values);
return res.rowCount;
} catch (err) {
console.error(err);
} finally {
client.release();
}
}
function stats (comparison) {
let i, j, δi, δj;
if (comparison instanceof DougalBinaryBundle) {
console.log("Computing stats");
const udv = comparison.chunks()[0]?.udv;
if (!udv) {
console.error("Could not determine udv from first chunk");
console.log(comparison.chunks());
return;
}
let records;
if (udv == 0xa) {
records = comparison.records;
// Transpose the records
[ i, j, δi, δj ] = Array.from({ length: 4 }, (_, i) => records.map(row => row[i]));
} else if (udv == 0xc) {
records = comparison.records;
let _;
[ _, _, i, j, δi, δj ] = Array.from({ length: 6 }, (_, i) => records.map(row => row[i]));
} else {
throw new Error(`Unrecognised DougalBinaryBundle User Defined Value: ${udv}`);
}
return {
length: records.length,
μ: [ d3a.mean(δi), d3a.mean(δj) ],
σ: [ d3a.deviation(δi), d3a.deviation(δj) ],
//rms: ijRMS(δi, δj),
...computePCA(records)
}
} else if (Array.isArray(comparison)) {
if (Array.isArray(comparison[0])) {
return stats(asBundle(comparison, {type: 0xc}));
} else {
// Assume object
return stats(asBundle(comparison));
}
}
}
/** Compare two projects' errorfinal quantities.
*
* Assumes that the preplots are the same.
* It is not a terribly efficient way of doing it, but considering
* that this is, by and large only going to be done once every few
* hours for an active prospect, and never for inactive ones, I
* think and hope we can live with that.
*
* `baseline` and `monitor` are the result of calling
* db.sequence.get(projectId) on each of the respective
* projects.
*/
/*
function comparisonGeometricDifferences (baseline, monitor) {
if (!baseline || !baseline.length) {
throw new Error("No baseline data");
}
if (!monitor || !monitor.length) {
throw new Error("No monitor data");
}
const comparison = []; // An array of { line, point, εi, εj, δts }; line + point may be repeated
for (const bp of baseline) {
if (!bp.errorfinal) {
console.log(`No final data for baseline point L${bp.line} S${bp.sequence} P${bp.point}`);
continue;
}
const monitor_points = monitor.filter( mp => mp.line === bp.line && mp.point === bp.point );
for (const mp of monitor_points) {
if (!mp.errorfinal) {
console.log(`No final data for monitor point L${mp.line} S${mp.sequence} P${mp.point}`);
continue;
}
const line = bp.line;
const point = bp.point;
const baseSeq = bp.sequence;
const monSeq = mp.sequence;
const baseTStamp = bp.tstamp;
const monTStamp = mp.tstamp;
const δi = bp.errorfinal.coordinates[0] - mp.errorfinal.coordinates[0];
const δj = bp.errorfinal.coordinates[1] - mp.errorfinal.coordinates[1];
const obj = {line, point, baseSeq, monSeq, baseTStamp, monTStamp, δi, δj};
comparison.push(obj);
// console.log(obj);
}
}
return comparison.sort(sortFn);
}
function sortComparison (comparison) {
return comparison.sort( (a, b) => {
if (a.line == b.line) {
if (a.point == b.point) {
return a.baseTStamp - b.baseTStamp;
} else {
return a.point - b.point;
}
} else {
return a.line - b.line;
}
})
}
*/
function sortFn (a, b) {
if (a.line == b.line) {
if (a.point == b.point) {
return a.baseTStamp - b.baseTStamp;
} else {
return a.point - b.point;
}
} else {
return a.line - b.line;
}
}
function asBundle (comparison, opts = {type: 0x0a}) {
return DougalBinaryBundle.clone(bundle(comparison, opts));
}
function fromBundle (bundle) {
if (!(bundle instanceof DougalBinaryBundle)) {
bundle = DougalBinaryBundle.clone(bundle);
}
const json = [];
for (const record of bundle) {
record.shift();
json.push(record);
}
return json;
}
async function saveGroup (group, opts = {}) {
if (group == null) {
// Save everything
const g = await groups();
for (const group of Object.values(g)) {
await saveGroup(group)
}
} if (typeof group === "string") {
// This is a group name
const g = await groups();
group = groups[g];
}
if (Array.isArray(group)) {
const pids = group.map( i => i.pid ).sort();
for (const [ baselineProjectID, monitorProjectID ] of combinations(pids, 2)) {
try {
const isSaved = await save(baselineProjectID, monitorProjectID);
if (isSaved) {
await saveSample(baselineProjectID, monitorProjectID, opts.sampleOpts);
} else {
await remove(baselineProjectID, monitorProjectID);
}
DEBUG("Saved comparison between %s and %s", baselineProjectID, monitorProjectID);
} catch (err) {
console.error(err);
ERROR("Error saving comparison between %s and %s", baselineProjectID, monitorProjectID);
}
}
}
}
/*
async function getGroup (groupName, opts = {}) {
const group = (await groups())?.[groupName]?.map( i => i.pid)?.sort();
if (!group?.length) return;
const client = await pool.connect();
try {
const text = `
-- SQL query goes here
`;
const values = combinations(group, 2);
const res = await client.query(text, values);
if (!res.rows.length) {
console.log("Comparison not found in database");
return;
}
if (opts.returnData) {
return res.rows.map( row => ({
data: DougalBinaryBundle.clone(row.data),
meta: row.meta
});
} else {
return res.rows.map( row => row.meta );
}
} catch (err) {
console.error(err);
} finally {
client.release();
}
}
*/
async function getGroup (groupName, opts = {}) {
const group = (await groups())?.[groupName]?.map( i => i.pid)?.sort();
if (!group?.length) return;
const client = await pool.connect();
try {
const pairs = combinations(group, 2);
const flatValues = pairs.flat();
const placeholders = [];
for (let i = 0; i < pairs.length; i++) {
placeholders.push(`($${i * 2 + 1}, $${i * 2 + 2})`);
}
const inClause = placeholders.join(',');
const selectFields = opts.returnData ? 'data, meta' : 'meta';
const text = `
SELECT baseline_pid, monitor_pid, ${selectFields}
FROM comparisons.comparisons
WHERE type = 'geometric_difference'
AND (baseline_pid, monitor_pid) IN (VALUES ${inClause})
ORDER BY baseline_pid, monitor_pid
`;
console.log(text);
console.log(flatValues);
const res = await client.query(text, flatValues);
if (!res.rows.length) {
console.log("Comparison not found in database");
return;
}
if (opts.returnData) {
return res.rows.map( row => ({
...row,
data: DougalBinaryBundle.clone(row.data),
}));
} else {
return res.rows;
}
} catch (err) {
console.error(err);
} finally {
client.release();
}
}
module.exports = {
groups,
fetchErrors,
compare,
computeSample,
get,
save,
getSample,
saveSample,
saveGroup,
getGroup,
remove,
stats,
// comparisonGeometricDifferences,
asBundle,
fromBundle
};

View File

@@ -0,0 +1,4 @@
module.exports = {
...require('./geometric-differences')
}

View File

@@ -0,0 +1,80 @@
const math = require('mathjs');
/**
* Compute PCA (eigenvectors and eigenvalues) for deviation data to assess geometric repeatability.
* @param {Array<Array<number>>} deviations - Array of [point, line, i_deviation, j_deviation]
* @returns {Object} - { eigenvalues, eigenvectors, rms, anisotropy, primaryDirection }
*/
function computePCA(deviations) {
// Extract i_deviation and j_deviation
const deviationMatrix = deviations.map(row => [row[2], row[3]]);
// Convert to mathjs matrix
const D = math.matrix(deviationMatrix);
// Compute mean for centering (1 x 2 matrix)
const mean = math.mean(D, 0);
// Explicitly repeat mean to match D's shape (n x 2)
const n = deviationMatrix.length;
const meanRepeated = math.repmat(mean, n, 1);
// Center the data
const centered = math.subtract(D, meanRepeated);
// Compute covariance matrix: (1/(n-1)) * (D_centered^T * D_centered)
const covMatrix = math.multiply(
math.multiply(1 / (n - 1), math.transpose(centered)),
centered
);
// Perform eigen decomposition
const result = math.eigs(covMatrix);
let eigenvalues = result.values;
const evObjs = result.eigenvectors;
// Convert eigenvalues to array if it's a matrix
eigenvalues = Array.isArray(eigenvalues) ? eigenvalues : eigenvalues.toArray();
// Create pairs and convert vector to array if necessary
const pairs = eigenvalues.map((val, i) => {
let vec = evObjs[i].vector;
if (vec.toArray) vec = vec.toArray();
return { val, vec };
});
// Sort by descending eigenvalues
pairs.sort((a, b) => b.val - a.val);
// Sorted eigenvalues
const sortedEigenvalues = pairs.map(p => p.val);
// Build eigenvector matrix: rows as components, columns as eigenvectors
const dimension = pairs[0].vec.length; // e.g., 2
const evecRows = [];
for (let comp = 0; comp < dimension; comp++) {
evecRows.push(pairs.map(p => p.vec[comp]));
}
const sortedEigenvectors = math.matrix(evecRows);
// Compute RMS errors along principal axes
const rms = sortedEigenvalues.map(val => Math.sqrt(Math.max(val, 0)));
// Compute anisotropy (ratio of major to minor axis variance)
const anisotropy = sortedEigenvalues[0] / (sortedEigenvalues[1] || 1); // Avoid division by zero
// Primary direction (angle in degrees of major eigenvector)
const primaryVector = sortedEigenvectors.subset(math.index([0, 1], 0)).toArray();
const primaryDirection = Math.atan2(primaryVector[1], primaryVector[0]) * 180 / Math.PI;
return {
eigenvalues: sortedEigenvalues,
eigenvectors: sortedEigenvectors.toArray(),
rms: rms, // RMS errors along major/minor axes
anisotropy: anisotropy, // Ratio of variances
primaryDirection: primaryDirection // Angle of major axis (degrees)
};
}
module.exports = { computePCA };

View File

@@ -0,0 +1,310 @@
const d3 = require('d3-array');
// Function to calculate the root mean square (RMS) of position deviations
// This computes the RMS of the Euclidean distances: sqrt( (1/n) * sum(δi² + δj²) )
// Assumes deviations are already centered (mean deviation ~0); if normalization by std dev or range is needed, adjust accordingly
function ijRMS(δi, δj) {
if (!δi.length || !δj.length) return 0;
if (δi.length != δj.length) {
console.warn(`δi and δj have different lengths!`);
}
let sumSquares = 0;
const n = Math.min(δi.length, δj.length);
for (let i=0; i < n; i++) {
sumSquares += (δi[i] * δi[i]) + (δj[i] * δj[i]);
}
const meanSquare = sumSquares / n;
const rms = Math.sqrt(meanSquare);
return rms;
}
/**
* Performs stratified sampling on an array of [line, point, δi, δj] data points.
* Groups by line and samples proportionally to preserve shape and spread.
*
* @param {Array<Array<number>>} data - Input data: [[line, point, δi, δj], ...]
* @param {number} sampleSize - Target number of samples (e.g., 2000)
* @returns {Array<Array<number>>} Sampled data in same format
*/
function old_stratifiedSample(data, sampleSize) {
if (!Array.isArray(data) || data.length === 0) return [];
if (!Number.isInteger(sampleSize) || sampleSize <= 0) {
throw new Error('sampleSize must be a positive integer');
}
// Group data by line (first element)
const grouped = d3.group(data, d => d[0]);
const totalSize = data.length;
const sampled = [];
// Ensure sampleSize doesn't exceed data size
const effectiveSampleSize = Math.min(sampleSize, totalSize);
// Iterate over each line group
for (const [line, group] of grouped) {
// Calculate proportional sample size for this group
const groupSize = group.length;
const groupSampleSize = Math.max(1, Math.round((groupSize / totalSize) * effectiveSampleSize));
// Shuffle group and take first N elements
const shuffled = d3.shuffle([...group]);
sampled.push(...shuffled.slice(0, groupSampleSize));
}
// If sampled size is slightly off due to rounding, adjust
if (sampled.length > effectiveSampleSize) {
return d3.shuffle(sampled).slice(0, effectiveSampleSize);
} else if (sampled.length < effectiveSampleSize) {
// Pad with random samples from entire dataset if needed
const remaining = effectiveSampleSize - sampled.length;
const additional = d3.shuffle(data.filter(d => !sampled.includes(d))).slice(0, remaining);
sampled.push(...additional);
}
return sampled;
}
/**
* Performs stratified sampling on an array of [line, point, δi, δj] data points.
* Stratifies by line and δi quantiles to preserve shape and spread, with outlier control.
*
* @param {Array<Array<number>>} data - Input data: [[line, point, δi, δj], ...]
* @param {number} sampleSize - Target number of samples (e.g., 2000)
* @param {number} [binsPerLine=10] - Number of δi quantile bins per line
* @returns {Array<Array<number>>} Sampled data in same format
*/
function stratifiedSample(data, sampleSize, binsPerLine = 10) {
if (!Array.isArray(data) || data.length === 0) return [];
if (!Number.isInteger(sampleSize) || sampleSize <= 0) {
throw new Error('sampleSize must be a positive integer');
}
if (!Number.isInteger(binsPerLine) || binsPerLine <= 0) {
throw new Error('binsPerLine must be a positive integer');
}
const totalSize = data.length;
const effectiveSampleSize = Math.min(sampleSize, totalSize);
const sampled = [];
// Group by line
const groupedByLine = d3.group(data, d => d[0]);
// Compute population stats for validation
const populationStats = computeStats(data);
// Iterate over each line
for (const [line, group] of groupedByLine) {
const groupSize = group.length;
const lineSampleSize = Math.max(1, Math.round((groupSize / totalSize) * effectiveSampleSize));
// Create quantile-based bins for δi
const δiValues = group.map(d => d[2]).sort(d3.ascending);
const quantiles = d3.range(0, binsPerLine + 1).map(i => d3.quantile(δiValues, i / binsPerLine));
const binnedData = group.map(d => {
const δi = d[2];
let binIndex = 0;
for (let i = 0; i < binsPerLine; i++) {
if (δi >= quantiles[i] && δi < quantiles[i + 1]) {
binIndex = i;
break;
}
}
return { data: d, bin: binIndex };
});
const groupedByBin = d3.group(binnedData, d => d.bin);
// Allocate samples across bins, inversely weighted by density to control outliers
const binSampleSizes = new Map();
let remainingLineSamples = lineSampleSize;
const binCounts = Array(binsPerLine).fill(0);
for (const [bin, binGroup] of groupedByBin) {
binCounts[bin] = binGroup.length;
}
const maxBinCount = d3.max(binCounts);
for (const [bin, binGroup] of groupedByBin) {
const binSize = binGroup.length;
// Inverse weighting: smaller bins (outliers) get fewer samples
const weight = binSize > 0 ? Math.max(0.1, 1 - (binSize / maxBinCount) * 0.5) : 1;
const binSampleSize = Math.max(1, Math.round(lineSampleSize * (binSize / groupSize) * weight));
binSampleSizes.set(bin, Math.min(binSampleSize, binSize));
remainingLineSamples -= binSampleSizes.get(bin);
}
// Distribute remaining samples
if (remainingLineSamples > 0) {
const nonEmptyBins = Array.from(groupedByBin.keys());
for (let i = 0; i < remainingLineSamples && nonEmptyBins.length > 0; i++) {
const bin = nonEmptyBins[i % nonEmptyBins.length];
binSampleSizes.set(bin, binSampleSizes.get(bin) + 1);
}
}
// Sample from each bin
for (const [bin, binGroup] of groupedByBin) {
const samples = d3.shuffle([...binGroup]).slice(0, binSampleSizes.get(bin)).map(s => s.data);
sampled.push(...samples);
}
}
// Adjust sample size
let finalSample = sampled;
if (sampled.length > effectiveSampleSize) {
finalSample = d3.shuffle(sampled).slice(0, effectiveSampleSize);
} else if (sampled.length < effectiveSampleSize) {
const remaining = effectiveSampleSize - sampled.length;
const additional = d3.shuffle(data.filter(d => !sampled.includes(d))).slice(0, remaining);
finalSample = [...sampled, ...additional];
}
// Validate and adjust if stats are off
const sampleStats = computeStats(finalSample);
const statTolerance = { μ: 0.1, σ: 0.2 }; // Allowable relative deviation
const needsAdjustment =
Math.abs(sampleStats.μ[0] - populationStats.μ[0]) / populationStats.μ[0] > statTolerance.μ ||
Math.abs(sampleStats.μ[1] - populationStats.μ[1]) / populationStats.μ[1] > statTolerance.μ ||
Math.abs(sampleStats.σ[0] - populationStats.σ[0]) / populationStats.σ[0] > statTolerance.σ ||
Math.abs(sampleStats.σ[1] - populationStats.σ[1]) / populationStats.σ[1] > statTolerance.σ;
if (needsAdjustment) {
// Add points from underrepresented regions
const δiSample = finalSample.map(d => d[2]);
const δiPopulation = data.map(d => d[2]);
const quantiles = d3.range(0, binsPerLine + 1).map(i => d3.quantile(δiPopulation, i / binsPerLine));
const sampleBins = d3.histogram().domain(d3.extent(δiPopulation)).thresholds(quantiles)(δiSample);
const populationBins = d3.histogram().domain(d3.extent(δiPopulation)).thresholds(quantiles)(δiPopulation);
const underSampledBins = sampleBins
.map((b, i) => ({ bin: i, diff: populationBins[i].length / totalSize - b.length / finalSample.length }))
.filter(b => b.diff > 0.1); // Significant under-sampling
if (underSampledBins.length > 0) {
const additionalSamples = [];
for (const { bin } of underSampledBins) {
const binData = data.filter(d => d[2] >= quantiles[bin] && d[2] < quantiles[bin + 1] && !finalSample.includes(d));
const needed = Math.round((underSampledBins[0].diff * effectiveSampleSize) / 2);
additionalSamples.push(...d3.shuffle(binData).slice(0, needed));
}
finalSample = d3.shuffle([...finalSample, ...additionalSamples]).slice(0, effectiveSampleSize);
}
}
return finalSample;
}
function decimate (data, decimationCount = 20) {
return data.filter( (row, index) => (index % decimationCount) == 0 );
}
function computeSample (data, opts = {}) {
const DEFAULT_SAMPLE_SIZE = 2000;
let sample;
if (opts.decimate === true) {
if (opts.sampleSize > 0) {
sample = decimate(data.records, Math.floor(data.records.length / opts.sampleSize));
} else {
sample = decimate(data.records, Math.floor(data.records.length / DEFAULT_SAMPLE_SIZE));
}
} else if (opts.decimate > 0) {
sample = decimate(data.records, opts.decimate);
} else if (opts.sampleSize) {
sample = stratifiedSample(data.records, opt.sampleSize);
} else {
sample = stratifiedSample(data.records, DEFAULT_SAMPLE_SIZE);
}
return sample;
}
// Optional: Utility to compute stats for validation
function computeStats(data) {
const δi = data.map(d => d[2]);
const δj = data.map(d => d[3]);
const rms = Math.sqrt(d3.mean(data, d => d[2] ** 2 + d[3] ** 2));
return {
l: data.length,
μ: [d3.mean(δi), d3.mean(δj)],
σ: [d3.deviation(δi), d3.deviation(δj)],
rms
};
}
function centre (data) {
const stats = computeStats(data);
return data.map( row => [row[0], row[1], row[2]-stats.μ[0], row[3]-stats.μ[1]] )
}
function outliers (data, sd=1.96) {
const stats = computeStats(data);
function fn ([l, p, i, j]) {
return (i - stats.μ[0]) > stats.σ[0]*sd ||
(j - stats.μ[1]) > stats.σ[1]*sd;
}
return data.filter(fn)
}
function inliers (data, sd=1.96) {
const stats = computeStats(data);
function fn ([l, p, i, j]) {
return (i - stats.μ[0]) <= stats.σ[0]*sd &&
(j - stats.μ[1]) <= stats.σ[1]*sd;
}
return data.filter(fn)
}
function difference (a, b) {
const obj = Array.isArray(a) ? [] : {};
for (const k in a) {
const v0 = a[k];
const v1 = b[k]
if (v0 instanceof Object && v1 instanceof Object) {
obj[k] = difference (v0, v1);
} else if (!isNaN(Number(v0)) && !isNaN(Number(v1))) {
obj[k] = v1 - v0;
}
}
return obj;
}
function combinations (a, n) {
const results = [];
function combine(current, start) {
if (current.length === n) {
results.push([...current]);
return;
}
for (let i = start; i < a.length; i++) {
current.push(a[i]);
combine(current, i + 1);
current.pop();
}
}
combine([], 0);
return results;
}
module.exports = {
combinations,
centre,
ijRMS,
computeStats,
computeSample,
stratifiedSample,
old_stratifiedSample,
decimate,
difference,
outliers,
inliers
}

View File

@@ -44,6 +44,7 @@
"jsonwebtoken": "^9.0.2",
"leaflet-headless": "git+https://git@gitlab.com/aaltronav/contrib/leaflet-headless.git#devel",
"marked": "^4.0.12",
"mathjs": "^14.6.0",
"node-fetch": "^2.6.1",
"nunjucks": "^3.2.3",
"path-to-regexp": "^6.2.1",

124
package-lock.json generated
View File

@@ -1678,17 +1678,6 @@
"dev": true,
"license": "MIT"
},
"lib/www/client/source/node_modules/@babel/runtime": {
"version": "7.23.2",
"dev": true,
"license": "MIT",
"dependencies": {
"regenerator-runtime": "^0.14.0"
},
"engines": {
"node": ">=6.9.0"
}
},
"lib/www/client/source/node_modules/@babel/template": {
"version": "7.27.2",
"dev": true,
@@ -7524,11 +7513,6 @@
"node": ">=4"
}
},
"lib/www/client/source/node_modules/regenerator-runtime": {
"version": "0.14.0",
"dev": true,
"license": "MIT"
},
"lib/www/client/source/node_modules/regenerator-transform": {
"version": "0.15.2",
"dev": true,
@@ -9374,6 +9358,7 @@
"jsonwebtoken": "^9.0.2",
"leaflet-headless": "git+https://git@gitlab.com/aaltronav/contrib/leaflet-headless.git#devel",
"marked": "^4.0.12",
"mathjs": "^14.6.0",
"node-fetch": "^2.6.1",
"nunjucks": "^3.2.3",
"path-to-regexp": "^6.2.1",
@@ -10527,17 +10512,6 @@
"node": ">=6.0.0"
}
},
"lib/www/server/node_modules/redoc-cli/node_modules/@babel/runtime": {
"version": "7.16.7",
"dev": true,
"license": "MIT",
"dependencies": {
"regenerator-runtime": "^0.13.4"
},
"engines": {
"node": ">=6.9.0"
}
},
"lib/www/server/node_modules/redoc-cli/node_modules/@babel/template": {
"version": "7.12.13",
"dev": true,
@@ -12532,11 +12506,6 @@
"url": "https://github.com/Mermade/oas-kit?sponsor=1"
}
},
"lib/www/server/node_modules/redoc-cli/node_modules/regenerator-runtime": {
"version": "0.13.9",
"dev": true,
"license": "MIT"
},
"lib/www/server/node_modules/redoc-cli/node_modules/require-directory": {
"version": "2.1.1",
"dev": true,
@@ -13297,6 +13266,15 @@
"node": ">=0.4"
}
},
"node_modules/@babel/runtime": {
"version": "7.28.3",
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.3.tgz",
"integrity": "sha512-9uIQ10o0WGdpP6GDhXcdOJPJuDgFtIDtN/9+ArJQ2NAfAmiuhTQdzkaTGR33v43GYS2UrSA0eX2pPPHoFVvpxA==",
"license": "MIT",
"engines": {
"node": ">=6.9.0"
}
},
"node_modules/@deck.gl/aggregation-layers": {
"version": "9.1.13",
"resolved": "https://registry.npmjs.org/@deck.gl/aggregation-layers/-/aggregation-layers-9.1.13.tgz",
@@ -14389,6 +14367,19 @@
"node": ">= 10"
}
},
"node_modules/complex.js": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/complex.js/-/complex.js-2.4.2.tgz",
"integrity": "sha512-qtx7HRhPGSCBtGiST4/WGHuW+zeaND/6Ld+db6PbrulIB1i2Ev/2UPiqcmpQNPSyfBKraC0EOvOKCB5dGZKt3g==",
"license": "MIT",
"engines": {
"node": "*"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/rawify"
}
},
"node_modules/compressible": {
"version": "2.0.18",
"resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.18.tgz",
@@ -15089,6 +15080,12 @@
"node": ">= 0.4"
}
},
"node_modules/escape-latex": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/escape-latex/-/escape-latex-1.2.0.tgz",
"integrity": "sha512-nV5aVWW1K0wEiUIEdZ4erkGGH8mDxGyxSeqPzRNtWP7ataw+/olFObw7hujFWlVjNsaDFw5VZ5NzVSIqRgfTiw==",
"license": "MIT"
},
"node_modules/escodegen": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz",
@@ -15205,6 +15202,19 @@
"node": ">= 6"
}
},
"node_modules/fraction.js": {
"version": "5.3.1",
"resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.1.tgz",
"integrity": "sha512-PhqCuhSKIGbbkJ+cojHv47eEWClU71FIOhiUsYdZYTwhIzCeIN8rXeEjserTvPat5JLJChumn8chHz64WkZgTw==",
"license": "MIT",
"engines": {
"node": "*"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/rawify"
}
},
"node_modules/fs-minipass": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-2.1.0.tgz",
@@ -15635,6 +15645,12 @@
"resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz",
"integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g=="
},
"node_modules/javascript-natural-sort": {
"version": "0.7.1",
"resolved": "https://registry.npmjs.org/javascript-natural-sort/-/javascript-natural-sort-0.7.1.tgz",
"integrity": "sha512-nO6jcEfZWQXDhOiBtG2KvKyEptz7RVbpGP4vTD2hLBdmNQSsCiicO2Ioinv6UI4y9ukqnBpy+XZ9H6uLNgJTlw==",
"license": "MIT"
},
"node_modules/jsbn": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
@@ -15848,6 +15864,29 @@
"node": ">= 0.4"
}
},
"node_modules/mathjs": {
"version": "14.6.0",
"resolved": "https://registry.npmjs.org/mathjs/-/mathjs-14.6.0.tgz",
"integrity": "sha512-5vI2BLB5GKQmiSK9BH6hVkZ+GgqpdnOgEfmHl7mqVmdQObLynr63KueyYYLCQMzj66q69mV2XZZGQqqxeftQbA==",
"license": "Apache-2.0",
"dependencies": {
"@babel/runtime": "^7.26.10",
"complex.js": "^2.2.5",
"decimal.js": "^10.4.3",
"escape-latex": "^1.2.0",
"fraction.js": "^5.2.1",
"javascript-natural-sort": "^0.7.1",
"seedrandom": "^3.0.5",
"tiny-emitter": "^2.1.0",
"typed-function": "^4.2.1"
},
"bin": {
"mathjs": "bin/cli.js"
},
"engines": {
"node": ">= 18"
}
},
"node_modules/md5": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz",
@@ -16343,6 +16382,12 @@
"node": ">=10"
}
},
"node_modules/seedrandom": {
"version": "3.0.5",
"resolved": "https://registry.npmjs.org/seedrandom/-/seedrandom-3.0.5.tgz",
"integrity": "sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==",
"license": "MIT"
},
"node_modules/semver": {
"version": "7.7.2",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz",
@@ -16606,6 +16651,12 @@
"texture-compressor": "bin/texture-compressor.js"
}
},
"node_modules/tiny-emitter": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/tiny-emitter/-/tiny-emitter-2.1.0.tgz",
"integrity": "sha512-NB6Dk1A9xgQPMoGqC5CVXn123gWyte215ONT5Pp5a0yt4nlEoO1ZWeCwpncaekPHXO60i47ihFnZPiRPjRMq4Q==",
"license": "MIT"
},
"node_modules/toidentifier": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
@@ -16667,6 +16718,15 @@
"node": ">= 0.6"
}
},
"node_modules/typed-function": {
"version": "4.2.1",
"resolved": "https://registry.npmjs.org/typed-function/-/typed-function-4.2.1.tgz",
"integrity": "sha512-EGjWssW7Tsk4DGfE+5yluuljS1OGYWiI1J6e8puZz9nTMM51Oug8CD5Zo4gWMsOhq5BI+1bF+rWTm4Vbj3ivRA==",
"license": "MIT",
"engines": {
"node": ">= 18"
}
},
"node_modules/undici-types": {
"version": "7.8.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz",