diff --git a/lib/www/server/lib/comparisons/geometric-differences.js b/lib/www/server/lib/comparisons/geometric-differences.js new file mode 100644 index 0000000..bed21d1 --- /dev/null +++ b/lib/www/server/lib/comparisons/geometric-differences.js @@ -0,0 +1,563 @@ +const d3a = require('d3-array'); +const { DougalBinaryBundle } = require('@dougal/binary'); +const { pool, setSurvey } = require('../db/connection'); +const db = require('../db'); +const { bundle } = require('../binary/bundle'); +const setops = require('../utils/setops'); +const { ijRMS, combinations, computeSample } = require('./utils'); +const { computePCA } = require('./pca'); +const { ERROR, INFO, DEBUG } = require('DOUGAL_ROOT/debug')(__filename); + + +async function fetchErrors (pid) { + + const client = await setSurvey(pid); + + try { + const text = ` + SELECT + fs.line, fs.point, + ij_error(fs.line::double precision, fs.point::double precision, fs.geometry)::json AS errorfinal + FROM + final_shots fs + ORDER BY fs.line, fs.point; + `; + + const res = await client.query(text); + + return res.rows.map( row => + [row.line, row.point, row.errorfinal.coordinates[0], row.errorfinal.coordinates[1]] + ); + } catch (err) { + console.error(err); + } finally { + client.release(); + } +} + + +async function groups () { + const projects = await db.project.get(); + const groupNames = [ + ...projects + .reduce( (acc, cur) => acc.add(...cur.groups), new Set() ) + ].filter( i => !!i ); + + return Object.fromEntries(groupNames.map( g => [g, projects.filter( p => p.groups.includes(g) )] )); +} + +/* +async function compare (baselineProjectID, monitorProjectID) { + console.log("Getting baseline", baselineProjectID); + const baselineData = await db.sequence.get(baselineProjectID); + console.log("Getting monitor", monitorProjectID); + const monitorData = await db.sequence.get(monitorProjectID); + console.log("Comparing"); + + const comparison = comparisonGeometricDifferences(baselineData, monitorData); + return comparison; +} +*/ + +function geometric_differences (baseline, monitor) { + + if (!baseline || !baseline.length) { + throw new Error("No baseline data"); + } + + if (!monitor || !monitor.length) { + throw new Error("No monitor data"); + } + + const comparison = []; // An array of { line, point, εi, εj }; line + point may be repeated + + for (const bp of baseline) { + const monitor_points = monitor.filter( mp => mp[0] === bp[0] && mp[1] === bp[1] ); + + if (!monitor_points.length) { + // console.log(`No match for L${bp[0]} P${bp[1]}`); + continue; + } + + for (const mp of monitor_points) { + const εi = mp[2] - bp[2], εj = mp[3] - bp[3]; + comparison.push([bp[0], bp[1], εi, εj]); + } + + } + + return comparison; +} + +async function compare (baselineProjectID, monitorProjectID, infoObj) { + console.log("Getting baseline", baselineProjectID); + const baselineData = await fetchErrors(baselineProjectID); + console.log("Getting monitor", monitorProjectID); + const monitorData = await fetchErrors(monitorProjectID); + console.log("Comparing"); + + const comparison = geometric_differences(baselineData, monitorData); + + if (infoObj instanceof Object) { + const baselineIJ = baselineData.map(i => i.slice(0,2)); + const monitorIJ = monitorData.map(i => i.slice(0,2)); + + infoObj.compared = comparison.length; + infoObj.baselineLength = baselineData.length; + infoObj.monitorLength = monitorData.length; + infoObj.baselineUniqueLength = setops.unique(baselineIJ).length; + infoObj.monitorUniqueLength = setops.unique(monitorIJ).length; + infoObj.common = setops.intersection(baselineIJ, monitorIJ).length; + } + + return comparison; +} + + +async function save (baselineProjectID, monitorProjectID, bundle, meta) { + const info = {}; + if (!bundle) { + const comparison = await compare(baselineProjectID, monitorProjectID, info); + if (comparison.length) { + bundle = asBundle(comparison); + } else { + console.warn(`No matching points between ${baselineProjectID} and ${monitorProjectID}`); + return; + } + } else if (!(bundle instanceof DougalBinaryBundle)) { + throw new Error("Illegal data: `bundle` must of null or of type DougalBinaryBundle"); + } + + if (!bundle.byteLength) { + console.warn(`Empty comparison results between ${baselineProjectID} and ${monitorProjectID}. Refusing to store`); + return; + } + + meta = {tstamp: (new Date()), ...info, ...stats(bundle), ...meta}; + + console.log("Storing in database"); + const client = await pool.connect(); + + try { + const text = ` + INSERT INTO comparisons.comparisons + (type, baseline_pid, monitor_pid, data, meta) + VALUES ('geometric_difference', $1, $2, $3, $4) + ON CONFLICT (type, baseline_pid, monitor_pid) + DO UPDATE SET + data = EXCLUDED.data, + meta = EXCLUDED.meta; + `; + + const values = [ baselineProjectID, monitorProjectID, Buffer.from(bundle), meta ]; + const res = await client.query(text, values); + return res.rowCount; + } catch (err) { + console.error(err); + } finally { + client.release(); + } +} + +async function saveSample (baselineProjectID, monitorProjectID, opts = {}) { + let sample = opts.sample; + let populationStats = opts.populationStats; + let sampleStats = opts.sampleStats; + + if (!sample?.length) { + const sampleSize = opts.sampleSize ?? 2000; + const record = await get(baselineProjectID, monitorProjectID); + let data; + + if (record) { + data = record.data; + } else { + console.log("Full data not found in database"); + data = asBundle(await compare(baselineProjectID, monitorProjectID)); + } + + sample = computeSample(data, opts); + + if (!populationStats) { + populationStats = stats(data); + } + } + + const bundle = asBundle(sample, {type: 0x0c}); + + if (!sampleStats) { + sampleStats = stats(bundle); + } + + meta = {tstamp: (new Date()), populationStats, sampleStats, ...(opts.meta??{})}; + + const client = await pool.connect(); + + try { + const text = ` + INSERT INTO comparisons.comparisons + (type, baseline_pid, monitor_pid, data, meta) + VALUES ('geometric_difference_sample', $1, $2, $3, $4) + ON CONFLICT (type, baseline_pid, monitor_pid) + DO UPDATE SET + data = EXCLUDED.data, + meta = EXCLUDED.meta; + `; + + const values = [ baselineProjectID, monitorProjectID, Buffer.from(bundle), meta ]; + const res = await client.query(text, values); + return res.rowCount; + } catch (err) { + console.error(err); + } finally { + client.release(); + } +} + +async function get (baselineProjectID, monitorProjectID, type = 'geometric_difference') { + + const client = await pool.connect(); + + try { + + const text = ` + SELECT data, meta + FROM comparisons.comparisons + WHERE type = $3 AND baseline_pid = $1 AND monitor_pid = $2; + `; + + const values = [ baselineProjectID, monitorProjectID, type ]; + const res = await client.query(text, values); + if (!res.rows.length) { + console.log("Comparison not found in database"); + return; + } + + const { data, meta } = res.rows[0]; + return { + data: DougalBinaryBundle.clone(data), + meta + }; + } catch (err) { + console.error(err); + } finally { + client.release(); + } +} + + +async function getSample (baselineProjectID, monitorProjectID) { + return await get(baselineProjectID, monitorProjectID, 'geometric_difference_sample'); +} + + +async function remove (baselineProjectID, monitorProjectID) { + const client = await pool.connect(); + + try { + const text = ` + DELETE + FROM comparisons.comparisons + WHERE + (type = 'geometric_difference' OR type = 'geometric_difference_sample') + AND baseline_pid = $1 + AND monitor_pid = $2; + `; + + const values = [ baselineProjectID, monitorProjectID ]; + + const res = await client.query(text, values); + return res.rowCount; + } catch (err) { + console.error(err); + } finally { + client.release(); + } +} + +function stats (comparison) { + let i, j, δi, δj; + + if (comparison instanceof DougalBinaryBundle) { + console.log("Computing stats"); + const udv = comparison.chunks()[0]?.udv; + + if (!udv) { + console.error("Could not determine udv from first chunk"); + console.log(comparison.chunks()); + return; + } + + let records; + + if (udv == 0xa) { + records = comparison.records; + + // Transpose the records + [ i, j, δi, δj ] = Array.from({ length: 4 }, (_, i) => records.map(row => row[i])); + } else if (udv == 0xc) { + records = comparison.records; + let _; + [ _, _, i, j, δi, δj ] = Array.from({ length: 6 }, (_, i) => records.map(row => row[i])); + } else { + throw new Error(`Unrecognised DougalBinaryBundle User Defined Value: ${udv}`); + } + + return { + length: records.length, + μ: [ d3a.mean(δi), d3a.mean(δj) ], + σ: [ d3a.deviation(δi), d3a.deviation(δj) ], + //rms: ijRMS(δi, δj), + ...computePCA(records) + } + } else if (Array.isArray(comparison)) { + if (Array.isArray(comparison[0])) { + return stats(asBundle(comparison, {type: 0xc})); + } else { + // Assume object + return stats(asBundle(comparison)); + } + } +} + + +/** Compare two projects' errorfinal quantities. + * + * Assumes that the preplots are the same. + * It is not a terribly efficient way of doing it, but considering + * that this is, by and large only going to be done once every few + * hours for an active prospect, and never for inactive ones, I + * think and hope we can live with that. + * + * `baseline` and `monitor` are the result of calling + * db.sequence.get(projectId) on each of the respective + * projects. + */ +/* +function comparisonGeometricDifferences (baseline, monitor) { + + if (!baseline || !baseline.length) { + throw new Error("No baseline data"); + } + + if (!monitor || !monitor.length) { + throw new Error("No monitor data"); + } + + const comparison = []; // An array of { line, point, εi, εj, δts }; line + point may be repeated + + for (const bp of baseline) { + if (!bp.errorfinal) { + console.log(`No final data for baseline point L${bp.line} S${bp.sequence} P${bp.point}`); + continue; + } + + const monitor_points = monitor.filter( mp => mp.line === bp.line && mp.point === bp.point ); + for (const mp of monitor_points) { + if (!mp.errorfinal) { + console.log(`No final data for monitor point L${mp.line} S${mp.sequence} P${mp.point}`); + continue; + } + + const line = bp.line; + const point = bp.point; + const baseSeq = bp.sequence; + const monSeq = mp.sequence; + const baseTStamp = bp.tstamp; + const monTStamp = mp.tstamp; + const δi = bp.errorfinal.coordinates[0] - mp.errorfinal.coordinates[0]; + const δj = bp.errorfinal.coordinates[1] - mp.errorfinal.coordinates[1]; + + const obj = {line, point, baseSeq, monSeq, baseTStamp, monTStamp, δi, δj}; + comparison.push(obj); + // console.log(obj); + } + } + + return comparison.sort(sortFn); +} + +function sortComparison (comparison) { + return comparison.sort( (a, b) => { + if (a.line == b.line) { + if (a.point == b.point) { + return a.baseTStamp - b.baseTStamp; + } else { + return a.point - b.point; + } + } else { + return a.line - b.line; + } + }) +} +*/ + +function sortFn (a, b) { + if (a.line == b.line) { + if (a.point == b.point) { + return a.baseTStamp - b.baseTStamp; + } else { + return a.point - b.point; + } + } else { + return a.line - b.line; + } +} + +function asBundle (comparison, opts = {type: 0x0a}) { + return DougalBinaryBundle.clone(bundle(comparison, opts)); +} + +function fromBundle (bundle) { + if (!(bundle instanceof DougalBinaryBundle)) { + bundle = DougalBinaryBundle.clone(bundle); + } + + const json = []; + for (const record of bundle) { + record.shift(); + json.push(record); + } + return json; +} + +async function saveGroup (group, opts = {}) { + if (group == null) { + // Save everything + const g = await groups(); + for (const group of Object.values(g)) { + await saveGroup(group) + } + } if (typeof group === "string") { + // This is a group name + const g = await groups(); + group = groups[g]; + } + + if (Array.isArray(group)) { + const pids = group.map( i => i.pid ).sort(); + + for (const [ baselineProjectID, monitorProjectID ] of combinations(pids, 2)) { + try { + const isSaved = await save(baselineProjectID, monitorProjectID); + if (isSaved) { + await saveSample(baselineProjectID, monitorProjectID, opts.sampleOpts); + } else { + await remove(baselineProjectID, monitorProjectID); + } + DEBUG("Saved comparison between %s and %s", baselineProjectID, monitorProjectID); + } catch (err) { + console.error(err); + ERROR("Error saving comparison between %s and %s", baselineProjectID, monitorProjectID); + } + } + } +} + +/* +async function getGroup (groupName, opts = {}) { + + const group = (await groups())?.[groupName]?.map( i => i.pid)?.sort(); + + if (!group?.length) return; + + const client = await pool.connect(); + + try { + + const text = ` + -- SQL query goes here + `; + + const values = combinations(group, 2); + const res = await client.query(text, values); + if (!res.rows.length) { + console.log("Comparison not found in database"); + return; + } + + if (opts.returnData) { + return res.rows.map( row => ({ + data: DougalBinaryBundle.clone(row.data), + meta: row.meta + }); + } else { + return res.rows.map( row => row.meta ); + } + } catch (err) { + console.error(err); + } finally { + client.release(); + } +} +*/ + + +async function getGroup (groupName, opts = {}) { + + const group = (await groups())?.[groupName]?.map( i => i.pid)?.sort(); + + if (!group?.length) return; + + const client = await pool.connect(); + + try { + + const pairs = combinations(group, 2); + const flatValues = pairs.flat(); + const placeholders = []; + for (let i = 0; i < pairs.length; i++) { + placeholders.push(`($${i * 2 + 1}, $${i * 2 + 2})`); + } + const inClause = placeholders.join(','); + const selectFields = opts.returnData ? 'data, meta' : 'meta'; + + const text = ` + SELECT baseline_pid, monitor_pid, ${selectFields} + FROM comparisons.comparisons + WHERE type = 'geometric_difference' + AND (baseline_pid, monitor_pid) IN (VALUES ${inClause}) + ORDER BY baseline_pid, monitor_pid + `; + + console.log(text); + console.log(flatValues); + + const res = await client.query(text, flatValues); + if (!res.rows.length) { + console.log("Comparison not found in database"); + return; + } + + if (opts.returnData) { + return res.rows.map( row => ({ + ...row, + data: DougalBinaryBundle.clone(row.data), + })); + } else { + return res.rows; + } + } catch (err) { + console.error(err); + } finally { + client.release(); + } +} + +module.exports = { + groups, + fetchErrors, + compare, + computeSample, + get, + save, + getSample, + saveSample, + saveGroup, + getGroup, + remove, + stats, + // comparisonGeometricDifferences, + asBundle, + fromBundle +}; diff --git a/lib/www/server/lib/comparisons/index.js b/lib/www/server/lib/comparisons/index.js new file mode 100644 index 0000000..53e05d4 --- /dev/null +++ b/lib/www/server/lib/comparisons/index.js @@ -0,0 +1,4 @@ + +module.exports = { + ...require('./geometric-differences') +} diff --git a/lib/www/server/lib/comparisons/pca.js b/lib/www/server/lib/comparisons/pca.js new file mode 100644 index 0000000..f4303b0 --- /dev/null +++ b/lib/www/server/lib/comparisons/pca.js @@ -0,0 +1,80 @@ +const math = require('mathjs'); + +/** + * Compute PCA (eigenvectors and eigenvalues) for deviation data to assess geometric repeatability. + * @param {Array>} deviations - Array of [point, line, i_deviation, j_deviation] + * @returns {Object} - { eigenvalues, eigenvectors, rms, anisotropy, primaryDirection } + */ +function computePCA(deviations) { + // Extract i_deviation and j_deviation + const deviationMatrix = deviations.map(row => [row[2], row[3]]); + + // Convert to mathjs matrix + const D = math.matrix(deviationMatrix); + + // Compute mean for centering (1 x 2 matrix) + const mean = math.mean(D, 0); + + // Explicitly repeat mean to match D's shape (n x 2) + const n = deviationMatrix.length; + const meanRepeated = math.repmat(mean, n, 1); + + // Center the data + const centered = math.subtract(D, meanRepeated); + + // Compute covariance matrix: (1/(n-1)) * (D_centered^T * D_centered) + const covMatrix = math.multiply( + math.multiply(1 / (n - 1), math.transpose(centered)), + centered + ); + + // Perform eigen decomposition + const result = math.eigs(covMatrix); + let eigenvalues = result.values; + const evObjs = result.eigenvectors; + + // Convert eigenvalues to array if it's a matrix + eigenvalues = Array.isArray(eigenvalues) ? eigenvalues : eigenvalues.toArray(); + + // Create pairs and convert vector to array if necessary + const pairs = eigenvalues.map((val, i) => { + let vec = evObjs[i].vector; + if (vec.toArray) vec = vec.toArray(); + return { val, vec }; + }); + + // Sort by descending eigenvalues + pairs.sort((a, b) => b.val - a.val); + + // Sorted eigenvalues + const sortedEigenvalues = pairs.map(p => p.val); + + // Build eigenvector matrix: rows as components, columns as eigenvectors + const dimension = pairs[0].vec.length; // e.g., 2 + const evecRows = []; + for (let comp = 0; comp < dimension; comp++) { + evecRows.push(pairs.map(p => p.vec[comp])); + } + const sortedEigenvectors = math.matrix(evecRows); + + // Compute RMS errors along principal axes + const rms = sortedEigenvalues.map(val => Math.sqrt(Math.max(val, 0))); + + // Compute anisotropy (ratio of major to minor axis variance) + const anisotropy = sortedEigenvalues[0] / (sortedEigenvalues[1] || 1); // Avoid division by zero + + // Primary direction (angle in degrees of major eigenvector) + const primaryVector = sortedEigenvectors.subset(math.index([0, 1], 0)).toArray(); + const primaryDirection = Math.atan2(primaryVector[1], primaryVector[0]) * 180 / Math.PI; + + return { + eigenvalues: sortedEigenvalues, + eigenvectors: sortedEigenvectors.toArray(), + rms: rms, // RMS errors along major/minor axes + anisotropy: anisotropy, // Ratio of variances + primaryDirection: primaryDirection // Angle of major axis (degrees) + }; +} + + +module.exports = { computePCA }; diff --git a/lib/www/server/lib/comparisons/utils.js b/lib/www/server/lib/comparisons/utils.js new file mode 100644 index 0000000..64f6276 --- /dev/null +++ b/lib/www/server/lib/comparisons/utils.js @@ -0,0 +1,310 @@ +const d3 = require('d3-array'); + +// Function to calculate the root mean square (RMS) of position deviations +// This computes the RMS of the Euclidean distances: sqrt( (1/n) * sum(δi² + δj²) ) +// Assumes deviations are already centered (mean deviation ~0); if normalization by std dev or range is needed, adjust accordingly +function ijRMS(δi, δj) { + if (!δi.length || !δj.length) return 0; + + if (δi.length != δj.length) { + console.warn(`δi and δj have different lengths!`); + } + + let sumSquares = 0; + const n = Math.min(δi.length, δj.length); + + for (let i=0; i < n; i++) { + sumSquares += (δi[i] * δi[i]) + (δj[i] * δj[i]); + } + + const meanSquare = sumSquares / n; + const rms = Math.sqrt(meanSquare); + + return rms; +} + + +/** + * Performs stratified sampling on an array of [line, point, δi, δj] data points. + * Groups by line and samples proportionally to preserve shape and spread. + * + * @param {Array>} data - Input data: [[line, point, δi, δj], ...] + * @param {number} sampleSize - Target number of samples (e.g., 2000) + * @returns {Array>} Sampled data in same format + */ +function old_stratifiedSample(data, sampleSize) { + if (!Array.isArray(data) || data.length === 0) return []; + if (!Number.isInteger(sampleSize) || sampleSize <= 0) { + throw new Error('sampleSize must be a positive integer'); + } + + // Group data by line (first element) + const grouped = d3.group(data, d => d[0]); + const totalSize = data.length; + const sampled = []; + + // Ensure sampleSize doesn't exceed data size + const effectiveSampleSize = Math.min(sampleSize, totalSize); + + // Iterate over each line group + for (const [line, group] of grouped) { + // Calculate proportional sample size for this group + const groupSize = group.length; + const groupSampleSize = Math.max(1, Math.round((groupSize / totalSize) * effectiveSampleSize)); + + // Shuffle group and take first N elements + const shuffled = d3.shuffle([...group]); + sampled.push(...shuffled.slice(0, groupSampleSize)); + } + + // If sampled size is slightly off due to rounding, adjust + if (sampled.length > effectiveSampleSize) { + return d3.shuffle(sampled).slice(0, effectiveSampleSize); + } else if (sampled.length < effectiveSampleSize) { + // Pad with random samples from entire dataset if needed + const remaining = effectiveSampleSize - sampled.length; + const additional = d3.shuffle(data.filter(d => !sampled.includes(d))).slice(0, remaining); + sampled.push(...additional); + } + + return sampled; +} + +/** + * Performs stratified sampling on an array of [line, point, δi, δj] data points. + * Stratifies by line and δi quantiles to preserve shape and spread, with outlier control. + * + * @param {Array>} data - Input data: [[line, point, δi, δj], ...] + * @param {number} sampleSize - Target number of samples (e.g., 2000) + * @param {number} [binsPerLine=10] - Number of δi quantile bins per line + * @returns {Array>} Sampled data in same format + */ +function stratifiedSample(data, sampleSize, binsPerLine = 10) { + if (!Array.isArray(data) || data.length === 0) return []; + if (!Number.isInteger(sampleSize) || sampleSize <= 0) { + throw new Error('sampleSize must be a positive integer'); + } + if (!Number.isInteger(binsPerLine) || binsPerLine <= 0) { + throw new Error('binsPerLine must be a positive integer'); + } + + const totalSize = data.length; + const effectiveSampleSize = Math.min(sampleSize, totalSize); + const sampled = []; + + // Group by line + const groupedByLine = d3.group(data, d => d[0]); + + // Compute population stats for validation + const populationStats = computeStats(data); + + // Iterate over each line + for (const [line, group] of groupedByLine) { + const groupSize = group.length; + const lineSampleSize = Math.max(1, Math.round((groupSize / totalSize) * effectiveSampleSize)); + + // Create quantile-based bins for δi + const δiValues = group.map(d => d[2]).sort(d3.ascending); + const quantiles = d3.range(0, binsPerLine + 1).map(i => d3.quantile(δiValues, i / binsPerLine)); + const binnedData = group.map(d => { + const δi = d[2]; + let binIndex = 0; + for (let i = 0; i < binsPerLine; i++) { + if (δi >= quantiles[i] && δi < quantiles[i + 1]) { + binIndex = i; + break; + } + } + return { data: d, bin: binIndex }; + }); + const groupedByBin = d3.group(binnedData, d => d.bin); + + // Allocate samples across bins, inversely weighted by density to control outliers + const binSampleSizes = new Map(); + let remainingLineSamples = lineSampleSize; + const binCounts = Array(binsPerLine).fill(0); + for (const [bin, binGroup] of groupedByBin) { + binCounts[bin] = binGroup.length; + } + const maxBinCount = d3.max(binCounts); + for (const [bin, binGroup] of groupedByBin) { + const binSize = binGroup.length; + // Inverse weighting: smaller bins (outliers) get fewer samples + const weight = binSize > 0 ? Math.max(0.1, 1 - (binSize / maxBinCount) * 0.5) : 1; + const binSampleSize = Math.max(1, Math.round(lineSampleSize * (binSize / groupSize) * weight)); + binSampleSizes.set(bin, Math.min(binSampleSize, binSize)); + remainingLineSamples -= binSampleSizes.get(bin); + } + + // Distribute remaining samples + if (remainingLineSamples > 0) { + const nonEmptyBins = Array.from(groupedByBin.keys()); + for (let i = 0; i < remainingLineSamples && nonEmptyBins.length > 0; i++) { + const bin = nonEmptyBins[i % nonEmptyBins.length]; + binSampleSizes.set(bin, binSampleSizes.get(bin) + 1); + } + } + + // Sample from each bin + for (const [bin, binGroup] of groupedByBin) { + const samples = d3.shuffle([...binGroup]).slice(0, binSampleSizes.get(bin)).map(s => s.data); + sampled.push(...samples); + } + } + + // Adjust sample size + let finalSample = sampled; + if (sampled.length > effectiveSampleSize) { + finalSample = d3.shuffle(sampled).slice(0, effectiveSampleSize); + } else if (sampled.length < effectiveSampleSize) { + const remaining = effectiveSampleSize - sampled.length; + const additional = d3.shuffle(data.filter(d => !sampled.includes(d))).slice(0, remaining); + finalSample = [...sampled, ...additional]; + } + + // Validate and adjust if stats are off + const sampleStats = computeStats(finalSample); + const statTolerance = { μ: 0.1, σ: 0.2 }; // Allowable relative deviation + const needsAdjustment = + Math.abs(sampleStats.μ[0] - populationStats.μ[0]) / populationStats.μ[0] > statTolerance.μ || + Math.abs(sampleStats.μ[1] - populationStats.μ[1]) / populationStats.μ[1] > statTolerance.μ || + Math.abs(sampleStats.σ[0] - populationStats.σ[0]) / populationStats.σ[0] > statTolerance.σ || + Math.abs(sampleStats.σ[1] - populationStats.σ[1]) / populationStats.σ[1] > statTolerance.σ; + + if (needsAdjustment) { + // Add points from underrepresented regions + const δiSample = finalSample.map(d => d[2]); + const δiPopulation = data.map(d => d[2]); + const quantiles = d3.range(0, binsPerLine + 1).map(i => d3.quantile(δiPopulation, i / binsPerLine)); + const sampleBins = d3.histogram().domain(d3.extent(δiPopulation)).thresholds(quantiles)(δiSample); + const populationBins = d3.histogram().domain(d3.extent(δiPopulation)).thresholds(quantiles)(δiPopulation); + const underSampledBins = sampleBins + .map((b, i) => ({ bin: i, diff: populationBins[i].length / totalSize - b.length / finalSample.length })) + .filter(b => b.diff > 0.1); // Significant under-sampling + + if (underSampledBins.length > 0) { + const additionalSamples = []; + for (const { bin } of underSampledBins) { + const binData = data.filter(d => d[2] >= quantiles[bin] && d[2] < quantiles[bin + 1] && !finalSample.includes(d)); + const needed = Math.round((underSampledBins[0].diff * effectiveSampleSize) / 2); + additionalSamples.push(...d3.shuffle(binData).slice(0, needed)); + } + finalSample = d3.shuffle([...finalSample, ...additionalSamples]).slice(0, effectiveSampleSize); + } + } + + return finalSample; +} + +function decimate (data, decimationCount = 20) { + return data.filter( (row, index) => (index % decimationCount) == 0 ); +} + +function computeSample (data, opts = {}) { + const DEFAULT_SAMPLE_SIZE = 2000; + let sample; + + if (opts.decimate === true) { + if (opts.sampleSize > 0) { + sample = decimate(data.records, Math.floor(data.records.length / opts.sampleSize)); + } else { + sample = decimate(data.records, Math.floor(data.records.length / DEFAULT_SAMPLE_SIZE)); + } + } else if (opts.decimate > 0) { + sample = decimate(data.records, opts.decimate); + } else if (opts.sampleSize) { + sample = stratifiedSample(data.records, opt.sampleSize); + } else { + sample = stratifiedSample(data.records, DEFAULT_SAMPLE_SIZE); + } + + return sample; +} + +// Optional: Utility to compute stats for validation +function computeStats(data) { + const δi = data.map(d => d[2]); + const δj = data.map(d => d[3]); + const rms = Math.sqrt(d3.mean(data, d => d[2] ** 2 + d[3] ** 2)); + return { + l: data.length, + μ: [d3.mean(δi), d3.mean(δj)], + σ: [d3.deviation(δi), d3.deviation(δj)], + rms + }; +} + +function centre (data) { + const stats = computeStats(data); + + return data.map( row => [row[0], row[1], row[2]-stats.μ[0], row[3]-stats.μ[1]] ) +} + +function outliers (data, sd=1.96) { + const stats = computeStats(data); + + function fn ([l, p, i, j]) { + return (i - stats.μ[0]) > stats.σ[0]*sd || + (j - stats.μ[1]) > stats.σ[1]*sd; + } + + return data.filter(fn) +} + +function inliers (data, sd=1.96) { + const stats = computeStats(data); + + function fn ([l, p, i, j]) { + return (i - stats.μ[0]) <= stats.σ[0]*sd && + (j - stats.μ[1]) <= stats.σ[1]*sd; + } + + return data.filter(fn) +} + +function difference (a, b) { + const obj = Array.isArray(a) ? [] : {}; + for (const k in a) { + const v0 = a[k]; + const v1 = b[k] + if (v0 instanceof Object && v1 instanceof Object) { + obj[k] = difference (v0, v1); + } else if (!isNaN(Number(v0)) && !isNaN(Number(v1))) { + obj[k] = v1 - v0; + } + } + return obj; +} + +function combinations (a, n) { + const results = []; + + function combine(current, start) { + if (current.length === n) { + results.push([...current]); + return; + } + for (let i = start; i < a.length; i++) { + current.push(a[i]); + combine(current, i + 1); + current.pop(); + } + } + + combine([], 0); + return results; +} + +module.exports = { + combinations, + centre, + ijRMS, + computeStats, + computeSample, + stratifiedSample, + old_stratifiedSample, + decimate, + difference, + outliers, + inliers +} diff --git a/lib/www/server/package.json b/lib/www/server/package.json index 9a7a7dd..e850b4d 100644 --- a/lib/www/server/package.json +++ b/lib/www/server/package.json @@ -44,6 +44,7 @@ "jsonwebtoken": "^9.0.2", "leaflet-headless": "git+https://git@gitlab.com/aaltronav/contrib/leaflet-headless.git#devel", "marked": "^4.0.12", + "mathjs": "^14.6.0", "node-fetch": "^2.6.1", "nunjucks": "^3.2.3", "path-to-regexp": "^6.2.1", diff --git a/package-lock.json b/package-lock.json index 66072dd..b31979b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1678,17 +1678,6 @@ "dev": true, "license": "MIT" }, - "lib/www/client/source/node_modules/@babel/runtime": { - "version": "7.23.2", - "dev": true, - "license": "MIT", - "dependencies": { - "regenerator-runtime": "^0.14.0" - }, - "engines": { - "node": ">=6.9.0" - } - }, "lib/www/client/source/node_modules/@babel/template": { "version": "7.27.2", "dev": true, @@ -7524,11 +7513,6 @@ "node": ">=4" } }, - "lib/www/client/source/node_modules/regenerator-runtime": { - "version": "0.14.0", - "dev": true, - "license": "MIT" - }, "lib/www/client/source/node_modules/regenerator-transform": { "version": "0.15.2", "dev": true, @@ -9374,6 +9358,7 @@ "jsonwebtoken": "^9.0.2", "leaflet-headless": "git+https://git@gitlab.com/aaltronav/contrib/leaflet-headless.git#devel", "marked": "^4.0.12", + "mathjs": "^14.6.0", "node-fetch": "^2.6.1", "nunjucks": "^3.2.3", "path-to-regexp": "^6.2.1", @@ -10527,17 +10512,6 @@ "node": ">=6.0.0" } }, - "lib/www/server/node_modules/redoc-cli/node_modules/@babel/runtime": { - "version": "7.16.7", - "dev": true, - "license": "MIT", - "dependencies": { - "regenerator-runtime": "^0.13.4" - }, - "engines": { - "node": ">=6.9.0" - } - }, "lib/www/server/node_modules/redoc-cli/node_modules/@babel/template": { "version": "7.12.13", "dev": true, @@ -12532,11 +12506,6 @@ "url": "https://github.com/Mermade/oas-kit?sponsor=1" } }, - "lib/www/server/node_modules/redoc-cli/node_modules/regenerator-runtime": { - "version": "0.13.9", - "dev": true, - "license": "MIT" - }, "lib/www/server/node_modules/redoc-cli/node_modules/require-directory": { "version": "2.1.1", "dev": true, @@ -13297,6 +13266,15 @@ "node": ">=0.4" } }, + "node_modules/@babel/runtime": { + "version": "7.28.3", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.3.tgz", + "integrity": "sha512-9uIQ10o0WGdpP6GDhXcdOJPJuDgFtIDtN/9+ArJQ2NAfAmiuhTQdzkaTGR33v43GYS2UrSA0eX2pPPHoFVvpxA==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/@deck.gl/aggregation-layers": { "version": "9.1.13", "resolved": "https://registry.npmjs.org/@deck.gl/aggregation-layers/-/aggregation-layers-9.1.13.tgz", @@ -14389,6 +14367,19 @@ "node": ">= 10" } }, + "node_modules/complex.js": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/complex.js/-/complex.js-2.4.2.tgz", + "integrity": "sha512-qtx7HRhPGSCBtGiST4/WGHuW+zeaND/6Ld+db6PbrulIB1i2Ev/2UPiqcmpQNPSyfBKraC0EOvOKCB5dGZKt3g==", + "license": "MIT", + "engines": { + "node": "*" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/rawify" + } + }, "node_modules/compressible": { "version": "2.0.18", "resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.18.tgz", @@ -15089,6 +15080,12 @@ "node": ">= 0.4" } }, + "node_modules/escape-latex": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/escape-latex/-/escape-latex-1.2.0.tgz", + "integrity": "sha512-nV5aVWW1K0wEiUIEdZ4erkGGH8mDxGyxSeqPzRNtWP7ataw+/olFObw7hujFWlVjNsaDFw5VZ5NzVSIqRgfTiw==", + "license": "MIT" + }, "node_modules/escodegen": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", @@ -15205,6 +15202,19 @@ "node": ">= 6" } }, + "node_modules/fraction.js": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.1.tgz", + "integrity": "sha512-PhqCuhSKIGbbkJ+cojHv47eEWClU71FIOhiUsYdZYTwhIzCeIN8rXeEjserTvPat5JLJChumn8chHz64WkZgTw==", + "license": "MIT", + "engines": { + "node": "*" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/rawify" + } + }, "node_modules/fs-minipass": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-2.1.0.tgz", @@ -15635,6 +15645,12 @@ "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", "integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g==" }, + "node_modules/javascript-natural-sort": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/javascript-natural-sort/-/javascript-natural-sort-0.7.1.tgz", + "integrity": "sha512-nO6jcEfZWQXDhOiBtG2KvKyEptz7RVbpGP4vTD2hLBdmNQSsCiicO2Ioinv6UI4y9ukqnBpy+XZ9H6uLNgJTlw==", + "license": "MIT" + }, "node_modules/jsbn": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", @@ -15848,6 +15864,29 @@ "node": ">= 0.4" } }, + "node_modules/mathjs": { + "version": "14.6.0", + "resolved": "https://registry.npmjs.org/mathjs/-/mathjs-14.6.0.tgz", + "integrity": "sha512-5vI2BLB5GKQmiSK9BH6hVkZ+GgqpdnOgEfmHl7mqVmdQObLynr63KueyYYLCQMzj66q69mV2XZZGQqqxeftQbA==", + "license": "Apache-2.0", + "dependencies": { + "@babel/runtime": "^7.26.10", + "complex.js": "^2.2.5", + "decimal.js": "^10.4.3", + "escape-latex": "^1.2.0", + "fraction.js": "^5.2.1", + "javascript-natural-sort": "^0.7.1", + "seedrandom": "^3.0.5", + "tiny-emitter": "^2.1.0", + "typed-function": "^4.2.1" + }, + "bin": { + "mathjs": "bin/cli.js" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/md5": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", @@ -16343,6 +16382,12 @@ "node": ">=10" } }, + "node_modules/seedrandom": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/seedrandom/-/seedrandom-3.0.5.tgz", + "integrity": "sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==", + "license": "MIT" + }, "node_modules/semver": { "version": "7.7.2", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz", @@ -16606,6 +16651,12 @@ "texture-compressor": "bin/texture-compressor.js" } }, + "node_modules/tiny-emitter": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/tiny-emitter/-/tiny-emitter-2.1.0.tgz", + "integrity": "sha512-NB6Dk1A9xgQPMoGqC5CVXn123gWyte215ONT5Pp5a0yt4nlEoO1ZWeCwpncaekPHXO60i47ihFnZPiRPjRMq4Q==", + "license": "MIT" + }, "node_modules/toidentifier": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", @@ -16667,6 +16718,15 @@ "node": ">= 0.6" } }, + "node_modules/typed-function": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/typed-function/-/typed-function-4.2.1.tgz", + "integrity": "sha512-EGjWssW7Tsk4DGfE+5yluuljS1OGYWiI1J6e8puZz9nTMM51Oug8CD5Zo4gWMsOhq5BI+1bF+rWTm4Vbj3ivRA==", + "license": "MIT", + "engines": { + "node": ">= 18" + } + }, "node_modules/undici-types": { "version": "7.8.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz",