From 47358a6b9ac94a1ab15b33d9e766545b28bcffb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20=C5=A0pa=C4=8Dek?= Date: Fri, 6 Dec 2024 01:19:35 +0100 Subject: [PATCH] Automatic testing and bug fixes --- index.js | 89 ++++++++++---- m-tree/mtree.js | 122 ++++++++----------- m-tree/nodes.js | 31 +++-- m-tree/utils.js | 65 +++++++++- public/index.html | 3 + public/performance.html | 254 ++++++++++++++++++++++++++++++++++++++++ public/script/main.js | 4 +- public/style/index.css | 4 + 8 files changed, 457 insertions(+), 115 deletions(-) create mode 100644 public/performance.html diff --git a/index.js b/index.js index a21445f..1caefd4 100644 --- a/index.js +++ b/index.js @@ -23,6 +23,9 @@ app.get('/favicon.ico', (req, res) => { res.status(204).end(); }); // Serve the index.html file app.get('/', (req, res) => { res.sendFile(__dirname + '/public/index.html'); }); +// Performance testing +app.get('/performance', (req, res) => { res.sendFile(__dirname + '/public/performance.html'); }); + // Return the MTree app.get('/tree', (req, res) => { const tree = JSON.parse(JSON.stringify(mtree, (key, value) => { @@ -72,31 +75,45 @@ app.get('/rangeQuery', (req, res) => { if (parsedQueryPoint.length !== mtree.dimension) return res.status(400).send('Query point has the wrong dimension'); - /*console.time('mtreeRangeQuery'); - const result = mtree.rangeQuery(parsedQueryPoint, parsedRadius); - console.timeEnd('mtreeRangeQuery'); - //console.log(result.length); - - console.time('sequentialSearch'); - const sequentialResult = points.filter(point => mtree.distanceFunction(point, parsedQueryPoint) <= parsedRadius); - console.timeEnd('sequentialSearch'); - //console.log(sequentialResult.length); - - res.send(result);*/ const start = performance.now(); const result = mtree.rangeQuery(parsedQueryPoint, parsedRadius); const RangeQueryTime = performance.now() - start; const start2 = performance.now(); - const sequentialResult = points.filter(point => mtree.distanceFunction(point, parsedQueryPoint) <= parsedRadius); + let sequentialFunctionCalls = 0; + const sequentialResult = points.filter(point => { + sequentialFunctionCalls++; + return mtree.distanceFunction(point, parsedQueryPoint) <= parsedRadius; + }); const sequentialSearchTime = performance.now() - start2; + /*let resultCopy = JSON.parse(JSON.stringify(result)); + let sequentialResultCopy = JSON.parse(JSON.stringify(sequentialResult)); + + resultCopy = resultCopy.points.map(point => point.point).sort((a, b) => mtree.distanceFunction(a, parsedQueryPoint) - mtree.distanceFunction(b, parsedQueryPoint)); + sequentialResultCopy = sequentialResultCopy.map(point => point).sort((a, b) => mtree.distanceFunction(a, parsedQueryPoint) - mtree.distanceFunction(b, parsedQueryPoint)); + + if (JSON.stringify(resultCopy) !== JSON.stringify(sequentialResultCopy)) { + console.log('Mismatch between MTree and sequential range query results'); + console.log(JSON.stringify(resultCopy)); + console.log(JSON.stringify(sequentialResultCopy)); + + console.log(`range query: query point ${JSON.stringify(parsedQueryPoint)}, radius ${parsedRadius}`); + + return res.status(500).send('Mismatch between MTree and sequential range query results'); + }*/ + const timingResult = { mtreeRangeQuery: RangeQueryTime, sequentialSearch: sequentialSearchTime }; + + const functionCalls = { + mtreeRangeQuery: result.dstFnCalls, + sequentialSearch: sequentialFunctionCalls + }; - res.send({ values: result, timingResult }); + res.send({ values: result.points, timingResult, functionCalls }); }); // Perform a k-NN query on the MTree @@ -121,15 +138,33 @@ app.get('/kNNQuery', (req, res) => { const mtreeKNNQueryTime = performance.now() - start; const start2 = performance.now(); - const sequentialResult = points.sort((a, b) => mtree.distanceFunction(a, parsedQueryPoint) - mtree.distanceFunction(b, parsedQueryPoint)).slice(0, kInt); + let sequentialFunctionCalls = 0; + const sequentialResult = points.sort((a, b) => { + sequentialFunctionCalls++; + return mtree.distanceFunction(a, parsedQueryPoint) - mtree.distanceFunction(b, parsedQueryPoint); + }).slice(0, kInt); const sequentialSearchTime = performance.now() - start2; + /*if (JSON.stringify(result.points.map(point => point.point)) !== JSON.stringify(sequentialResult)) { + console.log('Mismatch between MTree and sequential KNN query results'); + console.log(JSON.stringify(result.points.map(point => point.point))); + console.log(JSON.stringify(sequentialResult)); + + console.log(`range query: query point ${JSON.stringify(parsedQueryPoint)}, k ${kInt}`); + + return res.status(500).send('Mismatch between MTree and sequential KNN query results'); + }*/ + const timingResult = { mtreeKNNQuery: mtreeKNNQueryTime, sequentialSearch: sequentialSearchTime }; + const functionCalls = { + mtreeKNNQuery: result.dstFnCalls, + sequentialSearch: sequentialFunctionCalls + }; - res.send({ values: result, timingResult }); + res.send({ values: result.points, timingResult, functionCalls }); }); // Recreate the MTree with the given dimensions @@ -138,7 +173,7 @@ app.post('/recreate', (req, res) => { dimensions = parseInt(dimensions); pointCount = parseInt(pointCount); capacity = parseInt(capacity); - console.log(dimensions, pointCount, capacity, distanceFunction); + if (isNaN(dimensions) || dimensions <= 0) return res.status(400).send('Invalid dimensions'); if (isNaN(pointCount) || pointCount <= 0) @@ -155,20 +190,24 @@ app.post('/recreate', (req, res) => { mtree = new MTree(dimensions, capacity, chosenDistanceFunction); points = generator.generateMany(pointCount); - // points.forEach(point => mtree.insert(point)); - console.time('tree creation') - points.forEach((point, i) => { - if (i % 1000 === 0) { + const start = performance.now(); + console.log('Creating tree:', { dimensions, pointCount, capacity, distanceFunction: distanceFunctionName }); + console.time('Tree created'); + points.forEach(point => mtree.insert(point)); + /*points.forEach((point, i) => { + /*if (i % 1000 === 0) { console.time(`insertion of next 1000 points`); console.log(`inserted ${i} points`); } - mtree.insert(point); if (i % 1000 === 999) console.timeEnd(`insertion of next 1000 points`); - }) - - res.send('MTree recreated'); - console.timeEnd('tree creation') + + mtree.insert(point); + console.log(`inserted ${i} points`); + })*/ + const treeCreationTime = performance.now() - start; + res.send({ treeCreationTime }); + console.timeEnd('Tree created'); }); // Load the MTree from the posted JSON diff --git a/m-tree/mtree.js b/m-tree/mtree.js index a6516b2..63e03d8 100644 --- a/m-tree/mtree.js +++ b/m-tree/mtree.js @@ -1,5 +1,5 @@ const { Node, GroundEntry } = require('./nodes'); -const calculateCentroid = require("./utils").calculateCentroid; +const calculateRadius = require("./utils").calculateRadius; const distanceFunctions = require('./distance-functions'); class MTree { @@ -16,7 +16,6 @@ class MTree { this.root = new Node([], true, null, this.distanceFunction); } - /** * Constructs a new MTree from a given JSON representation of the MTree. * @param {Object} tree - The JSON representation of the MTree @@ -24,7 +23,6 @@ class MTree { */ static fromJSON(tree) { const mtree = new MTree(tree.dimension, tree.capacity, distanceFunctions[tree.distanceFunctionName]); - console.log(tree); function assignParents(node, parent) { node.parent = parent; @@ -62,16 +60,17 @@ class MTree { // Add the ground entry to the leaf node's entries leafNode.insert(groundEntry, this.distanceFunction); + leafNode.updateCentroid(this.distanceFunction); + leafNode.updateRadius(this.distanceFunction); + // If the leaf node now has more than the capacity amount of entries, split it if (leafNode.entries.length > this.capacity) this.splitNode(leafNode); - // Update the routing entries of ancestors of the leaf node let currentNode = leafNode; - while (currentNode.parent !== null) { - currentNode.parent.updateCentroid(this.distanceFunction); - currentNode.parent.updateRadiusIfNeeded(groundEntry, this.distanceFunction); - currentNode = currentNode.parent; + while (currentNode = currentNode.parent) { + currentNode.updateCentroid(this.distanceFunction); + currentNode.updateRadiusIfNeeded(groundEntry, this.distanceFunction); } } @@ -88,7 +87,7 @@ class MTree { if (node.isLeaf) return node; - const closestEntry = this.findClosestEntry(node.entries, point); + const closestEntry = this.findBestEntry(node.entries, point); if (!closestEntry) return node; @@ -96,40 +95,34 @@ class MTree { } /** - * Finds the entry in the given array of entries that is closest to the given point. + * Finds the entry in the given array of entries for which adding a new point + * would require the smallest increase in radius, or no increase at all. + * * @param {NodeEntry[]} entries - The array of entries to search through - * @param {number[]} point - The point to find the closest entry to - * @returns {NodeEntry} The closest entry to the given point + * @param {number[]} point - The point to find the best entry for + * @returns {NodeEntry} The best entry for the given point */ - findClosestEntry(entries, point) { - let closestEntry = null; + findBestEntry(entries, point) { + let bestEntry = null; + let minRadiusIncrease = Infinity; let minDistance = Infinity; for (const entry of entries) { - const distance = this.distanceFunction(point, entry.point); - if (distance < minDistance) { - minDistance = distance; - closestEntry = entry; + const currentDistance = this.distanceFunction(entry.point, point); + const radiusIncrease = Math.max(0, currentDistance - entry.radius); + + if (radiusIncrease < minRadiusIncrease) { + minRadiusIncrease = radiusIncrease; + minDistance = currentDistance; + bestEntry = entry; + } + else if (radiusIncrease === minRadiusIncrease && currentDistance < minDistance) { + minDistance = currentDistance; + bestEntry = entry; } } - return closestEntry; - } - - /** - * Calculates the radius of the given entries. - * The radius is the maximum distance from the centroid of the entries to any point in the entries. - * - * @param {Object[]} entries - The entries to calculate the radius from. - * @returns {number} The radius. - */ - calculateRadius(entries) { - const centroidPoint = calculateCentroid(entries, this.distanceFunction); - // Calculate the maximum distance from the centroid to any point in the entries, - // taking into account the radius of each node - const maxDistance = Math.max(...entries.map(entry => this.distanceFunction(entry.point, centroidPoint)/* + entry.radius*/)); - - return maxDistance; + return bestEntry; } /** @@ -174,15 +167,15 @@ class MTree { * the best split or null if no valid split is found. */ findBestSplit(entries) { - if (entries.length < 2) return null; + if (entries.length < 2) return { leftEntries: entries, rightEntries: [] }; - const halfSize = Math.floor(entries.length / 2); + //const halfSize = Math.floor(entries.length / 2); let minTotalRadius = Infinity; let bestSplit = null; // Precompute distances to reduce redundant calculations - const distances = entries.map((entry, index) => - entries.map((otherEntry, otherIndex) => + const distances = entries.map((entry, index) => + entries.map((otherEntry, otherIndex) => index !== otherIndex ? this.distanceFunction(entry.point, otherEntry.point) : Infinity ) ); @@ -209,9 +202,10 @@ class MTree { leftEntries.push(entries[i]); rightEntries.push(entries[j]); + // Ensure the partitions have similar number of entries if (Math.abs(leftEntries.length - rightEntries.length) <= 1) { - const leftRadius = this.calculateRadius(leftEntries); - const rightRadius = this.calculateRadius(rightEntries); + const leftRadius = calculateRadius(leftEntries, this.distanceFunction); + const rightRadius = calculateRadius(rightEntries, this.distanceFunction); const totalRadius = leftRadius + rightRadius; if (totalRadius < minTotalRadius) { @@ -225,31 +219,6 @@ class MTree { return bestSplit; } - /** - * Gets all combinations of size 'size' from the given array 'arr'. - * @param {any[]} arr - The array to get combinations from - * @param {number} size - The size of the combinations to get - * @returns {any[][]} An array of combinations of the given size - */ - /*getCombinations(arr, size) { - if (size === 0) - return [[]]; - const result = []; - const helper = (offset, partialCombination) => { - if (partialCombination.length === size) { - result.push(partialCombination.slice()); - return; - } - for (let i = offset; i <= arr.length - (size - partialCombination.length); i++) { - partialCombination.push(arr[i]); - helper(i + 1, partialCombination); - partialCombination.pop(); - } - }; - helper(0, []); - return result; - }*/ - /** * Returns all points in the tree that are within the given radius of the given query point. * @param {number[]} queryPoint - The point to search around. @@ -258,8 +227,9 @@ class MTree { */ rangeQuery(queryPoint, radius) { const result = []; + this.dstFnCalls = 0; this.rangeQueryRecursive(this.root, queryPoint, radius, result); - return result; + return { points: result, dstFnCalls: this.dstFnCalls }; } /** @@ -270,15 +240,14 @@ class MTree { * @param {number[][]} resultArray - An array to store the result in */ rangeQueryRecursive(currentNode, queryPoint, searchRadius, resultArray) { - if (currentNode.isLeaf) { - for (const entry of currentNode.entries) { - const distance = this.distanceFunction(queryPoint, entry.point); + for (const entry of currentNode.entries) { + const distance = this.distanceFunction(queryPoint, entry.point); + this.dstFnCalls++; + + if (currentNode.isLeaf) { if (distance <= searchRadius) resultArray.push({ point: entry.point, distance: distance }); - } - } else { - for (const entry of currentNode.entries) { - const distance = this.distanceFunction(queryPoint, entry.point); + } else { if (distance <= searchRadius + entry.radius) this.rangeQueryRecursive(entry, queryPoint, searchRadius, resultArray); } @@ -292,9 +261,10 @@ class MTree { * @returns {Object[]} An array of objects with 'point' and 'distance' properties, sorted by distance. */ kNNQuery(queryPoint, k) { + this.dstFnCalls = 0; const result = Array(k).fill({ distance: Infinity }); this.kNNQueryRecursive(this.root, queryPoint, k, result); - return result; + return { points: result, dstFnCalls: this.dstFnCalls }; } /** @@ -309,6 +279,7 @@ class MTree { if (node.isLeaf) { for (const entry of node.entries) { const distance = this.distanceFunction(queryPoint, entry.point); + this.dstFnCalls++; if (distance < result[k - 1].distance) { result[k - 1] = { point: entry.point, distance: distance }; result.sort((a, b) => a.distance - b.distance); @@ -317,6 +288,7 @@ class MTree { } else { for (const entry of node.entries) { const distance = this.distanceFunction(queryPoint, entry.point); + this.dstFnCalls++; if (distance <= result[k - 1].distance + entry.radius) { this.kNNQueryRecursive(entry, queryPoint, k, result); } diff --git a/m-tree/nodes.js b/m-tree/nodes.js index c446fa7..2facc1a 100644 --- a/m-tree/nodes.js +++ b/m-tree/nodes.js @@ -16,14 +16,13 @@ class GroundEntry { class Node { static idCounter = 0; // Initialize a static counter - -/** - * Constructs a new Node instance. - * @param {Node[]|GroundEntry[]} entries - The node entries, either GroundEntries or other Nodes. - * @param {boolean} isLeaf - Indicates if the node is a leaf node. - * @param {Node|null} parent - The parent node, or null if this is the root node. - * @param {function} distanceFunction - The function used to calculate distances between points. - */ + /** + * Constructs a new Node instance. + * @param {Node[]|GroundEntry[]} entries - The node entries, either GroundEntries or other Nodes. + * @param {boolean} isLeaf - Indicates if the node is a leaf node. + * @param {Node|null} parent - The parent node, or null if this is the root node. + * @param {function} distanceFunction - The function used to calculate distances between points. + */ constructor(entries, isLeaf, parent, distanceFunction) { this.id = Node.idCounter++; // Assign a unique ID to the node this.entries = entries; @@ -46,7 +45,8 @@ class Node { insert(entry, distanceFunction) { this.entries.push(entry); this.updateCentroid(distanceFunction); - this.updateRadiusIfNeeded(entry, distanceFunction); + this.updateRadius(distanceFunction); + //this.updateRadiusIfNeeded(entry, distanceFunction); } /** @@ -66,7 +66,14 @@ class Node { */ updateRadius(distanceFunction) { const groundEntries = Node.findGroundEntries(this); - this.radius = Math.max(...groundEntries.map(entry => distanceFunction(entry.point, this.point))); + + let maxDistance = 0; + for (const entry of groundEntries) { + const distance = distanceFunction(entry.point, this.point); + if (distance > maxDistance) + maxDistance = distance; + } + this.radius = maxDistance; } /** @@ -87,6 +94,10 @@ class Node { } }; + findGroundEntries() { + return Node.findGroundEntries(this); + } + /** * Updates the radius of the node if the distance from the node's centroid * to the new entry is greater than the current radius. diff --git a/m-tree/utils.js b/m-tree/utils.js index 01d6cba..85433f5 100644 --- a/m-tree/utils.js +++ b/m-tree/utils.js @@ -7,16 +7,17 @@ * @returns {number[]} The centroid of the array of entries. */ function calculateCentroid(entries, distanceFunction) { -if (entries.length === 0) + if (entries.length === 0) return; - + const length = entries[0].point.length; const sum = new Array(length).fill(0); let totalWeight = 0; for (const entry of entries) { - const weight = entry.radius || 1; + //const weight = entry.radius || 1; //const weight = distanceFunction(entry.point, this.point); + const weight = 1; for (let i = 0; i < length; i++) { sum[i] += entry.point[i] * weight; } @@ -26,4 +27,60 @@ if (entries.length === 0) return sum.map(val => val / totalWeight); } -module.exports = { calculateCentroid }; \ No newline at end of file +/** + * Calculates the radius of the given entries. + * The radius is the maximum distance from the centroid of the entries to any point in the entries. + * + * @param {Object[]} entries - The entries to calculate the radius from. + * @param {function} [distanceFunction] - A distance function to use to calculate the distance between two points. + * @returns {number} The radius. + */ +function calculateRadius(entries, distanceFunction) { + const centroidPoint = calculateCentroid(entries, distanceFunction); + + // Calculate the maximum distance from the centroid to any point in the entries, + // taking into account the radius of each node + //const maxDistance = Math.max(...entries.map(entry => distanceFunction(entry.point, centroidPoint) + (entry.radius || 0))); + let maxDistance = 0; + for (const entry of entries) { + const distance = distanceFunction(entry.point, centroidPoint); + if (distance > maxDistance) + maxDistance = distance; + } + this.radius = maxDistance; + + return maxDistance; +} +/** + * Calculates the radius of the given entries. + * The radius is the maximum distance from the centroid of the entries to any point in the entries. + * + * @param {Object[]} entries - The entries to calculate the radius from. + * @param {function} [distanceFunction] - A distance function to use to calculate the distance between two points. + * @returns {number} The radius. + */ +/*function calculateRadius(entries, distanceFunction) { + const centroidPoint = calculateCentroid(entries, distanceFunction); + + // Calculate the maximum distance from the centroid to any point in the entries, + // taking into account the radius of each node + let maxDistance = 0; + for (const entry of entries) { + if (entry.entries) { + const groundEntries = entry.findGroundEntries(); + for (const groundEntry of groundEntries) { + const distance = distanceFunction(groundEntry.point, centroidPoint); + if (distance > maxDistance) + maxDistance = distance; + } + } else { + const distance = distanceFunction(entry.point, centroidPoint); + if (distance > maxDistance) + maxDistance = distance; + } + } + + return maxDistance; +}*/ + +module.exports = { calculateRadius, calculateCentroid }; \ No newline at end of file diff --git a/public/index.html b/public/index.html index 33997c7..cd6ff74 100644 --- a/public/index.html +++ b/public/index.html @@ -18,6 +18,7 @@
+

Data control

@@ -44,6 +45,7 @@
+

Queries

@@ -61,6 +63,7 @@
+

Save/Load Tree

diff --git a/public/performance.html b/public/performance.html new file mode 100644 index 0000000..03ebb34 --- /dev/null +++ b/public/performance.html @@ -0,0 +1,254 @@ + + + + + M-Tree vs Sequential Search Comparison + + + + +

M-Tree vs Sequential Search Comparison

+
+ +
+ +
+ +
+ +
+
+
+ + + + + + + + \ No newline at end of file diff --git a/public/script/main.js b/public/script/main.js index 1e33225..b57a7b9 100644 --- a/public/script/main.js +++ b/public/script/main.js @@ -68,6 +68,7 @@ async function performKNNQuery() { setStatus(response.ok); if (response.ok) { const result = await response.json(); + const table = createTable(result.values); document.getElementById('result').innerHTML = ''; document.getElementById('result').appendChild(table); @@ -119,7 +120,8 @@ function createTable(result) { th2.innerHTML = 'Distance'; headerRow.appendChild(th2); - result.forEach(row => { + //result.forEach(row => { + result.slice(0, 100).forEach(row => { const rowElement = table.insertRow(); rowElement.insertCell().innerText = `[${row.point.map(x => x.toFixed(5)).join(', ')}]`; rowElement.insertCell().innerText = row.distance.toFixed(5); diff --git a/public/style/index.css b/public/style/index.css index 13567ce..94cd5e2 100644 --- a/public/style/index.css +++ b/public/style/index.css @@ -35,6 +35,9 @@ h1 { text-align: center; } +h2 { + margin-top: 0; +} .form-group { margin-bottom: 0.5rem; } @@ -124,6 +127,7 @@ section { display: flex; width: auto; column-gap: 1rem; + padding-bottom: 0; } #result-section {