From 41846d1275c7dd8431a14c1b9b51b83ce21fce2f Mon Sep 17 00:00:00 2001 From: gagik Date: Wed, 2 Oct 2024 16:05:26 +0200 Subject: [PATCH 01/20] shard WIP --- packages/shell-api/src/collection.ts | 4 +- packages/shell-api/src/shard.spec.ts | 57 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 7c8128b91..994bd5a4c 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2326,7 +2326,7 @@ export default class Collection extends ShellApiWithMongoClass { return await this._mongo._serviceProvider.getSearchIndexes( this._database._name, this._name, - indexName as string | undefined, + indexName, { ...(await this._database._baseOptions()), ...options } ); } @@ -2355,7 +2355,7 @@ export default class Collection extends ShellApiWithMongoClass { this._name, [ { - name: (indexName as string | undefined) ?? 'default', + name: indexName ?? 'default', // Omitting type when it is 'search' for compat with older servers ...(type && type !== 'search' && { type: type as 'search' | 'vectorSearch' }), diff --git a/packages/shell-api/src/shard.spec.ts b/packages/shell-api/src/shard.spec.ts index 9193ae66d..a9b913e28 100644 --- a/packages/shell-api/src/shard.spec.ts +++ b/packages/shell-api/src/shard.spec.ts @@ -2554,6 +2554,63 @@ describe('Shard', function () { }); }); }); + + describe('collection.getShardDistribution()', function () { + let db: Database; + const dbName = 'shard-stats-test'; + const ns = `${dbName}.test`; + + beforeEach(async function () { + db = sh._database.getSiblingDB(dbName); + await db.getCollection('test').insertOne({ key: 1 }); + await db.getCollection('test').createIndex({ key: 1 }); + }); + + afterEach(async function () { + await db.dropDatabase(); + }); + + context('unsharded collections', function () { + it('throws an error', async function () { + const caughtError = await db + .getCollection('test') + .getShardDistribution() + .catch((e) => e); + expect(caughtError.message).includes( + 'Collection test is not sharded' + ); + }); + }); + + context('sharded collections', function () { + beforeEach(async function () { + expect((await sh.enableSharding(dbName)).ok).to.equal(1); + expect( + (await sh.shardCollection(ns, { key: 1 })).collectionsharded + ).to.equal(ns); + }); + + it('returns the correct StatsResult', async function () { + const result = await db.getCollection('test').getShardDistribution(); + const shardDistributionValue = result.value as Document; + + expect(result.type).to.equal('StatsResult'); + + const shardFields = Object.keys(shardDistributionValue).filter( + (field) => field !== 'Totals' + ); + expect(shardFields.length).to.equal(1); + const shardField = shardFields[0]; + expect( + shardDistributionValue[shardField]['estimated docs per chunk'] + ).to.equal(1); + + expect(shardDistributionValue.Totals.docs).to.equal(1); + expect(shardDistributionValue.Totals.chunks).to.equal(1); + }); + }); + }); + describe('collection.stats()', function () { let db: Database; let hasTotalSize: boolean; From 2cf6ff255819e5cb0e8ca939bf1fd76d8d560a6f Mon Sep 17 00:00:00 2001 From: gagik Date: Thu, 3 Oct 2024 15:56:46 +0200 Subject: [PATCH 02/20] First fix attempt --- packages/shell-api/src/collection.ts | 75 ++++++++++++++++++---------- packages/shell-api/src/shard.spec.ts | 57 +++++++++++++++++++++ 2 files changed, 105 insertions(+), 27 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 994bd5a4c..90d6939e1 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2086,20 +2086,6 @@ export default class Collection extends ShellApiWithMongoClass { const result = {} as Document; const config = this._mongo.getDB('config'); - const ns = `${this._database._name}.${this._name}`; - - const configCollectionsInfo = await config - .getCollection('collections') - .findOne({ - _id: ns, - ...onlyShardedCollectionsInConfigFilter, - }); - if (!configCollectionsInfo) { - throw new MongoshInvalidInputError( - `Collection ${this._name} is not sharded`, - ShellApiErrors.NotConnectedToShardedCluster - ); - } const collStats = await ( await this.aggregate({ $collStats: { storageStats: {} } }) @@ -2115,12 +2101,43 @@ export default class Collection extends ShellApiWithMongoClass { avgObjSize: number; }[] = []; + let configCollectionsInfo: Document | null; + let timeseriesBucketCount: number | undefined; + await Promise.all( - collStats.map((extShardStats) => + collStats.map((extractedShardStats) => (async (): Promise => { // Extract and store only the relevant subset of the stats for this shard - const { shard } = extShardStats; + if (!configCollectionsInfo) { + const { storageStats } = extractedShardStats; + let timeseriesBucketNs: string | undefined; + const timeseries: Document | undefined = storageStats['timeseries']; + + if (typeof timeseries !== 'undefined') { + timeseriesBucketCount = timeseries['bucketCount']; + timeseriesBucketNs = timeseries['bucketsNs']; + } + + const ns = + timeseriesBucketNs ?? `${this._database._name}.${this._name}`; + + configCollectionsInfo = await config + .getCollection('collections') + .findOne({ + _id: ns, + ...onlyShardedCollectionsInConfigFilter, + }); + + if (!configCollectionsInfo) { + throw new MongoshInvalidInputError( + `Collection ${this._name} is not sharded`, + ShellApiErrors.NotConnectedToShardedCluster + ); + } + } + + const { shard } = extractedShardStats; // If we have an UUID, use that for lookups. If we have only the ns, // use that. (On 5.0+ servers, config.chunk has uses the UUID, before // that it had the ns). @@ -2131,39 +2148,43 @@ export default class Collection extends ShellApiWithMongoClass { const [host, numChunks] = await Promise.all([ config .getCollection('shards') - .findOne({ _id: extShardStats.shard }), + .findOne({ _id: extractedShardStats.shard }), config.getCollection('chunks').countDocuments(countChunksQuery), ]); const shardStats = { shardId: shard, host: host !== null ? host.host : null, - size: extShardStats.storageStats.size, - count: extShardStats.storageStats.count, + size: extractedShardStats.storageStats.size, + count: extractedShardStats.storageStats.count, numChunks: numChunks, - avgObjSize: extShardStats.storageStats.avgObjSize, + avgObjSize: extractedShardStats.storageStats.avgObjSize, }; const key = `Shard ${shardStats.shardId} at ${shardStats.host}`; - const estChunkData = + // In sharded timeseries collections, we use the bucket count + const shardStatsCount = + timeseriesBucketCount ?? shardStats.count ?? 0; + + const estimatedChunkDataPerChunk = shardStats.numChunks === 0 ? 0 : shardStats.size / shardStats.numChunks; - const estChunkCount = + const estimatedDocsPerChunk = shardStats.numChunks === 0 ? 0 - : Math.floor(shardStats.count / shardStats.numChunks); + : Math.floor(shardStatsCount / shardStats.numChunks); result[key] = { data: dataFormat(coerceToJSNumber(shardStats.size)), - docs: shardStats.count, + docs: shardStatsCount, chunks: shardStats.numChunks, - 'estimated data per chunk': dataFormat(estChunkData), - 'estimated docs per chunk': estChunkCount, + 'estimated data per chunk': dataFormat(estimatedChunkDataPerChunk), + 'estimated docs per chunk': estimatedDocsPerChunk, }; totals.size += coerceToJSNumber(shardStats.size); - totals.count += coerceToJSNumber(shardStats.count); + totals.count += coerceToJSNumber(shardStatsCount); totals.numChunks += coerceToJSNumber(shardStats.numChunks); conciseShardsStats.push(shardStats); diff --git a/packages/shell-api/src/shard.spec.ts b/packages/shell-api/src/shard.spec.ts index a9b913e28..8e75fa4e1 100644 --- a/packages/shell-api/src/shard.spec.ts +++ b/packages/shell-api/src/shard.spec.ts @@ -2609,6 +2609,63 @@ describe('Shard', function () { expect(shardDistributionValue.Totals.chunks).to.equal(1); }); }); + + // We explicitly test sharded time series collections as it fallbacks to the bucket information + context('sharded timeseries collections', function () { + skipIfServerVersion(mongos, '< 5.1'); + + const timeseriesCollectionName = 'testTS'; + const timeseriesNS = `${dbName}.${timeseriesCollectionName}`; + + beforeEach(async function () { + expect((await sh.enableSharding(dbName)).ok).to.equal(1); + + expect( + ( + await sh.shardCollection( + timeseriesNS, + { 'metadata.bucketId': 1 }, + { + timeseries: { + timeField: 'timestamp', + metaField: 'metadata', + granularity: 'hours', + }, + } + ) + ).collectionsharded + ).to.equal(timeseriesNS); + await db.getCollection(timeseriesCollectionName).insertOne({ + metadata: { + bucketId: 1, + type: 'temperature', + }, + timestamp: new Date('2021-05-18T00:00:00.000Z'), + temp: 12, + }); + }); + + it('returns the correct StatsResult', async function () { + const result = await db + .getCollection('testTS') + .getShardDistribution(); + const shardDistributionValue = result.value as Document; + + expect(result.type).to.equal('StatsResult'); + + const shardFields = Object.keys(shardDistributionValue).filter( + (field) => field !== 'Totals' + ); + expect(shardFields.length).to.equal(1); + const shardField = shardFields[0]; + expect( + shardDistributionValue[shardField]['estimated docs per chunk'] + ).to.equal(1); + + expect(shardDistributionValue.Totals.docs).to.equal(1); + expect(shardDistributionValue.Totals.chunks).to.equal(1); + }); + }); }); describe('collection.stats()', function () { From bdc0ed20641f7ca96d787ee73e0a8fd5114c1c16 Mon Sep 17 00:00:00 2001 From: gagik Date: Thu, 3 Oct 2024 20:23:50 +0200 Subject: [PATCH 03/20] Avoid doing it inside the promise --- packages/shell-api/src/collection.ts | 61 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 90d6939e1..87f9506af 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2101,42 +2101,43 @@ export default class Collection extends ShellApiWithMongoClass { avgObjSize: number; }[] = []; - let configCollectionsInfo: Document | null; + const timeseriesShardStats = collStats.find( + (extractedShardStats) => + typeof extractedShardStats.storageStats.timeseries !== 'undefined' + ); + let timeseriesBucketCount: number | undefined; + let timeseriesBucketNs: string | undefined; + if (typeof timeseriesShardStats !== 'undefined') { + const { storageStats } = timeseriesShardStats; - await Promise.all( - collStats.map((extractedShardStats) => - (async (): Promise => { - // Extract and store only the relevant subset of the stats for this shard - if (!configCollectionsInfo) { - const { storageStats } = extractedShardStats; - let timeseriesBucketNs: string | undefined; + const timeseries: Document | undefined = storageStats['timeseries']; - const timeseries: Document | undefined = storageStats['timeseries']; + if (typeof timeseries !== 'undefined') { + timeseriesBucketCount = timeseries['bucketCount']; + timeseriesBucketNs = timeseries['bucketsNs']; + } + } - if (typeof timeseries !== 'undefined') { - timeseriesBucketCount = timeseries['bucketCount']; - timeseriesBucketNs = timeseries['bucketsNs']; - } + const ns = timeseriesBucketNs ?? `${this._database._name}.${this._name}`; - const ns = - timeseriesBucketNs ?? `${this._database._name}.${this._name}`; - - configCollectionsInfo = await config - .getCollection('collections') - .findOne({ - _id: ns, - ...onlyShardedCollectionsInConfigFilter, - }); - - if (!configCollectionsInfo) { - throw new MongoshInvalidInputError( - `Collection ${this._name} is not sharded`, - ShellApiErrors.NotConnectedToShardedCluster - ); - } - } + const configCollectionsInfo = await config + .getCollection('collections') + .findOne({ + _id: ns, + ...onlyShardedCollectionsInConfigFilter, + }); + if (!configCollectionsInfo) { + throw new MongoshInvalidInputError( + `Collection ${this._name} is not sharded`, + ShellApiErrors.NotConnectedToShardedCluster + ); + } + + await Promise.all( + collStats.map((extractedShardStats) => + (async (): Promise => { const { shard } = extractedShardStats; // If we have an UUID, use that for lookups. If we have only the ns, // use that. (On 5.0+ servers, config.chunk has uses the UUID, before From a93ee79065b81aa263519e91df59de0f137e6705 Mon Sep 17 00:00:00 2001 From: gagik Date: Fri, 4 Oct 2024 10:41:12 +0200 Subject: [PATCH 04/20] Use timeseries as a fallback --- packages/shell-api/src/collection.ts | 84 +++++++++++++++++++--------- 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 87f9506af..566207985 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2076,6 +2076,47 @@ export default class Collection extends ShellApiWithMongoClass { }); } + /** + * Helper for getting collection info of sharded timeseries collections + * @returns the bucket count and collection info based on given collStats + */ + async _getShardedTimeseriesCollectionInfo( + config: Database, + collStats: Document[] + ): Promise<{ + timeseriesBucketCount: number | null; + timeseriesCollectionInfo: Document | null; + }> { + const timeseriesShardStats = collStats.find( + (extractedShardStats) => + typeof extractedShardStats.storageStats.timeseries !== 'undefined' + ); + + if (!timeseriesShardStats) { + return { + timeseriesCollectionInfo: null, + timeseriesBucketCount: null, + }; + } + + const { storageStats } = timeseriesShardStats; + + const timeseries: Document = storageStats['timeseries']; + + const timeseriesBucketCount: number | null = + timeseries['bucketCount'] ?? null; + const timeseriesBucketNs: string | undefined = timeseries['bucketsNs']; + + const timeseriesCollectionInfo = await config + .getCollection('collections') + .findOne({ + _id: timeseriesBucketNs, + ...onlyShardedCollectionsInConfigFilter, + }); + + return { timeseriesBucketCount, timeseriesCollectionInfo }; + } + @returnsPromise @topologies([Topologies.Sharded]) @apiVersions([]) @@ -2101,38 +2142,29 @@ export default class Collection extends ShellApiWithMongoClass { avgObjSize: number; }[] = []; - const timeseriesShardStats = collStats.find( - (extractedShardStats) => - typeof extractedShardStats.storageStats.timeseries !== 'undefined' - ); - - let timeseriesBucketCount: number | undefined; - let timeseriesBucketNs: string | undefined; - if (typeof timeseriesShardStats !== 'undefined') { - const { storageStats } = timeseriesShardStats; - - const timeseries: Document | undefined = storageStats['timeseries']; - - if (typeof timeseries !== 'undefined') { - timeseriesBucketCount = timeseries['bucketCount']; - timeseriesBucketNs = timeseries['bucketsNs']; - } - } - - const ns = timeseriesBucketNs ?? `${this._database._name}.${this._name}`; - - const configCollectionsInfo = await config + const ns = `${this._database._name}.${this._name}`; + let configCollectionsInfo = await config .getCollection('collections') .findOne({ _id: ns, ...onlyShardedCollectionsInConfigFilter, }); + let fallbackTimeseriesBucketCount: number | undefined; + if (!configCollectionsInfo) { - throw new MongoshInvalidInputError( - `Collection ${this._name} is not sharded`, - ShellApiErrors.NotConnectedToShardedCluster - ); + const { timeseriesCollectionInfo, timeseriesBucketCount } = + await this._getShardedTimeseriesCollectionInfo(config, collStats); + + if (!timeseriesCollectionInfo || !timeseriesBucketCount) { + throw new MongoshInvalidInputError( + `Collection ${this._name} is not sharded`, + ShellApiErrors.NotConnectedToShardedCluster + ); + } + + fallbackTimeseriesBucketCount = timeseriesBucketCount; + configCollectionsInfo = timeseriesCollectionInfo; } await Promise.all( @@ -2165,7 +2197,7 @@ export default class Collection extends ShellApiWithMongoClass { // In sharded timeseries collections, we use the bucket count const shardStatsCount = - timeseriesBucketCount ?? shardStats.count ?? 0; + fallbackTimeseriesBucketCount ?? shardStats.count ?? 0; const estimatedChunkDataPerChunk = shardStats.numChunks === 0 From a108d218e53337b0eab8e771007c97e26f4b48f0 Mon Sep 17 00:00:00 2001 From: gagik Date: Fri, 4 Oct 2024 10:59:50 +0200 Subject: [PATCH 05/20] Use 0 count for timeseries --- packages/shell-api/src/collection.ts | 30 ++++++++++------------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 566207985..b2f87e305 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2083,30 +2083,26 @@ export default class Collection extends ShellApiWithMongoClass { async _getShardedTimeseriesCollectionInfo( config: Database, collStats: Document[] - ): Promise<{ - timeseriesBucketCount: number | null; - timeseriesCollectionInfo: Document | null; - }> { + ): Promise { const timeseriesShardStats = collStats.find( (extractedShardStats) => typeof extractedShardStats.storageStats.timeseries !== 'undefined' ); if (!timeseriesShardStats) { - return { - timeseriesCollectionInfo: null, - timeseriesBucketCount: null, - }; + return null; } const { storageStats } = timeseriesShardStats; const timeseries: Document = storageStats['timeseries']; - const timeseriesBucketCount: number | null = - timeseries['bucketCount'] ?? null; const timeseriesBucketNs: string | undefined = timeseries['bucketsNs']; + if (!timeseriesBucketNs) { + return null; + } + const timeseriesCollectionInfo = await config .getCollection('collections') .findOne({ @@ -2114,7 +2110,7 @@ export default class Collection extends ShellApiWithMongoClass { ...onlyShardedCollectionsInConfigFilter, }); - return { timeseriesBucketCount, timeseriesCollectionInfo }; + return timeseriesCollectionInfo; } @returnsPromise @@ -2150,20 +2146,17 @@ export default class Collection extends ShellApiWithMongoClass { ...onlyShardedCollectionsInConfigFilter, }); - let fallbackTimeseriesBucketCount: number | undefined; - if (!configCollectionsInfo) { - const { timeseriesCollectionInfo, timeseriesBucketCount } = + const timeseriesCollectionInfo = await this._getShardedTimeseriesCollectionInfo(config, collStats); - if (!timeseriesCollectionInfo || !timeseriesBucketCount) { + if (!timeseriesCollectionInfo) { throw new MongoshInvalidInputError( `Collection ${this._name} is not sharded`, ShellApiErrors.NotConnectedToShardedCluster ); } - fallbackTimeseriesBucketCount = timeseriesBucketCount; configCollectionsInfo = timeseriesCollectionInfo; } @@ -2195,9 +2188,8 @@ export default class Collection extends ShellApiWithMongoClass { const key = `Shard ${shardStats.shardId} at ${shardStats.host}`; - // In sharded timeseries collections, we use the bucket count - const shardStatsCount = - fallbackTimeseriesBucketCount ?? shardStats.count ?? 0; + // In sharded timeseries collections, count is 0. + const shardStatsCount = shardStats.count ?? 0; const estimatedChunkDataPerChunk = shardStats.numChunks === 0 From a2fa6b458187ed32547e002d11d1492c7d7c016a Mon Sep 17 00:00:00 2001 From: gagik Date: Fri, 4 Oct 2024 11:06:44 +0200 Subject: [PATCH 06/20] Fix type error and tests --- packages/shell-api/src/collection.ts | 7 +++++-- packages/shell-api/src/shard.spec.ts | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index b2f87e305..612ed3f6c 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2139,14 +2139,15 @@ export default class Collection extends ShellApiWithMongoClass { }[] = []; const ns = `${this._database._name}.${this._name}`; - let configCollectionsInfo = await config + let configCollectionsInfo: Document; + const existingConfigCollectionsInfo = await config .getCollection('collections') .findOne({ _id: ns, ...onlyShardedCollectionsInConfigFilter, }); - if (!configCollectionsInfo) { + if (!existingConfigCollectionsInfo) { const timeseriesCollectionInfo = await this._getShardedTimeseriesCollectionInfo(config, collStats); @@ -2158,6 +2159,8 @@ export default class Collection extends ShellApiWithMongoClass { } configCollectionsInfo = timeseriesCollectionInfo; + } else { + configCollectionsInfo = existingConfigCollectionsInfo; } await Promise.all( diff --git a/packages/shell-api/src/shard.spec.ts b/packages/shell-api/src/shard.spec.ts index 8e75fa4e1..422e32de0 100644 --- a/packages/shell-api/src/shard.spec.ts +++ b/packages/shell-api/src/shard.spec.ts @@ -2658,11 +2658,13 @@ describe('Shard', function () { ); expect(shardFields.length).to.equal(1); const shardField = shardFields[0]; + + // Timeseries will have count 0 expect( shardDistributionValue[shardField]['estimated docs per chunk'] - ).to.equal(1); + ).to.equal(0); - expect(shardDistributionValue.Totals.docs).to.equal(1); + expect(shardDistributionValue.Totals.docs).to.equal(0); expect(shardDistributionValue.Totals.chunks).to.equal(1); }); }); From 86081e4e6e112811d35c0d342097f6ef5cb5f76e Mon Sep 17 00:00:00 2001 From: gagik Date: Fri, 4 Oct 2024 11:23:29 +0200 Subject: [PATCH 07/20] Fix comment --- packages/shell-api/src/collection.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 612ed3f6c..3e50ab91c 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2078,7 +2078,8 @@ export default class Collection extends ShellApiWithMongoClass { /** * Helper for getting collection info of sharded timeseries collections - * @returns the bucket count and collection info based on given collStats + * @returns collection info based on given collStats, null if the information is not found or + * if the collection is not timeseries. */ async _getShardedTimeseriesCollectionInfo( config: Database, From 20a81bc010167370c2f49cba15e7e0d15e2c43de Mon Sep 17 00:00:00 2001 From: gagik Date: Mon, 7 Oct 2024 12:12:30 +0200 Subject: [PATCH 08/20] WIP --- .../e2e-tests/test/e2e-current-op.spec.ts | 82 +++++++++++++++++++ packages/shell-api/src/collection.ts | 15 +++- 2 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 packages/e2e-tests/test/e2e-current-op.spec.ts diff --git a/packages/e2e-tests/test/e2e-current-op.spec.ts b/packages/e2e-tests/test/e2e-current-op.spec.ts new file mode 100644 index 000000000..df82b9a00 --- /dev/null +++ b/packages/e2e-tests/test/e2e-current-op.spec.ts @@ -0,0 +1,82 @@ +import { expect } from 'chai'; +import { + skipIfApiStrict, + startSharedTestServer, +} from '../../../testing/integration-testing-hooks'; +import type { TestShell } from './test-shell'; + +describe('e2e currentOp', function () { + skipIfApiStrict(); + + const testServer = startSharedTestServer(); + + context('with 2 shells', function () { + let helperShell: TestShell; + let currentOpShell: TestShell; + + const OPERATION_TIME = 1_000; + const CURRENT_OP_WAIT_TIME = 100; + this.timeout(OPERATION_TIME * 5); + + beforeEach(async function () { + helperShell = this.startTestShell({ + args: [await testServer.connectionString()], + }); + currentOpShell = this.startTestShell({ + args: [await testServer.connectionString()], + }); + await helperShell.waitForPrompt(); + await currentOpShell.waitForPrompt(); + + // Insert a dummy object so find commands will actually run with the delay. + await helperShell.executeLine('db.coll.insertOne({})'); + }); + + it('should return the correct operation', async function () { + const regexOperation = helperShell.executeLine( + `db.coll.find({$where: function() { sleep(${OPERATION_TIME}) }}).projection({test: 1})` + ); + helperShell.assertNoErrors(); + await currentOpShell.executeLine(`sleep(${CURRENT_OP_WAIT_TIME})`); + let currentOpCall = await currentOpShell.executeLine(`db.currentOp()`); + + currentOpShell.assertNoErrors(); + expect(currentOpCall).to.include("find: 'coll'"); + expect(currentOpCall).to.include( + `filter: { '$where': Code('function() { sleep(${OPERATION_TIME}) }') }` + ); + expect(currentOpCall).to.include('projection: { test: 1 }'); + + await regexOperation; + + currentOpCall = await currentOpShell.executeLine(`db.currentOp()`); + + currentOpShell.assertNoErrors(); + expect(currentOpCall).not.to.include("find: 'coll'"); + expect(currentOpCall).not.to.include( + `filter: { '$where': Code('function() { sleep(${OPERATION_TIME}) }') }` + ); + expect(currentOpCall).not.to.include('projection: { test: 1 }'); + }); + + it('should work when the operation contains regex', async function () { + const regExpString = '^(?i)\\\\Qchho0842\\E'; + // The values from currentOp removes redundant escapes such as \E + const simplifiedRegExpString = '^(?i)\\\\Qchho0842E'; + + void helperShell.executeLine( + `db.coll.find({$where: function() { sleep(${OPERATION_TIME}) }}).projection({re: BSONRegExp('${regExpString}')})` + ); + helperShell.assertNoErrors(); + + await currentOpShell.executeLine(`sleep(${CURRENT_OP_WAIT_TIME})`); + + const currentOpCall = await currentOpShell.executeLine(`db.currentOp()`); + currentOpShell.assertNoErrors(); + + expect(currentOpCall).to.include( + `projection: { re: BSONRegExp('${simplifiedRegExpString}', '') }` + ); + }); + }); +}); diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 3e50ab91c..e68319e7c 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2181,10 +2181,21 @@ export default class Collection extends ShellApiWithMongoClass { .findOne({ _id: extractedShardStats.shard }), config.getCollection('chunks').countDocuments(countChunksQuery), ]); + + // Since 6.0, there can be orphan documents indicated by numOrphanDocs. + // These orphan documents need to be accounted for in the size calculation. + const orphanDocumentsSize = !extractedShardStats.storageStats + .numOrphanDocs + ? 0 + : extractedShardStats.storageStats.numOrphanDocs * + extractedShardStats.storageStats.avgObjSize; + const adjustedSize = + extractedShardStats.storageStats.size - orphanDocumentsSize; + const shardStats = { shardId: shard, host: host !== null ? host.host : null, - size: extractedShardStats.storageStats.size, + size: adjustedSize, count: extractedShardStats.storageStats.count, numChunks: numChunks, avgObjSize: extractedShardStats.storageStats.avgObjSize, @@ -2212,7 +2223,7 @@ export default class Collection extends ShellApiWithMongoClass { 'estimated docs per chunk': estimatedDocsPerChunk, }; - totals.size += coerceToJSNumber(shardStats.size); + totals.size += coerceToJSNumber(adjustedSize); totals.count += coerceToJSNumber(shardStatsCount); totals.numChunks += coerceToJSNumber(shardStats.numChunks); From f6c2ad2957f2f1e41a7f22a89dc00246b4e7e0f5 Mon Sep 17 00:00:00 2001 From: gagik Date: Mon, 7 Oct 2024 14:05:04 +0200 Subject: [PATCH 09/20] Better comment --- packages/shell-api/src/collection.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 3e50ab91c..8447524ed 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2192,7 +2192,8 @@ export default class Collection extends ShellApiWithMongoClass { const key = `Shard ${shardStats.shardId} at ${shardStats.host}`; - // In sharded timeseries collections, count is 0. + // In sharded timeseries collections, count is undefined + // and will be presented as 0. const shardStatsCount = shardStats.count ?? 0; const estimatedChunkDataPerChunk = From da8641f86caf920908e3abf2243cbe3c0a1e1781 Mon Sep 17 00:00:00 2001 From: gagik Date: Mon, 7 Oct 2024 14:49:00 +0200 Subject: [PATCH 10/20] Combine helpers --- packages/shell-api/src/collection.ts | 72 ++++++++++++++-------------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 8447524ed..2a3a280db 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2077,32 +2077,43 @@ export default class Collection extends ShellApiWithMongoClass { } /** - * Helper for getting collection info of sharded timeseries collections - * @returns collection info based on given collStats, null if the information is not found or - * if the collection is not timeseries. + * Helper for getting collection info for sharded collections. + * @throws If the collection is not sharded. + * @returns collection info based on given collStats. */ - async _getShardedTimeseriesCollectionInfo( + async _getShardedCollectionInfo( config: Database, collStats: Document[] - ): Promise { + ): Promise { + const ns = `${this._database._name}.${this._name}`; + const existingConfigCollectionsInfo = await config + .getCollection('collections') + .findOne({ + _id: ns, + ...onlyShardedCollectionsInConfigFilter, + }); + + if (existingConfigCollectionsInfo !== null) { + return existingConfigCollectionsInfo; + } + + // If the collection info is not found, check if it is timeseries and use the bucket const timeseriesShardStats = collStats.find( (extractedShardStats) => typeof extractedShardStats.storageStats.timeseries !== 'undefined' ); if (!timeseriesShardStats) { - return null; + throw new MongoshInvalidInputError( + `Collection ${this._name} is not sharded`, + ShellApiErrors.NotConnectedToShardedCluster + ); } const { storageStats } = timeseriesShardStats; - const timeseries: Document = storageStats['timeseries']; - - const timeseriesBucketNs: string | undefined = timeseries['bucketsNs']; - - if (!timeseriesBucketNs) { - return null; - } + const timeseries: Document = storageStats.timeseries; + const timeseriesBucketNs: string = timeseries.bucketsNs; const timeseriesCollectionInfo = await config .getCollection('collections') @@ -2111,6 +2122,13 @@ export default class Collection extends ShellApiWithMongoClass { ...onlyShardedCollectionsInConfigFilter, }); + if (!timeseriesCollectionInfo) { + throw new MongoshRuntimeError( + `Timeseries collection bucket info not found`, + CommonErrors.CommandFailed + ); + } + return timeseriesCollectionInfo; } @@ -2139,30 +2157,10 @@ export default class Collection extends ShellApiWithMongoClass { avgObjSize: number; }[] = []; - const ns = `${this._database._name}.${this._name}`; - let configCollectionsInfo: Document; - const existingConfigCollectionsInfo = await config - .getCollection('collections') - .findOne({ - _id: ns, - ...onlyShardedCollectionsInConfigFilter, - }); - - if (!existingConfigCollectionsInfo) { - const timeseriesCollectionInfo = - await this._getShardedTimeseriesCollectionInfo(config, collStats); - - if (!timeseriesCollectionInfo) { - throw new MongoshInvalidInputError( - `Collection ${this._name} is not sharded`, - ShellApiErrors.NotConnectedToShardedCluster - ); - } - - configCollectionsInfo = timeseriesCollectionInfo; - } else { - configCollectionsInfo = existingConfigCollectionsInfo; - } + const configCollectionsInfo = await this._getShardedCollectionInfo( + config, + collStats + ); await Promise.all( collStats.map((extractedShardStats) => From 09647d08b16d782e5e062b3481f3e0acbd5651b8 Mon Sep 17 00:00:00 2001 From: gagik Date: Mon, 7 Oct 2024 15:14:02 +0200 Subject: [PATCH 11/20] Switch to NaN as a result --- packages/shell-api/src/collection.ts | 6 +++--- packages/shell-api/src/shard.spec.ts | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 2a3a280db..3b99d5326 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2190,9 +2190,9 @@ export default class Collection extends ShellApiWithMongoClass { const key = `Shard ${shardStats.shardId} at ${shardStats.host}`; - // In sharded timeseries collections, count is undefined - // and will be presented as 0. - const shardStatsCount = shardStats.count ?? 0; + // In sharded timeseries collections we do not have a count + // so we intentionally pass NaN as a result to the client. + const shardStatsCount: number = shardStats.count ?? NaN; const estimatedChunkDataPerChunk = shardStats.numChunks === 0 diff --git a/packages/shell-api/src/shard.spec.ts b/packages/shell-api/src/shard.spec.ts index 422e32de0..bcd394040 100644 --- a/packages/shell-api/src/shard.spec.ts +++ b/packages/shell-api/src/shard.spec.ts @@ -2659,12 +2659,12 @@ describe('Shard', function () { expect(shardFields.length).to.equal(1); const shardField = shardFields[0]; - // Timeseries will have count 0 + // Timeseries will have count NaN expect( shardDistributionValue[shardField]['estimated docs per chunk'] - ).to.equal(0); + ).to.be.NaN; - expect(shardDistributionValue.Totals.docs).to.equal(0); + expect(shardDistributionValue.Totals.docs).to.be.NaN; expect(shardDistributionValue.Totals.chunks).to.equal(1); }); }); From d693ab298d0ecb24fa7b32efe99065173cd20959 Mon Sep 17 00:00:00 2001 From: gagik Date: Mon, 7 Oct 2024 15:17:46 +0200 Subject: [PATCH 12/20] Nicer error message --- packages/shell-api/src/collection.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 3b99d5326..cacfdd8f6 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2124,7 +2124,7 @@ export default class Collection extends ShellApiWithMongoClass { if (!timeseriesCollectionInfo) { throw new MongoshRuntimeError( - `Timeseries collection bucket info not found`, + `Error finding collection information for ${timeseriesBucketNs}`, CommonErrors.CommandFailed ); } From e895d65947a5a9cb0e5b557e7823b02490057506 Mon Sep 17 00:00:00 2001 From: gagik Date: Tue, 8 Oct 2024 11:16:41 +0200 Subject: [PATCH 13/20] Fix old unit test --- packages/shell-api/src/collection.spec.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/packages/shell-api/src/collection.spec.ts b/packages/shell-api/src/collection.spec.ts index a9cf11da0..205da41f3 100644 --- a/packages/shell-api/src/collection.spec.ts +++ b/packages/shell-api/src/collection.spec.ts @@ -2258,8 +2258,16 @@ describe('Collection', function () { it('throws when collection is not sharded', async function () { const serviceProviderCursor = stubInterface(); serviceProviderCursor.limit.returns(serviceProviderCursor); - serviceProviderCursor.tryNext.resolves(null); - serviceProvider.find.returns(serviceProviderCursor as any); + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + serviceProviderCursor.tryNext.returns(null as any); + serviceProvider.find.returns(serviceProviderCursor); + + const tryNext = sinon.stub(); + tryNext.onCall(0).resolves({ storageStats: {} }); + tryNext.onCall(1).resolves(null); + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + serviceProvider.aggregate.returns({ tryNext } as any); + const error = await collection.getShardDistribution().catch((e) => e); expect(error).to.be.instanceOf(MongoshInvalidInputError); From bf1f643a634610b15d2e9074a7a330d4057c09c2 Mon Sep 17 00:00:00 2001 From: gagik Date: Tue, 8 Oct 2024 11:49:21 +0200 Subject: [PATCH 14/20] Use different DB and collection names --- packages/shell-api/src/shard.spec.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/shell-api/src/shard.spec.ts b/packages/shell-api/src/shard.spec.ts index bcd394040..c8ec6a3cd 100644 --- a/packages/shell-api/src/shard.spec.ts +++ b/packages/shell-api/src/shard.spec.ts @@ -2557,7 +2557,7 @@ describe('Shard', function () { describe('collection.getShardDistribution()', function () { let db: Database; - const dbName = 'shard-stats-test'; + const dbName = 'get-shard-distribution-test'; const ns = `${dbName}.test`; beforeEach(async function () { @@ -2614,7 +2614,7 @@ describe('Shard', function () { context('sharded timeseries collections', function () { skipIfServerVersion(mongos, '< 5.1'); - const timeseriesCollectionName = 'testTS'; + const timeseriesCollectionName = 'getShardDistributionTS'; const timeseriesNS = `${dbName}.${timeseriesCollectionName}`; beforeEach(async function () { @@ -2647,7 +2647,7 @@ describe('Shard', function () { it('returns the correct StatsResult', async function () { const result = await db - .getCollection('testTS') + .getCollection(timeseriesCollectionName) .getShardDistribution(); const shardDistributionValue = result.value as Document; From 59e6f2552bbe9a933ab427ffe500aec8581069f9 Mon Sep 17 00:00:00 2001 From: gagik Date: Tue, 8 Oct 2024 15:18:18 +0200 Subject: [PATCH 15/20] update collection.spec.ts, collection.ts and result.ts --- packages/shell-api/src/collection.spec.ts | 64 +++++++++++++++++++++++ packages/shell-api/src/collection.ts | 11 ++-- packages/shell-api/src/result.ts | 18 +++++++ 3 files changed, 89 insertions(+), 4 deletions(-) diff --git a/packages/shell-api/src/collection.spec.ts b/packages/shell-api/src/collection.spec.ts index 205da41f3..f29729eec 100644 --- a/packages/shell-api/src/collection.spec.ts +++ b/packages/shell-api/src/collection.spec.ts @@ -2276,6 +2276,70 @@ describe('Collection', function () { ShellApiErrors.NotConnectedToShardedCluster ); }); + + describe('with orphan documents', function () { + const mockedNumChunks = 2; + const mockedCollectionConfigInfo = {}; + const mockedShardStats = { + shard: 'test-shard', + storageStats: { + size: 1000, + numOrphanDocs: 10, + avgObjSize: 7, + count: 15, + }, + }; + const mockedShardInfo = { + host: 'dummy-host', + }; + + beforeEach(function () { + const serviceProviderCursor = stubInterface(); + + // Make find and limit have no effect so the value of findOne is determined by tryNext. + serviceProviderCursor.limit.returns(serviceProviderCursor); + serviceProvider.find.returns(serviceProviderCursor); + + // Mock according to the order of findOne calls getShardDistribution uses. + serviceProviderCursor.tryNext + .onCall(0) + .resolves(mockedCollectionConfigInfo); + serviceProviderCursor.tryNext.onCall(1).resolves(mockedShardInfo); + serviceProvider.countDocuments.returns( + Promise.resolve(mockedNumChunks) + ); + + const aggregateTryNext = sinon.stub(); + aggregateTryNext.onCall(0).resolves(mockedShardStats); + aggregateTryNext.onCall(1).resolves(null); + + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + serviceProvider.aggregate.returns({ + tryNext: aggregateTryNext, + } as any); + }); + + it('should account for numOrphanDocs when calculating size', async function () { + const shardDistribution = await collection.getShardDistribution(); + + const { storageStats } = mockedShardStats; + expect(shardDistribution.type).equals('StatsResult'); + const adjustedSize = + storageStats.size - + storageStats.numOrphanDocs * storageStats.avgObjSize; + expect(shardDistribution.value.Totals.data).equals( + `${adjustedSize}B` + ); + const shardField = Object.keys(shardDistribution.value).find( + (field) => field !== 'Totals' + ) as string; + + expect(shardField).not.undefined; + expect( + shardDistribution.value[shardField]['estimated data per chunk'] + ).equals(`${adjustedSize / mockedNumChunks}B`); + }); + }); }); describe('analyzeShardKey', function () { diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 51f85a846..ee16a22bc 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -90,6 +90,7 @@ import { HIDDEN_COMMANDS } from '@mongosh/history'; import PlanCache from './plan-cache'; import ChangeStreamCursor from './change-stream-cursor'; import { ShellApiErrors } from './error-codes'; +import type { GetShardDistributionResult } from './result'; @shellApiClassDefault @addSourceToResults @@ -2135,12 +2136,14 @@ export default class Collection extends ShellApiWithMongoClass { @returnsPromise @topologies([Topologies.Sharded]) @apiVersions([]) - async getShardDistribution(): Promise { + async getShardDistribution(): Promise< + CommandResult + > { this._emitCollectionApiCall('getShardDistribution', {}); await getConfigDB(this._database); // Warns if not connected to mongos - const result = {} as Document; + const result = {} as GetShardDistributionResult; const config = this._mongo.getDB('config'); const collStats = await ( @@ -2235,7 +2238,7 @@ export default class Collection extends ShellApiWithMongoClass { data: dataFormat(totals.size), docs: totals.count, chunks: totals.numChunks, - } as Document; + } as GetShardDistributionResult['Totals']; for (const shardStats of conciseShardsStats) { const estDataPercent = @@ -2254,7 +2257,7 @@ export default class Collection extends ShellApiWithMongoClass { ]; } result.Totals = totalValue; - return new CommandResult('StatsResult', result); + return new CommandResult('StatsResult', result); } @serverVersions(['3.1.0', ServerVersions.latest]) diff --git a/packages/shell-api/src/result.ts b/packages/shell-api/src/result.ts index f4d2b3162..9500a6d32 100644 --- a/packages/shell-api/src/result.ts +++ b/packages/shell-api/src/result.ts @@ -127,3 +127,21 @@ export class CursorIterationResult extends ShellApiValueClass { this.documents = []; } } + +export type GetShardDistributionResult = { + Totals: { + data: string; + docs: number; + chunks: number; + } & { + [individualShardDistribution: string]: string[]; + }; +} & { + [individualShardResult: string]: { + data: string; + docs: number; + chunks: number; + 'estimated data per chunk': string; + 'estimated docs per chunk': number; + }; +}; From b6eda2c6c8453b8fa595a7f952376c75eb1a3b0e Mon Sep 17 00:00:00 2001 From: gagik Date: Tue, 8 Oct 2024 16:36:15 +0200 Subject: [PATCH 16/20] Remove old op spec --- .../e2e-tests/test/e2e-current-op.spec.ts | 82 ------------------- 1 file changed, 82 deletions(-) delete mode 100644 packages/e2e-tests/test/e2e-current-op.spec.ts diff --git a/packages/e2e-tests/test/e2e-current-op.spec.ts b/packages/e2e-tests/test/e2e-current-op.spec.ts deleted file mode 100644 index df82b9a00..000000000 --- a/packages/e2e-tests/test/e2e-current-op.spec.ts +++ /dev/null @@ -1,82 +0,0 @@ -import { expect } from 'chai'; -import { - skipIfApiStrict, - startSharedTestServer, -} from '../../../testing/integration-testing-hooks'; -import type { TestShell } from './test-shell'; - -describe('e2e currentOp', function () { - skipIfApiStrict(); - - const testServer = startSharedTestServer(); - - context('with 2 shells', function () { - let helperShell: TestShell; - let currentOpShell: TestShell; - - const OPERATION_TIME = 1_000; - const CURRENT_OP_WAIT_TIME = 100; - this.timeout(OPERATION_TIME * 5); - - beforeEach(async function () { - helperShell = this.startTestShell({ - args: [await testServer.connectionString()], - }); - currentOpShell = this.startTestShell({ - args: [await testServer.connectionString()], - }); - await helperShell.waitForPrompt(); - await currentOpShell.waitForPrompt(); - - // Insert a dummy object so find commands will actually run with the delay. - await helperShell.executeLine('db.coll.insertOne({})'); - }); - - it('should return the correct operation', async function () { - const regexOperation = helperShell.executeLine( - `db.coll.find({$where: function() { sleep(${OPERATION_TIME}) }}).projection({test: 1})` - ); - helperShell.assertNoErrors(); - await currentOpShell.executeLine(`sleep(${CURRENT_OP_WAIT_TIME})`); - let currentOpCall = await currentOpShell.executeLine(`db.currentOp()`); - - currentOpShell.assertNoErrors(); - expect(currentOpCall).to.include("find: 'coll'"); - expect(currentOpCall).to.include( - `filter: { '$where': Code('function() { sleep(${OPERATION_TIME}) }') }` - ); - expect(currentOpCall).to.include('projection: { test: 1 }'); - - await regexOperation; - - currentOpCall = await currentOpShell.executeLine(`db.currentOp()`); - - currentOpShell.assertNoErrors(); - expect(currentOpCall).not.to.include("find: 'coll'"); - expect(currentOpCall).not.to.include( - `filter: { '$where': Code('function() { sleep(${OPERATION_TIME}) }') }` - ); - expect(currentOpCall).not.to.include('projection: { test: 1 }'); - }); - - it('should work when the operation contains regex', async function () { - const regExpString = '^(?i)\\\\Qchho0842\\E'; - // The values from currentOp removes redundant escapes such as \E - const simplifiedRegExpString = '^(?i)\\\\Qchho0842E'; - - void helperShell.executeLine( - `db.coll.find({$where: function() { sleep(${OPERATION_TIME}) }}).projection({re: BSONRegExp('${regExpString}')})` - ); - helperShell.assertNoErrors(); - - await currentOpShell.executeLine(`sleep(${CURRENT_OP_WAIT_TIME})`); - - const currentOpCall = await currentOpShell.executeLine(`db.currentOp()`); - currentOpShell.assertNoErrors(); - - expect(currentOpCall).to.include( - `projection: { re: BSONRegExp('${simplifiedRegExpString}', '') }` - ); - }); - }); -}); From af1d9a01fa3fd955711806a27a4676023f36a95d Mon Sep 17 00:00:00 2001 From: gagik Date: Wed, 9 Oct 2024 10:01:03 +0200 Subject: [PATCH 17/20] Cleaner calculation --- packages/shell-api/src/collection.ts | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index ee16a22bc..dd28d5f84 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2185,18 +2185,16 @@ export default class Collection extends ShellApiWithMongoClass { // Since 6.0, there can be orphan documents indicated by numOrphanDocs. // These orphan documents need to be accounted for in the size calculation. - const orphanDocumentsSize = !extractedShardStats.storageStats - .numOrphanDocs - ? 0 - : extractedShardStats.storageStats.numOrphanDocs * - extractedShardStats.storageStats.avgObjSize; - const adjustedSize = - extractedShardStats.storageStats.size - orphanDocumentsSize; + const orphanDocumentsCount = + extractedShardStats.storageStats.numOrphanDocs ?? 0; + const ownedSize = + (extractedShardStats.storageStats.count - orphanDocumentsCount) * + extractedShardStats.storageStats.avgObjSize; const shardStats = { shardId: shard, host: host !== null ? host.host : null, - size: adjustedSize, + size: ownedSize, count: extractedShardStats.storageStats.count, numChunks: numChunks, avgObjSize: extractedShardStats.storageStats.avgObjSize, @@ -2225,7 +2223,7 @@ export default class Collection extends ShellApiWithMongoClass { 'estimated docs per chunk': estimatedDocsPerChunk, }; - totals.size += coerceToJSNumber(adjustedSize); + totals.size += coerceToJSNumber(ownedSize); totals.count += coerceToJSNumber(shardStatsCount); totals.numChunks += coerceToJSNumber(shardStats.numChunks); From c2037758f61c142b38b50353fee2b52bc121a8ea Mon Sep 17 00:00:00 2001 From: gagik Date: Wed, 9 Oct 2024 10:28:21 +0200 Subject: [PATCH 18/20] Update to match test --- packages/shell-api/src/collection.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index dd28d5f84..3c5d18699 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2188,8 +2188,8 @@ export default class Collection extends ShellApiWithMongoClass { const orphanDocumentsCount = extractedShardStats.storageStats.numOrphanDocs ?? 0; const ownedSize = - (extractedShardStats.storageStats.count - orphanDocumentsCount) * - extractedShardStats.storageStats.avgObjSize; + extractedShardStats.storageStats.size - + orphanDocumentsCount * extractedShardStats.storageStats.avgObjSize; const shardStats = { shardId: shard, From bdff836473e10ae84b38ef09b8cf1713d40a966b Mon Sep 17 00:00:00 2001 From: gagik Date: Wed, 9 Oct 2024 10:32:21 +0200 Subject: [PATCH 19/20] Account for invalid avgObjSize --- packages/shell-api/src/collection.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 3c5d18699..b0967a728 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2186,10 +2186,13 @@ export default class Collection extends ShellApiWithMongoClass { // Since 6.0, there can be orphan documents indicated by numOrphanDocs. // These orphan documents need to be accounted for in the size calculation. const orphanDocumentsCount = - extractedShardStats.storageStats.numOrphanDocs ?? 0; + typeof extractedShardStats.storageStats.numOrphanDocs === 'number' + ? extractedShardStats.storageStats.numOrphanDocs + : 0; const ownedSize = extractedShardStats.storageStats.size - - orphanDocumentsCount * extractedShardStats.storageStats.avgObjSize; + orphanDocumentsCount * + (extractedShardStats.storageStats.avgObjSize ?? 0); const shardStats = { shardId: shard, From 9edd6e7e55b4212c7f20989a6f4b4ebe71984f9e Mon Sep 17 00:00:00 2001 From: gagik Date: Wed, 9 Oct 2024 14:07:06 +0200 Subject: [PATCH 20/20] Use template literals and cleaner size calculation --- packages/shell-api/src/collection.spec.ts | 2 +- packages/shell-api/src/collection.ts | 38 ++++++++++++++++------- packages/shell-api/src/result.ts | 18 ----------- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/packages/shell-api/src/collection.spec.ts b/packages/shell-api/src/collection.spec.ts index f29729eec..3f218d662 100644 --- a/packages/shell-api/src/collection.spec.ts +++ b/packages/shell-api/src/collection.spec.ts @@ -2332,7 +2332,7 @@ describe('Collection', function () { ); const shardField = Object.keys(shardDistribution.value).find( (field) => field !== 'Totals' - ) as string; + ) as `Shard ${string} at ${string}`; expect(shardField).not.undefined; expect( diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index b0967a728..1723425bb 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -90,7 +90,6 @@ import { HIDDEN_COMMANDS } from '@mongosh/history'; import PlanCache from './plan-cache'; import ChangeStreamCursor from './change-stream-cursor'; import { ShellApiErrors } from './error-codes'; -import type { GetShardDistributionResult } from './result'; @shellApiClassDefault @addSourceToResults @@ -2185,14 +2184,11 @@ export default class Collection extends ShellApiWithMongoClass { // Since 6.0, there can be orphan documents indicated by numOrphanDocs. // These orphan documents need to be accounted for in the size calculation. - const orphanDocumentsCount = - typeof extractedShardStats.storageStats.numOrphanDocs === 'number' - ? extractedShardStats.storageStats.numOrphanDocs - : 0; + const orphanDocumentsSize = + (extractedShardStats.storageStats.numOrphanDocs ?? 0) * + (extractedShardStats.storageStats.avgObjSize ?? 0); const ownedSize = - extractedShardStats.storageStats.size - - orphanDocumentsCount * - (extractedShardStats.storageStats.avgObjSize ?? 0); + extractedShardStats.storageStats.size - orphanDocumentsSize; const shardStats = { shardId: shard, @@ -2203,8 +2199,6 @@ export default class Collection extends ShellApiWithMongoClass { avgObjSize: extractedShardStats.storageStats.avgObjSize, }; - const key = `Shard ${shardStats.shardId} at ${shardStats.host}`; - // In sharded timeseries collections we do not have a count // so we intentionally pass NaN as a result to the client. const shardStatsCount: number = shardStats.count ?? NaN; @@ -2218,7 +2212,7 @@ export default class Collection extends ShellApiWithMongoClass { ? 0 : Math.floor(shardStatsCount / shardStats.numChunks); - result[key] = { + result[`Shard ${shardStats.shardId} at ${shardStats.host}`] = { data: dataFormat(coerceToJSNumber(shardStats.size)), docs: shardStatsCount, chunks: shardStats.numChunks, @@ -2258,6 +2252,7 @@ export default class Collection extends ShellApiWithMongoClass { ]; } result.Totals = totalValue; + return new CommandResult('StatsResult', result); } @@ -2482,3 +2477,24 @@ export default class Collection extends ShellApiWithMongoClass { ); } } + +export type GetShardDistributionResult = { + Totals: { + data: string; + docs: number; + chunks: number; + } & { + [individualShardDistribution: `Shard ${string}`]: [ + `${number} % data`, + `${number} % docs in cluster`, + `${string} avg obj size on shard` + ]; + }; + [individualShardResult: `Shard ${string} at ${string}`]: { + data: string; + docs: number; + chunks: number; + 'estimated data per chunk': string; + 'estimated docs per chunk': number; + }; +}; diff --git a/packages/shell-api/src/result.ts b/packages/shell-api/src/result.ts index 9500a6d32..f4d2b3162 100644 --- a/packages/shell-api/src/result.ts +++ b/packages/shell-api/src/result.ts @@ -127,21 +127,3 @@ export class CursorIterationResult extends ShellApiValueClass { this.documents = []; } } - -export type GetShardDistributionResult = { - Totals: { - data: string; - docs: number; - chunks: number; - } & { - [individualShardDistribution: string]: string[]; - }; -} & { - [individualShardResult: string]: { - data: string; - docs: number; - chunks: number; - 'estimated data per chunk': string; - 'estimated docs per chunk': number; - }; -};