Skip to content

Commit

Permalink
feat(shell-api): Account for orphan documents in getShardDistribution…
Browse files Browse the repository at this point in the history
…() helper MONGOSH-1838 (#2203)

getShardDistribution() should correctly account for orphan documents  (>= 6.0) when calculating size statistics. It does so by subtracting the size of orphan documents from the total size.
  • Loading branch information
gagik authored Oct 10, 2024
1 parent 664380b commit 2a3bb25
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 9 deletions.
64 changes: 64 additions & 0 deletions packages/shell-api/src/collection.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2276,6 +2276,70 @@ describe('Collection', function () {
ShellApiErrors.NotConnectedToShardedCluster
);
});

describe('with orphan documents', function () {
const mockedNumChunks = 2;
const mockedCollectionConfigInfo = {};
const mockedShardStats = {
shard: 'test-shard',
storageStats: {
size: 1000,
numOrphanDocs: 10,
avgObjSize: 7,
count: 15,
},
};
const mockedShardInfo = {
host: 'dummy-host',
};

beforeEach(function () {
const serviceProviderCursor = stubInterface<ServiceProviderCursor>();

// Make find and limit have no effect so the value of findOne is determined by tryNext.
serviceProviderCursor.limit.returns(serviceProviderCursor);
serviceProvider.find.returns(serviceProviderCursor);

// Mock according to the order of findOne calls getShardDistribution uses.
serviceProviderCursor.tryNext
.onCall(0)
.resolves(mockedCollectionConfigInfo);
serviceProviderCursor.tryNext.onCall(1).resolves(mockedShardInfo);
serviceProvider.countDocuments.returns(
Promise.resolve(mockedNumChunks)
);

const aggregateTryNext = sinon.stub();
aggregateTryNext.onCall(0).resolves(mockedShardStats);
aggregateTryNext.onCall(1).resolves(null);

// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
serviceProvider.aggregate.returns({
tryNext: aggregateTryNext,
} as any);
});

it('should account for numOrphanDocs when calculating size', async function () {
const shardDistribution = await collection.getShardDistribution();

const { storageStats } = mockedShardStats;
expect(shardDistribution.type).equals('StatsResult');
const adjustedSize =
storageStats.size -
storageStats.numOrphanDocs * storageStats.avgObjSize;
expect(shardDistribution.value.Totals.data).equals(
`${adjustedSize}B`
);
const shardField = Object.keys(shardDistribution.value).find(
(field) => field !== 'Totals'
) as `Shard ${string} at ${string}`;

expect(shardField).not.undefined;
expect(
shardDistribution.value[shardField]['estimated data per chunk']
).equals(`${adjustedSize / mockedNumChunks}B`);
});
});
});

describe('analyzeShardKey', function () {
Expand Down
49 changes: 40 additions & 9 deletions packages/shell-api/src/collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2135,12 +2135,14 @@ export default class Collection extends ShellApiWithMongoClass {
@returnsPromise
@topologies([Topologies.Sharded])
@apiVersions([])
async getShardDistribution(): Promise<CommandResult> {
async getShardDistribution(): Promise<
CommandResult<GetShardDistributionResult>
> {
this._emitCollectionApiCall('getShardDistribution', {});

await getConfigDB(this._database); // Warns if not connected to mongos

const result = {} as Document;
const result = {} as GetShardDistributionResult;
const config = this._mongo.getDB('config');

const collStats = await (
Expand Down Expand Up @@ -2179,17 +2181,24 @@ export default class Collection extends ShellApiWithMongoClass {
.findOne({ _id: extractedShardStats.shard }),
config.getCollection('chunks').countDocuments(countChunksQuery),
]);

// Since 6.0, there can be orphan documents indicated by numOrphanDocs.
// These orphan documents need to be accounted for in the size calculation.
const orphanDocumentsSize =
(extractedShardStats.storageStats.numOrphanDocs ?? 0) *
(extractedShardStats.storageStats.avgObjSize ?? 0);
const ownedSize =
extractedShardStats.storageStats.size - orphanDocumentsSize;

const shardStats = {
shardId: shard,
host: host !== null ? host.host : null,
size: extractedShardStats.storageStats.size,
size: ownedSize,
count: extractedShardStats.storageStats.count,
numChunks: numChunks,
avgObjSize: extractedShardStats.storageStats.avgObjSize,
};

const key = `Shard ${shardStats.shardId} at ${shardStats.host}`;

// In sharded timeseries collections we do not have a count
// so we intentionally pass NaN as a result to the client.
const shardStatsCount: number = shardStats.count ?? NaN;
Expand All @@ -2203,15 +2212,15 @@ export default class Collection extends ShellApiWithMongoClass {
? 0
: Math.floor(shardStatsCount / shardStats.numChunks);

result[key] = {
result[`Shard ${shardStats.shardId} at ${shardStats.host}`] = {
data: dataFormat(coerceToJSNumber(shardStats.size)),
docs: shardStatsCount,
chunks: shardStats.numChunks,
'estimated data per chunk': dataFormat(estimatedChunkDataPerChunk),
'estimated docs per chunk': estimatedDocsPerChunk,
};

totals.size += coerceToJSNumber(shardStats.size);
totals.size += coerceToJSNumber(ownedSize);
totals.count += coerceToJSNumber(shardStatsCount);
totals.numChunks += coerceToJSNumber(shardStats.numChunks);

Expand All @@ -2224,7 +2233,7 @@ export default class Collection extends ShellApiWithMongoClass {
data: dataFormat(totals.size),
docs: totals.count,
chunks: totals.numChunks,
} as Document;
} as GetShardDistributionResult['Totals'];

for (const shardStats of conciseShardsStats) {
const estDataPercent =
Expand All @@ -2243,7 +2252,8 @@ export default class Collection extends ShellApiWithMongoClass {
];
}
result.Totals = totalValue;
return new CommandResult('StatsResult', result);

return new CommandResult<GetShardDistributionResult>('StatsResult', result);
}

@serverVersions(['3.1.0', ServerVersions.latest])
Expand Down Expand Up @@ -2467,3 +2477,24 @@ export default class Collection extends ShellApiWithMongoClass {
);
}
}

export type GetShardDistributionResult = {
Totals: {
data: string;
docs: number;
chunks: number;
} & {
[individualShardDistribution: `Shard ${string}`]: [
`${number} % data`,
`${number} % docs in cluster`,
`${string} avg obj size on shard`
];
};
[individualShardResult: `Shard ${string} at ${string}`]: {
data: string;
docs: number;
chunks: number;
'estimated data per chunk': string;
'estimated docs per chunk': number;
};
};

0 comments on commit 2a3bb25

Please sign in to comment.