我有一个使用mongoDB (运行猫鼬)查询的NodeJS。
在这个特殊的例子中,我将查询一组集合,并将数据作为CSV传输到archiverjs中,以创建一个zip文件。因此,我有一个传入的请求,数据会使用mongoose和mongo游标进行查询,然后通过管道连接到一个管道中,该管道将分别以archiverjs结束-- http响应将zip文件传递给用户。
async function getSortedQueryCursor(...) {
...
const query = MODEL_LOOKUP[fileType]
.find(reducer)
.sort({ [idString]: 'asc' });
return query.cursor();
}
async function getData(...) {
const cursor = await getSortedQueryCursor(...);
return cursor
.pipe(filter1Stream)
.pipe(filter2Stream)
.pipe(filter3Stream)
.pipe(csvStringifyStream);
}
router.post('/:scenarioId', async (request, response) => {
...
const archive = Archiver(...);
archive.pipe(response);
const result = await getData(...);
archive.append(stream, { name: filepath });
return archive.finalize();
}一旦一个特定的集合在游戏中(这个集合包含了大约4,000万个文档),查询就会持续很长时间(>15s),在这段时间里,我可以在100%的CPU上看到mongo进程。更令人惊讶的是,结果集是空的(没有与查询匹配的文档)。
这是一个相当简单的查询:
items.find({ scenarioId: 'ckqf5ulg38gu208eecxlf95fc' }, { sort: { dataId: 1 }我有scenarioId和dataId的索引。如果在shell上运行查询,它将以30 on 格式返回。
explain()的结果是:
[
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "data.items",
"indexFilterSet": false,
"parsedQuery": {
"scenarioId": {
"$eq": "ckqf5ulg38gu208eecxlf95fc"
}
},
"winningPlan": {
"stage": "SORT",
"sortPattern": {
"itemId": 1
},
"memLimit": 104857600,
"type": "simple",
"inputStage": {
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"scenarioId": 1
},
"indexName": "scenarioId_1",
"isMultiKey": false,
"multiKeyPaths": {
"scenarioId": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"scenarioId": [
"[\"ckqf5ulg38gu208eecxlf95fc\", \"ckqf5ulg38gu208eecxlf95fc\"]"
]
}
}
}
},
"rejectedPlans": [
...
]
},
"executionStats": {
"executionSuccess": true,
"nReturned": 0,
"executionTimeMillis": 0,
"totalKeysExamined": 0,
"totalDocsExamined": 0,
"executionStages": {
"stage": "SORT",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 3,
"advanced": 0,
"needTime": 1,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"sortPattern": {
"dataId": 1
},
"memLimit": 104857600,
"type": "simple",
"totalDataSizeSorted": 0,
"usedDisk": false,
"inputStage": {
"stage": "FETCH",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"docsExamined": 0,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"keyPattern": {
"scenarioId": 1
},
"indexName": "scenarioId_1",
"isMultiKey": false,
"multiKeyPaths": {
"scenarioId": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"scenarioId": [
"[\"ckqf5ulg38gu208eecxlf95fc\", \"ckqf5ulg38gu208eecxlf95fc\"]"
]
},
"keysExamined": 0,
"seeks": 1,
"dupsTested": 0,
"dupsDropped": 0
}
}
},
...
},
"serverInfo": {
...
"version": "4.4.6",
"gitVersion": "72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7"
},
...
}
]它告诉我(我在解释这些结果方面不是很有经验),这个查询非常便宜:"executionTimeMillisEstimate": 0,,因为它没有运行文档扫描"docsExamined": 0,。
接下来,我连接到mongo服务器并运行db.currentOp({"secs_running": {$gte: 5}})从这边获取一些信息:
{
"type" : "op",
...
"clientMetadata" : {
"driver" : {
"name" : "nodejs|Mongoose",
"version" : "3.6.5"
},
"os" : {
"type" : "Linux",
"name" : "linux",
"architecture" : "x64",
"version" : "5.8.0-50-generic"
},
"platform" : "'Node.js v14.17.0, LE (unified)",
"version" : "3.6.5|5.12.3"
},
"active" : true,
"secs_running" : NumberLong(16),
"microsecs_running" : NumberLong(16661409),
"op" : "query",
"ns" : "data.items",
"command" : {
"find" : "items",
"filter" : {
"scenarioId" : "ckqf5ulg38gu208eecxlf95fc"
},
"sort" : {
"itemId" : 1
},
"projection" : {
},
"returnKey" : false,
"showRecordId" : false,
"lsid" : {
"id" : UUID("be3ce18b-5365-4680-b734-543d06418301")
},
"$clusterTime" : {
"clusterTime" : Timestamp(1625498044, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : 0
}
},
"$db" : "data",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"numYields" : 14701,
"locks" : {
"ReplicationStateTransition" : "w",
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(14702)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(14702)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(14702)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(14702)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(1)
}
}
},
"waitingForFlowControl" : false,
"flowControlStats" : {
}
}有什么想法可以提高我的应用程序的性能或者找到瓶颈吗?由于mongo端的负载很大,并且没有找到/传递给应用程序的文档,所以我猜是mongo有问题.
编辑:我已经使用db.setProfilingLevel(2)和db.system.profile.find().pretty()从DB记录了整个过程。在这里我们可以看到整个集合(还是我误解了"docsExamined" : 39612167?)被质疑:
{
"op" : "query",
"ns" : "data.items",
"command" : {
"find" : "items",
"filter" : {
"scenarioId" : "ckqf5ulg38gu208eecxlf95fc"
},
"sort" : {
"dataId" : 1
},
"projection" : {
},
...
"$db" : "data",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"keysExamined" : 39612167,
"docsExamined" : 39612167,
"cursorExhausted" : true,
"numYield" : 39613,
"nreturned" : 0,
"queryHash" : "B7F40289",
"planCacheKey" : "BADED068",
"locks" : {
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(39615)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(39615)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(39614)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(39614)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(1)
}
}
},
"flowControl" : {
},
"storage" : {
},
"responseLength" : 242,
"protocol" : "op_msg",
"millis" : 48401,
"planSummary" : "IXSCAN { dataId: 1 }",
"execStats" : {
"stage" : "CACHED_PLAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 48401,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 39613,
"restoreState" : 39613,
"isEOF" : 1,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"scenarioId" : {
"$eq" : "ckqf5ulg38gu208eecxlf95fc"
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 6270,
"works" : 39612168,
"advanced" : 0,
"needTime" : 39612167,
"needYield" : 0,
"saveState" : 39613,
"restoreState" : 39613,
"isEOF" : 1,
"docsExamined" : 39612167,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 39612167,
"executionTimeMillisEstimate" : 2151,
"works" : 39612168,
"advanced" : 39612167,
"needTime" : 0,
"needYield" : 0,
"saveState" : 39613,
"restoreState" : 39613,
"isEOF" : 1,
"keyPattern" : {
"dataId" : 1
},
"indexName" : "dataId_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"dataId" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"dataId" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 39612167,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}发布于 2021-07-06 12:00:21
(和往常一样)指数似乎没有被正确设定。我创造了一个新的(次要的?)索引:
{
"dataId" : 1,
"scenarioId": 1
}现在查询在毫秒内返回..。
编辑:我仍然想知道的是,shell查询以毫秒为单位返回,而猫鼬查询花费了很长时间。尽管在我看来,查询似乎是相同的(从我的角度来看),但mongo对它们的处理方式不同。
https://stackoverflow.com/questions/68267153
复制相似问题