文章/答案/技术大牛

发布

社区首页 >问答首页 >简单Mongo查找的高负荷指数法

问简单Mongo查找的高负荷指数法
EN

Stack Overflow用户

提问于 2021-07-06 08:33:42

回答 1查看 144关注 0票数 0

我有一个使用mongoDB (运行猫鼬)查询的NodeJS。

在这个特殊的例子中，我将查询一组集合，并将数据作为CSV传输到archiverjs中，以创建一个zip文件。因此，我有一个传入的请求，数据会使用mongoose和mongo游标进行查询，然后通过管道连接到一个管道中，该管道将分别以archiverjs结束-- http响应将zip文件传递给用户。

async function getSortedQueryCursor(...) {
  ...

  const query = MODEL_LOOKUP[fileType]
    .find(reducer)
    .sort({ [idString]: 'asc' });

  return query.cursor();
}


async function getData(...) {
    const cursor = await getSortedQueryCursor(...);

    return cursor
      .pipe(filter1Stream)
      .pipe(filter2Stream)
      .pipe(filter3Stream)
      .pipe(csvStringifyStream);
}

router.post('/:scenarioId', async (request, response) => {
    ...

    const archive = Archiver(...);
    
    archive.pipe(response);

    const result = await getData(...);

    archive.append(stream, { name: filepath });

    return archive.finalize();
}

一旦一个特定的集合在游戏中(这个集合包含了大约4,000万个文档)，查询就会持续很长时间(>15s)，在这段时间里，我可以在100%的CPU上看到mongo进程。更令人惊讶的是，结果集是空的(没有与查询匹配的文档)。

这是一个相当简单的查询：

items.find({ scenarioId: 'ckqf5ulg38gu208eecxlf95fc' }, { sort: { dataId: 1 }

我有scenarioId和dataId的索引。如果在shell上运行查询，它将以30 on 格式返回。

explain()的结果是：

[
  {
    "queryPlanner": {
      "plannerVersion": 1,
      "namespace": "data.items",
      "indexFilterSet": false,
      "parsedQuery": {
        "scenarioId": {
          "$eq": "ckqf5ulg38gu208eecxlf95fc"
        }
      },
      "winningPlan": {
        "stage": "SORT",
        "sortPattern": {
          "itemId": 1
        },
        "memLimit": 104857600,
        "type": "simple",
        "inputStage": {
          "stage": "FETCH",
          "inputStage": {
            "stage": "IXSCAN",
            "keyPattern": {
              "scenarioId": 1
            },
            "indexName": "scenarioId_1",
            "isMultiKey": false,
            "multiKeyPaths": {
              "scenarioId": []
            },
            "isUnique": false,
            "isSparse": false,
            "isPartial": false,
            "indexVersion": 2,
            "direction": "forward",
            "indexBounds": {
              "scenarioId": [
                "[\"ckqf5ulg38gu208eecxlf95fc\", \"ckqf5ulg38gu208eecxlf95fc\"]"
              ]
            }
          }
        }
      },
      "rejectedPlans": [
        ...
      ]
    },
    "executionStats": {
      "executionSuccess": true,
      "nReturned": 0,
      "executionTimeMillis": 0,
      "totalKeysExamined": 0,
      "totalDocsExamined": 0,
      "executionStages": {
        "stage": "SORT",
        "nReturned": 0,
        "executionTimeMillisEstimate": 0,
        "works": 3,
        "advanced": 0,
        "needTime": 1,
        "needYield": 0,
        "saveState": 0,
        "restoreState": 0,
        "isEOF": 1,
        "sortPattern": {
          "dataId": 1
        },
        "memLimit": 104857600,
        "type": "simple",
        "totalDataSizeSorted": 0,
        "usedDisk": false,
        "inputStage": {
          "stage": "FETCH",
          "nReturned": 0,
          "executionTimeMillisEstimate": 0,
          "works": 1,
          "advanced": 0,
          "needTime": 0,
          "needYield": 0,
          "saveState": 0,
          "restoreState": 0,
          "isEOF": 1,
          "docsExamined": 0,
          "alreadyHasObj": 0,
          "inputStage": {
            "stage": "IXSCAN",
            "nReturned": 0,
            "executionTimeMillisEstimate": 0,
            "works": 1,
            "advanced": 0,
            "needTime": 0,
            "needYield": 0,
            "saveState": 0,
            "restoreState": 0,
            "isEOF": 1,
            "keyPattern": {
              "scenarioId": 1
            },
            "indexName": "scenarioId_1",
            "isMultiKey": false,
            "multiKeyPaths": {
              "scenarioId": []
            },
            "isUnique": false,
            "isSparse": false,
            "isPartial": false,
            "indexVersion": 2,
            "direction": "forward",
            "indexBounds": {
              "scenarioId": [
                "[\"ckqf5ulg38gu208eecxlf95fc\", \"ckqf5ulg38gu208eecxlf95fc\"]"
              ]
            },
            "keysExamined": 0,
            "seeks": 1,
            "dupsTested": 0,
            "dupsDropped": 0
          }
        }
      },
     ...
    },
    "serverInfo": {
      ...
      "version": "4.4.6",
      "gitVersion": "72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7"
    },
    ...
  }
]

它告诉我(我在解释这些结果方面不是很有经验)，这个查询非常便宜："executionTimeMillisEstimate": 0,，因为它没有运行文档扫描"docsExamined": 0,。

接下来，我连接到mongo服务器并运行db.currentOp({"secs_running": {$gte: 5}})从这边获取一些信息：

{
    "type" : "op",
    ...
    "clientMetadata" : {
        "driver" : {
            "name" : "nodejs|Mongoose",
            "version" : "3.6.5"
        },
        "os" : {
            "type" : "Linux",
            "name" : "linux",
            "architecture" : "x64",
            "version" : "5.8.0-50-generic"
        },
        "platform" : "'Node.js v14.17.0, LE (unified)",
        "version" : "3.6.5|5.12.3"
    },
    "active" : true,
    "secs_running" : NumberLong(16),
    "microsecs_running" : NumberLong(16661409),
    "op" : "query",
    "ns" : "data.items",
    "command" : {
        "find" : "items",
        "filter" : {
            "scenarioId" : "ckqf5ulg38gu208eecxlf95fc"
        },
        "sort" : {
            "itemId" : 1
        },
        "projection" : {
            
        },
        "returnKey" : false,
        "showRecordId" : false,
        "lsid" : {
            "id" : UUID("be3ce18b-5365-4680-b734-543d06418301")
        },
        "$clusterTime" : {
            "clusterTime" : Timestamp(1625498044, 1),
            "signature" : {
                "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
                "keyId" : 0
            }
        },
        "$db" : "data",
        "$readPreference" : {
            "mode" : "primaryPreferred"
        }
    },
    "numYields" : 14701,
    "locks" : {
        "ReplicationStateTransition" : "w",
        "Global" : "r",
        "Database" : "r",
        "Collection" : "r"
    },
    "waitingForLock" : false,
    "lockStats" : {
        "ReplicationStateTransition" : {
            "acquireCount" : {
                "w" : NumberLong(14702)
            }
        },
        "Global" : {
            "acquireCount" : {
                "r" : NumberLong(14702)
            }
        },
        "Database" : {
            "acquireCount" : {
                "r" : NumberLong(14702)
            }
        },
        "Collection" : {
            "acquireCount" : {
                "r" : NumberLong(14702)
            }
        },
        "Mutex" : {
            "acquireCount" : {
                "r" : NumberLong(1)
            }
        }
    },
    "waitingForFlowControl" : false,
    "flowControlStats" : {
    }
}

有什么想法可以提高我的应用程序的性能或者找到瓶颈吗？由于mongo端的负载很大，并且没有找到/传递给应用程序的文档，所以我猜是mongo有问题.

编辑:我已经使用db.setProfilingLevel(2)和db.system.profile.find().pretty()从DB记录了整个过程。在这里我们可以看到整个集合(还是我误解了"docsExamined" : 39612167?)被质疑：

{
    "op" : "query",
    "ns" : "data.items",
    "command" : {
        "find" : "items",
        "filter" : {
            "scenarioId" : "ckqf5ulg38gu208eecxlf95fc"
        },
        "sort" : {
            "dataId" : 1
        },
        "projection" : {

        },
        ...
        "$db" : "data",
        "$readPreference" : {
            "mode" : "primaryPreferred"
        }
    },
    "keysExamined" : 39612167,
    "docsExamined" : 39612167,
    "cursorExhausted" : true,
    "numYield" : 39613,
    "nreturned" : 0,
    "queryHash" : "B7F40289",
    "planCacheKey" : "BADED068",
    "locks" : {
        "ReplicationStateTransition" : {
            "acquireCount" : {
                "w" : NumberLong(39615)
            }
        },
        "Global" : {
            "acquireCount" : {
                "r" : NumberLong(39615)
            }
        },
        "Database" : {
            "acquireCount" : {
                "r" : NumberLong(39614)
            }
        },
        "Collection" : {
            "acquireCount" : {
                "r" : NumberLong(39614)
            }
        },
        "Mutex" : {
            "acquireCount" : {
                "r" : NumberLong(1)
            }
        }
    },
    "flowControl" : {

    },
    "storage" : {

    },
    "responseLength" : 242,
    "protocol" : "op_msg",
    "millis" : 48401,
    "planSummary" : "IXSCAN { dataId: 1 }",
    "execStats" : {
        "stage" : "CACHED_PLAN",
        "nReturned" : 0,
        "executionTimeMillisEstimate" : 48401,
        "works" : 1,
        "advanced" : 0,
        "needTime" : 0,
        "needYield" : 0,
        "saveState" : 39613,
        "restoreState" : 39613,
        "isEOF" : 1,
        "inputStage" : {
            "stage" : "FETCH",
            "filter" : {
                "scenarioId" : {
                    "$eq" : "ckqf5ulg38gu208eecxlf95fc"
                }
            },
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 6270,
            "works" : 39612168,
            "advanced" : 0,
            "needTime" : 39612167,
            "needYield" : 0,
            "saveState" : 39613,
            "restoreState" : 39613,
            "isEOF" : 1,
            "docsExamined" : 39612167,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 39612167,
                "executionTimeMillisEstimate" : 2151,
                "works" : 39612168,
                "advanced" : 39612167,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 39613,
                "restoreState" : 39613,
                "isEOF" : 1,
                "keyPattern" : {
                    "dataId" : 1
                },
                "indexName" : "dataId_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "dataId" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "dataId" : [
                        "[MinKey, MaxKey]"
                    ]
                },
                "keysExamined" : 39612167,
                "seeks" : 1,
                "dupsTested" : 0,
                "dupsDropped" : 0
            }
        }
    }

mongoose

stream

database-cursor

node.js

mongodb

回答 1

Stack Overflow用户

回答已采纳

发布于 2021-07-06 12:00:21

(和往常一样)指数似乎没有被正确设定。我创造了一个新的(次要的？)索引：

{
    "dataId" : 1,
    "scenarioId": 1
}

现在查询在毫秒内返回..。

编辑:我仍然想知道的是，shell查询以毫秒为单位返回，而猫鼬查询花费了很长时间。尽管在我看来，查询似乎是相同的(从我的角度来看)，但mongo对它们的处理方式不同。

票数 0

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/68267153

复制

相似问题

问简单Mongo查找的高负荷指数法
EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问简单Mongo查找的高负荷指数法EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问简单Mongo查找的高负荷指数法
EN