mongoDB集合中的输入数据如下所示:
{
"_id" : ObjectId("dummyObjectID"),
"data" : [
{
"src_ip" : "Source IP address",
"dst_ip" : "Destination IP address",
},
{
"switchId" : 1,
"egress_timeStamp" : et1,
"hop_latency" : someValue,
"ingress_timeStamp" : it1
},
{
"switchId" : 2,
"egress_timeStamp" : et2,
"hop_latency" : someValue,
"ingress_timeStamp" : it2
},
{
"switchId" : 3,
"egress_timeStamp" : et3,
"hop_latency" : 1122,
"ingress_timeStamp" : it3
},
{
"switchId" : 4,
"egress_timeStamp" : et4,
"hop_latency" : someValue,
"ingress_timeStamp" : it4
},
{
"switchId" : 5,
"egress_timeStamp" : et5,
"hop_latency" : someValue,
"ingress_timeStamp" : it5
}
],
"time" : dummyTime
}我的期望值低于输出:
{
"_id" : ObjectId("dummyObjectID"),
"data" : [
{
"src_ip" : "Source IP address",
"dst_ip" : "Destination IP address",
},
{
"switchId" : 1,
"egress_timeStamp" : et1,
"link_latency" : 0, # Here 0 because there is no switch before this switch
"hop_latency" : someValue,
"ingress_timeStamp" : it1
},
{
"switchId" : 2,
"egress_timeStamp" : et2,
"link_latency" : it2 - et1,
"hop_latency" : someValue,
"ingress_timeStamp" : it2
},
{
"switchId" : 3,
"egress_timeStamp" : et3,
"link_latency" : it3 - et2,
"hop_latency" : someValue,
"ingress_timeStamp" : it3
},
{
"switchId" : 4,
"egress_timeStamp" : et4,
"link_latency" : it4 - et3,
"hop_latency" : someValue,
"ingress_timeStamp" : it4
},
{
"switchId" : 5,
"egress_timeStamp" : et5,
"link_latency" : it5 - et4,
"hop_latency" : someValue,
"ingress_timeStamp" : it5
}
],
"time" : dummyTime
}我想为每个switchID计算链路延迟,即‘当前交换机的ingressTimestamp’-‘前一个交换机的egressTimestamp’。对于第一个switchID,linkLatency必须为“0”。我面临的问题是,交换机数据没有密钥;因此无法执行此操作。
我是mongodb的新手。我可以使用pyMongo和Python pandas dataframe获得所需的输出,但这需要很多时间。我认为mongoDB本身一定有一些很好的方法来获得所需的输出。
发布于 2019-12-28 08:44:27
不需要用熊猫。您可以从当前的列表条目构建新的列表条目;希望这会更有效(注意:它不会在第一条记录中创建多余的零条目;如果确实需要的话,可以适当地修改):
for record in db.mycollection.find({}):
prev_et = None
new_data = []
for x in sorted(record['data'], key=lambda o: o.get('switch_id', '')):
if prev_et is not None:
x['link_latency'] = (x['ingress_timeStamp'] - prev_et).total_seconds()
prev_et = x.get('egress_timeStamp', None)
new_data.append(x)
record['data'] = new_data
db.mycollection.replace_one({'_id': record['_id']}, record, upsert=True)完整的数据设置示例:
import pymongo
import datetime
from random import randint
db = pymongo.MongoClient()['mydatabase']
t = [datetime.datetime.utcnow()]
for i in range(1, 10):
t.append(t[i - 1] + datetime.timedelta(milliseconds=randint(100, 1000)))
db.mycollection.insert_one({
"data": [
{
"src_ip": "Source IP address",
"dst_ip": "Destination IP address",
},
{
"switchId": 1,
"egress_timeStamp": t[0],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[1]
},
{
"switchId": 2,
"egress_timeStamp": t[2],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[3],
},
{
"switchId": 3,
"egress_timeStamp": t[4],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[5],
},
{
"switchId": 4,
"egress_timeStamp": t[6],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[7],
},
{
"switchId": 5,
"egress_timeStamp": t[8],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[9],
}
],
"time": datetime.datetime.utcnow()
})
for record in db.mycollection.find({}):
prev_et = None
new_data = []
for x in sorted(record['data'], key=lambda o: o.get('switch_id', '')):
if prev_et is not None:
x['link_latency'] = (x['ingress_timeStamp'] - prev_et).total_seconds()
prev_et = x.get('egress_timeStamp', None)
new_data.append(x)
record['data'] = new_data
db.mycollection.replace_one({'_id': record['_id']}, record, upsert=True)提供:
> db.mycollection.findOne()
{
"_id" : ObjectId("5e06a4bf54e0e497307e43d8"),
"data" : [
{
"src_ip" : "Source IP address",
"dst_ip" : "Destination IP address"
},
{
"switchId" : 1,
"egress_timeStamp" : ISODate("2019-12-28T00:41:35.615Z"),
"hop_latency" : 949,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:36.303Z")
},
{
"switchId" : 2,
"egress_timeStamp" : ISODate("2019-12-28T00:41:36.955Z"),
"hop_latency" : 953,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:37.162Z"),
"link_latency" : 1.547
},
{
"switchId" : 3,
"egress_timeStamp" : ISODate("2019-12-28T00:41:37.475Z"),
"hop_latency" : 751,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:38.032Z"),
"link_latency" : 1.077
},
{
"switchId" : 4,
"egress_timeStamp" : ISODate("2019-12-28T00:41:38.878Z"),
"hop_latency" : 961,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:39.682Z"),
"link_latency" : 2.207
},
{
"switchId" : 5,
"egress_timeStamp" : ISODate("2019-12-28T00:41:40.096Z"),
"hop_latency" : 728,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:40.561Z"),
"link_latency" : 1.683
}
],
"time" : ISODate("2019-12-28T00:41:35.615Z")
}https://stackoverflow.com/questions/59459369
复制相似问题