在下面的句子中,在Asteroid A at 0和d at 7一词中,space is counted as 8之后的位置。为了一张清晰的照片,我下面有很多空间。
// 0-7, 9-10, 12, 14-18, 20-26, 28-31, 33-37, 39-41, 43-46
// Asteroid is a rocky objects that orbit the Sun现在,我在12 - 19这里有一个带有word 12 - 19的对象,19th是空白。
{
"start_offset": 12,
"end": 19,
"text": "a rocky",
"entity_type": "adjective",
},现在,我应该找到这个start and end之间的所有单词,并将其推入上面的splits键下的对象中,如下所示。
{
"start_offset": 12,
"end": 19,
"text": "a rocky",
"entity_type": "adjective",
"splits": [
{
"start_offset": 14,
"end": 19,
"text": "rocky",
"entity_type": "adjective",
},
]
}, 这个迭代,我需要做n次,最后应该分组所有的元素给出的输出。
现在,我已经尝试了像下面这样的接近的结果,但仍然需要很多改进。有人能指点我吗?
const res = arr.reduce((pv, cv) => {
const [{ start_offset, end }] = arr
.filter((s) => (s.start_offset <= cv.start_offset) && (s.end >= cv.end))
.sort((s1, s2) => (s2.end - s2.start_offset) - (s1.end - s1.start_offset));
const hash = `${start_offset}-${end}`;
pv[hash] = pv[hash]
? { ...pv[hash], splits: [...pv[hash].splits, cv] }
: { start_offset, end, splits: [cv] };
return pv;
}, {});
const result = Object.values(res);
console.log(result)给定输入:
let arr = [
{
"start_offset": 0,
"end": 38,
"text": "Asteroid is a rocky objects that orbit",
"entity_type": "adjective",
},
{
"start_offset": 12,
"end": 19,
"text": "a rocky",
"entity_type": "adjective",
},
{
"start_offset": 14,
"end": 27,
"text": "rocky objects",
"entity_type": "adjective",
},
{
"start_offset": 20,
"end": 32,
"text": "objects that",
"entity_type": "adjective",
},
{
"start_offset": 14,
"end": 19,
"text": "rocky",
"entity_type": "adjective",
},
{
"start_offset": 20,
"end": 27,
"text": "objects",
"entity_type": "adjective",
},
{
"start_offset": 33,
"end": 47,
"text": "orbit the Sun",
"entity_type": "adjective",
},
{
"start_offset": 43,
"end": 47,
"text": "Sun",
"entity_type": "adjective",
}
]预期输出:
let output = [
{
"start_offset": 0,
"end": 38,
"text": "Asteroid is a rocky objects that orbit",
"entity_type": "adjective",
"splits": [
{
"start_offset": 12,
"end": 19,
"text": "a rocky",
"entity_type": "adjective",
"splits": [
{
"start_offset": 14,
"end": 19,
"text": "rocky",
"entity_type": "adjective",
},
]
},
{
"start_offset": 14,
"end": 27,
"text": "rocky objects",
"entity_type": "adjective",
"splits": [
{
"start_offset": 20,
"end": 27,
"text": "objects",
"entity_type": "adjective",
},
]
},
{
"start_offset": 20,
"end": 32,
"text": "objects that",
"entity_type": "adjective",
},
]
},
{
"start_offset": 33,
"end": 47,
"text": "orbit the Sun",
"entity_type": "adjective",
},
{
"start_offset": 43,
"end": 47,
"text": "Sun",
"entity_type": "adjective",
}
]发布于 2022-07-21 11:19:02
您可以预先排序数组,并通过查找较小级别的开始范围和结束范围来减少数组,直到找到子级别为止。
const
data = [{ start_offset: 0, end: 38, text: "Asteroid is a rocky objects that orbit", entity_type: "adjective" }, { start_offset: 12, end: 19, text: "a rocky", entity_type: "adjective" }, { start_offset: 14, end: 27, text: "rocky objects", entity_type: "adjective" }, { start_offset: 20, end: 32, text: "objects that", entity_type: "adjective" }, { start_offset: 14, end: 19, text: "rocky", entity_type: "adjective" }, { start_offset: 20, end: 27, text: "objects", entity_type: "adjective" }, { start_offset: 33, end: 47, text: "orbit the Sun", entity_type: "adjective" }, { start_offset: 43, end: 47, text: "Sun", entity_type: "adjective" }],
result = data
.sort((a, b) => a.start_offset - b.start_offset || b.end - a.end)
.reduce((r, { ...o }) => {
let temp,
child = { split: r };
do {
temp = child;
temp.split = temp.split || [];
child = temp.split.find(q => q.start_offset <= o.start_offset && q.end >= o.end);
} while (child)
temp.split.push(o);
return r;
}, []);
console.log(result);.as-console-wrapper { max-height: 100% !important; top: 0; }
https://stackoverflow.com/questions/73064805
复制相似问题