当我上传一个文档进行表单识别时,我得到的结果似乎与文档的结构不一致。这似乎是非常成功或失败的。在我的示例中,有一个包含产品描述、数量、部件号和价格的网格。只有描述中的第一行与数量、价格和零件代号相关。描述的其余行在响应的其他位置找到,与第一行没有相关性。
以下是报价布局的示例

我已经上传了完整的文档和空白的表单,简单和复杂的多页。这是缺少数据的结果的一部分。
{
"header": [
{
"text": "ITEM",
"boundingBox": [
43.8,
527.5,
65.4,
527.5,
65.4,
519.7,
43.8,
519.7
]
}
],
"entries": [
[
{
"text": "F5 BIG-IP Service Premium (Level 1-3) - technical support",
"boundingBox": [
43.7,
512.1,
278.7,
512.1,
278.7,
504.3,
43.7,
504.3
],
"confidence": 1.0
}
],
[
{
"text": "F5 BIG-IP Service Premium (Level 1-3) - technical support",
"boundingBox": [
43.7,
438.7,
278.7,
438.7,
278.7,
430.9,
43.7,
430.9
],
"confidence": 1.0
}
],
[
{
"text": "F5 BIG-IP Service Premium (Level 1-3) - technical support",
"boundingBox": [
43.7,
357.7,
278.7,
357.7,
278.7,
349.9,
43.7,
349.9
],
"confidence": 1.0
}
],
[
{
"text": "F5 BIG-IP Service Premium (Level 1-3) - technical support",
"boundingBox": [
43.7,
276.4,
278.7,
276.4,
278.7,
268.7,
43.7,
268.7
],
"confidence": 1.0
}
],
[
{
"text": "F5 BIG-IP Service Premium (Level 1-3) - technical support",
"boundingBox": [
43.7,
195.4,
278.7,
195.4,
278.7,
187.7,
43.7,
187.7
],
"confidence": 1.0
}
],
[
{
"text": "F5 BIG-IP Service Premium (Level 1-3) - technical support",
"boundingBox": [
43.8,
114.5,
278.8,
114.5,
278.8,
106.8,
43.8,
106.8
],
"confidence": 1.0
}
]
]
},
{
"header": [
{
"text": "QTY",
"boundingBox": [
304.9,
527.5,
320.6,
527.5,
320.6,
519.7,
304.9,
519.7
]
}
],
"entries": [
[
{
"text": "1",
"boundingBox": [
310.7,
512.8,
315.1,
512.8,
315.1,
504.3,
310.7,
504.3
],
"confidence": 1.0
}
],
[
{
"text": "1",
"boundingBox": [
310.7,
439.3,
315.1,
439.3,
315.1,
430.9,
310.7,
430.9
],
"confidence": 1.0
}
],
[
{
"text": "1",
"boundingBox": [
310.7,
358.3,
315.1,
358.3,
315.1,
349.9,
310.7,
349.9
],
"confidence": 1.0
}
],
[
{
"text": "1",
"boundingBox": [
310.7,
277.1,
315.1,
277.1,
315.1,
268.7,
310.7,
268.7
],
"confidence": 1.0
}
],
[
{
"text": "1",
"boundingBox": [
310.7,
196.1,
315.1,
196.1,
315.1,
187.7,
310.7,
187.7
],
"confidence": 1.0
}
],
[
{
"text": "1",
"boundingBox": [
310.7,
115.2,
315.1,
115.2,
315.1,
106.8,
310.7,
106.8
],
"confidence": 1.0
}
]
]
},
{
"header": [
{
"text": "Part#",
"boundingBox": [
356.2,
527.5,
381.9,
527.5,
381.9,
519.7,
356.2,
519.7
]
}
],
"entries": [
[
{
"text": "3158598",
"boundingBox": [
353.6,
512.8,
384.6,
512.8,
384.6,
504.3,
353.6,
504.3
],
"confidence": 1.0
}
],
[
{
"text": "3158598",
"boundingBox": [
353.6,
439.3,
384.6,
439.3,
384.6,
430.9,
353.6,
430.9
],
"confidence": 1.0
}
],
[
{
"text": "3158598",
"boundingBox": [
353.6,
358.3,
384.6,
358.3,
384.6,
349.9,
353.6,
349.9
],
"confidence": 1.0
}
],
[
{
"text": "3158598",
"boundingBox": [
353.6,
277.1,
384.6,
277.1,
384.6,
268.7,
353.6,
268.7
],
"confidence": 1.0
}
],
[
{
"text": "3158598",
"boundingBox": [
353.6,
196.1,
384.6,
196.1,
384.6,
187.7,
353.6,
187.7
],
"confidence": 1.0
}
],
[
{
"text": "3158598",
"boundingBox": [
353.6,
115.2,
384.6,
115.2,
384.6,
106.8,
353.6,
106.8
],
"confidence": 1.0
}
]
]
},
{
"header": [
{
"text": "UNIT PRICE",
"boundingBox": [
431.0,
527.5,
480.5,
527.5,
480.5,
519.7,
431.0,
519.7
]
}
],
"entries": [
[
{
"text": "$10,003.53",
"boundingBox": [
439.7,
512.8,
480.1,
512.8,
480.1,
504.3,
439.7,
504.3
],
"confidence": 1.0
}
],
[
{
"text": "$30,815.03",
"boundingBox": [
439.7,
439.3,
480.1,
439.3,
480.1,
430.9,
439.7,
430.9
],
"confidence": 1.0
}
],
[
{
"text": "$6,401.54",
"boundingBox": [
444.2,
358.3,
480.3,
358.3,
480.3,
349.9,
444.2,
349.9
],
"confidence": 1.0
}
],
[
{
"text": "$10,003.53",
"boundingBox": [
439.7,
277.1,
480.1,
277.1,
480.1,
268.7,
439.7,
268.7
],
"confidence": 1.0
}
],
[
{
"text": "$16,006.85",
"boundingBox": [
439.7,
196.1,
480.1,
196.1,
480.1,
187.7,
439.7,
187.7
],
"confidence": 1.0
}
],
[
{
"text": "$2,399.33",
"boundingBox": [
444.2,
115.2,
480.3,
115.2,
480.3,
106.8,
444.2,
106.8
],
"confidence": 1.0
}
]
]
},
{
"header": [
{
"text": "EXT. PRICE",
"boundingBox": [
513.7,
527.5,
558.2,
527.5,
558.2,
519.7,
513.7,
519.7
]
}
],
"entries": [
[
{
"text": "$10,003.53",
"boundingBox": [
517.8,
512.8,
558.3,
512.8,
558.3,
504.3,
517.8,
504.3
],
"confidence": 1.0
}
],
[
{
"text": "$30,815.03",
"boundingBox": [
517.8,
439.3,
558.3,
439.3,
558.3,
430.9,
517.8,
430.9
],
"confidence": 1.0
}
],
[
{
"text": "$6,401.54",
"boundingBox": [
522.4,
358.3,
558.4,
358.3,
558.4,
349.9,
522.4,
349.9
],
"confidence": 1.0
}
],
[
{
"text": "$10,003.53",
"boundingBox": [
517.8,
277.1,
558.3,
277.1,
558.3,
268.7,
517.8,
268.7
],
"confidence": 1.0
}
],
[
{
"text": "$16,006.85",
"boundingBox": [
517.8,
196.1,
558.3,
196.1,
558.3,
187.7,
517.8,
187.7
],
"confidence": 1.0
}
],
[
{
"text": "$2,399.33",
"boundingBox": [
522.4,
115.2,
558.4,
115.2,
558.4,
106.8,
522.4,
106.8
],
"confidence": 1.0
}
]
]
}发布于 2020-01-28 05:40:17
Form Recognizer V2最近发布了公共预览版。它现在支持带标签的训练功能,你可以尝试在训练模型之前对一些样本表单进行标记。通过这种方式,您可以更好地控制表单中您感兴趣的数据。https://docs.microsoft.com/en-us/azure/cognitive-services/form-recognizer/overview
https://stackoverflow.com/questions/58174773
复制相似问题