我很难用Lambda中的T提取来使用javascript来分析PDF文档。我真的很想在这里提供一些帮助。
这是我的代码:
const AWS = require("aws-sdk");
AWS.config.update({ region: process.env.AWS_REGION });
const textract = new AWS.Textract();
exports.handler = async (event, context) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(
event.Records[0].s3.object.key.replace(/\+/g, " ")
);
var textractParams = {
DocumentLocation: {
S3Object: {
Bucket: bucket,
Name: key,
},
},
FeatureTypes: ["FORMS"]
};
try {
const textractAnalysis = await textract.startDocumentAnalysis(textractParams);
var analysisParams = {
JobId: textractAnalysis.JobId
};
const data = await textract.getDocumentAnalysis(analysisParams);
console.log(data);
} catch (e) {
console.log(e);
}
};这是我得到的回应:
domain: null,
service: Service {
config: Config {
credentials: [EnvironmentCredentials],
credentialProvider: [CredentialProviderChain],
region: 'eu-west-1',
logger: null,
apiVersions: {},
apiVersion: null,
endpoint: 'textract.eu-west-1.amazonaws.com',
httpOptions: [Object],
maxRetries: undefined,
maxRedirects: 10,
paramValidation: true,
sslEnabled: true,
s3ForcePathStyle: false,
s3BucketEndpoint: false,
s3DisableBodySigning: true,
s3UsEast1RegionalEndpoint: 'legacy',
s3UseArnRegion: undefined,
computeChecksums: true,
convertResponseTypes: true,
correctClockSkew: false,
customUserAgent: null,
dynamoDbCrc32: true,
systemClockOffset: 0,
signatureVersion: 'v4',
signatureCache: true,
retryDelayOptions: {},
useAccelerateEndpoint: false,
clientSideMonitoring: false,
endpointDiscoveryEnabled: undefined,
endpointCacheSize: 1000,
hostPrefixEnabled: true,
stsRegionalEndpoints: 'legacy'
},
isGlobalEndpoint: false,
endpoint: Endpoint {
protocol: 'https:',
host: 'textract.eu-west-1.amazonaws.com',
port: 443,
hostname: 'textract.eu-west-1.amazonaws.com',
pathname: '/',
path: '/',
href: 'https://textract.eu-west-1.amazonaws.com/'
},
_events: { apiCallAttempt: [Array], apiCall: [Array] },
MONITOR_EVENTS_BUBBLE: [Function: EVENTS_BUBBLE],
CALL_EVENTS_BUBBLE: [Function: CALL_EVENTS_BUBBLE],
_clientId: 3
},
operation: 'getDocumentAnalysis',
params: { JobId: undefined },
httpRequest: HttpRequest {
method: 'POST',
path: '/',
headers: {
'User-Agent': 'aws-sdk-nodejs/2.1001.0 linux/v14.18.1 exec-env/AWS_Lambda_nodejs14.x'
},
body: '',
endpoint: {
protocol: 'https:',
host: 'textract.eu-west-1.amazonaws.com',
port: 443,
hostname: 'textract.eu-west-1.amazonaws.com',
pathname: '/',
path: '/',
href: 'https://textract.eu-west-1.amazonaws.com/',
constructor: [Function]
},
region: 'eu-west-1',
_userAgent: 'aws-sdk-nodejs/2.1001.0 linux/v14.18.1 exec-env/AWS_Lambda_nodejs14.x'
},
startTime: 2022-01-05T22:37:00.269Z,
response: Response {
request: [Circular *1],
data: null,
error: null,
retryCount: 0,
redirectCount: 0,
httpResponse: HttpResponse {
statusCode: undefined,
headers: {},
body: undefined,
streaming: false,
stream: null
},
maxRetries: 3,
maxRedirects: 10
},
_asm: AcceptorStateMachine {
currentState: 'validate',
states: {
validate: [Object],
build: [Object],
afterBuild: [Object],
sign: [Object],
retry: [Object],
afterRetry: [Object],
send: [Object],
validateResponse: [Object],
extractError: [Object],
extractData: [Object],
restart: [Object],
success: [Object],
error: [Object],
complete: [Object]
}
},
_haltHandlersOnError: false,
_events: {
validate: [
[Function (anonymous)],
[Function],
[Function: VALIDATE_REGION],
[Function: BUILD_IDEMPOTENCY_TOKENS],
[Function: VALIDATE_PARAMETERS]
],
afterBuild: [
[Function: COMPUTE_CHECKSUM],
[Function],
[Function: SET_CONTENT_LENGTH],
[Function: SET_HTTP_HOST]
],
restart: [ [Function: RESTART] ],
sign: [ [Function (anonymous)], [Function], [Function] ],
validateResponse: [ [Function: VALIDATE_RESPONSE], [Function (anonymous)] ],
send: [ [Function] ],
httpHeaders: [ [Function: HTTP_HEADERS] ],
httpData: [ [Function: HTTP_DATA] ],
httpDone: [ [Function: HTTP_DONE] ],
retry: [
[Function: FINALIZE_ERROR],
[Function: INVALIDATE_CREDENTIALS],
[Function: EXPIRED_SIGNATURE],
[Function: CLOCK_SKEWED],
[Function: REDIRECT],
[Function: RETRY_CHECK],
[Function: API_CALL_ATTEMPT_RETRY]
],
afterRetry: [ [Function] ],
build: [ [Function: buildRequest] ],
extractData: [ [Function: extractData], [Function: extractRequestId] ],
extractError: [ [Function: extractError], [Function: extractRequestId] ],
httpError: [ [Function: ENOTFOUND_ERROR] ],
success: [ [Function: API_CALL_ATTEMPT] ],
complete: [ [Function: API_CALL] ]
},
emit: [Function: emit],
API_CALL_ATTEMPT: [Function: API_CALL_ATTEMPT],
API_CALL_ATTEMPT_RETRY: [Function: API_CALL_ATTEMPT_RETRY],
API_CALL: [Function: API_CALL]
}但是,当我更改两个t提取函数并添加这样的承诺时:
const textractAnalysis = await textract.startDocumentAnalysis(textractParams).promise();
const data = await textract.getDocumentAnalysis(analysisParams).promise();然后我得到了这样的回应:
{ JobStatus: 'IN_PROGRESS', AnalyzeDocumentModelVersion: '1.0' }我也尝试过使用Textract,但这更糟糕,因为我根本无法导入它。
提前感谢!
发布于 2022-01-05 23:31:49
.startDocumentAnalysis的文档显示,您只获得作为响应的jobId。
文本分析完成后,Amazon会将完成状态发布到在NotificationChannel中指定的(Amazon )主题。要获得文本分析操作的结果,首先检查发布到Amazon主题的状态值是否成功。如果是,调用GetDocumentAnalysis,并将作业标识符(JobId)从初始调用传递给StartDocumentAnalysis。有关更多信息,请参见文档文本分析。
如果您想要一种或多或少同步的方式来完成它,您可以使用.analyzeDocument。
https://stackoverflow.com/questions/70599966
复制相似问题