IO-3515 resolve issues on search selects not updating, improve confidence scoring.
This commit is contained in:
@@ -62,27 +62,12 @@ async function handleBillOcr(request, response) {
|
||||
|
||||
// The uploaded file is available in request.file
|
||||
const uploadedFile = request.file;
|
||||
const { jobid, bodyshopid, partsorderid, skipTextract } = request.body;
|
||||
|
||||
|
||||
if (skipTextract === 'true') {
|
||||
console.log('Skipping Textract processing as per request');
|
||||
response.status(200).send({
|
||||
success: true,
|
||||
status: 'COMPLETED',
|
||||
data: await generateBillFormData({ processedData: null, jobid, bodyshopid, partsorderid, req: request }), //This is broken if the processedData is not overwritten in the function for testing.
|
||||
message: 'Invoice processing completed'
|
||||
});
|
||||
return;
|
||||
}
|
||||
const { jobid, bodyshopid, partsorderid } = request.body;
|
||||
|
||||
try {
|
||||
const fileType = getFileType(uploadedFile);
|
||||
console.log(`Processing file type: ${fileType}`);
|
||||
|
||||
// Images are always processed synchronously (single page)
|
||||
if (fileType === 'image') {
|
||||
console.log('Image => 1 page, processing synchronously');
|
||||
const processedData = await processSinglePageDocument(uploadedFile.buffer);
|
||||
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: request });
|
||||
response.status(200).send({
|
||||
@@ -94,11 +79,9 @@ async function handleBillOcr(request, response) {
|
||||
} else if (fileType === 'pdf') {
|
||||
// Check the number of pages in the PDF
|
||||
const pageCount = await getPdfPageCount(uploadedFile.buffer);
|
||||
console.log(`PDF has ${pageCount} page(s)`);
|
||||
|
||||
if (pageCount === 1) {
|
||||
// Process synchronously for single-page documents
|
||||
console.log('PDF => 1 page, processing synchronously');
|
||||
const processedData = await processSinglePageDocument(uploadedFile.buffer);
|
||||
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: request });
|
||||
//const billResult = await generateBillFormData({ result, });
|
||||
@@ -110,12 +93,11 @@ async function handleBillOcr(request, response) {
|
||||
});
|
||||
} else {
|
||||
// Start the Textract job (non-blocking) for multi-page documents
|
||||
console.log('PDF => 2+ pages, processing asynchronously');
|
||||
const jobInfo = await startTextractJob(uploadedFile.buffer, { jobid, bodyshopid, partsorderid });
|
||||
|
||||
response.status(202).send({
|
||||
success: true,
|
||||
jobId: jobInfo.jobId,
|
||||
textractJobId: jobInfo.jobId,
|
||||
message: 'Invoice processing started',
|
||||
statusUrl: `/ai/bill-ocr/status/${jobInfo.jobId}`
|
||||
});
|
||||
@@ -136,17 +118,14 @@ async function handleBillOcr(request, response) {
|
||||
}
|
||||
|
||||
async function handleBillOcrStatus(request, response) {
|
||||
const { jobId: textractJobId } = request.params;
|
||||
const { textractJobId } = request.params;
|
||||
|
||||
if (!textractJobId) {
|
||||
console.log('No textractJobId found in params');
|
||||
response.status(400).send({ error: 'Job ID is required' });
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('Looking for job:', textractJobId);
|
||||
const jobStatus = await getTextractJob({ redisPubClient, textractJobId });
|
||||
console.log('Job status:', jobStatus);
|
||||
|
||||
if (!jobStatus) {
|
||||
response.status(404).send({ error: 'Job not found' });
|
||||
@@ -156,18 +135,17 @@ async function handleBillOcrStatus(request, response) {
|
||||
if (jobStatus.status === 'COMPLETED') {
|
||||
// Generate billForm on-demand if not already generated
|
||||
let billForm = jobStatus.data?.billForm;
|
||||
|
||||
|
||||
if (!billForm && jobStatus.context) {
|
||||
try {
|
||||
console.log('Generating bill form data on-demand...');
|
||||
billForm = await generateBillFormData({
|
||||
processedData: jobStatus.data,
|
||||
billForm = await generateBillFormData({
|
||||
processedData: jobStatus.data,
|
||||
jobid: jobStatus.context.jobid,
|
||||
bodyshopid: jobStatus.context.bodyshopid,
|
||||
partsorderid: jobStatus.context.partsorderid,
|
||||
req: request // Now we have request context!
|
||||
});
|
||||
|
||||
|
||||
// Cache the billForm back to Redis for future requests
|
||||
await setTextractJob({
|
||||
redisPubClient,
|
||||
@@ -181,7 +159,6 @@ async function handleBillOcrStatus(request, response) {
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error generating bill form data:', error);
|
||||
response.status(500).send({
|
||||
status: 'COMPLETED',
|
||||
error: 'Data processed but failed to generate bill form',
|
||||
@@ -191,7 +168,7 @@ async function handleBillOcrStatus(request, response) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
response.status(200).send({
|
||||
status: 'COMPLETED',
|
||||
data: {
|
||||
@@ -211,9 +188,6 @@ async function handleBillOcrStatus(request, response) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Process a single-page document synchronously using AnalyzeExpenseCommand
|
||||
* @param {Buffer} pdfBuffer
|
||||
@@ -238,6 +212,7 @@ async function processSinglePageDocument(pdfBuffer) {
|
||||
|
||||
async function startTextractJob(pdfBuffer, context = {}) {
|
||||
// Upload PDF to S3 temporarily for Textract async processing
|
||||
const { bodyshopid, jobid } = context;
|
||||
const s3Bucket = process.env.AWS_AI_BUCKET;
|
||||
const snsTopicArn = process.env.AWS_TEXTRACT_SNS_TOPIC_ARN;
|
||||
const snsRoleArn = process.env.AWS_TEXTRACT_SNS_ROLE_ARN;
|
||||
@@ -253,7 +228,7 @@ async function startTextractJob(pdfBuffer, context = {}) {
|
||||
}
|
||||
|
||||
const uploadId = uuidv4();
|
||||
const s3Key = `textract-temp/${uploadId}.pdf`; //TODO Update Keys structure to something better.
|
||||
const s3Key = `textract-temp/${bodyshopid}/${jobid}/${uploadId}.pdf`; //TODO Update Keys structure to something better.
|
||||
|
||||
// Upload to S3
|
||||
const uploadCommand = new PutObjectCommand({
|
||||
@@ -319,7 +294,6 @@ async function processSQSMessages() {
|
||||
}
|
||||
|
||||
try {
|
||||
console.log('Polling SQS queue:', queueUrl);
|
||||
const receiveCommand = new ReceiveMessageCommand({
|
||||
QueueUrl: queueUrl,
|
||||
MaxNumberOfMessages: 10,
|
||||
@@ -328,13 +302,12 @@ async function processSQSMessages() {
|
||||
});
|
||||
|
||||
const result = await sqsClient.send(receiveCommand);
|
||||
console.log('SQS poll result:', result.Messages ? `${result.Messages.length} messages` : 'no messages');
|
||||
|
||||
if (result.Messages && result.Messages.length > 0) {
|
||||
console.log('Processing', result.Messages.length, 'messages from SQS');
|
||||
for (const message of result.Messages) {
|
||||
try {
|
||||
console.log("Processing message:", message);
|
||||
//TODO: Add environment level filtering here.
|
||||
await handleTextractNotification(message);
|
||||
|
||||
// Delete message after successful processing
|
||||
|
||||
Reference in New Issue
Block a user