IO-3515 resolve issues on search selects not updating, improve confidence scoring.

This commit is contained in:
Patrick Fic
2026-02-19 12:22:35 -08:00
parent 5d53d09af9
commit ae1408012f
11 changed files with 410 additions and 26552 deletions

View File

@@ -62,27 +62,12 @@ async function handleBillOcr(request, response) {
// The uploaded file is available in request.file
const uploadedFile = request.file;
const { jobid, bodyshopid, partsorderid, skipTextract } = request.body;
if (skipTextract === 'true') {
console.log('Skipping Textract processing as per request');
response.status(200).send({
success: true,
status: 'COMPLETED',
data: await generateBillFormData({ processedData: null, jobid, bodyshopid, partsorderid, req: request }), //This is broken if the processedData is not overwritten in the function for testing.
message: 'Invoice processing completed'
});
return;
}
const { jobid, bodyshopid, partsorderid } = request.body;
try {
const fileType = getFileType(uploadedFile);
console.log(`Processing file type: ${fileType}`);
// Images are always processed synchronously (single page)
if (fileType === 'image') {
console.log('Image => 1 page, processing synchronously');
const processedData = await processSinglePageDocument(uploadedFile.buffer);
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: request });
response.status(200).send({
@@ -94,11 +79,9 @@ async function handleBillOcr(request, response) {
} else if (fileType === 'pdf') {
// Check the number of pages in the PDF
const pageCount = await getPdfPageCount(uploadedFile.buffer);
console.log(`PDF has ${pageCount} page(s)`);
if (pageCount === 1) {
// Process synchronously for single-page documents
console.log('PDF => 1 page, processing synchronously');
const processedData = await processSinglePageDocument(uploadedFile.buffer);
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: request });
//const billResult = await generateBillFormData({ result, });
@@ -110,12 +93,11 @@ async function handleBillOcr(request, response) {
});
} else {
// Start the Textract job (non-blocking) for multi-page documents
console.log('PDF => 2+ pages, processing asynchronously');
const jobInfo = await startTextractJob(uploadedFile.buffer, { jobid, bodyshopid, partsorderid });
response.status(202).send({
success: true,
jobId: jobInfo.jobId,
textractJobId: jobInfo.jobId,
message: 'Invoice processing started',
statusUrl: `/ai/bill-ocr/status/${jobInfo.jobId}`
});
@@ -136,17 +118,14 @@ async function handleBillOcr(request, response) {
}
async function handleBillOcrStatus(request, response) {
const { jobId: textractJobId } = request.params;
const { textractJobId } = request.params;
if (!textractJobId) {
console.log('No textractJobId found in params');
response.status(400).send({ error: 'Job ID is required' });
return;
}
console.log('Looking for job:', textractJobId);
const jobStatus = await getTextractJob({ redisPubClient, textractJobId });
console.log('Job status:', jobStatus);
if (!jobStatus) {
response.status(404).send({ error: 'Job not found' });
@@ -156,18 +135,17 @@ async function handleBillOcrStatus(request, response) {
if (jobStatus.status === 'COMPLETED') {
// Generate billForm on-demand if not already generated
let billForm = jobStatus.data?.billForm;
if (!billForm && jobStatus.context) {
try {
console.log('Generating bill form data on-demand...');
billForm = await generateBillFormData({
processedData: jobStatus.data,
billForm = await generateBillFormData({
processedData: jobStatus.data,
jobid: jobStatus.context.jobid,
bodyshopid: jobStatus.context.bodyshopid,
partsorderid: jobStatus.context.partsorderid,
req: request // Now we have request context!
});
// Cache the billForm back to Redis for future requests
await setTextractJob({
redisPubClient,
@@ -181,7 +159,6 @@ async function handleBillOcrStatus(request, response) {
}
});
} catch (error) {
console.error('Error generating bill form data:', error);
response.status(500).send({
status: 'COMPLETED',
error: 'Data processed but failed to generate bill form',
@@ -191,7 +168,7 @@ async function handleBillOcrStatus(request, response) {
return;
}
}
response.status(200).send({
status: 'COMPLETED',
data: {
@@ -211,9 +188,6 @@ async function handleBillOcrStatus(request, response) {
}
}
/**
* Process a single-page document synchronously using AnalyzeExpenseCommand
* @param {Buffer} pdfBuffer
@@ -238,6 +212,7 @@ async function processSinglePageDocument(pdfBuffer) {
async function startTextractJob(pdfBuffer, context = {}) {
// Upload PDF to S3 temporarily for Textract async processing
const { bodyshopid, jobid } = context;
const s3Bucket = process.env.AWS_AI_BUCKET;
const snsTopicArn = process.env.AWS_TEXTRACT_SNS_TOPIC_ARN;
const snsRoleArn = process.env.AWS_TEXTRACT_SNS_ROLE_ARN;
@@ -253,7 +228,7 @@ async function startTextractJob(pdfBuffer, context = {}) {
}
const uploadId = uuidv4();
const s3Key = `textract-temp/${uploadId}.pdf`; //TODO Update Keys structure to something better.
const s3Key = `textract-temp/${bodyshopid}/${jobid}/${uploadId}.pdf`; //TODO Update Keys structure to something better.
// Upload to S3
const uploadCommand = new PutObjectCommand({
@@ -319,7 +294,6 @@ async function processSQSMessages() {
}
try {
console.log('Polling SQS queue:', queueUrl);
const receiveCommand = new ReceiveMessageCommand({
QueueUrl: queueUrl,
MaxNumberOfMessages: 10,
@@ -328,13 +302,12 @@ async function processSQSMessages() {
});
const result = await sqsClient.send(receiveCommand);
console.log('SQS poll result:', result.Messages ? `${result.Messages.length} messages` : 'no messages');
if (result.Messages && result.Messages.length > 0) {
console.log('Processing', result.Messages.length, 'messages from SQS');
for (const message of result.Messages) {
try {
console.log("Processing message:", message);
//TODO: Add environment level filtering here.
await handleTextractNotification(message);
// Delete message after successful processing