IO-3515 WIP - bulk calls functioning. Further refinement required.
This commit is contained in:
@@ -441,8 +441,11 @@ const main = async () => {
|
|||||||
await server.listen(port);
|
await server.listen(port);
|
||||||
logger.log(`Server started on port ${port}`, "INFO", "api");
|
logger.log(`Server started on port ${port}`, "INFO", "api");
|
||||||
|
|
||||||
|
// Initialize bill-ocr with Redis client
|
||||||
|
const { initializeBillOcr, startSQSPolling } = require("./server/ai/bill-ocr/bill-ocr");
|
||||||
|
initializeBillOcr(pubClient);
|
||||||
|
|
||||||
// Start SQS polling for Textract notifications
|
// Start SQS polling for Textract notifications
|
||||||
const { startSQSPolling } = require("./server/ai/bill-ocr/bill-ocr");
|
|
||||||
startSQSPolling();
|
startSQSPolling();
|
||||||
logger.log(`Started SQS polling for Textract notifications`, "INFO", "api");
|
logger.log(`Started SQS polling for Textract notifications`, "INFO", "api");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
@@ -16,8 +16,76 @@ const textractClient = new TextractClient(awsConfig);
|
|||||||
const s3Client = new S3Client(awsConfig);
|
const s3Client = new S3Client(awsConfig);
|
||||||
const sqsClient = new SQSClient(awsConfig);
|
const sqsClient = new SQSClient(awsConfig);
|
||||||
|
|
||||||
// In-memory job storage (consider using Redis or a database for production)
|
let redisPubClient = null;
|
||||||
const jobStore = new Map();
|
const TEXTRACT_JOB_TTL = 3600;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the bill-ocr module with Redis client
|
||||||
|
* @param {Object} pubClient - Redis cluster client
|
||||||
|
*/
|
||||||
|
function initializeBillOcr(pubClient) {
|
||||||
|
redisPubClient = pubClient;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate Redis key for Textract job using textract job ID
|
||||||
|
* @param {string} textractJobId
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
function getTextractJobKey(textractJobId) {
|
||||||
|
return `textract:job:${textractJobId}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store Textract job data in Redis
|
||||||
|
* @param {string} textractJobId
|
||||||
|
* @param {Object} jobData
|
||||||
|
*/
|
||||||
|
async function setTextractJob(textractJobId, jobData) {
|
||||||
|
if (!redisPubClient) {
|
||||||
|
throw new Error('Redis client not initialized. Call initializeBillOcr first.');
|
||||||
|
}
|
||||||
|
const key = getTextractJobKey(textractJobId);
|
||||||
|
await redisPubClient.set(key, JSON.stringify(jobData));
|
||||||
|
await redisPubClient.expire(key, TEXTRACT_JOB_TTL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve Textract job data from Redis
|
||||||
|
* @param {string} textractJobId
|
||||||
|
* @returns {Promise<Object|null>}
|
||||||
|
*/
|
||||||
|
async function getTextractJob(textractJobId) {
|
||||||
|
if (!redisPubClient) {
|
||||||
|
throw new Error('Redis client not initialized. Call initializeBillOcr first.');
|
||||||
|
}
|
||||||
|
const key = getTextractJobKey(textractJobId);
|
||||||
|
const data = await redisPubClient.get(key);
|
||||||
|
return data ? JSON.parse(data) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if job exists by Textract job ID
|
||||||
|
* @param {string} textractJobId
|
||||||
|
* @returns {Promise<boolean>}
|
||||||
|
*/
|
||||||
|
async function jobExists(textractJobId) {
|
||||||
|
if (!redisPubClient) {
|
||||||
|
throw new Error('Redis client not initialized. Call initializeBillOcr first.');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Checking if job exists for Textract job ID:', textractJobId);
|
||||||
|
const key = getTextractJobKey(textractJobId);
|
||||||
|
const exists = await redisPubClient.exists(key);
|
||||||
|
|
||||||
|
if (exists) {
|
||||||
|
console.log(`Job found: ${textractJobId}`);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('No matching job found in Redis');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
async function handleBillOcr(request, response) {
|
async function handleBillOcr(request, response) {
|
||||||
// Check if file was uploaded
|
// Check if file was uploaded
|
||||||
@@ -37,7 +105,7 @@ async function handleBillOcr(request, response) {
|
|||||||
success: true,
|
success: true,
|
||||||
jobId: jobInfo.jobId,
|
jobId: jobInfo.jobId,
|
||||||
message: 'Invoice processing started',
|
message: 'Invoice processing started',
|
||||||
statusUrl: `/api/bill-ocr/status/${jobInfo.jobId}`
|
statusUrl: `/ai/bill-ocr/status/${jobInfo.jobId}`
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error starting invoice processing:', error);
|
console.error('Error starting invoice processing:', error);
|
||||||
@@ -53,16 +121,21 @@ async function handleBillOcrStatus(request, response) {
|
|||||||
console.log('request.params:', request.params);
|
console.log('request.params:', request.params);
|
||||||
console.log('request.query:', request.query);
|
console.log('request.query:', request.query);
|
||||||
|
|
||||||
const { jobId } = request.params;
|
|
||||||
|
|
||||||
if (!jobId) {
|
|
||||||
console.log('No jobId found in params');
|
|
||||||
|
const { jobId: textractJobId } = request.params;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (!textractJobId) {
|
||||||
|
console.log('No textractJobId found in params');
|
||||||
response.status(400).send({ error: 'Job ID is required' });
|
response.status(400).send({ error: 'Job ID is required' });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Looking for job:', jobId);
|
console.log('Looking for job:', textractJobId);
|
||||||
const jobStatus = jobStore.get(jobId);
|
const jobStatus = await getTextractJob(textractJobId);
|
||||||
console.log('Job status:', jobStatus);
|
console.log('Job status:', jobStatus);
|
||||||
|
|
||||||
if (!jobStatus) {
|
if (!jobStatus) {
|
||||||
@@ -103,8 +176,8 @@ async function startTextractJob(pdfBuffer) {
|
|||||||
throw new Error('AWS_TEXTRACT_SNS_ROLE_ARN environment variable is required');
|
throw new Error('AWS_TEXTRACT_SNS_ROLE_ARN environment variable is required');
|
||||||
}
|
}
|
||||||
|
|
||||||
const jobId = uuidv4();
|
const uploadId = uuidv4();
|
||||||
const s3Key = `textract-temp/${jobId}.pdf`;
|
const s3Key = `textract-temp/${uploadId}.pdf`;
|
||||||
|
|
||||||
// Upload to S3
|
// Upload to S3
|
||||||
const uploadCommand = new PutObjectCommand({
|
const uploadCommand = new PutObjectCommand({
|
||||||
@@ -123,30 +196,26 @@ async function startTextractJob(pdfBuffer) {
|
|||||||
Name: s3Key
|
Name: s3Key
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
OutputConfig: {
|
|
||||||
S3Bucket: s3Bucket,
|
|
||||||
S3Prefix: `textract-output/${jobId}/`
|
|
||||||
},
|
|
||||||
NotificationChannel: {
|
NotificationChannel: {
|
||||||
SNSTopicArn: snsTopicArn,
|
SNSTopicArn: snsTopicArn,
|
||||||
RoleArn: snsRoleArn
|
RoleArn: snsRoleArn
|
||||||
},
|
},
|
||||||
ClientRequestToken: jobId
|
ClientRequestToken: uploadId
|
||||||
});
|
});
|
||||||
|
|
||||||
const startResult = await textractClient.send(startCommand);
|
const startResult = await textractClient.send(startCommand);
|
||||||
|
const textractJobId = startResult.JobId;
|
||||||
|
|
||||||
// Store job info
|
// Store job info in Redis using textractJobId as the key
|
||||||
jobStore.set(jobId, {
|
await setTextractJob(textractJobId, {
|
||||||
status: 'IN_PROGRESS',
|
status: 'IN_PROGRESS',
|
||||||
textractJobId: startResult.JobId,
|
|
||||||
s3Key: s3Key,
|
s3Key: s3Key,
|
||||||
|
uploadId: uploadId,
|
||||||
startedAt: new Date().toISOString()
|
startedAt: new Date().toISOString()
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
jobId: jobId,
|
jobId: textractJobId
|
||||||
textractJobId: startResult.JobId
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -196,40 +265,48 @@ async function processSQSMessages() {
|
|||||||
|
|
||||||
async function handleTextractNotification(message) {
|
async function handleTextractNotification(message) {
|
||||||
const body = JSON.parse(message.Body);
|
const body = JSON.parse(message.Body);
|
||||||
const snsMessage = JSON.parse(body.Message);
|
let snsMessage
|
||||||
|
try {
|
||||||
|
|
||||||
|
snsMessage = JSON.parse(body.Message);
|
||||||
|
} catch (error) {
|
||||||
|
//Delete the message so it doesn't clog the queue
|
||||||
|
const deleteCommand = new DeleteMessageCommand({
|
||||||
|
QueueUrl: process.env.AWS_TEXTRACT_SQS_QUEUE_URL,
|
||||||
|
ReceiptHandle: message.ReceiptHandle
|
||||||
|
});
|
||||||
|
await sqsClient.send(deleteCommand);
|
||||||
|
console.error('Error parsing SNS message:', error);
|
||||||
|
console.log('Message Deleted:', body);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const textractJobId = snsMessage.JobId;
|
const textractJobId = snsMessage.JobId;
|
||||||
const status = snsMessage.Status;
|
const status = snsMessage.Status;
|
||||||
|
|
||||||
// Find our job by Textract job ID
|
// Check if job exists in Redis
|
||||||
let ourJobId = null;
|
const exists = await jobExists(textractJobId);
|
||||||
for (const [key, value] of jobStore.entries()) {
|
|
||||||
if (value.textractJobId === textractJobId) {
|
|
||||||
ourJobId = key;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ourJobId) {
|
if (!exists) {
|
||||||
console.warn(`Job not found for Textract job ID: ${textractJobId}`);
|
console.warn(`Job not found for Textract job ID: ${textractJobId}`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const jobInfo = jobStore.get(ourJobId);
|
const jobInfo = await getTextractJob(textractJobId);
|
||||||
|
|
||||||
if (status === 'SUCCEEDED') {
|
if (status === 'SUCCEEDED') {
|
||||||
// Retrieve the results
|
// Retrieve the results
|
||||||
const invoiceData = await retrieveTextractResults(textractJobId);
|
const invoiceData = await retrieveTextractResults(textractJobId);
|
||||||
const processedData = processScanData(invoiceData);
|
const processedData = processScanData(invoiceData);
|
||||||
|
|
||||||
jobStore.set(ourJobId, {
|
await setTextractJob(textractJobId, {
|
||||||
...jobInfo,
|
...jobInfo,
|
||||||
status: 'COMPLETED',
|
status: 'COMPLETED',
|
||||||
data: processedData,
|
data: processedData,
|
||||||
completedAt: new Date().toISOString()
|
completedAt: new Date().toISOString()
|
||||||
});
|
});
|
||||||
} else if (status === 'FAILED') {
|
} else if (status === 'FAILED') {
|
||||||
jobStore.set(ourJobId, {
|
await setTextractJob(textractJobId, {
|
||||||
...jobInfo,
|
...jobInfo,
|
||||||
status: 'FAILED',
|
status: 'FAILED',
|
||||||
error: snsMessage.StatusMessage || 'Textract job failed',
|
error: snsMessage.StatusMessage || 'Textract job failed',
|
||||||
@@ -268,7 +345,7 @@ function startSQSPolling() {
|
|||||||
processSQSMessages().catch(error => {
|
processSQSMessages().catch(error => {
|
||||||
console.error('SQS polling error:', error);
|
console.error('SQS polling error:', error);
|
||||||
});
|
});
|
||||||
}, 5000); // Poll every 5 seconds
|
}, 10000); // Poll every 10 seconds
|
||||||
|
|
||||||
return pollInterval;
|
return pollInterval;
|
||||||
}
|
}
|
||||||
@@ -339,7 +416,7 @@ function extractInvoiceData(textractResponse) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function normalizeFieldName(fieldType) {
|
function normalizeFieldName(fieldType) {
|
||||||
// Convert Textract field types to more readable names
|
//Placeholder normalization for now.
|
||||||
const fieldMap = {
|
const fieldMap = {
|
||||||
'ITEM': 'description',
|
'ITEM': 'description',
|
||||||
'QUANTITY': 'quantity',
|
'QUANTITY': 'quantity',
|
||||||
@@ -398,6 +475,7 @@ function processScanData(invoiceData) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
initializeBillOcr,
|
||||||
handleBillOcr,
|
handleBillOcr,
|
||||||
handleBillOcrStatus,
|
handleBillOcrStatus,
|
||||||
startSQSPolling
|
startSQSPolling
|
||||||
|
|||||||
Reference in New Issue
Block a user