IO-3515 Additional packages and initial route &n simple queue polling.
This commit is contained in:
141
package-lock.json
generated
141
package-lock.json
generated
@@ -14,6 +14,8 @@
|
|||||||
"@aws-sdk/client-s3": "^3.974.0",
|
"@aws-sdk/client-s3": "^3.974.0",
|
||||||
"@aws-sdk/client-secrets-manager": "^3.974.0",
|
"@aws-sdk/client-secrets-manager": "^3.974.0",
|
||||||
"@aws-sdk/client-ses": "^3.974.0",
|
"@aws-sdk/client-ses": "^3.974.0",
|
||||||
|
"@aws-sdk/client-sqs": "^3.975.0",
|
||||||
|
"@aws-sdk/client-textract": "^3.975.0",
|
||||||
"@aws-sdk/credential-provider-node": "^3.972.1",
|
"@aws-sdk/credential-provider-node": "^3.972.1",
|
||||||
"@aws-sdk/lib-storage": "^3.974.0",
|
"@aws-sdk/lib-storage": "^3.974.0",
|
||||||
"@aws-sdk/s3-request-presigner": "^3.974.0",
|
"@aws-sdk/s3-request-presigner": "^3.974.0",
|
||||||
@@ -558,6 +560,58 @@
|
|||||||
"node": ">=20.0.0"
|
"node": ">=20.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@aws-sdk/client-sqs": {
|
||||||
|
"version": "3.975.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@aws-sdk/client-sqs/-/client-sqs-3.975.0.tgz",
|
||||||
|
"integrity": "sha512-6KS8T24LkEp2QZ/529SKVHQTgvCDUWXE8NtrILUBxZ9e3BiprjC9JSEdMqgh82BUD8s8yv4nnoa4Faiz7lRFpw==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@aws-crypto/sha256-browser": "5.2.0",
|
||||||
|
"@aws-crypto/sha256-js": "5.2.0",
|
||||||
|
"@aws-sdk/core": "^3.973.1",
|
||||||
|
"@aws-sdk/credential-provider-node": "^3.972.1",
|
||||||
|
"@aws-sdk/middleware-host-header": "^3.972.1",
|
||||||
|
"@aws-sdk/middleware-logger": "^3.972.1",
|
||||||
|
"@aws-sdk/middleware-recursion-detection": "^3.972.1",
|
||||||
|
"@aws-sdk/middleware-sdk-sqs": "^3.972.2",
|
||||||
|
"@aws-sdk/middleware-user-agent": "^3.972.2",
|
||||||
|
"@aws-sdk/region-config-resolver": "^3.972.1",
|
||||||
|
"@aws-sdk/types": "^3.973.0",
|
||||||
|
"@aws-sdk/util-endpoints": "3.972.0",
|
||||||
|
"@aws-sdk/util-user-agent-browser": "^3.972.1",
|
||||||
|
"@aws-sdk/util-user-agent-node": "^3.972.1",
|
||||||
|
"@smithy/config-resolver": "^4.4.6",
|
||||||
|
"@smithy/core": "^3.21.1",
|
||||||
|
"@smithy/fetch-http-handler": "^5.3.9",
|
||||||
|
"@smithy/hash-node": "^4.2.8",
|
||||||
|
"@smithy/invalid-dependency": "^4.2.8",
|
||||||
|
"@smithy/md5-js": "^4.2.8",
|
||||||
|
"@smithy/middleware-content-length": "^4.2.8",
|
||||||
|
"@smithy/middleware-endpoint": "^4.4.11",
|
||||||
|
"@smithy/middleware-retry": "^4.4.27",
|
||||||
|
"@smithy/middleware-serde": "^4.2.9",
|
||||||
|
"@smithy/middleware-stack": "^4.2.8",
|
||||||
|
"@smithy/node-config-provider": "^4.3.8",
|
||||||
|
"@smithy/node-http-handler": "^4.4.8",
|
||||||
|
"@smithy/protocol-http": "^5.3.8",
|
||||||
|
"@smithy/smithy-client": "^4.10.12",
|
||||||
|
"@smithy/types": "^4.12.0",
|
||||||
|
"@smithy/url-parser": "^4.2.8",
|
||||||
|
"@smithy/util-base64": "^4.3.0",
|
||||||
|
"@smithy/util-body-length-browser": "^4.2.0",
|
||||||
|
"@smithy/util-body-length-node": "^4.2.1",
|
||||||
|
"@smithy/util-defaults-mode-browser": "^4.3.26",
|
||||||
|
"@smithy/util-defaults-mode-node": "^4.2.29",
|
||||||
|
"@smithy/util-endpoints": "^3.2.8",
|
||||||
|
"@smithy/util-middleware": "^4.2.8",
|
||||||
|
"@smithy/util-retry": "^4.2.8",
|
||||||
|
"@smithy/util-utf8": "^4.2.0",
|
||||||
|
"tslib": "^2.6.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@aws-sdk/client-sso": {
|
"node_modules/@aws-sdk/client-sso": {
|
||||||
"version": "3.974.0",
|
"version": "3.974.0",
|
||||||
"resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.974.0.tgz",
|
"resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.974.0.tgz",
|
||||||
@@ -607,20 +661,70 @@
|
|||||||
"node": ">=20.0.0"
|
"node": ">=20.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@aws-sdk/client-textract": {
|
||||||
|
"version": "3.975.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@aws-sdk/client-textract/-/client-textract-3.975.0.tgz",
|
||||||
|
"integrity": "sha512-7Br+8Zkqm1UYf4poSynK8srQOj9iHi3HxsGDjfj+9D5Vg8USxQTugKZ9W70LFwKTPmreLhkPHpfOQWCdj3V2dA==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@aws-crypto/sha256-browser": "5.2.0",
|
||||||
|
"@aws-crypto/sha256-js": "5.2.0",
|
||||||
|
"@aws-sdk/core": "^3.973.1",
|
||||||
|
"@aws-sdk/credential-provider-node": "^3.972.1",
|
||||||
|
"@aws-sdk/middleware-host-header": "^3.972.1",
|
||||||
|
"@aws-sdk/middleware-logger": "^3.972.1",
|
||||||
|
"@aws-sdk/middleware-recursion-detection": "^3.972.1",
|
||||||
|
"@aws-sdk/middleware-user-agent": "^3.972.2",
|
||||||
|
"@aws-sdk/region-config-resolver": "^3.972.1",
|
||||||
|
"@aws-sdk/types": "^3.973.0",
|
||||||
|
"@aws-sdk/util-endpoints": "3.972.0",
|
||||||
|
"@aws-sdk/util-user-agent-browser": "^3.972.1",
|
||||||
|
"@aws-sdk/util-user-agent-node": "^3.972.1",
|
||||||
|
"@smithy/config-resolver": "^4.4.6",
|
||||||
|
"@smithy/core": "^3.21.1",
|
||||||
|
"@smithy/fetch-http-handler": "^5.3.9",
|
||||||
|
"@smithy/hash-node": "^4.2.8",
|
||||||
|
"@smithy/invalid-dependency": "^4.2.8",
|
||||||
|
"@smithy/middleware-content-length": "^4.2.8",
|
||||||
|
"@smithy/middleware-endpoint": "^4.4.11",
|
||||||
|
"@smithy/middleware-retry": "^4.4.27",
|
||||||
|
"@smithy/middleware-serde": "^4.2.9",
|
||||||
|
"@smithy/middleware-stack": "^4.2.8",
|
||||||
|
"@smithy/node-config-provider": "^4.3.8",
|
||||||
|
"@smithy/node-http-handler": "^4.4.8",
|
||||||
|
"@smithy/protocol-http": "^5.3.8",
|
||||||
|
"@smithy/smithy-client": "^4.10.12",
|
||||||
|
"@smithy/types": "^4.12.0",
|
||||||
|
"@smithy/url-parser": "^4.2.8",
|
||||||
|
"@smithy/util-base64": "^4.3.0",
|
||||||
|
"@smithy/util-body-length-browser": "^4.2.0",
|
||||||
|
"@smithy/util-body-length-node": "^4.2.1",
|
||||||
|
"@smithy/util-defaults-mode-browser": "^4.3.26",
|
||||||
|
"@smithy/util-defaults-mode-node": "^4.2.29",
|
||||||
|
"@smithy/util-endpoints": "^3.2.8",
|
||||||
|
"@smithy/util-middleware": "^4.2.8",
|
||||||
|
"@smithy/util-retry": "^4.2.8",
|
||||||
|
"@smithy/util-utf8": "^4.2.0",
|
||||||
|
"tslib": "^2.6.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@aws-sdk/core": {
|
"node_modules/@aws-sdk/core": {
|
||||||
"version": "3.973.0",
|
"version": "3.973.1",
|
||||||
"resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.973.0.tgz",
|
"resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.973.1.tgz",
|
||||||
"integrity": "sha512-qy3Fmt8z4PRInM3ZqJmHihQ2tfCdj/MzbGaZpuHjYjgl1/Gcar4Pyp/zzHXh9hGEb61WNbWgsJcDUhnGIiX1TA==",
|
"integrity": "sha512-Ocubx42QsMyVs9ANSmFpRm0S+hubWljpPLjOi9UFrtcnVJjrVJTzQ51sN0e5g4e8i8QZ7uY73zosLmgYL7kZTQ==",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@aws-sdk/types": "^3.973.0",
|
"@aws-sdk/types": "^3.973.0",
|
||||||
"@aws-sdk/xml-builder": "^3.972.1",
|
"@aws-sdk/xml-builder": "^3.972.1",
|
||||||
"@smithy/core": "^3.21.0",
|
"@smithy/core": "^3.21.1",
|
||||||
"@smithy/node-config-provider": "^4.3.8",
|
"@smithy/node-config-provider": "^4.3.8",
|
||||||
"@smithy/property-provider": "^4.2.8",
|
"@smithy/property-provider": "^4.2.8",
|
||||||
"@smithy/protocol-http": "^5.3.8",
|
"@smithy/protocol-http": "^5.3.8",
|
||||||
"@smithy/signature-v4": "^5.3.8",
|
"@smithy/signature-v4": "^5.3.8",
|
||||||
"@smithy/smithy-client": "^4.10.11",
|
"@smithy/smithy-client": "^4.10.12",
|
||||||
"@smithy/types": "^4.12.0",
|
"@smithy/types": "^4.12.0",
|
||||||
"@smithy/util-base64": "^4.3.0",
|
"@smithy/util-base64": "^4.3.0",
|
||||||
"@smithy/util-middleware": "^4.2.8",
|
"@smithy/util-middleware": "^4.2.8",
|
||||||
@@ -965,6 +1069,23 @@
|
|||||||
"node": ">=20.0.0"
|
"node": ">=20.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@aws-sdk/middleware-sdk-sqs": {
|
||||||
|
"version": "3.972.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@aws-sdk/middleware-sdk-sqs/-/middleware-sdk-sqs-3.972.2.tgz",
|
||||||
|
"integrity": "sha512-LPKsfqdoei7kBJo7JqGKbIM05W0bbcnJNfFtoOPgjXOJa7OpEs0pYj5OHiqbykgUFzkygD22f9sBmEfZkFoZ0g==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@aws-sdk/types": "^3.973.0",
|
||||||
|
"@smithy/smithy-client": "^4.10.12",
|
||||||
|
"@smithy/types": "^4.12.0",
|
||||||
|
"@smithy/util-hex-encoding": "^4.2.0",
|
||||||
|
"@smithy/util-utf8": "^4.2.0",
|
||||||
|
"tslib": "^2.6.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@aws-sdk/middleware-ssec": {
|
"node_modules/@aws-sdk/middleware-ssec": {
|
||||||
"version": "3.972.1",
|
"version": "3.972.1",
|
||||||
"resolved": "https://registry.npmjs.org/@aws-sdk/middleware-ssec/-/middleware-ssec-3.972.1.tgz",
|
"resolved": "https://registry.npmjs.org/@aws-sdk/middleware-ssec/-/middleware-ssec-3.972.1.tgz",
|
||||||
@@ -980,15 +1101,15 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@aws-sdk/middleware-user-agent": {
|
"node_modules/@aws-sdk/middleware-user-agent": {
|
||||||
"version": "3.972.1",
|
"version": "3.972.2",
|
||||||
"resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.972.1.tgz",
|
"resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.972.2.tgz",
|
||||||
"integrity": "sha512-6SVg4pY/9Oq9MLzO48xuM3lsOb8Rxg55qprEtFRpkUmuvKij31f5SQHEGxuiZ4RqIKrfjr2WMuIgXvqJ0eJsPA==",
|
"integrity": "sha512-d+Exq074wy0X6wvShg/kmZVtkah+28vMuqCtuY3cydg8LUZOJBtbAolCpEJizSyb8mJJZF9BjWaTANXL4OYnkg==",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@aws-sdk/core": "^3.973.0",
|
"@aws-sdk/core": "^3.973.1",
|
||||||
"@aws-sdk/types": "^3.973.0",
|
"@aws-sdk/types": "^3.973.0",
|
||||||
"@aws-sdk/util-endpoints": "3.972.0",
|
"@aws-sdk/util-endpoints": "3.972.0",
|
||||||
"@smithy/core": "^3.21.0",
|
"@smithy/core": "^3.21.1",
|
||||||
"@smithy/protocol-http": "^5.3.8",
|
"@smithy/protocol-http": "^5.3.8",
|
||||||
"@smithy/types": "^4.12.0",
|
"@smithy/types": "^4.12.0",
|
||||||
"tslib": "^2.6.2"
|
"tslib": "^2.6.2"
|
||||||
|
|||||||
@@ -23,6 +23,8 @@
|
|||||||
"@aws-sdk/client-s3": "^3.974.0",
|
"@aws-sdk/client-s3": "^3.974.0",
|
||||||
"@aws-sdk/client-secrets-manager": "^3.974.0",
|
"@aws-sdk/client-secrets-manager": "^3.974.0",
|
||||||
"@aws-sdk/client-ses": "^3.974.0",
|
"@aws-sdk/client-ses": "^3.974.0",
|
||||||
|
"@aws-sdk/client-sqs": "^3.975.0",
|
||||||
|
"@aws-sdk/client-textract": "^3.975.0",
|
||||||
"@aws-sdk/credential-provider-node": "^3.972.1",
|
"@aws-sdk/credential-provider-node": "^3.972.1",
|
||||||
"@aws-sdk/lib-storage": "^3.974.0",
|
"@aws-sdk/lib-storage": "^3.974.0",
|
||||||
"@aws-sdk/s3-request-presigner": "^3.974.0",
|
"@aws-sdk/s3-request-presigner": "^3.974.0",
|
||||||
|
|||||||
@@ -125,6 +125,8 @@ const applyRoutes = ({ app }) => {
|
|||||||
app.use("/payroll", require("./server/routes/payrollRoutes"));
|
app.use("/payroll", require("./server/routes/payrollRoutes"));
|
||||||
app.use("/sso", require("./server/routes/ssoRoutes"));
|
app.use("/sso", require("./server/routes/ssoRoutes"));
|
||||||
app.use("/integrations", require("./server/routes/intergrationRoutes"));
|
app.use("/integrations", require("./server/routes/intergrationRoutes"));
|
||||||
|
app.use("/ai", require("./server/routes/aiRoutes"));
|
||||||
|
|
||||||
|
|
||||||
// Default route for forbidden access
|
// Default route for forbidden access
|
||||||
app.get("/", (req, res) => {
|
app.get("/", (req, res) => {
|
||||||
@@ -438,6 +440,11 @@ const main = async () => {
|
|||||||
try {
|
try {
|
||||||
await server.listen(port);
|
await server.listen(port);
|
||||||
logger.log(`Server started on port ${port}`, "INFO", "api");
|
logger.log(`Server started on port ${port}`, "INFO", "api");
|
||||||
|
|
||||||
|
// Start SQS polling for Textract notifications
|
||||||
|
const { startSQSPolling } = require("./server/ai/bill-ocr/bill-ocr");
|
||||||
|
startSQSPolling();
|
||||||
|
logger.log(`Started SQS polling for Textract notifications`, "INFO", "api");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.log(`Server failed to start on port ${port}`, "ERROR", "api", null, { error: error.message });
|
logger.log(`Server failed to start on port ${port}`, "ERROR", "api", null, { error: error.message });
|
||||||
}
|
}
|
||||||
|
|||||||
7
server/ai/bill-ocr/bill-ocr-readme.md
Normal file
7
server/ai/bill-ocr/bill-ocr-readme.md
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
Required Infrastructure setup
|
||||||
|
1. Create an AI user that has access to the required S3 buckets and textract permissions.
|
||||||
|
2. Had to create a queue and SNS topic. had to also create the role that had `sns:Publish`. Had to add `sqs:ReceiveMessage` and `sqs:DeleteMessage` to the profile.
|
||||||
|
3. Created 2 roles for SNS. The textract role is the right one, the other was created manually based on incorrect instructions.
|
||||||
|
|
||||||
|
TODO:
|
||||||
|
* Create a rome bucket for uploads, or move to the regular spot.
|
||||||
404
server/ai/bill-ocr/bill-ocr.js
Normal file
404
server/ai/bill-ocr/bill-ocr.js
Normal file
@@ -0,0 +1,404 @@
|
|||||||
|
const { TextractClient, StartExpenseAnalysisCommand, GetExpenseAnalysisCommand } = require("@aws-sdk/client-textract");
|
||||||
|
const { S3Client, PutObjectCommand } = require("@aws-sdk/client-s3");
|
||||||
|
const { SQSClient, ReceiveMessageCommand, DeleteMessageCommand } = require("@aws-sdk/client-sqs");
|
||||||
|
const { v4: uuidv4 } = require('uuid');
|
||||||
|
|
||||||
|
// Initialize AWS clients
|
||||||
|
const awsConfig = {
|
||||||
|
region: process.env.AWS_AI_REGION || "ca-central-1",
|
||||||
|
credentials: {
|
||||||
|
accessKeyId: process.env.AWS_AI_ACCESS_KEY_ID,
|
||||||
|
secretAccessKey: process.env.AWS_AI_SECRET_ACCESS_KEY,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const textractClient = new TextractClient(awsConfig);
|
||||||
|
const s3Client = new S3Client(awsConfig);
|
||||||
|
const sqsClient = new SQSClient(awsConfig);
|
||||||
|
|
||||||
|
// In-memory job storage (consider using Redis or a database for production)
|
||||||
|
const jobStore = new Map();
|
||||||
|
|
||||||
|
async function handleBillOcr(request, response) {
|
||||||
|
// Check if file was uploaded
|
||||||
|
if (!request.file) {
|
||||||
|
response.status(400).send({ error: 'No file uploaded' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The uploaded file is available in request.file
|
||||||
|
const uploadedFile = request.file;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Start the Textract job (non-blocking)
|
||||||
|
const jobInfo = await startTextractJob(uploadedFile.buffer);
|
||||||
|
|
||||||
|
response.status(202).send({
|
||||||
|
success: true,
|
||||||
|
jobId: jobInfo.jobId,
|
||||||
|
message: 'Invoice processing started',
|
||||||
|
statusUrl: `/api/bill-ocr/status/${jobInfo.jobId}`
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error starting invoice processing:', error);
|
||||||
|
response.status(500).send({
|
||||||
|
error: 'Failed to start invoice processing',
|
||||||
|
message: error.message
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleBillOcrStatus(request, response) {
|
||||||
|
console.log('handleBillOcrStatus called');
|
||||||
|
console.log('request.params:', request.params);
|
||||||
|
console.log('request.query:', request.query);
|
||||||
|
|
||||||
|
const { jobId } = request.params;
|
||||||
|
|
||||||
|
if (!jobId) {
|
||||||
|
console.log('No jobId found in params');
|
||||||
|
response.status(400).send({ error: 'Job ID is required' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Looking for job:', jobId);
|
||||||
|
const jobStatus = jobStore.get(jobId);
|
||||||
|
console.log('Job status:', jobStatus);
|
||||||
|
|
||||||
|
if (!jobStatus) {
|
||||||
|
response.status(404).send({ error: 'Job not found' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (jobStatus.status === 'COMPLETED') {
|
||||||
|
response.status(200).send({
|
||||||
|
status: 'COMPLETED',
|
||||||
|
data: jobStatus.data
|
||||||
|
});
|
||||||
|
} else if (jobStatus.status === 'FAILED') {
|
||||||
|
response.status(500).send({
|
||||||
|
status: 'FAILED',
|
||||||
|
error: jobStatus.error
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
response.status(200).send({
|
||||||
|
status: jobStatus.status
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function startTextractJob(pdfBuffer) {
|
||||||
|
// Upload PDF to S3 temporarily for Textract async processing
|
||||||
|
const s3Bucket = process.env.AWS_AI_BUCKET;
|
||||||
|
const snsTopicArn = process.env.AWS_TEXTRACT_SNS_TOPIC_ARN;
|
||||||
|
const snsRoleArn = process.env.AWS_TEXTRACT_SNS_ROLE_ARN;
|
||||||
|
|
||||||
|
if (!s3Bucket) {
|
||||||
|
throw new Error('AWS_AI_BUCKET environment variable is required');
|
||||||
|
}
|
||||||
|
if (!snsTopicArn) {
|
||||||
|
throw new Error('AWS_TEXTRACT_SNS_TOPIC_ARN environment variable is required');
|
||||||
|
}
|
||||||
|
if (!snsRoleArn) {
|
||||||
|
throw new Error('AWS_TEXTRACT_SNS_ROLE_ARN environment variable is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
const jobId = uuidv4();
|
||||||
|
const s3Key = `textract-temp/${jobId}.pdf`;
|
||||||
|
|
||||||
|
// Upload to S3
|
||||||
|
const uploadCommand = new PutObjectCommand({
|
||||||
|
Bucket: s3Bucket,
|
||||||
|
Key: s3Key,
|
||||||
|
Body: pdfBuffer,
|
||||||
|
ContentType: 'application/pdf'
|
||||||
|
});
|
||||||
|
await s3Client.send(uploadCommand);
|
||||||
|
|
||||||
|
// Start async Textract expense analysis with SNS notification
|
||||||
|
const startCommand = new StartExpenseAnalysisCommand({
|
||||||
|
DocumentLocation: {
|
||||||
|
S3Object: {
|
||||||
|
Bucket: s3Bucket,
|
||||||
|
Name: s3Key
|
||||||
|
}
|
||||||
|
},
|
||||||
|
OutputConfig: {
|
||||||
|
S3Bucket: s3Bucket,
|
||||||
|
S3Prefix: `textract-output/${jobId}/`
|
||||||
|
},
|
||||||
|
NotificationChannel: {
|
||||||
|
SNSTopicArn: snsTopicArn,
|
||||||
|
RoleArn: snsRoleArn
|
||||||
|
},
|
||||||
|
ClientRequestToken: jobId
|
||||||
|
});
|
||||||
|
|
||||||
|
const startResult = await textractClient.send(startCommand);
|
||||||
|
|
||||||
|
// Store job info
|
||||||
|
jobStore.set(jobId, {
|
||||||
|
status: 'IN_PROGRESS',
|
||||||
|
textractJobId: startResult.JobId,
|
||||||
|
s3Key: s3Key,
|
||||||
|
startedAt: new Date().toISOString()
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
jobId: jobId,
|
||||||
|
textractJobId: startResult.JobId
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process SQS messages from Textract completion notifications
|
||||||
|
async function processSQSMessages() {
|
||||||
|
const queueUrl = process.env.AWS_TEXTRACT_SQS_QUEUE_URL;
|
||||||
|
|
||||||
|
if (!queueUrl) {
|
||||||
|
console.error('AWS_TEXTRACT_SQS_QUEUE_URL not configured');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
console.log('Polling SQS queue:', queueUrl);
|
||||||
|
const receiveCommand = new ReceiveMessageCommand({
|
||||||
|
QueueUrl: queueUrl,
|
||||||
|
MaxNumberOfMessages: 10,
|
||||||
|
WaitTimeSeconds: 20,
|
||||||
|
MessageAttributeNames: ['All']
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await sqsClient.send(receiveCommand);
|
||||||
|
console.log('SQS poll result:', result.Messages ? `${result.Messages.length} messages` : 'no messages');
|
||||||
|
|
||||||
|
if (result.Messages && result.Messages.length > 0) {
|
||||||
|
console.log('Processing', result.Messages.length, 'messages from SQS');
|
||||||
|
for (const message of result.Messages) {
|
||||||
|
try {
|
||||||
|
console.log("Processing message:", message);
|
||||||
|
await handleTextractNotification(message);
|
||||||
|
|
||||||
|
// Delete message after successful processing
|
||||||
|
const deleteCommand = new DeleteMessageCommand({
|
||||||
|
QueueUrl: queueUrl,
|
||||||
|
ReceiptHandle: message.ReceiptHandle
|
||||||
|
});
|
||||||
|
await sqsClient.send(deleteCommand);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error processing message:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error receiving SQS messages:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleTextractNotification(message) {
|
||||||
|
const body = JSON.parse(message.Body);
|
||||||
|
const snsMessage = JSON.parse(body.Message);
|
||||||
|
|
||||||
|
const textractJobId = snsMessage.JobId;
|
||||||
|
const status = snsMessage.Status;
|
||||||
|
|
||||||
|
// Find our job by Textract job ID
|
||||||
|
let ourJobId = null;
|
||||||
|
for (const [key, value] of jobStore.entries()) {
|
||||||
|
if (value.textractJobId === textractJobId) {
|
||||||
|
ourJobId = key;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ourJobId) {
|
||||||
|
console.warn(`Job not found for Textract job ID: ${textractJobId}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const jobInfo = jobStore.get(ourJobId);
|
||||||
|
|
||||||
|
if (status === 'SUCCEEDED') {
|
||||||
|
// Retrieve the results
|
||||||
|
const invoiceData = await retrieveTextractResults(textractJobId);
|
||||||
|
const processedData = processScanData(invoiceData);
|
||||||
|
|
||||||
|
jobStore.set(ourJobId, {
|
||||||
|
...jobInfo,
|
||||||
|
status: 'COMPLETED',
|
||||||
|
data: processedData,
|
||||||
|
completedAt: new Date().toISOString()
|
||||||
|
});
|
||||||
|
} else if (status === 'FAILED') {
|
||||||
|
jobStore.set(ourJobId, {
|
||||||
|
...jobInfo,
|
||||||
|
status: 'FAILED',
|
||||||
|
error: snsMessage.StatusMessage || 'Textract job failed',
|
||||||
|
completedAt: new Date().toISOString()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function retrieveTextractResults(textractJobId) {
|
||||||
|
// Handle pagination if there are multiple pages of results
|
||||||
|
let allExpenseDocuments = [];
|
||||||
|
let nextToken = null;
|
||||||
|
|
||||||
|
do {
|
||||||
|
const getCommand = new GetExpenseAnalysisCommand({
|
||||||
|
JobId: textractJobId,
|
||||||
|
NextToken: nextToken
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await textractClient.send(getCommand);
|
||||||
|
|
||||||
|
if (result.ExpenseDocuments) {
|
||||||
|
allExpenseDocuments = allExpenseDocuments.concat(result.ExpenseDocuments);
|
||||||
|
}
|
||||||
|
|
||||||
|
nextToken = result.NextToken;
|
||||||
|
} while (nextToken);
|
||||||
|
|
||||||
|
// Extract invoice data from Textract response
|
||||||
|
return extractInvoiceData({ ExpenseDocuments: allExpenseDocuments });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start SQS polling (call this when server starts)
|
||||||
|
function startSQSPolling() {
|
||||||
|
const pollInterval = setInterval(() => {
|
||||||
|
processSQSMessages().catch(error => {
|
||||||
|
console.error('SQS polling error:', error);
|
||||||
|
});
|
||||||
|
}, 5000); // Poll every 5 seconds
|
||||||
|
|
||||||
|
return pollInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractInvoiceData(textractResponse) {
|
||||||
|
const invoiceData = {
|
||||||
|
summary: {},
|
||||||
|
lineItems: []
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!textractResponse.ExpenseDocuments || textractResponse.ExpenseDocuments.length === 0) {
|
||||||
|
return invoiceData;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process each page of the invoice
|
||||||
|
textractResponse.ExpenseDocuments.forEach(expenseDoc => {
|
||||||
|
// Extract summary fields (vendor, invoice number, date, total, etc.)
|
||||||
|
if (expenseDoc.SummaryFields) {
|
||||||
|
expenseDoc.SummaryFields.forEach(field => {
|
||||||
|
const fieldType = field.Type?.Text || '';
|
||||||
|
const fieldValue = field.ValueDetection?.Text || '';
|
||||||
|
const confidence = field.ValueDetection?.Confidence || 0;
|
||||||
|
|
||||||
|
// Map common invoice fields
|
||||||
|
if (fieldType && fieldValue) {
|
||||||
|
invoiceData.summary[fieldType] = {
|
||||||
|
value: fieldValue,
|
||||||
|
confidence: confidence
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract line items
|
||||||
|
if (expenseDoc.LineItemGroups) {
|
||||||
|
expenseDoc.LineItemGroups.forEach(lineItemGroup => {
|
||||||
|
if (lineItemGroup.LineItems) {
|
||||||
|
lineItemGroup.LineItems.forEach(lineItem => {
|
||||||
|
const item = {};
|
||||||
|
|
||||||
|
if (lineItem.LineItemExpenseFields) {
|
||||||
|
lineItem.LineItemExpenseFields.forEach(field => {
|
||||||
|
const fieldType = field.Type?.Text || '';
|
||||||
|
const fieldValue = field.ValueDetection?.Text || '';
|
||||||
|
const confidence = field.ValueDetection?.Confidence || 0;
|
||||||
|
|
||||||
|
if (fieldType && fieldValue) {
|
||||||
|
// Normalize field names
|
||||||
|
const normalizedField = normalizeFieldName(fieldType);
|
||||||
|
item[normalizedField] = {
|
||||||
|
value: fieldValue,
|
||||||
|
confidence: confidence
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Object.keys(item).length > 0) {
|
||||||
|
invoiceData.lineItems.push(item);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return invoiceData;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeFieldName(fieldType) {
|
||||||
|
// Convert Textract field types to more readable names
|
||||||
|
const fieldMap = {
|
||||||
|
'ITEM': 'description',
|
||||||
|
'QUANTITY': 'quantity',
|
||||||
|
'UNIT_PRICE': 'unitPrice',
|
||||||
|
'PRICE': 'price',
|
||||||
|
'PRODUCT_CODE': 'productCode',
|
||||||
|
'EXPENSE_ROW': 'row'
|
||||||
|
};
|
||||||
|
|
||||||
|
return fieldMap[fieldType] || fieldType.toLowerCase().replace(/_/g, '');
|
||||||
|
}
|
||||||
|
|
||||||
|
function processScanData(invoiceData) {
|
||||||
|
// Process and clean the extracted data
|
||||||
|
const processed = {
|
||||||
|
summary: {},
|
||||||
|
lineItems: []
|
||||||
|
};
|
||||||
|
|
||||||
|
// Clean summary fields
|
||||||
|
for (const [key, value] of Object.entries(invoiceData.summary)) {
|
||||||
|
if (value.confidence > 50) { // Only include fields with > 50% confidence
|
||||||
|
processed.summary[key] = value.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process line items
|
||||||
|
processed.lineItems = invoiceData.lineItems
|
||||||
|
.map(item => {
|
||||||
|
const processedItem = {};
|
||||||
|
|
||||||
|
for (const [key, value] of Object.entries(item)) {
|
||||||
|
if (value.confidence > 50) { // Only include fields with > 50% confidence
|
||||||
|
let cleanValue = value.value;
|
||||||
|
|
||||||
|
// Parse numbers for quantity and price fields
|
||||||
|
if (key === 'quantity') {
|
||||||
|
cleanValue = parseFloat(cleanValue) || 0;
|
||||||
|
} else if (key === 'unitPrice' || key === 'price') {
|
||||||
|
// Remove currency symbols and parse
|
||||||
|
cleanValue = parseFloat(cleanValue.replace(/[^0-9.-]/g, '')) || 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
processedItem[key] = cleanValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return processedItem;
|
||||||
|
})
|
||||||
|
.filter(item => {
|
||||||
|
// Filter out items with no description or with quantity <= 0
|
||||||
|
return item.description && (!item.quantity || item.quantity > 0);
|
||||||
|
});
|
||||||
|
|
||||||
|
return processed;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
handleBillOcr,
|
||||||
|
handleBillOcrStatus,
|
||||||
|
startSQSPolling
|
||||||
|
};
|
||||||
15
server/routes/aiRoutes.js
Normal file
15
server/routes/aiRoutes.js
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
const express = require("express");
|
||||||
|
const router = express.Router();
|
||||||
|
const multer = require("multer");
|
||||||
|
const validateFirebaseIdTokenMiddleware = require("../middleware/validateFirebaseIdTokenMiddleware");
|
||||||
|
const { handleBillOcr, handleBillOcrStatus } = require("../ai/bill-ocr/bill-ocr");
|
||||||
|
|
||||||
|
// Configure multer for form data parsing
|
||||||
|
const upload = multer();
|
||||||
|
|
||||||
|
//router.use(validateFirebaseIdTokenMiddleware);
|
||||||
|
|
||||||
|
router.post("/bill-ocr", upload.single('billScan'), handleBillOcr);
|
||||||
|
router.get("/bill-ocr/status/:jobId", handleBillOcrStatus);
|
||||||
|
|
||||||
|
module.exports = router;
|
||||||
Reference in New Issue
Block a user