IO-3515 additional cleanup, translations

This commit is contained in:
Patrick Fic
2026-02-19 14:15:57 -08:00
parent b2bc19c5c9
commit 21f43285bc
10 changed files with 159 additions and 88 deletions

View File

@@ -19,93 +19,97 @@ const normalizePrice = (str) => {
if (typeof str !== 'string') return str;
return str.replace(/[^0-9.-]+/g, "");
};
const normalizePriceFinal = (str) => {
if (typeof str !== 'string') {
// If it's already a number, format to 2 decimals
const num = parseFloat(str);
return isNaN(num) ? 0 : num;
}
// First, try to extract valid decimal number patterns (e.g., "123.45")
const decimalPattern = /\d+\.\d{1,2}/g;
const decimalMatches = str.match(decimalPattern);
//More complex function. Not necessary at the moment, keeping for reference.
// const normalizePriceFinal = (str) => {
// if (typeof str !== 'string') {
// // If it's already a number, format to 2 decimals
// const num = parseFloat(str);
// return isNaN(num) ? 0 : num;
// }
if (decimalMatches && decimalMatches.length > 0) {
// Found valid decimal number(s)
const numbers = decimalMatches.map(m => parseFloat(m)).filter(n => !isNaN(n) && n > 0);
// // First, try to extract valid decimal number patterns (e.g., "123.45")
// const decimalPattern = /\d+\.\d{1,2}/g;
// const decimalMatches = str.match(decimalPattern);
if (numbers.length === 1) {
return numbers[0];
}
// if (decimalMatches && decimalMatches.length > 0) {
// // Found valid decimal number(s)
// const numbers = decimalMatches.map(m => parseFloat(m)).filter(n => !isNaN(n) && n > 0);
if (numbers.length > 1) {
// Check if all numbers are the same (e.g., "47.57.47.57" -> [47.57, 47.57])
const uniqueNumbers = [...new Set(numbers)];
if (uniqueNumbers.length === 1) {
return uniqueNumbers[0];
}
// if (numbers.length === 1) {
// return numbers[0];
// }
// Check if numbers are very close (within 1% tolerance)
const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
// if (numbers.length > 1) {
// // Check if all numbers are the same (e.g., "47.57.47.57" -> [47.57, 47.57])
// const uniqueNumbers = [...new Set(numbers)];
// if (uniqueNumbers.length === 1) {
// return uniqueNumbers[0];
// }
if (allClose) {
return avg;
}
// // Check if numbers are very close (within 1% tolerance)
// const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
// const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
// Return the first number (most likely correct)
return numbers[0];
}
}
// if (allClose) {
// return avg;
// }
// Fallback: Split on common delimiters and extract all potential numbers
const parts = str.split(/[\/|\\,;]/).map(part => part.trim()).filter(part => part.length > 0);
// // Return the first number (most likely correct)
// return numbers[0];
// }
// }
if (parts.length > 1) {
// Multiple values detected - extract and parse all valid numbers
const numbers = parts
.map(part => {
const cleaned = part.replace(/[^0-9.-]+/g, "");
const parsed = parseFloat(cleaned);
return isNaN(parsed) ? null : parsed;
})
.filter(num => num !== null && num > 0);
// // Fallback: Split on common delimiters and extract all potential numbers
// const parts = str.split(/[\/|\\,;]/).map(part => part.trim()).filter(part => part.length > 0);
if (numbers.length === 0) {
// No valid numbers found, try fallback to basic cleaning
const cleaned = str.replace(/[^0-9.-]+/g, "");
const parsed = parseFloat(cleaned);
return isNaN(parsed) ? 0 : parsed;
}
// if (parts.length > 1) {
// // Multiple values detected - extract and parse all valid numbers
// const numbers = parts
// .map(part => {
// const cleaned = part.replace(/[^0-9.-]+/g, "");
// const parsed = parseFloat(cleaned);
// return isNaN(parsed) ? null : parsed;
// })
// .filter(num => num !== null && num > 0);
if (numbers.length === 1) {
return numbers[0];
}
// if (numbers.length === 0) {
// // No valid numbers found, try fallback to basic cleaning
// const cleaned = str.replace(/[^0-9.-]+/g, "");
// const parsed = parseFloat(cleaned);
// return isNaN(parsed) ? 0 : parsed;
// }
// Multiple valid numbers
const uniqueNumbers = [...new Set(numbers)];
// if (numbers.length === 1) {
// return numbers[0];
// }
if (uniqueNumbers.length === 1) {
return uniqueNumbers[0];
}
// // Multiple valid numbers
// const uniqueNumbers = [...new Set(numbers)];
// Check if numbers are very close (within 1% tolerance)
const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
// if (uniqueNumbers.length === 1) {
// return uniqueNumbers[0];
// }
if (allClose) {
return avg;
}
// // Check if numbers are very close (within 1% tolerance)
// const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
// const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
// if (allClose) {
// return avg;
// }
// // Return the first valid number
// return numbers[0];
// }
// // Single value or no delimiters, clean normally
// const cleaned = str.replace(/[^0-9.-]+/g, "");
// const parsed = parseFloat(cleaned);
// return isNaN(parsed) ? 0 : parsed;
// };
// Return the first valid number
return numbers[0];
}
// Single value or no delimiters, clean normally
const cleaned = str.replace(/[^0-9.-]+/g, "");
const parsed = parseFloat(cleaned);
return isNaN(parsed) ? 0 : parsed;
};
// Helper function to calculate Textract OCR confidence (0-100%)
const calculateTextractConfidence = (textractLineItem) => {
@@ -149,6 +153,7 @@ const calculateTextractConfidence = (textractLineItem) => {
else if (field.normalizedLabel === standardizedFieldsnames.quantity) {
weight = 3.5;
}
// We generally ignore the key from textract. Keeping for future reference.
// else if (key === 'ITEM' || key === 'PRODUCT_CODE') {
// weight = 3; // Description and part number are most important
// } else if (key === 'PRICE' || key === 'UNIT_PRICE' || key === 'QUANTITY') {
@@ -179,7 +184,6 @@ const calculateTextractConfidence = (textractLineItem) => {
return Math.round(avgConfidence * 100) / 100; // Round to 2 decimal places
};
// Helper function to calculate match confidence score (0-100%)
const calculateMatchConfidence = (matches, bestMatch) => {
if (!matches || matches.length === 0 || !bestMatch) {
return 0; // No match = 0% confidence
@@ -217,7 +221,6 @@ const calculateMatchConfidence = (matches, bestMatch) => {
return Math.max(matchConfidence, 1);
};
// Helper function to calculate overall confidence combining OCR and match confidence
const calculateOverallConfidence = (ocrConfidence, matchConfidence) => {
// If there's no match, OCR confidence doesn't matter much
if (matchConfidence === 0) {
@@ -318,7 +321,7 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
}
`, {
jobid, // TODO: Refactor back in parts orders
jobid, // TODO: Parts order IDs are currently ignore. If receving a parts order, it could be used to more precisely match to joblines.
});
//Create fuses of line descriptions for matching.
@@ -378,10 +381,8 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
if (!job) {
throw new Error('Job not found for bill form data generation.');
}
//Figure out which lines have a match and which don't.
//TODO: How do we handle freight lines and core charges?
//Create the form data structure for the bill posting screen.
const billFormData = {
"jobid": jobid,
@@ -392,10 +393,10 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
"total": normalizePrice(processedData.summary?.INVOICE_TOTAL?.value || processedData.summary?.TOTAL?.value),
"billlines": joblineMatches.map(jlMatchLine => {
const { matches, textractLineItem, } = jlMatchLine
//Matches should be prioritized, take the first one.
//Matches should be pre-sorted, take the first one.
const matchToUse = matches.length > 0 ? matches[0] : null;
// Calculate confidence scores (0-100%)
// Calculate confidence scores
const ocrConfidence = calculateTextractConfidence(textractLineItem);
const matchConfidence = calculateMatchConfidence(matches, matchToUse);
const overallConfidence = calculateOverallConfidence(ocrConfidence, matchConfidence);
@@ -452,7 +453,7 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
//TODO: Do we need to verify the lines to see if it is a unit price or total price (i.e. quantity * price)
const lineObject = {
"line_desc": matchToUse?.item?.line_desc || textractLineItem.ITEM?.value || "NO DESCRIPTION",
"quantity": textractLineItem.QUANTITY?.value, // convert to integer?
"quantity": textractLineItem.QUANTITY?.value,
"actual_price": normalizePrice(actualPrice),
"actual_cost": normalizePrice(actualCost),
"cost_center": matchToUse?.item?.part_type
@@ -470,7 +471,6 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
},
"joblineid": matchToUse?.item?.id || "noline",
"confidence": `T${overallConfidence} - O${ocrConfidence} - J${matchConfidence}`
}
return lineObject
})

View File

@@ -1,5 +1,5 @@
const PDFDocument = require('pdf-lib').PDFDocument;
const TEXTRACT_REDIS_PREFIX = `textract:${process.env?.NODE_ENV === "production" ? "PROD" : "TEST"}`
const TEXTRACT_REDIS_PREFIX = `textract:${process.env?.NODE_ENV}`
const TEXTRACT_JOB_TTL = 10 * 60;

View File

@@ -5,6 +5,4 @@ Required Infrastructure setup
TODO:
* Create a rome bucket for uploads, or move to the regular spot.
* How to implement this across environments.
* How to prevent polling for a job that may have errored.
* Handling of HEIC files on upload.
* Add environment variables.