IO-3515 additional cleanup, translations
This commit is contained in:
@@ -19,93 +19,97 @@ const normalizePrice = (str) => {
|
||||
if (typeof str !== 'string') return str;
|
||||
return str.replace(/[^0-9.-]+/g, "");
|
||||
};
|
||||
const normalizePriceFinal = (str) => {
|
||||
if (typeof str !== 'string') {
|
||||
// If it's already a number, format to 2 decimals
|
||||
const num = parseFloat(str);
|
||||
return isNaN(num) ? 0 : num;
|
||||
}
|
||||
|
||||
// First, try to extract valid decimal number patterns (e.g., "123.45")
|
||||
const decimalPattern = /\d+\.\d{1,2}/g;
|
||||
const decimalMatches = str.match(decimalPattern);
|
||||
//More complex function. Not necessary at the moment, keeping for reference.
|
||||
// const normalizePriceFinal = (str) => {
|
||||
// if (typeof str !== 'string') {
|
||||
// // If it's already a number, format to 2 decimals
|
||||
// const num = parseFloat(str);
|
||||
// return isNaN(num) ? 0 : num;
|
||||
// }
|
||||
|
||||
if (decimalMatches && decimalMatches.length > 0) {
|
||||
// Found valid decimal number(s)
|
||||
const numbers = decimalMatches.map(m => parseFloat(m)).filter(n => !isNaN(n) && n > 0);
|
||||
// // First, try to extract valid decimal number patterns (e.g., "123.45")
|
||||
// const decimalPattern = /\d+\.\d{1,2}/g;
|
||||
// const decimalMatches = str.match(decimalPattern);
|
||||
|
||||
if (numbers.length === 1) {
|
||||
return numbers[0];
|
||||
}
|
||||
// if (decimalMatches && decimalMatches.length > 0) {
|
||||
// // Found valid decimal number(s)
|
||||
// const numbers = decimalMatches.map(m => parseFloat(m)).filter(n => !isNaN(n) && n > 0);
|
||||
|
||||
if (numbers.length > 1) {
|
||||
// Check if all numbers are the same (e.g., "47.57.47.57" -> [47.57, 47.57])
|
||||
const uniqueNumbers = [...new Set(numbers)];
|
||||
if (uniqueNumbers.length === 1) {
|
||||
return uniqueNumbers[0];
|
||||
}
|
||||
// if (numbers.length === 1) {
|
||||
// return numbers[0];
|
||||
// }
|
||||
|
||||
// Check if numbers are very close (within 1% tolerance)
|
||||
const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
|
||||
const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
|
||||
// if (numbers.length > 1) {
|
||||
// // Check if all numbers are the same (e.g., "47.57.47.57" -> [47.57, 47.57])
|
||||
// const uniqueNumbers = [...new Set(numbers)];
|
||||
// if (uniqueNumbers.length === 1) {
|
||||
// return uniqueNumbers[0];
|
||||
// }
|
||||
|
||||
if (allClose) {
|
||||
return avg;
|
||||
}
|
||||
// // Check if numbers are very close (within 1% tolerance)
|
||||
// const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
|
||||
// const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
|
||||
|
||||
// Return the first number (most likely correct)
|
||||
return numbers[0];
|
||||
}
|
||||
}
|
||||
// if (allClose) {
|
||||
// return avg;
|
||||
// }
|
||||
|
||||
// Fallback: Split on common delimiters and extract all potential numbers
|
||||
const parts = str.split(/[\/|\\,;]/).map(part => part.trim()).filter(part => part.length > 0);
|
||||
// // Return the first number (most likely correct)
|
||||
// return numbers[0];
|
||||
// }
|
||||
// }
|
||||
|
||||
if (parts.length > 1) {
|
||||
// Multiple values detected - extract and parse all valid numbers
|
||||
const numbers = parts
|
||||
.map(part => {
|
||||
const cleaned = part.replace(/[^0-9.-]+/g, "");
|
||||
const parsed = parseFloat(cleaned);
|
||||
return isNaN(parsed) ? null : parsed;
|
||||
})
|
||||
.filter(num => num !== null && num > 0);
|
||||
// // Fallback: Split on common delimiters and extract all potential numbers
|
||||
// const parts = str.split(/[\/|\\,;]/).map(part => part.trim()).filter(part => part.length > 0);
|
||||
|
||||
if (numbers.length === 0) {
|
||||
// No valid numbers found, try fallback to basic cleaning
|
||||
const cleaned = str.replace(/[^0-9.-]+/g, "");
|
||||
const parsed = parseFloat(cleaned);
|
||||
return isNaN(parsed) ? 0 : parsed;
|
||||
}
|
||||
// if (parts.length > 1) {
|
||||
// // Multiple values detected - extract and parse all valid numbers
|
||||
// const numbers = parts
|
||||
// .map(part => {
|
||||
// const cleaned = part.replace(/[^0-9.-]+/g, "");
|
||||
// const parsed = parseFloat(cleaned);
|
||||
// return isNaN(parsed) ? null : parsed;
|
||||
// })
|
||||
// .filter(num => num !== null && num > 0);
|
||||
|
||||
if (numbers.length === 1) {
|
||||
return numbers[0];
|
||||
}
|
||||
// if (numbers.length === 0) {
|
||||
// // No valid numbers found, try fallback to basic cleaning
|
||||
// const cleaned = str.replace(/[^0-9.-]+/g, "");
|
||||
// const parsed = parseFloat(cleaned);
|
||||
// return isNaN(parsed) ? 0 : parsed;
|
||||
// }
|
||||
|
||||
// Multiple valid numbers
|
||||
const uniqueNumbers = [...new Set(numbers)];
|
||||
// if (numbers.length === 1) {
|
||||
// return numbers[0];
|
||||
// }
|
||||
|
||||
if (uniqueNumbers.length === 1) {
|
||||
return uniqueNumbers[0];
|
||||
}
|
||||
// // Multiple valid numbers
|
||||
// const uniqueNumbers = [...new Set(numbers)];
|
||||
|
||||
// Check if numbers are very close (within 1% tolerance)
|
||||
const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
|
||||
const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
|
||||
// if (uniqueNumbers.length === 1) {
|
||||
// return uniqueNumbers[0];
|
||||
// }
|
||||
|
||||
if (allClose) {
|
||||
return avg;
|
||||
}
|
||||
// // Check if numbers are very close (within 1% tolerance)
|
||||
// const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
|
||||
// const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
|
||||
|
||||
// if (allClose) {
|
||||
// return avg;
|
||||
// }
|
||||
|
||||
// // Return the first valid number
|
||||
// return numbers[0];
|
||||
// }
|
||||
|
||||
// // Single value or no delimiters, clean normally
|
||||
// const cleaned = str.replace(/[^0-9.-]+/g, "");
|
||||
// const parsed = parseFloat(cleaned);
|
||||
// return isNaN(parsed) ? 0 : parsed;
|
||||
// };
|
||||
|
||||
// Return the first valid number
|
||||
return numbers[0];
|
||||
}
|
||||
|
||||
// Single value or no delimiters, clean normally
|
||||
const cleaned = str.replace(/[^0-9.-]+/g, "");
|
||||
const parsed = parseFloat(cleaned);
|
||||
return isNaN(parsed) ? 0 : parsed;
|
||||
};
|
||||
|
||||
// Helper function to calculate Textract OCR confidence (0-100%)
|
||||
const calculateTextractConfidence = (textractLineItem) => {
|
||||
@@ -149,6 +153,7 @@ const calculateTextractConfidence = (textractLineItem) => {
|
||||
else if (field.normalizedLabel === standardizedFieldsnames.quantity) {
|
||||
weight = 3.5;
|
||||
}
|
||||
// We generally ignore the key from textract. Keeping for future reference.
|
||||
// else if (key === 'ITEM' || key === 'PRODUCT_CODE') {
|
||||
// weight = 3; // Description and part number are most important
|
||||
// } else if (key === 'PRICE' || key === 'UNIT_PRICE' || key === 'QUANTITY') {
|
||||
@@ -179,7 +184,6 @@ const calculateTextractConfidence = (textractLineItem) => {
|
||||
return Math.round(avgConfidence * 100) / 100; // Round to 2 decimal places
|
||||
};
|
||||
|
||||
// Helper function to calculate match confidence score (0-100%)
|
||||
const calculateMatchConfidence = (matches, bestMatch) => {
|
||||
if (!matches || matches.length === 0 || !bestMatch) {
|
||||
return 0; // No match = 0% confidence
|
||||
@@ -217,7 +221,6 @@ const calculateMatchConfidence = (matches, bestMatch) => {
|
||||
return Math.max(matchConfidence, 1);
|
||||
};
|
||||
|
||||
// Helper function to calculate overall confidence combining OCR and match confidence
|
||||
const calculateOverallConfidence = (ocrConfidence, matchConfidence) => {
|
||||
// If there's no match, OCR confidence doesn't matter much
|
||||
if (matchConfidence === 0) {
|
||||
@@ -318,7 +321,7 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
||||
|
||||
}
|
||||
`, {
|
||||
jobid, // TODO: Refactor back in parts orders
|
||||
jobid, // TODO: Parts order IDs are currently ignore. If receving a parts order, it could be used to more precisely match to joblines.
|
||||
});
|
||||
|
||||
//Create fuses of line descriptions for matching.
|
||||
@@ -378,10 +381,8 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
||||
if (!job) {
|
||||
throw new Error('Job not found for bill form data generation.');
|
||||
}
|
||||
//Figure out which lines have a match and which don't.
|
||||
|
||||
//TODO: How do we handle freight lines and core charges?
|
||||
|
||||
//Create the form data structure for the bill posting screen.
|
||||
const billFormData = {
|
||||
"jobid": jobid,
|
||||
@@ -392,10 +393,10 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
||||
"total": normalizePrice(processedData.summary?.INVOICE_TOTAL?.value || processedData.summary?.TOTAL?.value),
|
||||
"billlines": joblineMatches.map(jlMatchLine => {
|
||||
const { matches, textractLineItem, } = jlMatchLine
|
||||
//Matches should be prioritized, take the first one.
|
||||
//Matches should be pre-sorted, take the first one.
|
||||
const matchToUse = matches.length > 0 ? matches[0] : null;
|
||||
|
||||
// Calculate confidence scores (0-100%)
|
||||
// Calculate confidence scores
|
||||
const ocrConfidence = calculateTextractConfidence(textractLineItem);
|
||||
const matchConfidence = calculateMatchConfidence(matches, matchToUse);
|
||||
const overallConfidence = calculateOverallConfidence(ocrConfidence, matchConfidence);
|
||||
@@ -452,7 +453,7 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
||||
//TODO: Do we need to verify the lines to see if it is a unit price or total price (i.e. quantity * price)
|
||||
const lineObject = {
|
||||
"line_desc": matchToUse?.item?.line_desc || textractLineItem.ITEM?.value || "NO DESCRIPTION",
|
||||
"quantity": textractLineItem.QUANTITY?.value, // convert to integer?
|
||||
"quantity": textractLineItem.QUANTITY?.value,
|
||||
"actual_price": normalizePrice(actualPrice),
|
||||
"actual_cost": normalizePrice(actualCost),
|
||||
"cost_center": matchToUse?.item?.part_type
|
||||
@@ -470,7 +471,6 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
||||
},
|
||||
"joblineid": matchToUse?.item?.id || "noline",
|
||||
"confidence": `T${overallConfidence} - O${ocrConfidence} - J${matchConfidence}`
|
||||
|
||||
}
|
||||
return lineObject
|
||||
})
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
const PDFDocument = require('pdf-lib').PDFDocument;
|
||||
const TEXTRACT_REDIS_PREFIX = `textract:${process.env?.NODE_ENV === "production" ? "PROD" : "TEST"}`
|
||||
const TEXTRACT_REDIS_PREFIX = `textract:${process.env?.NODE_ENV}`
|
||||
const TEXTRACT_JOB_TTL = 10 * 60;
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,4 @@ Required Infrastructure setup
|
||||
|
||||
TODO:
|
||||
* Create a rome bucket for uploads, or move to the regular spot.
|
||||
* How to implement this across environments.
|
||||
* How to prevent polling for a job that may have errored.
|
||||
* Handling of HEIC files on upload.
|
||||
* Add environment variables.
|
||||
Reference in New Issue
Block a user