IO-3515 Minor improvements to Bill AI.
This commit is contained in:
@@ -1,12 +1,12 @@
|
||||
|
||||
|
||||
const Fuse = require('fuse.js');
|
||||
const { has } = require("lodash");
|
||||
|
||||
const { standardizedFieldsnames } = require('./bill-ocr-normalize');
|
||||
const InstanceManager = require("../../utils/instanceMgr").default;
|
||||
|
||||
const PRICE_PERCENT_MARGIN_TOLERANCE = 0.5; //Used to make sure prices and costs are likely.
|
||||
|
||||
const PRICE_QUANTITY_MARGIN_TOLERANCE = 0.03; //Used to make sure that if there is a quantity, the price is likely a unit price.
|
||||
// Helper function to normalize fields
|
||||
const normalizePartNumber = (str) => {
|
||||
return str.replace(/[^a-zA-Z0-9]/g, '').toUpperCase();
|
||||
@@ -17,7 +17,38 @@ const normalizeText = (str) => {
|
||||
};
|
||||
const normalizePrice = (str) => {
|
||||
if (typeof str !== 'string') return str;
|
||||
return str.replace(/[^0-9.-]+/g, "");
|
||||
|
||||
let value = str.trim();
|
||||
|
||||
// Handle European-style decimal comma like "292,37".
|
||||
// Only treat the *last* comma as a decimal separator when:
|
||||
// - there's no '.' anywhere (so we don't fight normal US formatting like "1,234.56")
|
||||
// - and the suffix after the last comma is 1-2 digits (so "1,234" stays 1234)
|
||||
if (!value.includes('.') && value.includes(',')) {
|
||||
const lastCommaIndex = value.lastIndexOf(',');
|
||||
const decimalSuffix = value.slice(lastCommaIndex + 1).trim();
|
||||
|
||||
if (/^\d{1,2}$/.test(decimalSuffix)) {
|
||||
const before = value.slice(0, lastCommaIndex).replace(/,/g, '');
|
||||
value = `${before}.${decimalSuffix}`;
|
||||
} else {
|
||||
// Treat commas as thousands separators (or noise) and drop them.
|
||||
value = value.replace(/,/g, '');
|
||||
}
|
||||
}
|
||||
|
||||
return value.replace(/[^0-9.-]+/g, "");
|
||||
};
|
||||
|
||||
const roundToIncrement = (value, increment) => {
|
||||
if (typeof value !== 'number' || !isFinite(value) || typeof increment !== 'number' || !isFinite(increment) || increment <= 0) {
|
||||
return value;
|
||||
}
|
||||
|
||||
const rounded = Math.round((value + Number.EPSILON) / increment) * increment;
|
||||
// Prevent float artifacts (e.g. 0.20500000000000002)
|
||||
const decimals = Math.max(0, Math.ceil(-Math.log10(increment)));
|
||||
return parseFloat(rounded.toFixed(decimals));
|
||||
};
|
||||
|
||||
//More complex function. Not necessary at the moment, keeping for reference.
|
||||
@@ -134,6 +165,7 @@ const calculateTextractConfidence = (textractLineItem) => {
|
||||
const hasActualCost = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_cost);
|
||||
const hasActualPrice = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_price);
|
||||
const hasLineDesc = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.line_desc);
|
||||
const hasQuantity = textractLineItem?.QUANTITY?.value; //We don't normalize quantity, we just use what textract gives us.
|
||||
|
||||
// Calculate weighted average, giving more weight to important fields
|
||||
// If we can identify key fields (ITEM, PRODUCT_CODE, PRICE), weight them higher
|
||||
@@ -173,10 +205,11 @@ const calculateTextractConfidence = (textractLineItem) => {
|
||||
if (!hasActualCost) missingCount++;
|
||||
if (!hasActualPrice) missingCount++;
|
||||
if (!hasLineDesc) missingCount++;
|
||||
if (!hasQuantity) missingCount++;
|
||||
|
||||
// Each missing field reduces confidence by 15%
|
||||
// Each missing field reduces confidence by 20%
|
||||
if (missingCount > 0) {
|
||||
missingFieldsPenalty = 1.0 - (missingCount * 0.15);
|
||||
missingFieldsPenalty = 1.0 - (missingCount * 0.20);
|
||||
}
|
||||
|
||||
avgConfidence = avgConfidence * missingFieldsPenalty;
|
||||
@@ -361,16 +394,16 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
||||
const joblineMatches = joblineFuzzySearch({ fuseToSearch: jobLineDescFuse, processedData });
|
||||
|
||||
const vendorFuse = new Fuse(
|
||||
jobData.vendors,
|
||||
jobData.vendors.map(v => ({ ...v, name_normalized: normalizeText(v.name) })),
|
||||
{
|
||||
keys: ['name'],
|
||||
threshold: 0.4, //Adjust as needed for matching sensitivity,
|
||||
keys: [{ name: "name", weight: 3 }, { name: 'name_normalized', weight: 2 }],
|
||||
threshold: 0.4,
|
||||
includeScore: true,
|
||||
},
|
||||
|
||||
}
|
||||
);
|
||||
|
||||
const vendorMatches = vendorFuse.search(processedData.summary?.VENDOR_NAME?.value || processedData.summary?.NAME?.value);
|
||||
const vendorMatches = vendorFuse.search(normalizeText(processedData.summary?.VENDOR_NAME?.value || processedData.summary?.NAME?.value));
|
||||
|
||||
let vendorid;
|
||||
if (vendorMatches.length > 0) {
|
||||
@@ -381,6 +414,21 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
||||
throw new Error('Job not found for bill form data generation.');
|
||||
}
|
||||
|
||||
|
||||
//Is there a subtotal level discount? If there is, we need to figure out what the percentage is, and apply that to the actual cost as a reduction
|
||||
const subtotalDiscountValueRaw = processedData.summary?.DISCOUNT?.value || processedData.summary?.SUBTOTAL_DISCOUNT?.value || 0;
|
||||
let discountPercentageDecimal = 0;
|
||||
if (subtotalDiscountValueRaw) {
|
||||
const subtotal = parseFloat(normalizePrice(processedData.summary?.SUBTOTAL?.value || 0)) || 0;
|
||||
const subtotalDiscountValue = parseFloat(normalizePrice(subtotalDiscountValueRaw)) || 0;
|
||||
if (subtotal > 0 && subtotalDiscountValue) {
|
||||
// Store discount percentage as a decimal (e.g. 20.5% => 0.205),
|
||||
// but only allow half-percent increments (0.005 steps).
|
||||
discountPercentageDecimal = Math.abs(subtotalDiscountValue / subtotal);
|
||||
discountPercentageDecimal = roundToIncrement(discountPercentageDecimal, 0.005);
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: How do we handle freight lines and core charges?
|
||||
//Create the form data structure for the bill posting screen.
|
||||
const billFormData = {
|
||||
@@ -448,6 +496,31 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
||||
}
|
||||
}
|
||||
|
||||
//If there's nothing, just fall back to seeing if there's a price object from textract.
|
||||
|
||||
if (!actualPrice && textractLineItem.PRICE) {
|
||||
actualPrice = textractLineItem.PRICE.value;
|
||||
}
|
||||
if (!actualCost && textractLineItem.PRICE) {
|
||||
actualCost = textractLineItem.PRICE.value;
|
||||
}
|
||||
|
||||
//If quantity greater than 1, check if the actual cost is a multiple of the actual price, if so, divide it out to get the unit price.
|
||||
const quantity = parseInt(textractLineItem?.QUANTITY?.value);
|
||||
if (quantity && quantity > 1) {
|
||||
if (actualPrice && quantity && Math.abs((actualPrice / quantity) - (parseFloat(matchToUse?.item?.act_price) || 0)) / ((parseFloat(matchToUse?.item?.act_price) || 1)) < PRICE_QUANTITY_MARGIN_TOLERANCE) {
|
||||
actualPrice = actualPrice / quantity;
|
||||
}
|
||||
if (actualCost && quantity && Math.abs((actualCost / quantity) - (parseFloat(matchToUse?.item?.act_price) || 0)) / ((parseFloat(matchToUse?.item?.act_price) || 1)) < PRICE_QUANTITY_MARGIN_TOLERANCE) {
|
||||
actualCost = actualCost / quantity;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (discountPercentageDecimal > 0) {
|
||||
actualCost = actualCost * (1 - discountPercentageDecimal);
|
||||
}
|
||||
|
||||
const responsibilityCenters = job.bodyshop.md_responsibility_centers
|
||||
//TODO: Do we need to verify the lines to see if it is a unit price or total price (i.e. quantity * price)
|
||||
const lineObject = {
|
||||
@@ -714,5 +787,6 @@ const bodyshopHasDmsKey = (bodyshop) =>
|
||||
|
||||
|
||||
module.exports = {
|
||||
generateBillFormData
|
||||
generateBillFormData,
|
||||
normalizePrice
|
||||
}
|
||||
|
||||
@@ -50,10 +50,12 @@ function normalizeLabelName(labelText) {
|
||||
'unit_price': standardizedFieldsnames.actual_price,
|
||||
'list': standardizedFieldsnames.actual_price,
|
||||
'retail_price': standardizedFieldsnames.actual_price,
|
||||
'retail': standardizedFieldsnames.actual_price,
|
||||
'net': standardizedFieldsnames.actual_cost,
|
||||
'selling_price': standardizedFieldsnames.actual_cost,
|
||||
'net_price': standardizedFieldsnames.actual_cost,
|
||||
'net_cost': standardizedFieldsnames.actual_cost,
|
||||
'total': standardizedFieldsnames.actual_cost,
|
||||
'po_no': standardizedFieldsnames.ro_number,
|
||||
'customer_po_no': standardizedFieldsnames.ro_number,
|
||||
'customer_po_no_': standardizedFieldsnames.ro_number
|
||||
|
||||
@@ -83,7 +83,7 @@ async function handleBillOcr(req, res) {
|
||||
// Process synchronously for single-page documents
|
||||
const processedData = await processSinglePageDocument(uploadedFile.buffer);
|
||||
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: req });
|
||||
logger.log("bill-ocr-single-complete", "DEBUG", req.user.email, jobid, { ...processedData, billForm });
|
||||
logger.log("bill-ocr-single-complete", "DEBUG", req.user.email, jobid, { ..._.omit(processedData, "originalTextractResponse"), billForm });
|
||||
return res.status(200).json({
|
||||
success: true,
|
||||
status: 'COMPLETED',
|
||||
|
||||
Reference in New Issue
Block a user