Merged in feature/IO-3515-ocr-bill-posting (pull request #3077)
Feature/IO-3515 ocr bill posting
This commit is contained in:
@@ -108,7 +108,7 @@ function BillEnterAiScan({
|
|||||||
setIsAiScan(true);
|
setIsAiScan(true);
|
||||||
const formdata = new FormData();
|
const formdata = new FormData();
|
||||||
formdata.append("billScan", file);
|
formdata.append("billScan", file);
|
||||||
formdata.append("jobid", billEnterModal.context.job?.id);
|
formdata.append("jobid", form.getFieldValue("jobid") || billEnterModal.context.job?.id);
|
||||||
formdata.append("bodyshopid", bodyshop.id);
|
formdata.append("bodyshopid", bodyshop.id);
|
||||||
formdata.append("partsorderid", billEnterModal.context.parts_order?.id);
|
formdata.append("partsorderid", billEnterModal.context.parts_order?.id);
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
|
|
||||||
|
|
||||||
const Fuse = require('fuse.js');
|
const Fuse = require('fuse.js');
|
||||||
const { has } = require("lodash");
|
|
||||||
const { standardizedFieldsnames } = require('./bill-ocr-normalize');
|
const { standardizedFieldsnames } = require('./bill-ocr-normalize');
|
||||||
const InstanceManager = require("../../utils/instanceMgr").default;
|
const InstanceManager = require("../../utils/instanceMgr").default;
|
||||||
|
|
||||||
const PRICE_PERCENT_MARGIN_TOLERANCE = 0.5; //Used to make sure prices and costs are likely.
|
const PRICE_PERCENT_MARGIN_TOLERANCE = 0.5; //Used to make sure prices and costs are likely.
|
||||||
|
const PRICE_QUANTITY_MARGIN_TOLERANCE = 0.03; //Used to make sure that if there is a quantity, the price is likely a unit price.
|
||||||
// Helper function to normalize fields
|
// Helper function to normalize fields
|
||||||
const normalizePartNumber = (str) => {
|
const normalizePartNumber = (str) => {
|
||||||
return str.replace(/[^a-zA-Z0-9]/g, '').toUpperCase();
|
return str.replace(/[^a-zA-Z0-9]/g, '').toUpperCase();
|
||||||
@@ -17,7 +17,38 @@ const normalizeText = (str) => {
|
|||||||
};
|
};
|
||||||
const normalizePrice = (str) => {
|
const normalizePrice = (str) => {
|
||||||
if (typeof str !== 'string') return str;
|
if (typeof str !== 'string') return str;
|
||||||
return str.replace(/[^0-9.-]+/g, "");
|
|
||||||
|
let value = str.trim();
|
||||||
|
|
||||||
|
// Handle European-style decimal comma like "292,37".
|
||||||
|
// Only treat the *last* comma as a decimal separator when:
|
||||||
|
// - there's no '.' anywhere (so we don't fight normal US formatting like "1,234.56")
|
||||||
|
// - and the suffix after the last comma is 1-2 digits (so "1,234" stays 1234)
|
||||||
|
if (!value.includes('.') && value.includes(',')) {
|
||||||
|
const lastCommaIndex = value.lastIndexOf(',');
|
||||||
|
const decimalSuffix = value.slice(lastCommaIndex + 1).trim();
|
||||||
|
|
||||||
|
if (/^\d{1,2}$/.test(decimalSuffix)) {
|
||||||
|
const before = value.slice(0, lastCommaIndex).replace(/,/g, '');
|
||||||
|
value = `${before}.${decimalSuffix}`;
|
||||||
|
} else {
|
||||||
|
// Treat commas as thousands separators (or noise) and drop them.
|
||||||
|
value = value.replace(/,/g, '');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return value.replace(/[^0-9.-]+/g, "");
|
||||||
|
};
|
||||||
|
|
||||||
|
const roundToIncrement = (value, increment) => {
|
||||||
|
if (typeof value !== 'number' || !isFinite(value) || typeof increment !== 'number' || !isFinite(increment) || increment <= 0) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
const rounded = Math.round((value + Number.EPSILON) / increment) * increment;
|
||||||
|
// Prevent float artifacts (e.g. 0.20500000000000002)
|
||||||
|
const decimals = Math.max(0, Math.ceil(-Math.log10(increment)));
|
||||||
|
return parseFloat(rounded.toFixed(decimals));
|
||||||
};
|
};
|
||||||
|
|
||||||
//More complex function. Not necessary at the moment, keeping for reference.
|
//More complex function. Not necessary at the moment, keeping for reference.
|
||||||
@@ -134,6 +165,7 @@ const calculateTextractConfidence = (textractLineItem) => {
|
|||||||
const hasActualCost = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_cost);
|
const hasActualCost = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_cost);
|
||||||
const hasActualPrice = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_price);
|
const hasActualPrice = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_price);
|
||||||
const hasLineDesc = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.line_desc);
|
const hasLineDesc = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.line_desc);
|
||||||
|
const hasQuantity = textractLineItem?.QUANTITY?.value; //We don't normalize quantity, we just use what textract gives us.
|
||||||
|
|
||||||
// Calculate weighted average, giving more weight to important fields
|
// Calculate weighted average, giving more weight to important fields
|
||||||
// If we can identify key fields (ITEM, PRODUCT_CODE, PRICE), weight them higher
|
// If we can identify key fields (ITEM, PRODUCT_CODE, PRICE), weight them higher
|
||||||
@@ -173,10 +205,11 @@ const calculateTextractConfidence = (textractLineItem) => {
|
|||||||
if (!hasActualCost) missingCount++;
|
if (!hasActualCost) missingCount++;
|
||||||
if (!hasActualPrice) missingCount++;
|
if (!hasActualPrice) missingCount++;
|
||||||
if (!hasLineDesc) missingCount++;
|
if (!hasLineDesc) missingCount++;
|
||||||
|
if (!hasQuantity) missingCount++;
|
||||||
|
|
||||||
// Each missing field reduces confidence by 15%
|
// Each missing field reduces confidence by 20%
|
||||||
if (missingCount > 0) {
|
if (missingCount > 0) {
|
||||||
missingFieldsPenalty = 1.0 - (missingCount * 0.15);
|
missingFieldsPenalty = 1.0 - (missingCount * 0.20);
|
||||||
}
|
}
|
||||||
|
|
||||||
avgConfidence = avgConfidence * missingFieldsPenalty;
|
avgConfidence = avgConfidence * missingFieldsPenalty;
|
||||||
@@ -361,16 +394,16 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
|||||||
const joblineMatches = joblineFuzzySearch({ fuseToSearch: jobLineDescFuse, processedData });
|
const joblineMatches = joblineFuzzySearch({ fuseToSearch: jobLineDescFuse, processedData });
|
||||||
|
|
||||||
const vendorFuse = new Fuse(
|
const vendorFuse = new Fuse(
|
||||||
jobData.vendors,
|
jobData.vendors.map(v => ({ ...v, name_normalized: normalizeText(v.name) })),
|
||||||
{
|
{
|
||||||
keys: ['name'],
|
keys: [{ name: "name", weight: 3 }, { name: 'name_normalized', weight: 2 }],
|
||||||
threshold: 0.4, //Adjust as needed for matching sensitivity,
|
threshold: 0.4,
|
||||||
includeScore: true,
|
includeScore: true,
|
||||||
|
},
|
||||||
|
|
||||||
}
|
|
||||||
);
|
);
|
||||||
|
|
||||||
const vendorMatches = vendorFuse.search(processedData.summary?.VENDOR_NAME?.value || processedData.summary?.NAME?.value);
|
const vendorMatches = vendorFuse.search(normalizeText(processedData.summary?.VENDOR_NAME?.value || processedData.summary?.NAME?.value));
|
||||||
|
|
||||||
let vendorid;
|
let vendorid;
|
||||||
if (vendorMatches.length > 0) {
|
if (vendorMatches.length > 0) {
|
||||||
@@ -381,6 +414,21 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
|||||||
throw new Error('Job not found for bill form data generation.');
|
throw new Error('Job not found for bill form data generation.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Is there a subtotal level discount? If there is, we need to figure out what the percentage is, and apply that to the actual cost as a reduction
|
||||||
|
const subtotalDiscountValueRaw = processedData.summary?.DISCOUNT?.value || processedData.summary?.SUBTOTAL_DISCOUNT?.value || 0;
|
||||||
|
let discountPercentageDecimal = 0;
|
||||||
|
if (subtotalDiscountValueRaw) {
|
||||||
|
const subtotal = parseFloat(normalizePrice(processedData.summary?.SUBTOTAL?.value || 0)) || 0;
|
||||||
|
const subtotalDiscountValue = parseFloat(normalizePrice(subtotalDiscountValueRaw)) || 0;
|
||||||
|
if (subtotal > 0 && subtotalDiscountValue) {
|
||||||
|
// Store discount percentage as a decimal (e.g. 20.5% => 0.205),
|
||||||
|
// but only allow half-percent increments (0.005 steps).
|
||||||
|
discountPercentageDecimal = Math.abs(subtotalDiscountValue / subtotal);
|
||||||
|
discountPercentageDecimal = roundToIncrement(discountPercentageDecimal, 0.005);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//TODO: How do we handle freight lines and core charges?
|
//TODO: How do we handle freight lines and core charges?
|
||||||
//Create the form data structure for the bill posting screen.
|
//Create the form data structure for the bill posting screen.
|
||||||
const billFormData = {
|
const billFormData = {
|
||||||
@@ -448,6 +496,31 @@ async function generateBillFormData({ processedData, jobid: jobidFromProps, body
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//If there's nothing, just fall back to seeing if there's a price object from textract.
|
||||||
|
|
||||||
|
if (!actualPrice && textractLineItem.PRICE) {
|
||||||
|
actualPrice = textractLineItem.PRICE.value;
|
||||||
|
}
|
||||||
|
if (!actualCost && textractLineItem.PRICE) {
|
||||||
|
actualCost = textractLineItem.PRICE.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
//If quantity greater than 1, check if the actual cost is a multiple of the actual price, if so, divide it out to get the unit price.
|
||||||
|
const quantity = parseInt(textractLineItem?.QUANTITY?.value);
|
||||||
|
if (quantity && quantity > 1) {
|
||||||
|
if (actualPrice && quantity && Math.abs((actualPrice / quantity) - (parseFloat(matchToUse?.item?.act_price) || 0)) / ((parseFloat(matchToUse?.item?.act_price) || 1)) < PRICE_QUANTITY_MARGIN_TOLERANCE) {
|
||||||
|
actualPrice = actualPrice / quantity;
|
||||||
|
}
|
||||||
|
if (actualCost && quantity && Math.abs((actualCost / quantity) - (parseFloat(matchToUse?.item?.act_price) || 0)) / ((parseFloat(matchToUse?.item?.act_price) || 1)) < PRICE_QUANTITY_MARGIN_TOLERANCE) {
|
||||||
|
actualCost = actualCost / quantity;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (discountPercentageDecimal > 0) {
|
||||||
|
actualCost = actualCost * (1 - discountPercentageDecimal);
|
||||||
|
}
|
||||||
|
|
||||||
const responsibilityCenters = job.bodyshop.md_responsibility_centers
|
const responsibilityCenters = job.bodyshop.md_responsibility_centers
|
||||||
//TODO: Do we need to verify the lines to see if it is a unit price or total price (i.e. quantity * price)
|
//TODO: Do we need to verify the lines to see if it is a unit price or total price (i.e. quantity * price)
|
||||||
const lineObject = {
|
const lineObject = {
|
||||||
@@ -714,5 +787,6 @@ const bodyshopHasDmsKey = (bodyshop) =>
|
|||||||
|
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
generateBillFormData
|
generateBillFormData,
|
||||||
|
normalizePrice
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -50,10 +50,12 @@ function normalizeLabelName(labelText) {
|
|||||||
'unit_price': standardizedFieldsnames.actual_price,
|
'unit_price': standardizedFieldsnames.actual_price,
|
||||||
'list': standardizedFieldsnames.actual_price,
|
'list': standardizedFieldsnames.actual_price,
|
||||||
'retail_price': standardizedFieldsnames.actual_price,
|
'retail_price': standardizedFieldsnames.actual_price,
|
||||||
|
'retail': standardizedFieldsnames.actual_price,
|
||||||
'net': standardizedFieldsnames.actual_cost,
|
'net': standardizedFieldsnames.actual_cost,
|
||||||
'selling_price': standardizedFieldsnames.actual_cost,
|
'selling_price': standardizedFieldsnames.actual_cost,
|
||||||
'net_price': standardizedFieldsnames.actual_cost,
|
'net_price': standardizedFieldsnames.actual_cost,
|
||||||
'net_cost': standardizedFieldsnames.actual_cost,
|
'net_cost': standardizedFieldsnames.actual_cost,
|
||||||
|
'total': standardizedFieldsnames.actual_cost,
|
||||||
'po_no': standardizedFieldsnames.ro_number,
|
'po_no': standardizedFieldsnames.ro_number,
|
||||||
'customer_po_no': standardizedFieldsnames.ro_number,
|
'customer_po_no': standardizedFieldsnames.ro_number,
|
||||||
'customer_po_no_': standardizedFieldsnames.ro_number
|
'customer_po_no_': standardizedFieldsnames.ro_number
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ const { getTextractJobKey, setTextractJob, getTextractJob, getFileType, getPdfPa
|
|||||||
const { extractInvoiceData, processScanData } = require("./bill-ocr-normalize");
|
const { extractInvoiceData, processScanData } = require("./bill-ocr-normalize");
|
||||||
const { generateBillFormData } = require("./bill-ocr-generator");
|
const { generateBillFormData } = require("./bill-ocr-generator");
|
||||||
const logger = require("../../utils/logger");
|
const logger = require("../../utils/logger");
|
||||||
|
const _ = require("lodash");
|
||||||
|
|
||||||
// Initialize AWS clients
|
// Initialize AWS clients
|
||||||
const awsConfig = {
|
const awsConfig = {
|
||||||
@@ -66,7 +67,7 @@ async function handleBillOcr(req, res) {
|
|||||||
if (fileType === 'image') {
|
if (fileType === 'image') {
|
||||||
const processedData = await processSinglePageDocument(uploadedFile.buffer);
|
const processedData = await processSinglePageDocument(uploadedFile.buffer);
|
||||||
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: req });
|
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: req });
|
||||||
logger.log("bill-ocr-single-complete", "DEBUG", req.user.email, jobid, { ...processedData, billForm });
|
logger.log("bill-ocr-single-complete", "DEBUG", req.user.email, jobid, { ..._.omit(processedData, "originalTextractResponse"), billForm });
|
||||||
|
|
||||||
return res.status(200).json({
|
return res.status(200).json({
|
||||||
success: true,
|
success: true,
|
||||||
@@ -82,7 +83,7 @@ async function handleBillOcr(req, res) {
|
|||||||
// Process synchronously for single-page documents
|
// Process synchronously for single-page documents
|
||||||
const processedData = await processSinglePageDocument(uploadedFile.buffer);
|
const processedData = await processSinglePageDocument(uploadedFile.buffer);
|
||||||
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: req });
|
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: req });
|
||||||
logger.log("bill-ocr-single-complete", "DEBUG", req.user.email, jobid, { ...processedData, billForm });
|
logger.log("bill-ocr-single-complete", "DEBUG", req.user.email, jobid, { ..._.omit(processedData, "originalTextractResponse"), billForm });
|
||||||
return res.status(200).json({
|
return res.status(200).json({
|
||||||
success: true,
|
success: true,
|
||||||
status: 'COMPLETED',
|
status: 'COMPLETED',
|
||||||
|
|||||||
Reference in New Issue
Block a user