IO-3515 resolve issues on search selects not updating, improve confidence scoring.
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
|
||||
const Fuse = require('fuse.js');
|
||||
const { has } = require("lodash");
|
||||
const { standardizedFieldsnames } = require('./bill-ocr-normalize');
|
||||
const InstanceManager = require("../../utils/instanceMgr").default;
|
||||
|
||||
const PRICE_PERCENT_MARGIN_TOLERANCE = 0.5; //Used to make sure prices and costs are likely.
|
||||
|
||||
@@ -13,11 +15,97 @@ const normalizePartNumber = (str) => {
|
||||
const normalizeText = (str) => {
|
||||
return str.replace(/[^a-zA-Z0-9\s]/g, '').replace(/\s+/g, ' ').trim().toUpperCase();
|
||||
};
|
||||
|
||||
const normalizePrice = (str) => {
|
||||
if (typeof str !== 'string') return str;
|
||||
return str.replace(/[^0-9.-]+/g, "");
|
||||
};
|
||||
const normalizePriceFinal = (str) => {
|
||||
if (typeof str !== 'string') {
|
||||
// If it's already a number, format to 2 decimals
|
||||
const num = parseFloat(str);
|
||||
return isNaN(num) ? 0 : num;
|
||||
}
|
||||
|
||||
// First, try to extract valid decimal number patterns (e.g., "123.45")
|
||||
const decimalPattern = /\d+\.\d{1,2}/g;
|
||||
const decimalMatches = str.match(decimalPattern);
|
||||
|
||||
if (decimalMatches && decimalMatches.length > 0) {
|
||||
// Found valid decimal number(s)
|
||||
const numbers = decimalMatches.map(m => parseFloat(m)).filter(n => !isNaN(n) && n > 0);
|
||||
|
||||
if (numbers.length === 1) {
|
||||
return numbers[0];
|
||||
}
|
||||
|
||||
if (numbers.length > 1) {
|
||||
// Check if all numbers are the same (e.g., "47.57.47.57" -> [47.57, 47.57])
|
||||
const uniqueNumbers = [...new Set(numbers)];
|
||||
if (uniqueNumbers.length === 1) {
|
||||
return uniqueNumbers[0];
|
||||
}
|
||||
|
||||
// Check if numbers are very close (within 1% tolerance)
|
||||
const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
|
||||
const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
|
||||
|
||||
if (allClose) {
|
||||
return avg;
|
||||
}
|
||||
|
||||
// Return the first number (most likely correct)
|
||||
return numbers[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Split on common delimiters and extract all potential numbers
|
||||
const parts = str.split(/[\/|\\,;]/).map(part => part.trim()).filter(part => part.length > 0);
|
||||
|
||||
if (parts.length > 1) {
|
||||
// Multiple values detected - extract and parse all valid numbers
|
||||
const numbers = parts
|
||||
.map(part => {
|
||||
const cleaned = part.replace(/[^0-9.-]+/g, "");
|
||||
const parsed = parseFloat(cleaned);
|
||||
return isNaN(parsed) ? null : parsed;
|
||||
})
|
||||
.filter(num => num !== null && num > 0);
|
||||
|
||||
if (numbers.length === 0) {
|
||||
// No valid numbers found, try fallback to basic cleaning
|
||||
const cleaned = str.replace(/[^0-9.-]+/g, "");
|
||||
const parsed = parseFloat(cleaned);
|
||||
return isNaN(parsed) ? 0 : parsed;
|
||||
}
|
||||
|
||||
if (numbers.length === 1) {
|
||||
return numbers[0];
|
||||
}
|
||||
|
||||
// Multiple valid numbers
|
||||
const uniqueNumbers = [...new Set(numbers)];
|
||||
|
||||
if (uniqueNumbers.length === 1) {
|
||||
return uniqueNumbers[0];
|
||||
}
|
||||
|
||||
// Check if numbers are very close (within 1% tolerance)
|
||||
const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
|
||||
const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
|
||||
|
||||
if (allClose) {
|
||||
return avg;
|
||||
}
|
||||
|
||||
// Return the first valid number
|
||||
return numbers[0];
|
||||
}
|
||||
|
||||
// Single value or no delimiters, clean normally
|
||||
const cleaned = str.replace(/[^0-9.-]+/g, "");
|
||||
const parsed = parseFloat(cleaned);
|
||||
return isNaN(parsed) ? 0 : parsed;
|
||||
};
|
||||
|
||||
// Helper function to calculate Textract OCR confidence (0-100%)
|
||||
const calculateTextractConfidence = (textractLineItem) => {
|
||||
@@ -38,6 +126,11 @@ const calculateTextractConfidence = (textractLineItem) => {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if critical normalized labels are present
|
||||
const hasActualCost = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_cost);
|
||||
const hasActualPrice = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_price);
|
||||
const hasLineDesc = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.line_desc);
|
||||
|
||||
// Calculate weighted average, giving more weight to important fields
|
||||
// If we can identify key fields (ITEM, PRODUCT_CODE, PRICE), weight them higher
|
||||
let totalWeight = 0;
|
||||
@@ -47,18 +140,42 @@ const calculateTextractConfidence = (textractLineItem) => {
|
||||
if (field.confidence && typeof field.confidence === 'number') {
|
||||
// Weight important fields higher
|
||||
let weight = 1;
|
||||
if (key === 'ITEM' || key === 'PRODUCT_CODE') {
|
||||
weight = 2; // Description and part number are most important
|
||||
} else if (key === 'PRICE' || key === 'UNIT_PRICE' || key === 'QUANTITY') {
|
||||
weight = 1.5; // Price and quantity moderately important
|
||||
if (field.normalizedLabel === standardizedFieldsnames.actual_cost || field.normalizedLabel === standardizedFieldsnames.actual_price) {
|
||||
weight = 4;
|
||||
}
|
||||
else if (field.normalizedLabel === standardizedFieldsnames.part_no || field.normalizedLabel === standardizedFieldsnames.line_desc) {
|
||||
weight = 3.5;
|
||||
}
|
||||
else if (field.normalizedLabel === standardizedFieldsnames.quantity) {
|
||||
weight = 3.5;
|
||||
}
|
||||
// else if (key === 'ITEM' || key === 'PRODUCT_CODE') {
|
||||
// weight = 3; // Description and part number are most important
|
||||
// } else if (key === 'PRICE' || key === 'UNIT_PRICE' || key === 'QUANTITY') {
|
||||
// weight = 2; // Price and quantity moderately important
|
||||
// }
|
||||
|
||||
weightedSum += field.confidence * weight;
|
||||
totalWeight += weight;
|
||||
}
|
||||
});
|
||||
|
||||
const avgConfidence = totalWeight > 0 ? weightedSum / totalWeight : 0;
|
||||
let avgConfidence = totalWeight > 0 ? weightedSum / totalWeight : 0;
|
||||
|
||||
// Apply penalty if critical normalized labels are missing
|
||||
let missingFieldsPenalty = 1.0;
|
||||
let missingCount = 0;
|
||||
if (!hasActualCost) missingCount++;
|
||||
if (!hasActualPrice) missingCount++;
|
||||
if (!hasLineDesc) missingCount++;
|
||||
|
||||
// Each missing field reduces confidence by 15%
|
||||
if (missingCount > 0) {
|
||||
missingFieldsPenalty = 1.0 - (missingCount * 0.15);
|
||||
}
|
||||
|
||||
avgConfidence = avgConfidence * missingFieldsPenalty;
|
||||
|
||||
return Math.round(avgConfidence * 100) / 100; // Round to 2 decimal places
|
||||
};
|
||||
|
||||
@@ -109,9 +226,9 @@ const calculateOverallConfidence = (ocrConfidence, matchConfidence) => {
|
||||
|
||||
// Overall confidence is affected by both how well Textract read the data
|
||||
// and how well we matched it to existing joblines
|
||||
// Use a weighted average: 40% OCR confidence, 60% match confidence
|
||||
// Match confidence is more important because even perfect OCR is useless without a good match
|
||||
const overall = (ocrConfidence * 0.4) + (matchConfidence * 0.6);
|
||||
// Use a weighted average: 60% OCR confidence, 40% match confidence
|
||||
// OCR confidence is more important because even perfect match is useless without good OCR
|
||||
const overall = (ocrConfidence * 0.6) + (matchConfidence * 0.4);
|
||||
|
||||
return Math.round(overall * 100) / 100;
|
||||
};
|
||||
@@ -147,61 +264,63 @@ const mergeResults = (resultsArray, weights = []) => {
|
||||
.slice(0, 5); // Return top 5 results
|
||||
};
|
||||
|
||||
async function generateBillFormData({ processedData, jobid, bodyshopid, partsorderid, req }) {
|
||||
async function generateBillFormData({ processedData, jobid: jobidFromProps, bodyshopid, partsorderid, req }) {
|
||||
const client = req.userGraphQLClient;
|
||||
|
||||
//TODO: Add in vendor data.
|
||||
let jobid = jobidFromProps;
|
||||
//If no jobid, fetch it, and funnel it back.
|
||||
if (!jobid || jobid === null || jobid === undefined || jobid === "" || jobid === "null" || jobid === "undefined") {
|
||||
const ro_number = processedData.summary?.PO_NUMBER?.value || Object.values(processedData.summary).find(value => value.normalizedLabel === 'ro_number')?.value;
|
||||
if (!ro_number) {
|
||||
throw new Error("Could not find RO number in the extracted data to associate with the bill. Select an RO and try again.");
|
||||
}
|
||||
|
||||
const { jobs } = await client.request(`
|
||||
query QUERY_BILL_OCR_JOB_BY_RO($ro_number: String!) {
|
||||
jobs(where: {ro_number: {_eq: $ro_number}}) {
|
||||
id
|
||||
}
|
||||
}`, { ro_number });
|
||||
|
||||
if (jobs.length === 0) {
|
||||
throw new Error("No job found for the detected RO/PO number.");
|
||||
} else {
|
||||
jobid = jobs[0].id;
|
||||
}
|
||||
}
|
||||
|
||||
const jobData = await client.request(`
|
||||
query QUERY_BILL_OCR_DATA($jobid: uuid!, $partsorderid: uuid!) {
|
||||
vendors{
|
||||
query QUERY_BILL_OCR_DATA($jobid: uuid!) {
|
||||
vendors {
|
||||
id
|
||||
name
|
||||
}
|
||||
jobs_by_pk(id: $jobid) {
|
||||
id
|
||||
bodyshop {
|
||||
id
|
||||
md_responsibility_centers
|
||||
cdk_dealerid
|
||||
pbs_serialnumber
|
||||
rr_dealerid
|
||||
}
|
||||
jobs_by_pk(id: $jobid) {
|
||||
id
|
||||
bodyshop{
|
||||
id
|
||||
md_responsibility_centers
|
||||
cdk_dealerid
|
||||
pbs_serialnumber
|
||||
rr_dealerid
|
||||
}
|
||||
joblines {
|
||||
id
|
||||
line_desc
|
||||
removed
|
||||
act_price
|
||||
db_price
|
||||
oem_partno
|
||||
alt_partno
|
||||
part_type
|
||||
}
|
||||
}
|
||||
parts_orders_by_pk(id: $partsorderid) {
|
||||
id
|
||||
parts_order_lines {
|
||||
id
|
||||
line_desc
|
||||
act_price
|
||||
cost
|
||||
jobline {
|
||||
id
|
||||
line_desc
|
||||
act_price
|
||||
oem_partno
|
||||
alt_partno
|
||||
part_type
|
||||
}
|
||||
}
|
||||
}
|
||||
joblines {
|
||||
id
|
||||
line_desc
|
||||
removed
|
||||
act_price
|
||||
db_price
|
||||
oem_partno
|
||||
alt_partno
|
||||
part_type
|
||||
}
|
||||
}
|
||||
|
||||
`, {
|
||||
jobid, partsorderid // this may fail if null?
|
||||
}
|
||||
`, {
|
||||
jobid, // TODO: Refactor back in parts orders
|
||||
});
|
||||
|
||||
//TODO: Need to find a vendor ID. Create a fuse for it, and fuzzy search for it using the textract vendor info.
|
||||
|
||||
//Create fuses of line descriptions for matching.
|
||||
const jobLineDescFuse = new Fuse(
|
||||
jobData.jobs_by_pk.joblines.map(jl => ({ ...jl, line_desc_normalized: normalizeText(jl.line_desc || ""), oem_partno_normalized: normalizePartNumber(jl.oem_partno || ""), alt_partno_normalized: normalizePartNumber(jl.alt_partno || "") })),
|
||||
@@ -226,7 +345,7 @@ async function generateBillFormData({ processedData, jobid, bodyshopid, partsord
|
||||
},
|
||||
{
|
||||
name: 'oem_partno_normalized',
|
||||
weight: 5
|
||||
weight: 6
|
||||
},
|
||||
{
|
||||
name: 'alt_partno_normalized',
|
||||
@@ -238,7 +357,6 @@ async function generateBillFormData({ processedData, jobid, bodyshopid, partsord
|
||||
}
|
||||
);
|
||||
const joblineMatches = joblineFuzzySearch({ fuseToSearch: jobLineDescFuse, processedData });
|
||||
console.log("*** ~ generateBillFormData ~ joblineMatches:", JSON.stringify(joblineMatches, null, 2));
|
||||
|
||||
const vendorFuse = new Fuse(
|
||||
jobData.vendors,
|
||||
@@ -250,13 +368,13 @@ async function generateBillFormData({ processedData, jobid, bodyshopid, partsord
|
||||
}
|
||||
);
|
||||
|
||||
const vendorMatches = vendorFuse.search(processedData.summary?.NAME?.value || processedData.summary?.VENDOR_NAME?.value);
|
||||
console.log("*** ~ generateBillFormData ~ vendorMatches:", JSON.stringify(vendorMatches, null, 2));
|
||||
const vendorMatches = vendorFuse.search(processedData.summary?.VENDOR_NAME?.value || processedData.summary?.NAME?.value);
|
||||
|
||||
let vendorid;
|
||||
if (vendorMatches.length > 0) {
|
||||
vendorid = vendorMatches[0].item.id;
|
||||
}
|
||||
const { jobs_by_pk: job, parts_orders_by_pk: partsOrder } = jobData;
|
||||
const { jobs_by_pk: job } = jobData;
|
||||
if (!job) {
|
||||
throw new Error('Job not found for bill form data generation.');
|
||||
}
|
||||
@@ -344,9 +462,9 @@ async function generateBillFormData({ processedData, jobid, bodyshopid, partsord
|
||||
: null
|
||||
: responsibilityCenters.defaults &&
|
||||
(responsibilityCenters.defaults.costs[matchToUse?.item?.part_type] || null)
|
||||
: null, //Needs to get set by client side.
|
||||
"applicable_taxes": { //Not sure what to do with these?
|
||||
"federal": false,
|
||||
: null,
|
||||
"applicable_taxes": {
|
||||
"federal": InstanceManager({ imex: true, rome: false }),
|
||||
"state": false,
|
||||
"local": false
|
||||
},
|
||||
@@ -551,43 +669,43 @@ function joblineFuzzySearch({ fuseToSearch, processedData }) {
|
||||
|
||||
})
|
||||
|
||||
// Output search statistics table
|
||||
console.log('\n═══════════════════════════════════════════════════════════════════════');
|
||||
console.log(' FUSE.JS SEARCH STATISTICS');
|
||||
console.log('═══════════════════════════════════════════════════════════════════════\n');
|
||||
// // Output search statistics table
|
||||
// console.log('\n═══════════════════════════════════════════════════════════════════════');
|
||||
// console.log(' FUSE.JS SEARCH STATISTICS');
|
||||
// console.log('═══════════════════════════════════════════════════════════════════════\n');
|
||||
|
||||
searchStats.forEach(lineStat => {
|
||||
console.log(`📄 Line Item #${lineStat.lineNumber}:`);
|
||||
console.log('─'.repeat(75));
|
||||
// searchStats.forEach(lineStat => {
|
||||
// console.log(`📄 Line Item #${lineStat.lineNumber}:`);
|
||||
// console.log('─'.repeat(75));
|
||||
|
||||
if (lineStat.searches.length > 0) {
|
||||
const tableData = lineStat.searches.map(search => ({
|
||||
'Search Type': search.type,
|
||||
'Search Term': search.term.substring(0, 40) + (search.term.length > 40 ? '...' : ''),
|
||||
'Results': search.results
|
||||
}));
|
||||
console.table(tableData);
|
||||
} else {
|
||||
console.log(' No searches performed for this line item.\n');
|
||||
}
|
||||
});
|
||||
// if (lineStat.searches.length > 0) {
|
||||
// const tableData = lineStat.searches.map(search => ({
|
||||
// 'Search Type': search.type,
|
||||
// 'Search Term': search.term.substring(0, 40) + (search.term.length > 40 ? '...' : ''),
|
||||
// 'Results': search.results
|
||||
// }));
|
||||
// console.table(tableData);
|
||||
// } else {
|
||||
// console.log(' No searches performed for this line item.\n');
|
||||
// }
|
||||
// });
|
||||
|
||||
// Summary statistics
|
||||
const totalSearches = searchStats.reduce((sum, stat) => sum + stat.searches.length, 0);
|
||||
const totalResults = searchStats.reduce((sum, stat) =>
|
||||
sum + stat.searches.reduce((s, search) => s + search.results, 0), 0);
|
||||
const avgResultsPerSearch = totalSearches > 0 ? (totalResults / totalSearches).toFixed(2) : 0;
|
||||
// // Summary statistics
|
||||
// const totalSearches = searchStats.reduce((sum, stat) => sum + stat.searches.length, 0);
|
||||
// const totalResults = searchStats.reduce((sum, stat) =>
|
||||
// sum + stat.searches.reduce((s, search) => s + search.results, 0), 0);
|
||||
// const avgResultsPerSearch = totalSearches > 0 ? (totalResults / totalSearches).toFixed(2) : 0;
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════════════════');
|
||||
console.log(' SUMMARY');
|
||||
console.log('═══════════════════════════════════════════════════════════════════════');
|
||||
console.table({
|
||||
'Total Line Items': processedData.lineItems.length,
|
||||
'Total Searches Performed': totalSearches,
|
||||
'Total Results Found': totalResults,
|
||||
'Average Results per Search': avgResultsPerSearch
|
||||
});
|
||||
console.log('═══════════════════════════════════════════════════════════════════════\n');
|
||||
// console.log('═══════════════════════════════════════════════════════════════════════');
|
||||
// console.log(' SUMMARY');
|
||||
// console.log('═══════════════════════════════════════════════════════════════════════');
|
||||
// console.table({
|
||||
// 'Total Line Items': processedData.lineItems.length,
|
||||
// 'Total Searches Performed': totalSearches,
|
||||
// 'Total Results Found': totalResults,
|
||||
// 'Average Results per Search': avgResultsPerSearch
|
||||
// });
|
||||
// console.log('═══════════════════════════════════════════════════════════════════════\n');
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user