Files
bodyshop/server/ai/bill-ocr/bill-ocr-generator.js

790 lines
30 KiB
JavaScript

const client = require("../../graphql-client/graphql-client").client;
const Fuse = require('fuse.js');
const { has } = require("lodash");
const PRICE_PERCENT_MARGIN_TOLERANCE = 0.5; //Used to make sure prices and costs are likely.
// Helper function to normalize fields
const normalizePartNumber = (str) => {
return str.replace(/[^a-zA-Z0-9]/g, '').toUpperCase();
};
const normalizeText = (str) => {
return str.replace(/[^a-zA-Z0-9\s]/g, '').replace(/\s+/g, ' ').trim().toUpperCase();
};
const normalizePrice = (str) => {
if (typeof str !== 'string') return str;
return str.replace(/[^0-9.-]+/g, "");
};
// Helper function to merge and deduplicate results with weighted scoring
const mergeResults = (resultsArray, weights = []) => {
const scoreMap = new Map();
resultsArray.forEach((results, index) => {
const weight = weights[index] || 1;
results.forEach(result => {
const id = result.item.id;
const weightedScore = result.score * weight;
if (!scoreMap.has(id)) {
scoreMap.set(id, { item: result.item, score: weightedScore, count: 1 });
} else {
const existing = scoreMap.get(id);
// Lower score is better in Fuse.js, so take the minimum
existing.score = Math.min(existing.score, weightedScore);
existing.count++;
}
});
});
// Convert back to array and sort by score (lower is better)
return Array.from(scoreMap.values())
.sort((a, b) => {
// Prioritize items found in multiple searches
if (a.count !== b.count) return b.count - a.count;
return a.score - b.score;
})
.slice(0, 5); // Return top 5 results
};
async function generateBillFormData({ processedData, jobid, bodyshopid, partsorderid }) {
//TODO: Should this be using the client auth token to limit results? Most likely.
//TODO: Add in vendor data.
const jobData = await client.request(`
query QUERY_BILL_OCR_DATA($jobid: uuid!, $partsorderid: uuid!) {
vendors{
id
name
}
jobs_by_pk(id: $jobid) {
id
joblines {
id
line_desc
removed
act_price
db_price
oem_partno
alt_partno
}
}
parts_orders_by_pk(id: $partsorderid) {
id
parts_order_lines {
id
line_desc
act_price
cost
jobline {
id
line_desc
act_price
oem_partno
alt_partno
}
}
}
}
`, {
jobid, partsorderid // this may fail if null?
});
//TODO: Need to find a vendor ID. Create a fuse for it, and fuzzy search for it using the textract vendor info.
//Create fuses of line descriptions for matching.
const jobLineDescFuse = new Fuse(
jobData.jobs_by_pk.joblines,
{
keys: [{
name: 'line_desc',
weight: 4
}, {
name: 'oem_partno',
weight: 5
}, {
name: 'alt_partno',
weight: 3
},
{
name: 'act_price',
weight: 1
}],
threshold: 0.4, //Adjust as needed for matching sensitivity,
includeScore: true,
}
);
const joblineMatches = joblineFuzzySearch({ fuseToSearch: jobLineDescFuse, processedData });
console.log("*** ~ generateBillFormData ~ joblineMatches:", joblineMatches);
const { jobs_by_pk: job, parts_orders_by_pk: partsOrder } = jobData;
if (!job) {
throw new Error('Job not found for bill form data generation.');
}
//Figure out which lines have a match and which don't.
//TODO: How do we handle freight lines and core charges?
//Create the form data structure for the bill posting screen.
const billFormData = {
"jobid": jobid,
"vendorid": null,
"invoice_number": processedData.summary?.INVOICE_RECEIPT_ID?.value,
"date": processedData.summary?.INVOICE_RECEIPT_DATE?.value,
"is_credit_memo": false,
"total": normalizePrice(processedData.summary?.INVOICE_TOTAL?.value || processedData.summary?.TOTAL?.value),
"billlines": joblineMatches.map(jlMatchLine => {
const { matches, textractLineItem, } = jlMatchLine
//Matches should be prioritized, take the first one.
const matchToUse = matches.length > 0 ? matches[0] : null;
//TODO: Should be using the textract if there is an exact match on the normalized label.
//if there isn't then we can do the below.
let actualPrice, actualCost;
const hasNormalizedActualPrice = Object.keys(textractLineItem).find(key => textractLineItem[key].normalizedLabel === 'actual_price');
const hasNormalizedActualCost = Object.keys(textractLineItem).find(key => textractLineItem[key].normalizedLabel === 'actual_cost');
if (hasNormalizedActualPrice) {
actualPrice = textractLineItem[hasNormalizedActualPrice].value;
}
if (hasNormalizedActualCost) {
actualCost = textractLineItem[hasNormalizedActualCost].value;
}
if (!hasNormalizedActualPrice || !hasNormalizedActualCost) {
//This is if there was no match found for normalized labels.
//Check all prices, and generally the higher one will be the actual price and the lower one will be the cost.
//Need to make sure that other random items are excluded. This should be within a reasonable range of the matched jobline at matchToUse.item.act_price
//Iterate over all of the text values, and check out which of them are currencies.
//They'll be in the format starting with a $ sign usually.
const currencyTextractLineItems = [] // {key, value}
Object.keys(textractLineItem).forEach(key => {
const currencyValue = textractLineItem[key].value?.startsWith('$') ? textractLineItem[key].value : null;
if (currencyValue) {
//Clean it and parse it
const cleanValue = parseFloat(currencyValue.replace(/[^0-9.-]/g, '')) || 0;
currencyTextractLineItems.push({ key, value: cleanValue })
}
})
//Sort them descending
currencyTextractLineItems.sort((a, b) => b.value - a.value);
//Most expensive should be the actual price, second most expensive should be the cost.
if (!actualPrice) actualPrice = currencyTextractLineItems.length > 0 ? currencyTextractLineItems[0].value : 0;
if (!actualCost) actualCost = currencyTextractLineItems.length > 1 ? currencyTextractLineItems[1].value : 0;
if (matchToUse) {
//Double check that they're within 50% of the matched jobline price if there is one.
const joblinePrice = parseFloat(matchToUse.item.act_price) || 0;
if (!hasNormalizedActualPrice && actualPrice > 0 && (actualPrice < joblinePrice * (1 - PRICE_PERCENT_MARGIN_TOLERANCE) || actualPrice > joblinePrice * (1 + PRICE_PERCENT_MARGIN_TOLERANCE))) {
actualPrice = joblinePrice; //Set to the jobline as a fallback.
}
if (!hasNormalizedActualCost && actualCost > 0 && (actualCost < joblinePrice * (1 - PRICE_PERCENT_MARGIN_TOLERANCE) || actualCost > joblinePrice * (1 + PRICE_PERCENT_MARGIN_TOLERANCE))) {
actualCost = null //Blank it out if it's not likely.
}
}
}
const lineObject = {
"line_desc": matchToUse.item?.line_desc,
"quantity": textractLineItem.QUANTITY?.value, // convert to integer?
"actual_price": normalizePrice(actualPrice),
"actual_cost": normalizePrice(actualCost),
"cost_center": "SETBYCLIENT", //Needs to get set by client side.
"applicable_taxes": { //Not sure what to do with these?
"federal": false,
"state": false,
"local": false
},
"joblineid": matchToUse.item?.id || "noline",
}
return lineObject
})
}
return billFormData
}
function joblineFuzzySearch({ fuseToSearch, processedData }) {
const matches = []
processedData.lineItems.forEach(lineItem => {
// Refined ITEM search (multi-word description)
const refinedItemResults = (() => {
if (!lineItem.ITEM?.value) return [];
const itemValue = lineItem.ITEM.value;
const normalized = normalizeText(itemValue);
// 1: Full string search
const fullSearch = fuseToSearch.search(normalized);
// 2: Search individual significant words (3+ chars)
const words = normalized.split(' ').filter(w => w.length >= 3);
const wordSearches = words.map(word => fuseToSearch.search(word));
// 3: Search without spaces entirely
const noSpaceSearch = fuseToSearch.search(normalized.replace(/\s+/g, ''));
// Merge results with weights (full search weighted higher)
return mergeResults(
[fullSearch, ...wordSearches, noSpaceSearch],
[1.0, ...words.map(() => 1.5), 1.2] // Full search best, individual words penalized slightly
);
})();
// Refined PRODUCT_CODE search (part numbers)
const refinedProductCodeResults = (() => {
if (!lineItem.PRODUCT_CODE?.value) return [];
const productCode = lineItem.PRODUCT_CODE.value;
const normalized = normalizePartNumber(productCode);
// 1: Normalized search (no spaces/special chars)
const normalizedSearch = fuseToSearch.search(normalized);
// 2: Original with minimal cleaning
const minimalClean = productCode.replace(/\s+/g, '').toUpperCase();
const minimalSearch = fuseToSearch.search(minimalClean);
// 3: Search with dashes (common in part numbers)
const withDashes = productCode.replace(/[^a-zA-Z0-9-]/g, '').toUpperCase();
const dashSearch = fuseToSearch.search(withDashes);
return mergeResults(
[normalizedSearch, minimalSearch, dashSearch],
[1.0, 1.1, 1.2] // Prefer fully normalized
);
})();
// Refined PRICE search
const refinedPriceResults = (() => {
if (!lineItem.PRICE?.value) return [];
const price = normalizePrice(lineItem.PRICE.value);
// 1: Exact price match
const exactSearch = fuseToSearch.search(price);
// 2: Price with 2 decimal places
const priceFloat = parseFloat(price);
if (!isNaN(priceFloat)) {
const formattedPrice = priceFloat.toFixed(2);
const formattedSearch = fuseToSearch.search(formattedPrice);
return mergeResults([exactSearch, formattedSearch], [1.0, 1.1]);
}
return exactSearch;
})();
// Refined UNIT_PRICE search
const refinedUnitPriceResults = (() => {
if (!lineItem.UNIT_PRICE?.value) return [];
const unitPrice = normalizePrice(lineItem.UNIT_PRICE.value);
// 1: Exact price match
const exactSearch = fuseToSearch.search(unitPrice);
// 2: Price with 2 decimal places
const priceFloat = parseFloat(unitPrice);
if (!isNaN(priceFloat)) {
const formattedPrice = priceFloat.toFixed(2);
const formattedSearch = fuseToSearch.search(formattedPrice);
return mergeResults([exactSearch, formattedSearch], [1.0, 1.1]);
}
return exactSearch;
})();
//Merge them all together and sort by the highest scores.
const combinedScoreMap = new Map();
// Weight different field types differently
const fieldWeights = {
productCode: 5.0, // Most important - part numbers should match
item: 3.0, // Second most important - description
price: 1.0, // Less important - prices can vary
unitPrice: 0.8 // Least important - similar to price
};
[
{ results: refinedProductCodeResults, weight: fieldWeights.productCode, field: 'productCode' },
{ results: refinedItemResults, weight: fieldWeights.item, field: 'item' },
{ results: refinedPriceResults, weight: fieldWeights.price, field: 'price' },
{ results: refinedUnitPriceResults, weight: fieldWeights.unitPrice, field: 'unitPrice' }
].forEach(({ results, weight, field }) => {
results.forEach((result, index) => {
const id = result.item.id;
// Position bonus (first result is better than fifth)
const positionBonus = (5 - index) / 5;
// Lower score is better in Fuse.js, so invert it and apply weights
const normalizedScore = (1 - result.score) * weight * positionBonus;
if (!combinedScoreMap.has(id)) {
combinedScoreMap.set(id, {
item: result.item,
score: normalizedScore,
fieldMatches: [field],
matchCount: result.count || 1
});
} else {
const existing = combinedScoreMap.get(id);
existing.score += normalizedScore;
existing.fieldMatches.push(field);
existing.matchCount += (result.count || 1);
}
});
});
// Convert to array and sort by best combined score
const finalMatches = Array.from(combinedScoreMap.values())
.map(entry => {
// Apply penalty if item has no act_price or it's 0
const hasPriceData = entry.item.act_price && parseFloat(entry.item.act_price) > 0;
const priceDataPenalty = hasPriceData ? 1.0 : 0.5; // 50% penalty if no price
return {
...entry,
// Boost score for items that matched in multiple fields, penalize for missing price
finalScore: entry.score * (1 + (entry.fieldMatches.length * 0.2)) * priceDataPenalty,
hasPriceData
};
})
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, 5);
// Always push the textract line item, even if no matches found
// This ensures all invoice lines are processed
matches.push({
matches: finalMatches,
textractLineItem: lineItem,
hasMatch: finalMatches.length > 0
});
})
return matches
}
module.exports = {
generateBillFormData
}
const processedDataFromCoache = {
"result": {
"summary": {
"ADDRESS": {
"value": "Capilano Audi\n813 Automall Drive, North Vancouver, BC V7P\n3R8, CA",
"label": "",
"normalizedLabel": "",
"confidence": 99.5746078491211
},
"STREET": {
"value": "813 Automall Drive,",
"label": "",
"normalizedLabel": "",
"confidence": 99.95982360839844
},
"CITY": {
"value": "North Vancouver,",
"label": "",
"normalizedLabel": "",
"confidence": 99.6387939453125
},
"STATE": {
"value": "CA",
"label": "",
"normalizedLabel": "",
"confidence": 90.67389678955078
},
"ZIP_CODE": {
"value": "3R8,",
"label": "",
"normalizedLabel": "",
"confidence": 99.11088562011719
},
"COUNTRY": {
"value": "Canada,",
"label": "",
"normalizedLabel": "",
"confidence": 99.96498107910156
},
"ADDRESS_BLOCK": {
"value": "813 Automall Drive, North Vancouver, BC V7P\n3R8, CA",
"label": "",
"normalizedLabel": "",
"confidence": 97.51412200927734
},
"NAME": {
"value": "Tekion Corp",
"label": "",
"normalizedLabel": "",
"confidence": 85.7325439453125
},
"DISCOUNT": {
"value": "$0.00",
"label": "Discount",
"normalizedLabel": "UNKNOWN_discount",
"confidence": 99.7116470336914
},
"INVOICE_RECEIPT_DATE": {
"value": "Jan 13 2026",
"label": "",
"normalizedLabel": "",
"confidence": 84.63554382324219
},
"INVOICE_RECEIPT_ID": {
"value": "72821",
"label": "Parts Invoice",
"normalizedLabel": "UNKNOWN_parts_invoice",
"confidence": 98.97218322753906
},
"TAX_PAYER_ID": {
"value": "713564805RT000",
"label": "G.S.T.#",
"normalizedLabel": "UNKNOWN_gst",
"confidence": 93.07450866699219
},
"VENDOR_VAT_NUMBER": {
"value": "713564805RT000",
"label": "G.S.T.#",
"normalizedLabel": "UNKNOWN_gst",
"confidence": 93.07450866699219
},
"PO_NUMBER": {
"value": "58117",
"label": "Customer PO No :",
"normalizedLabel": "UNKNOWN_customer_po_no_",
"confidence": 98.43647003173828
},
"RECEIVER_ADDRESS": {
"value": "1172 W 3RD ST\nNORTH VANCOUVER, BC\nCanada, V7P 1E6",
"label": "Billing Address",
"normalizedLabel": "UNKNOWN_billing_address",
"confidence": 99.65380859375
},
"RECEIVER_NAME": {
"value": "6992 COACHE COLLISION LTD.\n-",
"label": "",
"normalizedLabel": "",
"confidence": 76.0853500366211
},
"RECEIVER_PHONE": {
"value": "(604) 987 2211",
"label": "",
"normalizedLabel": "",
"confidence": 70.94715118408203
},
"SUBTOTAL": {
"value": "$4,421.82",
"label": "Sub Total",
"normalizedLabel": "UNKNOWN_sub_total",
"confidence": 99.92008972167969
},
"TAX": {
"value": "$221.09",
"label": "GST",
"normalizedLabel": "UNKNOWN_gst",
"confidence": 99.90827178955078
},
"TOTAL": {
"value": "$4,642.91",
"label": "Total",
"normalizedLabel": "UNKNOWN_total",
"confidence": 99.9670639038086
},
"VENDOR_ADDRESS": {
"value": "Capilano Audi\n813 Automall Drive, North Vancouver, BC V7P\n3R8, CA",
"label": "",
"normalizedLabel": "",
"confidence": 99.5746078491211
},
"VENDOR_NAME": {
"value": "Tekion Corp",
"label": "",
"normalizedLabel": "",
"confidence": 85.7325439453125
},
"VENDOR_PHONE": {
"value": "(604) 985-0693,",
"label": "Tel:",
"normalizedLabel": "UNKNOWN_tel",
"confidence": 92.17896270751953
}
},
"lineItems": [
{
"PRODUCT_CODE": {
"value": "80A-807-107-G-GRU",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 60.69740676879883
},
"ITEM": {
"value": "80A-807-107-G-GRU COVER\n-",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 80.92524719238281
},
"PRICE": {
"value": "$914.16",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 99.99465942382812
},
"QUANTITY": {
"value": "1",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 98.02381134033203
},
"UNIT_PRICE": {
"value": "$914.16",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 90.97144317626953
},
"EXPENSE_ROW": {
"value": "price\n1 80A-807-107-G-GRU COVER 1 SP-ORD $1,172.00 $914.16 $914.16\n-\nchange",
"label": "",
"normalizedLabel": "",
"confidence": 95.68555450439453
}
},
{
"OTHER": {
"value": "2",
"label": "1",
"normalizedLabel": "UNKNOWN_1",
"confidence": 75.68085479736328
},
"PRODUCT_CODE": {
"value": "80A-807-647-C-9B9",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 63.68674087524414
},
"ITEM": {
"value": "80A-807-647-C-9B9 GRILLE\n-",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 82.46648406982422
},
"PRICE": {
"value": "$191.88",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 99.9972915649414
},
"QUANTITY": {
"value": "1",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 99.20547485351562
},
"UNIT_PRICE": {
"value": "$191.88",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 93.8568344116211
},
"OTHER_2": {
"value": "SP-ORD",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 90.64925384521484
},
"OTHER_3": {
"value": "$246.00",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 90.83607482910156
},
"EXPENSE_ROW": {
"value": "2 80A-807-647-C-9B9 GRILLE 1 SP-ORD $246.00 $191.88 $191.88\n-",
"label": "",
"normalizedLabel": "",
"confidence": 95.46671295166016
}
},
{
"OTHER": {
"value": "3",
"label": "1",
"normalizedLabel": "UNKNOWN_1",
"confidence": 73.59772491455078
},
"PRODUCT_CODE": {
"value": "80A-807-661-A-GRU",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 73.04312133789062
},
"ITEM": {
"value": "80A-807-661-A-GRU CONN PIECE\n-",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 84.59918212890625
},
"PRICE": {
"value": "$223.08",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 99.99654388427734
},
"QUANTITY": {
"value": "1",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 99.30966186523438
},
"UNIT_PRICE": {
"value": "$223.08",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 91.8134765625
},
"OTHER_2": {
"value": "SP-ORD",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 88.12962341308594
},
"OTHER_3": {
"value": "$286.00",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 88.31619262695312
},
"EXPENSE_ROW": {
"value": "3 80A-807-661-A-GRU CONN PIECE 1 SP-ORD $286.00 $223.08 $223.08\n-",
"label": "",
"normalizedLabel": "",
"confidence": 95.99571228027344
}
},
{
"OTHER": {
"value": "4",
"label": "1",
"normalizedLabel": "UNKNOWN_1",
"confidence": 74.5914306640625
},
"PRODUCT_CODE": {
"value": "80A-853-765--3Q7",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 79.21212005615234
},
"ITEM": {
"value": "80A-853-765--3Q7 MOLDING\n-",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 75.85474395751953
},
"PRICE": {
"value": "$994.50",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 99.99325561523438
},
"QUANTITY": {
"value": "1",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 98.7352294921875
},
"UNIT_PRICE": {
"value": "$994.50",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 96.28019714355469
},
"OTHER_2": {
"value": "SP-ORD",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 89.3599853515625
},
"OTHER_3": {
"value": "$1,275.00",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 89.5396499633789
},
"EXPENSE_ROW": {
"value": "4 80A-853-765--3Q7 MOLDING 1 SP-ORD $1,275.00 $994.50 $994.50\n-",
"label": "",
"normalizedLabel": "",
"confidence": 96.20103454589844
}
},
{
"OTHER": {
"value": "5",
"label": "1",
"normalizedLabel": "UNKNOWN_1",
"confidence": 75.91088104248047
},
"ITEM": {
"value": "GRILLE",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 84.18718719482422
},
"PRICE": {
"value": "$2,098.20",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 99.97396087646484
},
"QUANTITY": {
"value": "1",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 99.32916259765625
},
"UNIT_PRICE": {
"value": "$2,098.20",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 98.04034423828125
},
"PRODUCT_CODE": {
"value": "80A-853-651-L-RP5",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 86.12691497802734
},
"OTHER_2": {
"value": "SP-ORD",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 90.96830749511719
},
"OTHER_3": {
"value": "$2,690.00",
"label": "38",
"normalizedLabel": "UNKNOWN_38",
"confidence": 91.14651489257812
},
"EXPENSE_ROW": {
"value": "5 80A-853-651-L-RP5 GRILLE 1 SP-ORD $2,690.00 $2,098.20 $2,098.20\n-",
"label": "",
"normalizedLabel": "",
"confidence": 93.76665496826172
}
}
],
}
}