const Fuse = require('fuse.js'); const { standardizedFieldsnames } = require('./bill-ocr-normalize'); const InstanceManager = require("../../utils/instanceMgr").default; const PRICE_PERCENT_MARGIN_TOLERANCE = 0.5; //Used to make sure prices and costs are likely. const PRICE_QUANTITY_MARGIN_TOLERANCE = 0.03; //Used to make sure that if there is a quantity, the price is likely a unit price. // Helper function to normalize fields const normalizePartNumber = (str) => { return str.replace(/[^a-zA-Z0-9]/g, '').toUpperCase(); }; const normalizeText = (str) => { return str.replace(/[^a-zA-Z0-9\s]/g, '').replace(/\s+/g, ' ').trim().toUpperCase(); }; const normalizePrice = (str) => { if (typeof str !== 'string') return str; let value = str.trim(); // Handle European-style decimal comma like "292,37". // Only treat the *last* comma as a decimal separator when: // - there's no '.' anywhere (so we don't fight normal US formatting like "1,234.56") // - and the suffix after the last comma is 1-2 digits (so "1,234" stays 1234) if (!value.includes('.') && value.includes(',')) { const lastCommaIndex = value.lastIndexOf(','); const decimalSuffix = value.slice(lastCommaIndex + 1).trim(); if (/^\d{1,2}$/.test(decimalSuffix)) { const before = value.slice(0, lastCommaIndex).replace(/,/g, ''); value = `${before}.${decimalSuffix}`; } else { // Treat commas as thousands separators (or noise) and drop them. value = value.replace(/,/g, ''); } } return value.replace(/[^0-9.-]+/g, ""); }; const roundToIncrement = (value, increment) => { if (typeof value !== 'number' || !isFinite(value) || typeof increment !== 'number' || !isFinite(increment) || increment <= 0) { return value; } const rounded = Math.round((value + Number.EPSILON) / increment) * increment; // Prevent float artifacts (e.g. 0.20500000000000002) const decimals = Math.max(0, Math.ceil(-Math.log10(increment))); return parseFloat(rounded.toFixed(decimals)); }; //More complex function. Not necessary at the moment, keeping for reference. // const normalizePriceFinal = (str) => { // if (typeof str !== 'string') { // // If it's already a number, format to 2 decimals // const num = parseFloat(str); // return isNaN(num) ? 0 : num; // } // // First, try to extract valid decimal number patterns (e.g., "123.45") // const decimalPattern = /\d+\.\d{1,2}/g; // const decimalMatches = str.match(decimalPattern); // if (decimalMatches && decimalMatches.length > 0) { // // Found valid decimal number(s) // const numbers = decimalMatches.map(m => parseFloat(m)).filter(n => !isNaN(n) && n > 0); // if (numbers.length === 1) { // return numbers[0]; // } // if (numbers.length > 1) { // // Check if all numbers are the same (e.g., "47.57.47.57" -> [47.57, 47.57]) // const uniqueNumbers = [...new Set(numbers)]; // if (uniqueNumbers.length === 1) { // return uniqueNumbers[0]; // } // // Check if numbers are very close (within 1% tolerance) // const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length; // const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01); // if (allClose) { // return avg; // } // // Return the first number (most likely correct) // return numbers[0]; // } // } // // Fallback: Split on common delimiters and extract all potential numbers // const parts = str.split(/[\/|\\,;]/).map(part => part.trim()).filter(part => part.length > 0); // if (parts.length > 1) { // // Multiple values detected - extract and parse all valid numbers // const numbers = parts // .map(part => { // const cleaned = part.replace(/[^0-9.-]+/g, ""); // const parsed = parseFloat(cleaned); // return isNaN(parsed) ? null : parsed; // }) // .filter(num => num !== null && num > 0); // if (numbers.length === 0) { // // No valid numbers found, try fallback to basic cleaning // const cleaned = str.replace(/[^0-9.-]+/g, ""); // const parsed = parseFloat(cleaned); // return isNaN(parsed) ? 0 : parsed; // } // if (numbers.length === 1) { // return numbers[0]; // } // // Multiple valid numbers // const uniqueNumbers = [...new Set(numbers)]; // if (uniqueNumbers.length === 1) { // return uniqueNumbers[0]; // } // // Check if numbers are very close (within 1% tolerance) // const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length; // const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01); // if (allClose) { // return avg; // } // // Return the first valid number // return numbers[0]; // } // // Single value or no delimiters, clean normally // const cleaned = str.replace(/[^0-9.-]+/g, ""); // const parsed = parseFloat(cleaned); // return isNaN(parsed) ? 0 : parsed; // }; // Helper function to calculate Textract OCR confidence (0-100%) const calculateTextractConfidence = (textractLineItem) => { if (!textractLineItem || Object.keys(textractLineItem).length === 0) { return 0; } const confidenceValues = []; // Collect confidence from all fields in the line item Object.values(textractLineItem).forEach(field => { if (field.confidence && typeof field.confidence === 'number') { confidenceValues.push(field.confidence); } }); if (confidenceValues.length === 0) { return 0; } // Check if critical normalized labels are present const hasActualCost = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_cost); const hasActualPrice = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_price); const hasLineDesc = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.line_desc); const hasQuantity = textractLineItem?.QUANTITY?.value; //We don't normalize quantity, we just use what textract gives us. // Calculate weighted average, giving more weight to important fields // If we can identify key fields (ITEM, PRODUCT_CODE, PRICE), weight them higher let totalWeight = 0; let weightedSum = 0; Object.entries(textractLineItem).forEach(([key, field]) => { if (field.confidence && typeof field.confidence === 'number') { // Weight important fields higher let weight = 1; if (field.normalizedLabel === standardizedFieldsnames.actual_cost || field.normalizedLabel === standardizedFieldsnames.actual_price) { weight = 4; } else if (field.normalizedLabel === standardizedFieldsnames.part_no || field.normalizedLabel === standardizedFieldsnames.line_desc) { weight = 3.5; } else if (field.normalizedLabel === standardizedFieldsnames.quantity) { weight = 3.5; } // We generally ignore the key from textract. Keeping for future reference. // else if (key === 'ITEM' || key === 'PRODUCT_CODE') { // weight = 3; // Description and part number are most important // } else if (key === 'PRICE' || key === 'UNIT_PRICE' || key === 'QUANTITY') { // weight = 2; // Price and quantity moderately important // } weightedSum += field.confidence * weight; totalWeight += weight; } }); let avgConfidence = totalWeight > 0 ? weightedSum / totalWeight : 0; // Apply penalty if critical normalized labels are missing let missingFieldsPenalty = 1.0; let missingCount = 0; if (!hasActualCost) missingCount++; if (!hasActualPrice) missingCount++; if (!hasLineDesc) missingCount++; if (!hasQuantity) missingCount++; // Each missing field reduces confidence by 20% if (missingCount > 0) { missingFieldsPenalty = 1.0 - (missingCount * 0.20); } avgConfidence = avgConfidence * missingFieldsPenalty; return Math.round(avgConfidence * 100) / 100; // Round to 2 decimal places }; const calculateMatchConfidence = (matches, bestMatch) => { if (!matches || matches.length === 0 || !bestMatch) { return 0; // No match = 0% confidence } // Base confidence from the match score // finalScore is already weighted and higher is better // Normalize it to a 0-100 scale const baseScore = Math.min(bestMatch.finalScore * 10, 100); // Scale factor of 10, cap at 100 // Bonus for multiple field matches (up to +15%) const fieldMatchBonus = Math.min(bestMatch.fieldMatches.length * 5, 15); // Bonus for having price data (+10%) const priceDataBonus = bestMatch.hasPriceData ? 10 : 0; // Bonus for clear winner (gap between 1st and 2nd match) let confidenceMarginBonus = 0; if (matches.length > 1) { const scoreDiff = bestMatch.finalScore - matches[1].finalScore; // If the best match is significantly better than the second best, add bonus confidenceMarginBonus = Math.min(scoreDiff * 5, 10); // Up to +10% } else { // Only one match found, add small bonus confidenceMarginBonus = 5; } // Calculate total match confidence let matchConfidence = baseScore + fieldMatchBonus + priceDataBonus + confidenceMarginBonus; // Cap at 100% and round to 2 decimal places matchConfidence = Math.min(Math.round(matchConfidence * 100) / 100, 100); // Ensure minimum of 1% if there's any match at all return Math.max(matchConfidence, 1); }; const calculateOverallConfidence = (ocrConfidence, matchConfidence) => { // If there's no match, OCR confidence doesn't matter much if (matchConfidence === 0) { return 0; } // Overall confidence is affected by both how well Textract read the data // and how well we matched it to existing joblines // Use a weighted average: 60% OCR confidence, 40% match confidence // OCR confidence is more important because even perfect match is useless without good OCR const overall = (ocrConfidence * 0.6) + (matchConfidence * 0.4); return Math.round(overall * 100) / 100; }; // Helper function to merge and deduplicate results with weighted scoring const mergeResults = (resultsArray, weights = []) => { const scoreMap = new Map(); resultsArray.forEach((results, index) => { const weight = weights[index] || 1; results.forEach(result => { const id = result.item.id; const weightedScore = result.score * weight; if (!scoreMap.has(id)) { scoreMap.set(id, { item: result.item, score: weightedScore, count: 1 }); } else { const existing = scoreMap.get(id); // Lower score is better in Fuse.js, so take the minimum existing.score = Math.min(existing.score, weightedScore); existing.count++; } }); }); // Convert back to array and sort by score (lower is better) return Array.from(scoreMap.values()) .sort((a, b) => { // Prioritize items found in multiple searches if (a.count !== b.count) return b.count - a.count; return a.score - b.score; }) .slice(0, 5); // Return top 5 results }; async function generateBillFormData({ processedData, jobid: jobidFromProps, bodyshopid, partsorderid, req }) { const client = req.userGraphQLClient; let jobid = jobidFromProps; //If no jobid, fetch it, and funnel it back. if (!jobid || jobid === null || jobid === undefined || jobid === "" || jobid === "null" || jobid === "undefined") { const ro_number = processedData.summary?.PO_NUMBER?.value || Object.values(processedData.summary).find(value => value.normalizedLabel === 'ro_number')?.value; if (!ro_number) { throw new Error("Could not find RO number in the extracted data to associate with the bill. Select an RO and try again."); } const { jobs } = await client.request(` query QUERY_BILL_OCR_JOB_BY_RO($ro_number: String!) { jobs(where: {ro_number: {_eq: $ro_number}}) { id } }`, { ro_number }); if (jobs.length === 0) { throw new Error("No job found for the detected RO/PO number."); } jobid = jobs[0].id; } const jobData = await client.request(` query QUERY_BILL_OCR_DATA($jobid: uuid!) { vendors { id name } jobs_by_pk(id: $jobid) { id bodyshop { id md_responsibility_centers cdk_dealerid pbs_serialnumber rr_dealerid } joblines { id line_desc removed act_price db_price oem_partno alt_partno part_type } } } `, { jobid, // TODO: Parts order IDs are currently ignore. If receving a parts order, it could be used to more precisely match to joblines. }); //Create fuses of line descriptions for matching. const jobLineDescFuse = new Fuse( jobData.jobs_by_pk.joblines.map(jl => ({ ...jl, line_desc_normalized: normalizeText(jl.line_desc || ""), oem_partno_normalized: normalizePartNumber(jl.oem_partno || ""), alt_partno_normalized: normalizePartNumber(jl.alt_partno || "") })), { keys: [{ name: 'line_desc', weight: 6 }, { name: 'oem_partno', weight: 8 }, { name: 'alt_partno', weight: 5 }, { name: 'act_price', weight: 1 }, { name: 'line_desc_normalized', weight: 4 }, { name: 'oem_partno_normalized', weight: 6 }, { name: 'alt_partno_normalized', weight: 3 }], threshold: 0.4, //Adjust as needed for matching sensitivity, includeScore: true, } ); const joblineMatches = joblineFuzzySearch({ fuseToSearch: jobLineDescFuse, processedData }); const vendorFuse = new Fuse( jobData.vendors.map(v => ({ ...v, name_normalized: normalizeText(v.name) })), { keys: [{ name: "name", weight: 3 }, { name: 'name_normalized', weight: 2 }], threshold: 0.4, includeScore: true, }, ); const vendorMatches = vendorFuse.search(normalizeText(processedData.summary?.VENDOR_NAME?.value || processedData.summary?.NAME?.value)); let vendorid; if (vendorMatches.length > 0) { vendorid = vendorMatches[0].item.id; } const { jobs_by_pk: job } = jobData; if (!job) { throw new Error('Job not found for bill form data generation.'); } //Is there a subtotal level discount? If there is, we need to figure out what the percentage is, and apply that to the actual cost as a reduction const subtotalDiscountValueRaw = processedData.summary?.DISCOUNT?.value || processedData.summary?.SUBTOTAL_DISCOUNT?.value || 0; let discountPercentageDecimal = 0; if (subtotalDiscountValueRaw) { const subtotal = parseFloat(normalizePrice(processedData.summary?.SUBTOTAL?.value || 0)) || 0; const subtotalDiscountValue = parseFloat(normalizePrice(subtotalDiscountValueRaw)) || 0; if (subtotal > 0 && subtotalDiscountValue) { // Store discount percentage as a decimal (e.g. 20.5% => 0.205), // but only allow half-percent increments (0.005 steps). discountPercentageDecimal = Math.abs(subtotalDiscountValue / subtotal); discountPercentageDecimal = roundToIncrement(discountPercentageDecimal, 0.005); } } //TODO: How do we handle freight lines and core charges? //Create the form data structure for the bill posting screen. const billFormData = { "jobid": jobid, "vendorid": vendorid, "invoice_number": processedData.summary?.INVOICE_RECEIPT_ID?.value, "date": processedData.summary?.INVOICE_RECEIPT_DATE?.value, "is_credit_memo": false, "total": normalizePrice(processedData.summary?.INVOICE_TOTAL?.value || processedData.summary?.TOTAL?.value), "billlines": joblineMatches.map(jlMatchLine => { const { matches, textractLineItem, } = jlMatchLine //Matches should be pre-sorted, take the first one. const matchToUse = matches.length > 0 ? matches[0] : null; // Calculate confidence scores const ocrConfidence = calculateTextractConfidence(textractLineItem); const matchConfidence = calculateMatchConfidence(matches, matchToUse); const overallConfidence = calculateOverallConfidence(ocrConfidence, matchConfidence); //TODO: Should be using the textract if there is an exact match on the normalized label. //if there isn't then we can do the below. let actualPrice, actualCost; //TODO: What is several match on the normalized name? We need to pick the most likely one. const hasNormalizedActualPrice = Object.keys(textractLineItem).find(key => textractLineItem[key].normalizedLabel === 'actual_price'); const hasNormalizedActualCost = Object.keys(textractLineItem).find(key => textractLineItem[key].normalizedLabel === 'actual_cost'); if (hasNormalizedActualPrice) { actualPrice = textractLineItem[hasNormalizedActualPrice].value; } if (hasNormalizedActualCost) { actualCost = textractLineItem[hasNormalizedActualCost].value; } if (!hasNormalizedActualPrice || !hasNormalizedActualCost) { //This is if there was no match found for normalized labels. //Check all prices, and generally the higher one will be the actual price and the lower one will be the cost. //Need to make sure that other random items are excluded. This should be within a reasonable range of the matched jobline at matchToUse.item.act_price //Iterate over all of the text values, and check out which of them are currencies. //They'll be in the format starting with a $ sign usually. const currencyTextractLineItems = [] // {key, value} Object.keys(textractLineItem).forEach(key => { const currencyValue = textractLineItem[key].value?.startsWith('$') ? textractLineItem[key].value : null; if (currencyValue) { //Clean it and parse it const cleanValue = parseFloat(currencyValue.replace(/[^0-9.-]/g, '')) || 0; currencyTextractLineItems.push({ key, value: cleanValue }) } }) //Sort them descending currencyTextractLineItems.sort((a, b) => b.value - a.value); //Most expensive should be the actual price, second most expensive should be the cost. if (!actualPrice) actualPrice = currencyTextractLineItems.length > 0 ? currencyTextractLineItems[0].value : 0; if (!actualCost) actualCost = currencyTextractLineItems.length > 1 ? currencyTextractLineItems[1].value : 0; if (matchToUse) { //Double check that they're within 50% of the matched jobline price if there is one. const joblinePrice = parseFloat(matchToUse.item.act_price) || 0; if (!hasNormalizedActualPrice && actualPrice > 0 && (actualPrice < joblinePrice * (1 - PRICE_PERCENT_MARGIN_TOLERANCE) || actualPrice > joblinePrice * (1 + PRICE_PERCENT_MARGIN_TOLERANCE))) { actualPrice = joblinePrice; //Set to the jobline as a fallback. } if (!hasNormalizedActualCost && actualCost > 0 && (actualCost < joblinePrice * (1 - PRICE_PERCENT_MARGIN_TOLERANCE) || actualCost > joblinePrice * (1 + PRICE_PERCENT_MARGIN_TOLERANCE))) { actualCost = null //Blank it out if it's not likely. } } } //If there's nothing, just fall back to seeing if there's a price object from textract. if (!actualPrice && textractLineItem.PRICE) { actualPrice = textractLineItem.PRICE.value; } if (!actualCost && textractLineItem.PRICE) { actualCost = textractLineItem.PRICE.value; } //If quantity greater than 1, check if the actual cost is a multiple of the actual price, if so, divide it out to get the unit price. const quantity = parseInt(textractLineItem?.QUANTITY?.value); if (quantity && quantity > 1) { if (actualPrice && quantity && Math.abs((actualPrice / quantity) - (parseFloat(matchToUse?.item?.act_price) || 0)) / ((parseFloat(matchToUse?.item?.act_price) || 1)) < PRICE_QUANTITY_MARGIN_TOLERANCE) { actualPrice = actualPrice / quantity; } if (actualCost && quantity && Math.abs((actualCost / quantity) - (parseFloat(matchToUse?.item?.act_price) || 0)) / ((parseFloat(matchToUse?.item?.act_price) || 1)) < PRICE_QUANTITY_MARGIN_TOLERANCE) { actualCost = actualCost / quantity; } } if (discountPercentageDecimal > 0) { actualCost = actualCost * (1 - discountPercentageDecimal); } const responsibilityCenters = job.bodyshop.md_responsibility_centers //TODO: Do we need to verify the lines to see if it is a unit price or total price (i.e. quantity * price) const lineObject = { "line_desc": matchToUse?.item?.line_desc || textractLineItem.ITEM?.value || "NO DESCRIPTION", "quantity": textractLineItem.QUANTITY?.value, "actual_price": normalizePrice(actualPrice), "actual_cost": normalizePrice(actualCost), "cost_center": matchToUse?.item?.part_type ? bodyshopHasDmsKey(job.bodyshop) ? matchToUse?.item?.part_type !== "PAE" ? matchToUse?.item?.part_type : null : responsibilityCenters.defaults && (responsibilityCenters.defaults.costs[matchToUse?.item?.part_type] || null) : null, "applicable_taxes": { "federal": InstanceManager({ imex: true, rome: false }), "state": false, "local": false }, "joblineid": matchToUse?.item?.id || "noline", "confidence": `T${overallConfidence} - O${ocrConfidence} - J${matchConfidence}` } return lineObject }) } return billFormData } function joblineFuzzySearch({ fuseToSearch, processedData }) { const matches = [] const searchStats = []; // Track search statistics processedData.lineItems.forEach((lineItem, lineIndex) => { const lineStats = { lineNumber: lineIndex + 1, searches: [] }; // Refined ITEM search (multi-word description) const refinedItemResults = (() => { if (!lineItem.ITEM?.value) return []; const itemValue = lineItem.ITEM.value; const normalized = normalizeText(itemValue); // 1: Full string search const fullSearch = fuseToSearch.search(normalized); lineStats.searches.push({ type: 'ITEM - Full String', term: normalized, results: fullSearch.length }); // 2: Search individual significant words (3+ chars) const words = normalized.split(' ').filter(w => w.length >= 3); const wordSearches = words.map(word => { const results = fuseToSearch.search(word); lineStats.searches.push({ type: 'ITEM - Individual Word', term: word, results: results.length }); return results; }); // 3: Search without spaces entirely const noSpaceSearch = fuseToSearch.search(normalized.replace(/\s+/g, '')); lineStats.searches.push({ type: 'ITEM - No Spaces', term: normalized.replace(/\s+/g, ''), results: noSpaceSearch.length }); // Merge results with weights (full search weighted higher) return mergeResults( [fullSearch, ...wordSearches, noSpaceSearch], [1.0, ...words.map(() => 1.5), 1.2] // Full search best, individual words penalized slightly ); })(); // Refined PRODUCT_CODE search (part numbers) const refinedProductCodeResults = (() => { if (!lineItem.PRODUCT_CODE?.value) return []; const productCode = lineItem.PRODUCT_CODE.value; const normalized = normalizePartNumber(productCode); // 1: Normalized search (no spaces/special chars) const normalizedSearch = fuseToSearch.search(normalized); lineStats.searches.push({ type: 'PRODUCT_CODE - Normalized', term: normalized, results: normalizedSearch.length }); // 2: Original with minimal cleaning const minimalClean = productCode.replace(/\s+/g, '').toUpperCase(); const minimalSearch = fuseToSearch.search(minimalClean); lineStats.searches.push({ type: 'PRODUCT_CODE - Minimal Clean', term: minimalClean, results: minimalSearch.length }); // 3: Search with dashes (common in part numbers) const withDashes = productCode.replace(/[^a-zA-Z0-9-]/g, '').toUpperCase(); const dashSearch = fuseToSearch.search(withDashes); lineStats.searches.push({ type: 'PRODUCT_CODE - With Dashes', term: withDashes, results: dashSearch.length }); // 4: Special chars to spaces (preserve word boundaries) const specialCharsToSpaces = productCode.replace(/[^a-zA-Z0-9\s]/g, ' ').replace(/\s+/g, ' ').trim().toUpperCase(); const specialCharsSearch = fuseToSearch.search(specialCharsToSpaces); lineStats.searches.push({ type: 'PRODUCT_CODE - Special Chars to Spaces', term: specialCharsToSpaces, results: specialCharsSearch.length }); return mergeResults( [normalizedSearch, minimalSearch, dashSearch, specialCharsSearch], [1.0, 1.1, 1.2, 1.15] // Prefer fully normalized, special chars to spaces slightly weighted ); })(); // Refined PRICE search const refinedPriceResults = (() => { if (!lineItem.PRICE?.value) return []; const price = normalizePrice(lineItem.PRICE.value); // 1: Exact price match const exactSearch = fuseToSearch.search(price); lineStats.searches.push({ type: 'PRICE - Exact', term: price, results: exactSearch.length }); // 2: Price with 2 decimal places const priceFloat = parseFloat(price); if (!isNaN(priceFloat)) { const formattedPrice = priceFloat.toFixed(2); const formattedSearch = fuseToSearch.search(formattedPrice); lineStats.searches.push({ type: 'PRICE - Formatted (2 decimals)', term: formattedPrice, results: formattedSearch.length }); return mergeResults([exactSearch, formattedSearch], [1.0, 1.1]); } return exactSearch; })(); // Refined UNIT_PRICE search const refinedUnitPriceResults = (() => { if (!lineItem.UNIT_PRICE?.value) return []; const unitPrice = normalizePrice(lineItem.UNIT_PRICE.value); // 1: Exact price match const exactSearch = fuseToSearch.search(unitPrice); lineStats.searches.push({ type: 'UNIT_PRICE - Exact', term: unitPrice, results: exactSearch.length }); // 2: Price with 2 decimal places const priceFloat = parseFloat(unitPrice); if (!isNaN(priceFloat)) { const formattedPrice = priceFloat.toFixed(2); const formattedSearch = fuseToSearch.search(formattedPrice); lineStats.searches.push({ type: 'UNIT_PRICE - Formatted (2 decimals)', term: formattedPrice, results: formattedSearch.length }); return mergeResults([exactSearch, formattedSearch], [1.0, 1.1]); } return exactSearch; })(); //Merge them all together and sort by the highest scores. const combinedScoreMap = new Map(); // Weight different field types differently const fieldWeights = { productCode: 5.0, // Most important - part numbers should match item: 3.0, // Second most important - description price: 1.0, // Less important - prices can vary unitPrice: 0.8 // Least important - similar to price }; [ { results: refinedProductCodeResults, weight: fieldWeights.productCode, field: 'productCode' }, { results: refinedItemResults, weight: fieldWeights.item, field: 'item' }, { results: refinedPriceResults, weight: fieldWeights.price, field: 'price' }, { results: refinedUnitPriceResults, weight: fieldWeights.unitPrice, field: 'unitPrice' } ].forEach(({ results, weight, field }) => { results.forEach((result, index) => { const id = result.item.id; // Position bonus (first result is better than fifth) const positionBonus = (5 - index) / 5; // Lower score is better in Fuse.js, so invert it and apply weights const normalizedScore = (1 - result.score) * weight * positionBonus; if (!combinedScoreMap.has(id)) { combinedScoreMap.set(id, { item: result.item, score: normalizedScore, fieldMatches: [field], matchCount: result.count || 1 }); } else { const existing = combinedScoreMap.get(id); existing.score += normalizedScore; existing.fieldMatches.push(field); existing.matchCount += (result.count || 1); } }); }); // Convert to array and sort by best combined score const finalMatches = Array.from(combinedScoreMap.values()) .map(entry => { // Apply penalty if item has no act_price or it's 0 const hasPriceData = entry.item.act_price && parseFloat(entry.item.act_price) > 0; const priceDataPenalty = hasPriceData ? 1.0 : 0.5; // 50% penalty if no price return { ...entry, // Boost score for items that matched in multiple fields, penalize for missing price finalScore: entry.score * (1 + (entry.fieldMatches.length * 0.2)) * priceDataPenalty, hasPriceData }; }) .sort((a, b) => b.finalScore - a.finalScore) .slice(0, 5); // Always push the textract line item, even if no matches found // This ensures all invoice lines are processed matches.push({ matches: finalMatches, textractLineItem: lineItem, hasMatch: finalMatches.length > 0 }); searchStats.push(lineStats); }) // // Output search statistics table // console.log('\n═══════════════════════════════════════════════════════════════════════'); // console.log(' FUSE.JS SEARCH STATISTICS'); // console.log('═══════════════════════════════════════════════════════════════════════\n'); // searchStats.forEach(lineStat => { // console.log(`📄 Line Item #${lineStat.lineNumber}:`); // console.log('─'.repeat(75)); // if (lineStat.searches.length > 0) { // const tableData = lineStat.searches.map(search => ({ // 'Search Type': search.type, // 'Search Term': search.term.substring(0, 40) + (search.term.length > 40 ? '...' : ''), // 'Results': search.results // })); // console.table(tableData); // } else { // console.log(' No searches performed for this line item.\n'); // } // }); // // Summary statistics // const totalSearches = searchStats.reduce((sum, stat) => sum + stat.searches.length, 0); // const totalResults = searchStats.reduce((sum, stat) => // sum + stat.searches.reduce((s, search) => s + search.results, 0), 0); // const avgResultsPerSearch = totalSearches > 0 ? (totalResults / totalSearches).toFixed(2) : 0; // console.log('═══════════════════════════════════════════════════════════════════════'); // console.log(' SUMMARY'); // console.log('═══════════════════════════════════════════════════════════════════════'); // console.table({ // 'Total Line Items': processedData.lineItems.length, // 'Total Searches Performed': totalSearches, // 'Total Results Found': totalResults, // 'Average Results per Search': avgResultsPerSearch // }); // console.log('═══════════════════════════════════════════════════════════════════════\n'); return matches } const bodyshopHasDmsKey = (bodyshop) => bodyshop.cdk_dealerid || bodyshop.pbs_serialnumber || bodyshop.rr_dealerid; module.exports = { generateBillFormData, normalizePrice }