bodyshop/server/ai/bill-ocr/bill-ocr-generator.js



const Fuse = require('fuse.js');

const { standardizedFieldsnames } = require('./bill-ocr-normalize');
const InstanceManager = require("../../utils/instanceMgr").default;

const PRICE_PERCENT_MARGIN_TOLERANCE = 0.5; //Used to make sure prices and costs are likely.
const PRICE_QUANTITY_MARGIN_TOLERANCE = 0.03; //Used to make sure that if there is a quantity, the price is likely a unit price.
// Helper function to normalize fields
const normalizePartNumber = (str) => {
    return str.replace(/[^a-zA-Z0-9]/g, '').toUpperCase();
};

const normalizeText = (str) => {
    return str.replace(/[^a-zA-Z0-9\s]/g, '').replace(/\s+/g, ' ').trim().toUpperCase();
};
const normalizePrice = (str) => {
    if (typeof str !== 'string') return str;

    let value = str.trim();

    // Handle European-style decimal comma like "292,37".
    // Only treat the *last* comma as a decimal separator when:
    // - there's no '.' anywhere (so we don't fight normal US formatting like "1,234.56")
    // - and the suffix after the last comma is 1-2 digits (so "1,234" stays 1234)
    if (!value.includes('.') && value.includes(',')) {
        const lastCommaIndex = value.lastIndexOf(',');
        const decimalSuffix = value.slice(lastCommaIndex + 1).trim();

        if (/^\d{1,2}$/.test(decimalSuffix)) {
            const before = value.slice(0, lastCommaIndex).replace(/,/g, '');
            value = `${before}.${decimalSuffix}`;
        } else {
            // Treat commas as thousands separators (or noise) and drop them.
            value = value.replace(/,/g, '');
        }
    }

    return value.replace(/[^0-9.-]+/g, "");
};

const roundToIncrement = (value, increment) => {
    if (typeof value !== 'number' || !isFinite(value) || typeof increment !== 'number' || !isFinite(increment) || increment <= 0) {
        return value;
    }

    const rounded = Math.round((value + Number.EPSILON) / increment) * increment;
    // Prevent float artifacts (e.g. 0.20500000000000002)
    const decimals = Math.max(0, Math.ceil(-Math.log10(increment)));
    return parseFloat(rounded.toFixed(decimals));
};

//More complex function. Not necessary at the moment, keeping for reference.
// const normalizePriceFinal = (str) => {
//     if (typeof str !== 'string') {
//         // If it's already a number, format to 2 decimals
//         const num = parseFloat(str);
//         return isNaN(num) ? 0 : num;
//     }

//     // First, try to extract valid decimal number patterns (e.g., "123.45")
//     const decimalPattern = /\d+\.\d{1,2}/g;
//     const decimalMatches = str.match(decimalPattern);

//     if (decimalMatches && decimalMatches.length > 0) {
//         // Found valid decimal number(s)
//         const numbers = decimalMatches.map(m => parseFloat(m)).filter(n => !isNaN(n) && n > 0);

//         if (numbers.length === 1) {
//             return numbers[0];
//         }

//         if (numbers.length > 1) {
//             // Check if all numbers are the same (e.g., "47.57.47.57" -> [47.57, 47.57])
//             const uniqueNumbers = [...new Set(numbers)];
//             if (uniqueNumbers.length === 1) {
//                 return uniqueNumbers[0];
//             }

//             // Check if numbers are very close (within 1% tolerance)
//             const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
//             const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);

//             if (allClose) {
//                 return avg;
//             }

//             // Return the first number (most likely correct)
//             return numbers[0];
//         }
//     }

//     // Fallback: Split on common delimiters and extract all potential numbers
//     const parts = str.split(/[\/|\\,;]/).map(part => part.trim()).filter(part => part.length > 0);

//     if (parts.length > 1) {
//         // Multiple values detected - extract and parse all valid numbers
//         const numbers = parts
//             .map(part => {
//                 const cleaned = part.replace(/[^0-9.-]+/g, "");
//                 const parsed = parseFloat(cleaned);
//                 return isNaN(parsed) ? null : parsed;
//             })
//             .filter(num => num !== null && num > 0);

//         if (numbers.length === 0) {
//             // No valid numbers found, try fallback to basic cleaning
//             const cleaned = str.replace(/[^0-9.-]+/g, "");
//             const parsed = parseFloat(cleaned);
//             return isNaN(parsed) ? 0 : parsed;
//         }

//         if (numbers.length === 1) {
//             return numbers[0];
//         }

//         // Multiple valid numbers
//         const uniqueNumbers = [...new Set(numbers)];

//         if (uniqueNumbers.length === 1) {
//             return uniqueNumbers[0];
//         }

//         // Check if numbers are very close (within 1% tolerance)
//         const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
//         const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);

//         if (allClose) {
//             return avg;
//         }

//         // Return the first valid number
//         return numbers[0];
//     }

//     // Single value or no delimiters, clean normally
//     const cleaned = str.replace(/[^0-9.-]+/g, "");
//     const parsed = parseFloat(cleaned);
//     return isNaN(parsed) ? 0 : parsed;
// };


// Helper function to calculate Textract OCR confidence (0-100%)
const calculateTextractConfidence = (textractLineItem) => {
    if (!textractLineItem || Object.keys(textractLineItem).length === 0) {
        return 0;
    }

    const confidenceValues = [];

    // Collect confidence from all fields in the line item
    Object.values(textractLineItem).forEach(field => {
        if (field.confidence && typeof field.confidence === 'number') {
            confidenceValues.push(field.confidence);
        }
    });

    if (confidenceValues.length === 0) {
        return 0;
    }

    // Check if critical normalized labels are present
    const hasActualCost = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_cost);
    const hasActualPrice = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_price);
    const hasLineDesc = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.line_desc);
    const hasQuantity = textractLineItem?.QUANTITY?.value; //We don't normalize quantity, we just use what textract gives us.

    // Calculate weighted average, giving more weight to important fields
    // If we can identify key fields (ITEM, PRODUCT_CODE, PRICE), weight them higher
    let totalWeight = 0;
    let weightedSum = 0;

    Object.entries(textractLineItem).forEach(([key, field]) => {
        if (field.confidence && typeof field.confidence === 'number') {
            // Weight important fields higher
            let weight = 1;
            if (field.normalizedLabel === standardizedFieldsnames.actual_cost || field.normalizedLabel === standardizedFieldsnames.actual_price) {
                weight = 4;
            }
            else if (field.normalizedLabel === standardizedFieldsnames.part_no || field.normalizedLabel === standardizedFieldsnames.line_desc) {
                weight = 3.5;
            }
            else if (field.normalizedLabel === standardizedFieldsnames.quantity) {
                weight = 3.5;
            }
            // We generally ignore the key from textract. Keeping for future reference.
            // else if (key === 'ITEM' || key === 'PRODUCT_CODE') {
            //     weight = 3; // Description and part number are most important
            // } else if (key === 'PRICE' || key === 'UNIT_PRICE' || key === 'QUANTITY') {
            //     weight = 2; // Price and quantity moderately important
            // }

            weightedSum += field.confidence * weight;
            totalWeight += weight;
        }
    });

    let avgConfidence = totalWeight > 0 ? weightedSum / totalWeight : 0;

    // Apply penalty if critical normalized labels are missing
    let missingFieldsPenalty = 1.0;
    let missingCount = 0;
    if (!hasActualCost) missingCount++;
    if (!hasActualPrice) missingCount++;
    if (!hasLineDesc) missingCount++;
    if (!hasQuantity) missingCount++;

    // Each missing field reduces confidence by 20%
    if (missingCount > 0) {
        missingFieldsPenalty = 1.0 - (missingCount * 0.20);
    }

    avgConfidence = avgConfidence * missingFieldsPenalty;

    return Math.round(avgConfidence * 100) / 100; // Round to 2 decimal places
};

const calculateMatchConfidence = (matches, bestMatch) => {
    if (!matches || matches.length === 0 || !bestMatch) {
        return 0; // No match = 0% confidence
    }

    // Base confidence from the match score
    // finalScore is already weighted and higher is better
    // Normalize it to a 0-100 scale
    const baseScore = Math.min(bestMatch.finalScore * 10, 100); // Scale factor of 10, cap at 100

    // Bonus for multiple field matches (up to +15%)
    const fieldMatchBonus = Math.min(bestMatch.fieldMatches.length * 5, 15);

    // Bonus for having price data (+10%)
    const priceDataBonus = bestMatch.hasPriceData ? 10 : 0;

    // Bonus for clear winner (gap between 1st and 2nd match)
    let confidenceMarginBonus = 0;
    if (matches.length > 1) {
        const scoreDiff = bestMatch.finalScore - matches[1].finalScore;
        // If the best match is significantly better than the second best, add bonus
        confidenceMarginBonus = Math.min(scoreDiff * 5, 10); // Up to +10%
    } else {
        // Only one match found, add small bonus
        confidenceMarginBonus = 5;
    }

    // Calculate total match confidence
    let matchConfidence = baseScore + fieldMatchBonus + priceDataBonus + confidenceMarginBonus;

    // Cap at 100% and round to 2 decimal places
    matchConfidence = Math.min(Math.round(matchConfidence * 100) / 100, 100);

    // Ensure minimum of 1% if there's any match at all
    return Math.max(matchConfidence, 1);
};

const calculateOverallConfidence = (ocrConfidence, matchConfidence) => {
    // If there's no match, OCR confidence doesn't matter much
    if (matchConfidence === 0) {
        return 0;
    }

    // Overall confidence is affected by both how well Textract read the data
    // and how well we matched it to existing joblines
    // Use a weighted average: 60% OCR confidence, 40% match confidence
    // OCR confidence is more important because even perfect match is useless without good OCR
    const overall = (ocrConfidence * 0.6) + (matchConfidence * 0.4);

    return Math.round(overall * 100) / 100;
};

// Helper function to merge and deduplicate results with weighted scoring
const mergeResults = (resultsArray, weights = []) => {
    const scoreMap = new Map();

    resultsArray.forEach((results, index) => {
        const weight = weights[index] || 1;
        results.forEach(result => {
            const id = result.item.id;
            const weightedScore = result.score * weight;

            if (!scoreMap.has(id)) {
                scoreMap.set(id, { item: result.item, score: weightedScore, count: 1 });
            } else {
                const existing = scoreMap.get(id);
                // Lower score is better in Fuse.js, so take the minimum
                existing.score = Math.min(existing.score, weightedScore);
                existing.count++;
            }
        });
    });

    // Convert back to array and sort by score (lower is better)
    return Array.from(scoreMap.values())
        .sort((a, b) => {
            // Prioritize items found in multiple searches
            if (a.count !== b.count) return b.count - a.count;
            return a.score - b.score;
        })
        .slice(0, 5); // Return top 5 results
};

async function generateBillFormData({ processedData, jobid: jobidFromProps, bodyshopid, partsorderid, req }) {
    const client = req.userGraphQLClient;

    let jobid = jobidFromProps;
    //If no jobid, fetch it, and funnel it back.
    if (!jobid || jobid === null || jobid === undefined || jobid === "" || jobid === "null" || jobid === "undefined") {
        const ro_number = processedData.summary?.PO_NUMBER?.value || Object.values(processedData.summary).find(value => value.normalizedLabel === 'ro_number')?.value;
        if (!ro_number) {
            throw new Error("Could not find RO number in the extracted data to associate with the bill. Select an RO and try again.");
        }

        const { jobs } = await client.request(`
            query QUERY_BILL_OCR_JOB_BY_RO($ro_number: String!) {
                    jobs(where: {ro_number: {_eq: $ro_number}}) {
                        id
                    }
                }`, { ro_number });

        if (jobs.length === 0) {
            throw new Error("No job found for the detected RO/PO number.");
        }
        jobid = jobs[0].id;
    }

    const jobData = await client.request(`
   query QUERY_BILL_OCR_DATA($jobid: uuid!) {
  vendors {
    id
    name
  }
  jobs_by_pk(id: $jobid) {
    id
    bodyshop {
      id
      md_responsibility_centers
      cdk_dealerid
      pbs_serialnumber
      rr_dealerid
    }
    joblines {
      id
      line_desc
      removed
      act_price
      db_price
      oem_partno
      alt_partno
      part_type
    }
  }

}
   `, {
        jobid,  // TODO: Parts order IDs are currently ignore. If receving a parts order, it could be used to more precisely match to joblines.
    });

    //Create fuses of line descriptions for matching.
    const jobLineDescFuse = new Fuse(
        jobData.jobs_by_pk.joblines.map(jl => ({ ...jl, line_desc_normalized: normalizeText(jl.line_desc || ""), oem_partno_normalized: normalizePartNumber(jl.oem_partno || ""), alt_partno_normalized: normalizePartNumber(jl.alt_partno || "") })),
        {
            keys: [{
                name: 'line_desc',
                weight: 6
            }, {
                name: 'oem_partno',
                weight: 8
            }, {
                name: 'alt_partno',
                weight: 5
            },
            {
                name: 'act_price',
                weight: 1
            },
            {
                name: 'line_desc_normalized',
                weight: 4
            },
            {
                name: 'oem_partno_normalized',
                weight: 6
            },
            {
                name: 'alt_partno_normalized',
                weight: 3
            }],
            threshold: 0.4, //Adjust as needed for matching sensitivity,
            includeScore: true,

        }
    );
    const joblineMatches = joblineFuzzySearch({ fuseToSearch: jobLineDescFuse, processedData });

    const vendorFuse = new Fuse(
        jobData.vendors.map(v => ({ ...v, name_normalized: normalizeText(v.name) })),
        {
            keys: [{ name: "name", weight: 3 }, { name: 'name_normalized', weight: 2 }],
            threshold: 0.4,
            includeScore: true,
        },

    );

    const vendorMatches = vendorFuse.search(normalizeText(processedData.summary?.VENDOR_NAME?.value || processedData.summary?.NAME?.value));

    let vendorid;
    if (vendorMatches.length > 0) {
        vendorid = vendorMatches[0].item.id;
    }
    const { jobs_by_pk: job } = jobData;
    if (!job) {
        throw new Error('Job not found for bill form data generation.');
    }


    //Is there a subtotal level discount? If there is, we need to figure out what the percentage is, and apply that to the actual cost as a reduction
    const subtotalDiscountValueRaw = processedData.summary?.DISCOUNT?.value || processedData.summary?.SUBTOTAL_DISCOUNT?.value || 0;
    let discountPercentageDecimal = 0;
    if (subtotalDiscountValueRaw) {
        const subtotal = parseFloat(normalizePrice(processedData.summary?.SUBTOTAL?.value || 0)) || 0;
        const subtotalDiscountValue = parseFloat(normalizePrice(subtotalDiscountValueRaw)) || 0;
        if (subtotal > 0 && subtotalDiscountValue) {
            // Store discount percentage as a decimal (e.g. 20.5% => 0.205),
            // but only allow half-percent increments (0.005 steps).
            discountPercentageDecimal = Math.abs(subtotalDiscountValue / subtotal);
            discountPercentageDecimal = roundToIncrement(discountPercentageDecimal, 0.005);
        }
    }

    //TODO: How do we handle freight lines and core charges?
    //Create the form data structure for the bill posting screen.
    const billFormData = {
        "jobid": jobid,
        "vendorid": vendorid,
        "invoice_number": processedData.summary?.INVOICE_RECEIPT_ID?.value,
        "date": processedData.summary?.INVOICE_RECEIPT_DATE?.value,
        "is_credit_memo": false,
        "total": normalizePrice(processedData.summary?.INVOICE_TOTAL?.value || processedData.summary?.TOTAL?.value),
        "billlines": joblineMatches.map(jlMatchLine => {
            const { matches, textractLineItem, } = jlMatchLine
            //Matches should be pre-sorted, take the first one.
            const matchToUse = matches.length > 0 ? matches[0] : null;

            // Calculate confidence scores
            const ocrConfidence = calculateTextractConfidence(textractLineItem);
            const matchConfidence = calculateMatchConfidence(matches, matchToUse);
            const overallConfidence = calculateOverallConfidence(ocrConfidence, matchConfidence);
            //TODO: Should be using the textract if there is an exact match on the normalized label.
            //if there isn't then we can do the below.

            let actualPrice, actualCost;
            //TODO: What is several match on the normalized name? We need to pick the most likely one.
            const hasNormalizedActualPrice = Object.keys(textractLineItem).find(key => textractLineItem[key].normalizedLabel === 'actual_price');
            const hasNormalizedActualCost = Object.keys(textractLineItem).find(key => textractLineItem[key].normalizedLabel === 'actual_cost');

            if (hasNormalizedActualPrice) {
                actualPrice = textractLineItem[hasNormalizedActualPrice].value;
            }
            if (hasNormalizedActualCost) {
                actualCost = textractLineItem[hasNormalizedActualCost].value;
            }

            if (!hasNormalizedActualPrice || !hasNormalizedActualCost) {
                //This is if there was no match found for normalized labels.
                //Check all prices, and generally the higher one will be the actual price and the lower one will be the cost.
                //Need to make sure that other random items are excluded. This should be within a reasonable range of the matched jobline at matchToUse.item.act_price
                //Iterate over all of the text values, and check out which of them are currencies.
                //They'll be in the format starting with a $ sign usually.
                const currencyTextractLineItems = [] // {key, value}
                Object.keys(textractLineItem).forEach(key => {
                    const currencyValue = textractLineItem[key].value?.startsWith('$') ? textractLineItem[key].value : null;
                    if (currencyValue) {
                        //Clean it and parse it
                        const cleanValue = parseFloat(currencyValue.replace(/[^0-9.-]/g, '')) || 0;
                        currencyTextractLineItems.push({ key, value: cleanValue })
                    }
                })

                //Sort them descending
                currencyTextractLineItems.sort((a, b) => b.value - a.value);
                //Most expensive should be the actual price, second most expensive should be the cost.
                if (!actualPrice) actualPrice = currencyTextractLineItems.length > 0 ? currencyTextractLineItems[0].value : 0;
                if (!actualCost) actualCost = currencyTextractLineItems.length > 1 ? currencyTextractLineItems[1].value : 0;

                if (matchToUse) {
                    //Double check that they're within 50% of the matched jobline price if there is one.
                    const joblinePrice = parseFloat(matchToUse.item.act_price) || 0;
                    if (!hasNormalizedActualPrice && actualPrice > 0 && (actualPrice < joblinePrice * (1 - PRICE_PERCENT_MARGIN_TOLERANCE) || actualPrice > joblinePrice * (1 + PRICE_PERCENT_MARGIN_TOLERANCE))) {
                        actualPrice = joblinePrice; //Set to the jobline as a fallback.
                    }
                    if (!hasNormalizedActualCost && actualCost > 0 && (actualCost < joblinePrice * (1 - PRICE_PERCENT_MARGIN_TOLERANCE) || actualCost > joblinePrice * (1 + PRICE_PERCENT_MARGIN_TOLERANCE))) {
                        actualCost = null //Blank it out if it's not likely.
                    }
                }
            }

            //If there's nothing, just fall back to seeing if there's a price object from textract.

            if (!actualPrice && textractLineItem.PRICE) {
                actualPrice = textractLineItem.PRICE.value;
            }
            if (!actualCost && textractLineItem.PRICE) {
                actualCost = textractLineItem.PRICE.value;
            }

            //If quantity greater than 1, check if the actual cost is a multiple of the actual price, if so, divide it out to get the unit price.
            const quantity = parseInt(textractLineItem?.QUANTITY?.value);
            if (quantity && quantity > 1) {
                if (actualPrice && quantity && Math.abs((actualPrice / quantity) - (parseFloat(matchToUse?.item?.act_price) || 0)) / ((parseFloat(matchToUse?.item?.act_price) || 1)) < PRICE_QUANTITY_MARGIN_TOLERANCE) {
                    actualPrice = actualPrice / quantity;
                }
                if (actualCost && quantity && Math.abs((actualCost / quantity) - (parseFloat(matchToUse?.item?.act_price) || 0)) / ((parseFloat(matchToUse?.item?.act_price) || 1)) < PRICE_QUANTITY_MARGIN_TOLERANCE) {
                    actualCost = actualCost / quantity;
                }
            }


            if (discountPercentageDecimal > 0) {
                actualCost = actualCost * (1 - discountPercentageDecimal);
            }

            const responsibilityCenters = job.bodyshop.md_responsibility_centers
            //TODO: Do we need to verify the lines to see if it is a unit price or total price (i.e. quantity * price)
            const lineObject = {
                "line_desc": matchToUse?.item?.line_desc || textractLineItem.ITEM?.value || "NO DESCRIPTION",
                "quantity": textractLineItem.QUANTITY?.value,
                "actual_price": normalizePrice(actualPrice),
                "actual_cost": normalizePrice(actualCost),
                "cost_center": matchToUse?.item?.part_type
                    ? bodyshopHasDmsKey(job.bodyshop)
                        ? matchToUse?.item?.part_type !== "PAE"
                            ? matchToUse?.item?.part_type
                            : null
                        : responsibilityCenters.defaults &&
                        (responsibilityCenters.defaults.costs[matchToUse?.item?.part_type] || null)
                    : null,
                "applicable_taxes": {
                    "federal": InstanceManager({ imex: true, rome: false }),
                    "state": false,
                    "local": false
                },
                "joblineid": matchToUse?.item?.id || "noline",
                "confidence": `T${overallConfidence} - O${ocrConfidence} - J${matchConfidence}`
            }
            return lineObject
        })
    }

    return billFormData

}

function joblineFuzzySearch({ fuseToSearch, processedData }) {
    const matches = []
    const searchStats = []; // Track search statistics

    processedData.lineItems.forEach((lineItem, lineIndex) => {
        const lineStats = {
            lineNumber: lineIndex + 1,
            searches: []
        };

        // Refined ITEM search (multi-word description)
        const refinedItemResults = (() => {
            if (!lineItem.ITEM?.value) return [];

            const itemValue = lineItem.ITEM.value;
            const normalized = normalizeText(itemValue);

            //  1: Full string search
            const fullSearch = fuseToSearch.search(normalized);
            lineStats.searches.push({ type: 'ITEM - Full String', term: normalized, results: fullSearch.length });

            //  2: Search individual significant words (3+ chars)
            const words = normalized.split(' ').filter(w => w.length >= 3);
            const wordSearches = words.map(word => {
                const results = fuseToSearch.search(word);
                lineStats.searches.push({ type: 'ITEM - Individual Word', term: word, results: results.length });
                return results;
            });

            //  3: Search without spaces entirely
            const noSpaceSearch = fuseToSearch.search(normalized.replace(/\s+/g, ''));
            lineStats.searches.push({ type: 'ITEM - No Spaces', term: normalized.replace(/\s+/g, ''), results: noSpaceSearch.length });

            // Merge results with weights (full search weighted higher)
            return mergeResults(
                [fullSearch, ...wordSearches, noSpaceSearch],
                [1.0, ...words.map(() => 1.5), 1.2] // Full search best, individual words penalized slightly
            );
        })();

        // Refined PRODUCT_CODE search (part numbers)
        const refinedProductCodeResults = (() => {
            if (!lineItem.PRODUCT_CODE?.value) return [];

            const productCode = lineItem.PRODUCT_CODE.value;
            const normalized = normalizePartNumber(productCode);

            //  1: Normalized search (no spaces/special chars)
            const normalizedSearch = fuseToSearch.search(normalized);
            lineStats.searches.push({ type: 'PRODUCT_CODE - Normalized', term: normalized, results: normalizedSearch.length });

            //  2: Original with minimal cleaning
            const minimalClean = productCode.replace(/\s+/g, '').toUpperCase();
            const minimalSearch = fuseToSearch.search(minimalClean);
            lineStats.searches.push({ type: 'PRODUCT_CODE - Minimal Clean', term: minimalClean, results: minimalSearch.length });

            //  3: Search with dashes (common in part numbers)
            const withDashes = productCode.replace(/[^a-zA-Z0-9-]/g, '').toUpperCase();
            const dashSearch = fuseToSearch.search(withDashes);
            lineStats.searches.push({ type: 'PRODUCT_CODE - With Dashes', term: withDashes, results: dashSearch.length });

            //  4: Special chars to spaces (preserve word boundaries)
            const specialCharsToSpaces = productCode.replace(/[^a-zA-Z0-9\s]/g, ' ').replace(/\s+/g, ' ').trim().toUpperCase();
            const specialCharsSearch = fuseToSearch.search(specialCharsToSpaces);
            lineStats.searches.push({ type: 'PRODUCT_CODE - Special Chars to Spaces', term: specialCharsToSpaces, results: specialCharsSearch.length });

            return mergeResults(
                [normalizedSearch, minimalSearch, dashSearch, specialCharsSearch],
                [1.0, 1.1, 1.2, 1.15] // Prefer fully normalized, special chars to spaces slightly weighted
            );
        })();

        // Refined PRICE search
        const refinedPriceResults = (() => {
            if (!lineItem.PRICE?.value) return [];

            const price = normalizePrice(lineItem.PRICE.value);

            //  1: Exact price match
            const exactSearch = fuseToSearch.search(price);
            lineStats.searches.push({ type: 'PRICE - Exact', term: price, results: exactSearch.length });

            //  2: Price with 2 decimal places
            const priceFloat = parseFloat(price);
            if (!isNaN(priceFloat)) {
                const formattedPrice = priceFloat.toFixed(2);
                const formattedSearch = fuseToSearch.search(formattedPrice);
                lineStats.searches.push({ type: 'PRICE - Formatted (2 decimals)', term: formattedPrice, results: formattedSearch.length });

                return mergeResults([exactSearch, formattedSearch], [1.0, 1.1]);
            }

            return exactSearch;
        })();

        // Refined UNIT_PRICE search
        const refinedUnitPriceResults = (() => {
            if (!lineItem.UNIT_PRICE?.value) return [];

            const unitPrice = normalizePrice(lineItem.UNIT_PRICE.value);

            //  1: Exact price match
            const exactSearch = fuseToSearch.search(unitPrice);
            lineStats.searches.push({ type: 'UNIT_PRICE - Exact', term: unitPrice, results: exactSearch.length });

            //  2: Price with 2 decimal places
            const priceFloat = parseFloat(unitPrice);
            if (!isNaN(priceFloat)) {
                const formattedPrice = priceFloat.toFixed(2);
                const formattedSearch = fuseToSearch.search(formattedPrice);
                lineStats.searches.push({ type: 'UNIT_PRICE - Formatted (2 decimals)', term: formattedPrice, results: formattedSearch.length });

                return mergeResults([exactSearch, formattedSearch], [1.0, 1.1]);
            }

            return exactSearch;
        })();

        //Merge them all together and sort by the highest scores.
        const combinedScoreMap = new Map();

        // Weight different field types differently
        const fieldWeights = {
            productCode: 5.0,  // Most important - part numbers should match
            item: 3.0,         // Second most important - description
            price: 1.0,        // Less important - prices can vary
            unitPrice: 0.8     // Least important - similar to price
        };

        [
            { results: refinedProductCodeResults, weight: fieldWeights.productCode, field: 'productCode' },
            { results: refinedItemResults, weight: fieldWeights.item, field: 'item' },
            { results: refinedPriceResults, weight: fieldWeights.price, field: 'price' },
            { results: refinedUnitPriceResults, weight: fieldWeights.unitPrice, field: 'unitPrice' }
        ].forEach(({ results, weight, field }) => {
            results.forEach((result, index) => {
                const id = result.item.id;

                // Position bonus (first result is better than fifth)
                const positionBonus = (5 - index) / 5;

                // Lower score is better in Fuse.js, so invert it and apply weights
                const normalizedScore = (1 - result.score) * weight * positionBonus;

                if (!combinedScoreMap.has(id)) {
                    combinedScoreMap.set(id, {
                        item: result.item,
                        score: normalizedScore,
                        fieldMatches: [field],
                        matchCount: result.count || 1
                    });
                } else {
                    const existing = combinedScoreMap.get(id);
                    existing.score += normalizedScore;
                    existing.fieldMatches.push(field);
                    existing.matchCount += (result.count || 1);
                }
            });
        });

        // Convert to array and sort by best combined score
        const finalMatches = Array.from(combinedScoreMap.values())
            .map(entry => {
                // Apply penalty if item has no act_price or it's 0
                const hasPriceData = entry.item.act_price && parseFloat(entry.item.act_price) > 0;
                const priceDataPenalty = hasPriceData ? 1.0 : 0.5; // 50% penalty if no price

                return {
                    ...entry,
                    // Boost score for items that matched in multiple fields, penalize for missing price
                    finalScore: entry.score * (1 + (entry.fieldMatches.length * 0.2)) * priceDataPenalty,
                    hasPriceData
                };
            })
            .sort((a, b) => b.finalScore - a.finalScore)
            .slice(0, 5);

        // Always push the textract line item, even if no matches found
        // This ensures all invoice lines are processed
        matches.push({
            matches: finalMatches,
            textractLineItem: lineItem,
            hasMatch: finalMatches.length > 0
        });

        searchStats.push(lineStats);

    })

    // // Output search statistics table
    // console.log('\n═══════════════════════════════════════════════════════════════════════');
    // console.log('                    FUSE.JS SEARCH STATISTICS');
    // console.log('═══════════════════════════════════════════════════════════════════════\n');

    // searchStats.forEach(lineStat => {
    //     console.log(`📄 Line Item #${lineStat.lineNumber}:`);
    //     console.log('─'.repeat(75));

    //     if (lineStat.searches.length > 0) {
    //         const tableData = lineStat.searches.map(search => ({
    //             'Search Type': search.type,
    //             'Search Term': search.term.substring(0, 40) + (search.term.length > 40 ? '...' : ''),
    //             'Results': search.results
    //         }));
    //         console.table(tableData);
    //     } else {
    //         console.log('  No searches performed for this line item.\n');
    //     }
    // });

    // // Summary statistics
    // const totalSearches = searchStats.reduce((sum, stat) => sum + stat.searches.length, 0);
    // const totalResults = searchStats.reduce((sum, stat) =>
    //     sum + stat.searches.reduce((s, search) => s + search.results, 0), 0);
    // const avgResultsPerSearch = totalSearches > 0 ? (totalResults / totalSearches).toFixed(2) : 0;

    // console.log('═══════════════════════════════════════════════════════════════════════');
    // console.log('                         SUMMARY');
    // console.log('═══════════════════════════════════════════════════════════════════════');
    // console.table({
    //     'Total Line Items': processedData.lineItems.length,
    //     'Total Searches Performed': totalSearches,
    //     'Total Results Found': totalResults,
    //     'Average Results per Search': avgResultsPerSearch
    // });
    // console.log('═══════════════════════════════════════════════════════════════════════\n');

    return matches
}

const bodyshopHasDmsKey = (bodyshop) =>
    bodyshop.cdk_dealerid || bodyshop.pbs_serialnumber || bodyshop.rr_dealerid;


module.exports = {
    generateBillFormData,
    normalizePrice
}