381 lines
15 KiB
JavaScript
381 lines
15 KiB
JavaScript
|
|
const client = require("../../graphql-client/graphql-client").client;
|
|
const Fuse = require('fuse.js');
|
|
const { has } = require("lodash");
|
|
|
|
const PRICE_PERCENT_MARGIN_TOLERANCE = 0.5; //Used to make sure prices and costs are likely.
|
|
|
|
// Helper function to normalize fields
|
|
const normalizePartNumber = (str) => {
|
|
return str.replace(/[^a-zA-Z0-9]/g, '').toUpperCase();
|
|
};
|
|
|
|
const normalizeText = (str) => {
|
|
return str.replace(/[^a-zA-Z0-9\s]/g, '').replace(/\s+/g, ' ').trim().toUpperCase();
|
|
};
|
|
|
|
const normalizePrice = (str) => {
|
|
if (typeof str !== 'string') return str;
|
|
return str.replace(/[^0-9.-]+/g, "");
|
|
};
|
|
|
|
// Helper function to merge and deduplicate results with weighted scoring
|
|
const mergeResults = (resultsArray, weights = []) => {
|
|
const scoreMap = new Map();
|
|
|
|
resultsArray.forEach((results, index) => {
|
|
const weight = weights[index] || 1;
|
|
results.forEach(result => {
|
|
const id = result.item.id;
|
|
const weightedScore = result.score * weight;
|
|
|
|
if (!scoreMap.has(id)) {
|
|
scoreMap.set(id, { item: result.item, score: weightedScore, count: 1 });
|
|
} else {
|
|
const existing = scoreMap.get(id);
|
|
// Lower score is better in Fuse.js, so take the minimum
|
|
existing.score = Math.min(existing.score, weightedScore);
|
|
existing.count++;
|
|
}
|
|
});
|
|
});
|
|
|
|
// Convert back to array and sort by score (lower is better)
|
|
return Array.from(scoreMap.values())
|
|
.sort((a, b) => {
|
|
// Prioritize items found in multiple searches
|
|
if (a.count !== b.count) return b.count - a.count;
|
|
return a.score - b.score;
|
|
})
|
|
.slice(0, 5); // Return top 5 results
|
|
};
|
|
|
|
async function generateBillFormData({ processedData, jobid, bodyshopid, partsorderid }) {
|
|
//TODO: Should this be using the client auth token to limit results? Most likely.
|
|
//TODO: Add in vendor data.
|
|
const jobData = await client.request(`
|
|
query QUERY_BILL_OCR_DATA($jobid: uuid!, $partsorderid: uuid!) {
|
|
vendors{
|
|
id
|
|
name
|
|
}
|
|
jobs_by_pk(id: $jobid) {
|
|
id
|
|
joblines {
|
|
id
|
|
line_desc
|
|
removed
|
|
act_price
|
|
db_price
|
|
oem_partno
|
|
alt_partno
|
|
}
|
|
}
|
|
parts_orders_by_pk(id: $partsorderid) {
|
|
id
|
|
parts_order_lines {
|
|
id
|
|
line_desc
|
|
act_price
|
|
cost
|
|
jobline {
|
|
id
|
|
line_desc
|
|
act_price
|
|
oem_partno
|
|
alt_partno
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
`, {
|
|
jobid, partsorderid // this may fail if null?
|
|
});
|
|
|
|
//TODO: Need to find a vendor ID. Create a fuse for it, and fuzzy search for it using the textract vendor info.
|
|
|
|
//Create fuses of line descriptions for matching.
|
|
const jobLineDescFuse = new Fuse(
|
|
jobData.jobs_by_pk.joblines,
|
|
{
|
|
keys: [{
|
|
name: 'line_desc',
|
|
weight: 4
|
|
}, {
|
|
name: 'oem_partno',
|
|
weight: 5
|
|
}, {
|
|
name: 'alt_partno',
|
|
weight: 3
|
|
},
|
|
{
|
|
name: 'act_price',
|
|
weight: 1
|
|
}],
|
|
threshold: 0.4, //Adjust as needed for matching sensitivity,
|
|
includeScore: true,
|
|
|
|
}
|
|
);
|
|
const joblineMatches = joblineFuzzySearch({ fuseToSearch: jobLineDescFuse, processedData });
|
|
console.log("*** ~ generateBillFormData ~ joblineMatches:", joblineMatches);
|
|
|
|
const { jobs_by_pk: job, parts_orders_by_pk: partsOrder } = jobData;
|
|
if (!job) {
|
|
throw new Error('Job not found for bill form data generation.');
|
|
}
|
|
//Figure out which lines have a match and which don't.
|
|
|
|
//TODO: How do we handle freight lines and core charges?
|
|
|
|
//Create the form data structure for the bill posting screen.
|
|
const billFormData = {
|
|
"jobid": jobid,
|
|
"vendorid": null,
|
|
"invoice_number": processedData.summary?.INVOICE_RECEIPT_ID?.value,
|
|
"date": processedData.summary?.INVOICE_RECEIPT_DATE?.value,
|
|
"is_credit_memo": false,
|
|
"total": normalizePrice(processedData.summary?.INVOICE_TOTAL?.value || processedData.summary?.TOTAL?.value),
|
|
"billlines": joblineMatches.map(jlMatchLine => {
|
|
const { matches, textractLineItem, } = jlMatchLine
|
|
//Matches should be prioritized, take the first one.
|
|
const matchToUse = matches.length > 0 ? matches[0] : null;
|
|
//TODO: Should be using the textract if there is an exact match on the normalized label.
|
|
//if there isn't then we can do the below.
|
|
|
|
let actualPrice, actualCost;
|
|
|
|
const hasNormalizedActualPrice = Object.keys(textractLineItem).find(key => textractLineItem[key].normalizedLabel === 'actual_price');
|
|
const hasNormalizedActualCost = Object.keys(textractLineItem).find(key => textractLineItem[key].normalizedLabel === 'actual_cost');
|
|
|
|
if (hasNormalizedActualPrice) {
|
|
actualPrice = textractLineItem[hasNormalizedActualPrice].value;
|
|
}
|
|
if (hasNormalizedActualCost) {
|
|
actualCost = textractLineItem[hasNormalizedActualCost].value;
|
|
}
|
|
|
|
if (!hasNormalizedActualPrice || !hasNormalizedActualCost) {
|
|
//This is if there was no match found for normalized labels.
|
|
//Check all prices, and generally the higher one will be the actual price and the lower one will be the cost.
|
|
//Need to make sure that other random items are excluded. This should be within a reasonable range of the matched jobline at matchToUse.item.act_price
|
|
//Iterate over all of the text values, and check out which of them are currencies.
|
|
//They'll be in the format starting with a $ sign usually.
|
|
const currencyTextractLineItems = [] // {key, value}
|
|
Object.keys(textractLineItem).forEach(key => {
|
|
const currencyValue = textractLineItem[key].value?.startsWith('$') ? textractLineItem[key].value : null;
|
|
if (currencyValue) {
|
|
//Clean it and parse it
|
|
const cleanValue = parseFloat(currencyValue.replace(/[^0-9.-]/g, '')) || 0;
|
|
currencyTextractLineItems.push({ key, value: cleanValue })
|
|
}
|
|
})
|
|
|
|
//Sort them descending
|
|
currencyTextractLineItems.sort((a, b) => b.value - a.value);
|
|
//Most expensive should be the actual price, second most expensive should be the cost.
|
|
if (!actualPrice) actualPrice = currencyTextractLineItems.length > 0 ? currencyTextractLineItems[0].value : 0;
|
|
if (!actualCost) actualCost = currencyTextractLineItems.length > 1 ? currencyTextractLineItems[1].value : 0;
|
|
|
|
if (matchToUse) {
|
|
//Double check that they're within 50% of the matched jobline price if there is one.
|
|
const joblinePrice = parseFloat(matchToUse.item.act_price) || 0;
|
|
if (!hasNormalizedActualPrice && actualPrice > 0 && (actualPrice < joblinePrice * (1 - PRICE_PERCENT_MARGIN_TOLERANCE) || actualPrice > joblinePrice * (1 + PRICE_PERCENT_MARGIN_TOLERANCE))) {
|
|
actualPrice = joblinePrice; //Set to the jobline as a fallback.
|
|
}
|
|
if (!hasNormalizedActualCost && actualCost > 0 && (actualCost < joblinePrice * (1 - PRICE_PERCENT_MARGIN_TOLERANCE) || actualCost > joblinePrice * (1 + PRICE_PERCENT_MARGIN_TOLERANCE))) {
|
|
actualCost = null //Blank it out if it's not likely.
|
|
}
|
|
}
|
|
}
|
|
|
|
const lineObject = {
|
|
"line_desc": matchToUse.item?.line_desc,
|
|
"quantity": textractLineItem.QUANTITY?.value, // convert to integer?
|
|
"actual_price": normalizePrice(actualPrice),
|
|
"actual_cost": normalizePrice(actualCost),
|
|
"cost_center": "SETBYCLIENT", //Needs to get set by client side.
|
|
"applicable_taxes": { //Not sure what to do with these?
|
|
"federal": false,
|
|
"state": false,
|
|
"local": false
|
|
},
|
|
"joblineid": matchToUse.item?.id || "noline",
|
|
}
|
|
return lineObject
|
|
})
|
|
}
|
|
|
|
return billFormData
|
|
|
|
}
|
|
|
|
function joblineFuzzySearch({ fuseToSearch, processedData }) {
|
|
const matches = []
|
|
processedData.lineItems.forEach(lineItem => {
|
|
// Refined ITEM search (multi-word description)
|
|
const refinedItemResults = (() => {
|
|
if (!lineItem.ITEM?.value) return [];
|
|
|
|
const itemValue = lineItem.ITEM.value;
|
|
const normalized = normalizeText(itemValue);
|
|
|
|
// 1: Full string search
|
|
const fullSearch = fuseToSearch.search(normalized);
|
|
|
|
// 2: Search individual significant words (3+ chars)
|
|
const words = normalized.split(' ').filter(w => w.length >= 3);
|
|
const wordSearches = words.map(word => fuseToSearch.search(word));
|
|
|
|
// 3: Search without spaces entirely
|
|
const noSpaceSearch = fuseToSearch.search(normalized.replace(/\s+/g, ''));
|
|
|
|
// Merge results with weights (full search weighted higher)
|
|
return mergeResults(
|
|
[fullSearch, ...wordSearches, noSpaceSearch],
|
|
[1.0, ...words.map(() => 1.5), 1.2] // Full search best, individual words penalized slightly
|
|
);
|
|
})();
|
|
|
|
// Refined PRODUCT_CODE search (part numbers)
|
|
const refinedProductCodeResults = (() => {
|
|
if (!lineItem.PRODUCT_CODE?.value) return [];
|
|
|
|
const productCode = lineItem.PRODUCT_CODE.value;
|
|
const normalized = normalizePartNumber(productCode);
|
|
|
|
// 1: Normalized search (no spaces/special chars)
|
|
const normalizedSearch = fuseToSearch.search(normalized);
|
|
|
|
// 2: Original with minimal cleaning
|
|
const minimalClean = productCode.replace(/\s+/g, '').toUpperCase();
|
|
const minimalSearch = fuseToSearch.search(minimalClean);
|
|
|
|
// 3: Search with dashes (common in part numbers)
|
|
const withDashes = productCode.replace(/[^a-zA-Z0-9-]/g, '').toUpperCase();
|
|
const dashSearch = fuseToSearch.search(withDashes);
|
|
|
|
return mergeResults(
|
|
[normalizedSearch, minimalSearch, dashSearch],
|
|
[1.0, 1.1, 1.2] // Prefer fully normalized
|
|
);
|
|
})();
|
|
|
|
// Refined PRICE search
|
|
const refinedPriceResults = (() => {
|
|
if (!lineItem.PRICE?.value) return [];
|
|
|
|
const price = normalizePrice(lineItem.PRICE.value);
|
|
|
|
// 1: Exact price match
|
|
const exactSearch = fuseToSearch.search(price);
|
|
|
|
// 2: Price with 2 decimal places
|
|
const priceFloat = parseFloat(price);
|
|
if (!isNaN(priceFloat)) {
|
|
const formattedPrice = priceFloat.toFixed(2);
|
|
const formattedSearch = fuseToSearch.search(formattedPrice);
|
|
|
|
return mergeResults([exactSearch, formattedSearch], [1.0, 1.1]);
|
|
}
|
|
|
|
return exactSearch;
|
|
})();
|
|
|
|
// Refined UNIT_PRICE search
|
|
const refinedUnitPriceResults = (() => {
|
|
if (!lineItem.UNIT_PRICE?.value) return [];
|
|
|
|
const unitPrice = normalizePrice(lineItem.UNIT_PRICE.value);
|
|
|
|
// 1: Exact price match
|
|
const exactSearch = fuseToSearch.search(unitPrice);
|
|
|
|
// 2: Price with 2 decimal places
|
|
const priceFloat = parseFloat(unitPrice);
|
|
if (!isNaN(priceFloat)) {
|
|
const formattedPrice = priceFloat.toFixed(2);
|
|
const formattedSearch = fuseToSearch.search(formattedPrice);
|
|
|
|
return mergeResults([exactSearch, formattedSearch], [1.0, 1.1]);
|
|
}
|
|
|
|
return exactSearch;
|
|
})();
|
|
|
|
//Merge them all together and sort by the highest scores.
|
|
const combinedScoreMap = new Map();
|
|
|
|
// Weight different field types differently
|
|
const fieldWeights = {
|
|
productCode: 5.0, // Most important - part numbers should match
|
|
item: 3.0, // Second most important - description
|
|
price: 1.0, // Less important - prices can vary
|
|
unitPrice: 0.8 // Least important - similar to price
|
|
};
|
|
|
|
[
|
|
{ results: refinedProductCodeResults, weight: fieldWeights.productCode, field: 'productCode' },
|
|
{ results: refinedItemResults, weight: fieldWeights.item, field: 'item' },
|
|
{ results: refinedPriceResults, weight: fieldWeights.price, field: 'price' },
|
|
{ results: refinedUnitPriceResults, weight: fieldWeights.unitPrice, field: 'unitPrice' }
|
|
].forEach(({ results, weight, field }) => {
|
|
results.forEach((result, index) => {
|
|
const id = result.item.id;
|
|
|
|
// Position bonus (first result is better than fifth)
|
|
const positionBonus = (5 - index) / 5;
|
|
|
|
// Lower score is better in Fuse.js, so invert it and apply weights
|
|
const normalizedScore = (1 - result.score) * weight * positionBonus;
|
|
|
|
if (!combinedScoreMap.has(id)) {
|
|
combinedScoreMap.set(id, {
|
|
item: result.item,
|
|
score: normalizedScore,
|
|
fieldMatches: [field],
|
|
matchCount: result.count || 1
|
|
});
|
|
} else {
|
|
const existing = combinedScoreMap.get(id);
|
|
existing.score += normalizedScore;
|
|
existing.fieldMatches.push(field);
|
|
existing.matchCount += (result.count || 1);
|
|
}
|
|
});
|
|
});
|
|
|
|
// Convert to array and sort by best combined score
|
|
const finalMatches = Array.from(combinedScoreMap.values())
|
|
.map(entry => {
|
|
// Apply penalty if item has no act_price or it's 0
|
|
const hasPriceData = entry.item.act_price && parseFloat(entry.item.act_price) > 0;
|
|
const priceDataPenalty = hasPriceData ? 1.0 : 0.5; // 50% penalty if no price
|
|
|
|
return {
|
|
...entry,
|
|
// Boost score for items that matched in multiple fields, penalize for missing price
|
|
finalScore: entry.score * (1 + (entry.fieldMatches.length * 0.2)) * priceDataPenalty,
|
|
hasPriceData
|
|
};
|
|
})
|
|
.sort((a, b) => b.finalScore - a.finalScore)
|
|
.slice(0, 5);
|
|
|
|
// Always push the textract line item, even if no matches found
|
|
// This ensures all invoice lines are processed
|
|
matches.push({
|
|
matches: finalMatches,
|
|
textractLineItem: lineItem,
|
|
hasMatch: finalMatches.length > 0
|
|
});
|
|
|
|
})
|
|
return matches
|
|
}
|
|
|
|
module.exports = {
|
|
generateBillFormData
|
|
}
|