IO-3515 resolve issues on search selects not updating, improve confidence scoring.

This commit is contained in:
Patrick Fic
2026-02-19 12:22:35 -08:00
parent 5d53d09af9
commit ae1408012f
11 changed files with 410 additions and 26552 deletions

View File

@@ -1,11 +1,13 @@
import { Button } from "antd";
import { Button, Tag, Modal, Typography } from "antd";
import axios from "axios";
import { useState } from "react";
import { FaWandMagicSparkles } from "react-icons/fa6";
import { connect } from "react-redux";
import { createStructuredSelector } from "reselect";
import { useNotification } from "../../contexts/Notifications/notificationContext";
import { selectBillEnterModal } from "../../redux/modals/modals.selectors";
import { selectBodyshop } from "../../redux/user/user.selectors";
import { FaWandMagicSparkles } from "react-icons/fa6";
import { useApolloClient } from "@apollo/client/react";
const mapStateToProps = createStructuredSelector({
billEnterModal: selectBillEnterModal,
@@ -24,11 +26,31 @@ function BillEnterAiScan({
setIsAiScan
}) {
const notification = useNotification();
const [showBetaModal, setShowBetaModal] = useState(false);
const BETA_ACCEPTANCE_KEY = "ai_scan_beta_acceptance";
const client = useApolloClient();
const handleBetaAcceptance = () => {
localStorage.setItem(BETA_ACCEPTANCE_KEY, "true");
setShowBetaModal(false);
// Trigger the file input after acceptance
fileInputRef.current?.click();
};
const checkBetaAcceptance = () => {
const hasAccepted = localStorage.getItem(BETA_ACCEPTANCE_KEY);
if (hasAccepted) {
// User has already accepted, proceed with file selection
fileInputRef.current?.click();
} else {
// Show beta modal
setShowBetaModal(true);
}
};
// Polling function for multipage PDF status
const pollJobStatus = async (jobId) => {
const pollJobStatus = async (textractJobId) => {
try {
const { data } = await axios.get(`/ai/bill-ocr/status/${jobId}`);
const { data } = await axios.get(`/ai/bill-ocr/status/${textractJobId}`);
if (data.status === "COMPLETED") {
// Stop polling
@@ -41,6 +63,7 @@ function BillEnterAiScan({
// Update form with the extracted data
if (data.data && data.data.billForm) {
form.setFieldsValue(data.data.billForm);
await form.validateFields(["billlines"], { recursive: true });
notification.success({
title: "AI Scan Complete",
message: "Invoice data has been extracted successfully"
@@ -82,69 +105,92 @@ function BillEnterAiScan({
type="file"
accept="image/*,application/pdf"
style={{ display: "none" }}
onChange={(e) => {
onChange={async (e) => {
const file = e.target.files?.[0];
if (file) {
setScanLoading(true);
setIsAiScan(true);
const formdata = new FormData();
formdata.append("billScan", file);
formdata.append("jobid", billEnterModal.context.job.id);
formdata.append("jobid", billEnterModal.context.job?.id);
formdata.append("bodyshopid", bodyshop.id);
formdata.append("partsorderid", billEnterModal.context.parts_order?.id);
//formdata.append("skipTextract", "true"); // For testing purposes
axios
.post("/ai/bill-ocr", formdata)
.then(({ data, status }) => {
if (status === 202) {
// Multipage PDF - start polling
notification.info({
title: "Processing Invoice",
message: "This is a multipage document. Processing may take a few moments..."
});
//Workaround needed to bypass react-compiler error about manipulating refs in child components. Refactor may be needed in the future to clean this up.
setPollingIntervalRef(
setInterval(() => {
pollJobStatus(data.jobId);
}, 3000)
);
try {
const { data, status } = await axios.post("/ai/bill-ocr", formdata);
// Initial poll
pollJobStatus(data.jobId);
} else if (status === 200) {
// Single page - immediate response
setScanLoading(false);
form.setFieldsValue(data.data.billForm);
notification.success({
title: "AI Scan Complete",
message: "Invoice data has been extracted successfully"
});
}
})
.catch((error) => {
setScanLoading(false);
notification.error({
title: "AI Scan Failed",
message: error.response?.data?.message || error.message || "Failed to process invoice"
if (status === 202) {
// Multipage PDF - start polling
notification.info({
title: "Processing Invoice",
message: "This is a multipage document. Processing may take a few moments..."
});
//Workaround needed to bypass react-compiler error about manipulating refs in child components. Refactor may be needed in the future to clean this up.
setPollingIntervalRef(
setInterval(() => {
pollJobStatus(data.textractJobId);
}, 3000)
);
// Initial poll
pollJobStatus(data.textractJobId);
} else if (status === 200) {
// Single page - immediate response
setScanLoading(false);
form.setFieldsValue(data.data.billForm);
await form.validateFields(["billlines"], { recursive: true });
notification.success({
title: "AI Scan Complete",
message: "Invoice data has been extracted successfully"
});
}
} catch (error) {
setScanLoading(false);
notification.error({
title: "AI Scan Failed",
message: error.response?.data?.message || error.message || "Failed to process invoice"
});
}
}
// Reset the input so the same file can be selected again
e.target.value = "";
}}
/>
<Button
onClick={() => {
fileInputRef.current?.click();
}}
icon={<FaWandMagicSparkles />}
loading={scanLoading}
disabled={scanLoading}
>
<Button onClick={checkBetaAcceptance} icon={<FaWandMagicSparkles />} loading={scanLoading} disabled={scanLoading}>
{scanLoading ? "Processing Invoice..." : "AI Scan"}
<Tag color="red">BETA</Tag>
</Button>
<Modal
title="AI Scan Beta Disclaimer"
open={showBetaModal}
onOk={handleBetaAcceptance}
onCancel={() => setShowBetaModal(false)}
okText="Accept and Continue"
cancelText="Cancel"
>
<Typography.Title level={2}>AI Usage Disclaimer</Typography.Title>
<Typography.Text>
This AI scanning feature is currently in <strong>beta</strong>. While it can accelerate data entry, you{" "}
<strong>must carefully review all extracted results</strong> for accuracy.
</Typography.Text>
<Typography.Text>The AI may make mistakes or miss information. Always verify:</Typography.Text>
<ul>
<li>All line items and quantities</li>
<li>Prices and totals</li>
<li>Part numbers and descriptions</li>
<li>Any other critical invoice details</li>
</ul>
<Typography.Text>
By continuing, you acknowledge that you will review and verify all AI-generated data before posting.
</Typography.Text>
</Modal>
</>
);
}

View File

@@ -8,12 +8,15 @@ import { MdOpenInNew } from "react-icons/md";
import { connect } from "react-redux";
import { Link } from "react-router-dom";
import { createStructuredSelector } from "reselect";
import { useNotification } from "../../contexts/Notifications/notificationContext.jsx";
import { CHECK_BILL_INVOICE_NUMBER } from "../../graphql/bills.queries";
import { selectBodyshop } from "../../redux/user/user.selectors";
import dayjs from "../../utils/day";
import { bodyshopHasDmsKey } from "../../utils/dmsUtils.js";
import InstanceRenderManager from "../../utils/instanceRenderMgr";
import AlertComponent from "../alert/alert.component";
import BillFormLinesExtended from "../bill-form-lines-extended/bill-form-lines-extended.component";
import DateTimePicker from "../form-date-time-picker/form-date-time-picker.component.jsx";
import FormFieldsChanged from "../form-fields-changed-alert/form-fields-changed-alert.component";
import CurrencyInput from "../form-items-formatted/currency-form-item.component";
import JobSearchSelect from "../job-search-select/job-search-select.component";
@@ -21,8 +24,6 @@ import LayoutFormRow from "../layout-form-row/layout-form-row.component";
import VendorSearchSelect from "../vendor-search-select/vendor-search-select.component";
import BillFormLines from "./bill-form.lines.component";
import { CalculateBillTotal } from "./bill-form.totals.utility";
import DateTimePicker from "../form-date-time-picker/form-date-time-picker.component.jsx";
import { bodyshopHasDmsKey } from "../../utils/dmsUtils.js";
const mapStateToProps = createStructuredSelector({
bodyshop: selectBodyshop
@@ -49,6 +50,8 @@ export function BillFormComponent({
const { t } = useTranslation();
const client = useApolloClient();
const [discount, setDiscount] = useState(0);
const notification = useNotification();
const jobIdFormWatch = Form.useWatch("jobid", form);
const {
treatments: { Extended_Bill_Posting, ClosingPeriod }
@@ -124,6 +127,23 @@ export function BillFormComponent({
bodyshop.inhousevendorid
]);
useEffect(() => {
console.log("*** Form Watch - jobid changed:", jobIdFormWatch);
if (jobIdFormWatch !== null) {
if (form.getFieldValue("jobid") !== null && form.getFieldValue("jobid") !== undefined) {
loadLines({ variables: { id: form.getFieldValue("jobid") } });
if (form.getFieldValue("vendorid") !== null && form.getFieldValue("vendorid") !== undefined) {
loadOutstandingReturns({
variables: {
jobId: form.getFieldValue("jobid"),
vendorId: form.getFieldValue("vendorid")
}
});
}
}
}
}, [jobIdFormWatch, form]);
return (
<div>
<FormFieldsChanged form={form} />
@@ -375,7 +395,15 @@ export function BillFormComponent({
]);
let totals;
if (!!values.total && !!values.billlines && values.billlines.length > 0) {
totals = CalculateBillTotal(values);
try {
totals = CalculateBillTotal(values);
} catch (error) {
notification.error({
title: "Error calculating totals",
message: error.message || "An error occurred while calculating bill totals.",
key: "bill_totals_calculation_error"
});
}
}
if (totals) {

View File

@@ -154,10 +154,10 @@ export function BillEnterModalLinesComponent({
label: t("billlines.fields.confidence")
}),
formInput: (record) => {
const confidenceValue = getFieldValue(["billlines", record.name, "confidence"]);
const rowValue = getFieldValue(["billlines", record.name]);
return (
<div style={{ display: "flex", alignItems: "center", justifyContent: "center" }}>
<ConfidenceDisplay value={confidenceValue} />
<ConfidenceDisplay rowValue={rowValue} />
</div>
);
}
@@ -276,7 +276,20 @@ export function BillEnterModalLinesComponent({
key: `${field.name}actual_price`,
name: [field.name, "actual_price"],
label: t("billlines.fields.actual_price"),
rules: [{ required: true }]
rules: [
{ required: true },
{
validator: (_, value) => {
if (Math.abs(parseFloat(value)) < 0.01) {
return Promise.reject();
} else {
return Promise.resolve();
}
},
warningOnly: true
}
],
hasFeedback: true
}),
formInput: (record, index) => (
<CurrencyInput

View File

@@ -1,4 +1,4 @@
import { Progress, Tag, Tooltip } from "antd";
import { Progress, Space, Tag, Tooltip } from "antd";
import { useTranslation } from "react-i18next";
const parseConfidence = (confidenceStr) => {
if (!confidenceStr || typeof confidenceStr !== "string") return null;
@@ -20,10 +20,11 @@ const getConfidenceColor = (value) => {
return "red";
};
const ConfidenceDisplay = ({ value }) => {
const ConfidenceDisplay = ({ rowValue: { confidence, actual_price, actual_cost } }) => {
const { t } = useTranslation();
const parsed = parseConfidence(value);
const parsed = parseConfidence(confidence);
const parsed_actual_price = parseFloat(actual_price);
const parsed_actual_cost = parseFloat(actual_cost);
if (!parsed) {
return <span style={{ color: "#999", fontSize: "0.85em" }}>N/A</span>;
}
@@ -71,9 +72,16 @@ const ConfidenceDisplay = ({ value }) => {
</div>
}
>
<Tag color={color} style={{ margin: 0, cursor: "help", userSelect: "none" }}>
{total.toFixed(0)}%
</Tag>
<Space size="small">
{!parsed_actual_cost || !parsed_actual_price || parsed_actual_cost === 0 || parsed_actual_price === 0 ? (
<Tag color="red" style={{ margin: 0, cursor: "help", userSelect: "none" }}>
{t("billlines.confidence.missing_data", { defaultValue: "Missing Data" })}
</Tag>
) : null}
<Tag color={color} style={{ margin: 0, cursor: "help", userSelect: "none" }}>
{total.toFixed(0)}%
</Tag>
</Space>
</Tooltip>
);
};

View File

@@ -15,17 +15,14 @@ const VendorSearchSelect = ({ value, onChange, options, onSelect, disabled, pref
if (value !== option) {
setOption(value);
}
}, [value]);
}, [value, option]);
useEffect(() => {
if (value !== option && onChange) {
if (value && !option) {
onChange(value);
} else {
onChange(option);
}
const handleChange = (newValue) => {
setOption(newValue);
if (onChange) {
onChange(newValue);
}
}, [value, option, onChange]);
};
const favorites =
preferredMake && options
@@ -69,7 +66,7 @@ const VendorSearchSelect = ({ value, onChange, options, onSelect, disabled, pref
);
}}
popupMatchSelectWidth={false}
onChange={setOption}
onChange={handleChange}
optionFilterProp="name"
onSelect={onSelect}
disabled={disabled || false}

View File

@@ -2,6 +2,8 @@
const Fuse = require('fuse.js');
const { has } = require("lodash");
const { standardizedFieldsnames } = require('./bill-ocr-normalize');
const InstanceManager = require("../../utils/instanceMgr").default;
const PRICE_PERCENT_MARGIN_TOLERANCE = 0.5; //Used to make sure prices and costs are likely.
@@ -13,11 +15,97 @@ const normalizePartNumber = (str) => {
const normalizeText = (str) => {
return str.replace(/[^a-zA-Z0-9\s]/g, '').replace(/\s+/g, ' ').trim().toUpperCase();
};
const normalizePrice = (str) => {
if (typeof str !== 'string') return str;
return str.replace(/[^0-9.-]+/g, "");
};
const normalizePriceFinal = (str) => {
if (typeof str !== 'string') {
// If it's already a number, format to 2 decimals
const num = parseFloat(str);
return isNaN(num) ? 0 : num;
}
// First, try to extract valid decimal number patterns (e.g., "123.45")
const decimalPattern = /\d+\.\d{1,2}/g;
const decimalMatches = str.match(decimalPattern);
if (decimalMatches && decimalMatches.length > 0) {
// Found valid decimal number(s)
const numbers = decimalMatches.map(m => parseFloat(m)).filter(n => !isNaN(n) && n > 0);
if (numbers.length === 1) {
return numbers[0];
}
if (numbers.length > 1) {
// Check if all numbers are the same (e.g., "47.57.47.57" -> [47.57, 47.57])
const uniqueNumbers = [...new Set(numbers)];
if (uniqueNumbers.length === 1) {
return uniqueNumbers[0];
}
// Check if numbers are very close (within 1% tolerance)
const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
if (allClose) {
return avg;
}
// Return the first number (most likely correct)
return numbers[0];
}
}
// Fallback: Split on common delimiters and extract all potential numbers
const parts = str.split(/[\/|\\,;]/).map(part => part.trim()).filter(part => part.length > 0);
if (parts.length > 1) {
// Multiple values detected - extract and parse all valid numbers
const numbers = parts
.map(part => {
const cleaned = part.replace(/[^0-9.-]+/g, "");
const parsed = parseFloat(cleaned);
return isNaN(parsed) ? null : parsed;
})
.filter(num => num !== null && num > 0);
if (numbers.length === 0) {
// No valid numbers found, try fallback to basic cleaning
const cleaned = str.replace(/[^0-9.-]+/g, "");
const parsed = parseFloat(cleaned);
return isNaN(parsed) ? 0 : parsed;
}
if (numbers.length === 1) {
return numbers[0];
}
// Multiple valid numbers
const uniqueNumbers = [...new Set(numbers)];
if (uniqueNumbers.length === 1) {
return uniqueNumbers[0];
}
// Check if numbers are very close (within 1% tolerance)
const avg = numbers.reduce((a, b) => a + b, 0) / numbers.length;
const allClose = numbers.every(num => Math.abs(num - avg) / avg < 0.01);
if (allClose) {
return avg;
}
// Return the first valid number
return numbers[0];
}
// Single value or no delimiters, clean normally
const cleaned = str.replace(/[^0-9.-]+/g, "");
const parsed = parseFloat(cleaned);
return isNaN(parsed) ? 0 : parsed;
};
// Helper function to calculate Textract OCR confidence (0-100%)
const calculateTextractConfidence = (textractLineItem) => {
@@ -38,6 +126,11 @@ const calculateTextractConfidence = (textractLineItem) => {
return 0;
}
// Check if critical normalized labels are present
const hasActualCost = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_cost);
const hasActualPrice = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.actual_price);
const hasLineDesc = Object.values(textractLineItem).some(field => field.normalizedLabel === standardizedFieldsnames.line_desc);
// Calculate weighted average, giving more weight to important fields
// If we can identify key fields (ITEM, PRODUCT_CODE, PRICE), weight them higher
let totalWeight = 0;
@@ -47,18 +140,42 @@ const calculateTextractConfidence = (textractLineItem) => {
if (field.confidence && typeof field.confidence === 'number') {
// Weight important fields higher
let weight = 1;
if (key === 'ITEM' || key === 'PRODUCT_CODE') {
weight = 2; // Description and part number are most important
} else if (key === 'PRICE' || key === 'UNIT_PRICE' || key === 'QUANTITY') {
weight = 1.5; // Price and quantity moderately important
if (field.normalizedLabel === standardizedFieldsnames.actual_cost || field.normalizedLabel === standardizedFieldsnames.actual_price) {
weight = 4;
}
else if (field.normalizedLabel === standardizedFieldsnames.part_no || field.normalizedLabel === standardizedFieldsnames.line_desc) {
weight = 3.5;
}
else if (field.normalizedLabel === standardizedFieldsnames.quantity) {
weight = 3.5;
}
// else if (key === 'ITEM' || key === 'PRODUCT_CODE') {
// weight = 3; // Description and part number are most important
// } else if (key === 'PRICE' || key === 'UNIT_PRICE' || key === 'QUANTITY') {
// weight = 2; // Price and quantity moderately important
// }
weightedSum += field.confidence * weight;
totalWeight += weight;
}
});
const avgConfidence = totalWeight > 0 ? weightedSum / totalWeight : 0;
let avgConfidence = totalWeight > 0 ? weightedSum / totalWeight : 0;
// Apply penalty if critical normalized labels are missing
let missingFieldsPenalty = 1.0;
let missingCount = 0;
if (!hasActualCost) missingCount++;
if (!hasActualPrice) missingCount++;
if (!hasLineDesc) missingCount++;
// Each missing field reduces confidence by 15%
if (missingCount > 0) {
missingFieldsPenalty = 1.0 - (missingCount * 0.15);
}
avgConfidence = avgConfidence * missingFieldsPenalty;
return Math.round(avgConfidence * 100) / 100; // Round to 2 decimal places
};
@@ -109,9 +226,9 @@ const calculateOverallConfidence = (ocrConfidence, matchConfidence) => {
// Overall confidence is affected by both how well Textract read the data
// and how well we matched it to existing joblines
// Use a weighted average: 40% OCR confidence, 60% match confidence
// Match confidence is more important because even perfect OCR is useless without a good match
const overall = (ocrConfidence * 0.4) + (matchConfidence * 0.6);
// Use a weighted average: 60% OCR confidence, 40% match confidence
// OCR confidence is more important because even perfect match is useless without good OCR
const overall = (ocrConfidence * 0.6) + (matchConfidence * 0.4);
return Math.round(overall * 100) / 100;
};
@@ -147,61 +264,63 @@ const mergeResults = (resultsArray, weights = []) => {
.slice(0, 5); // Return top 5 results
};
async function generateBillFormData({ processedData, jobid, bodyshopid, partsorderid, req }) {
async function generateBillFormData({ processedData, jobid: jobidFromProps, bodyshopid, partsorderid, req }) {
const client = req.userGraphQLClient;
//TODO: Add in vendor data.
let jobid = jobidFromProps;
//If no jobid, fetch it, and funnel it back.
if (!jobid || jobid === null || jobid === undefined || jobid === "" || jobid === "null" || jobid === "undefined") {
const ro_number = processedData.summary?.PO_NUMBER?.value || Object.values(processedData.summary).find(value => value.normalizedLabel === 'ro_number')?.value;
if (!ro_number) {
throw new Error("Could not find RO number in the extracted data to associate with the bill. Select an RO and try again.");
}
const { jobs } = await client.request(`
query QUERY_BILL_OCR_JOB_BY_RO($ro_number: String!) {
jobs(where: {ro_number: {_eq: $ro_number}}) {
id
}
}`, { ro_number });
if (jobs.length === 0) {
throw new Error("No job found for the detected RO/PO number.");
} else {
jobid = jobs[0].id;
}
}
const jobData = await client.request(`
query QUERY_BILL_OCR_DATA($jobid: uuid!, $partsorderid: uuid!) {
vendors{
query QUERY_BILL_OCR_DATA($jobid: uuid!) {
vendors {
id
name
}
jobs_by_pk(id: $jobid) {
id
bodyshop {
id
md_responsibility_centers
cdk_dealerid
pbs_serialnumber
rr_dealerid
}
jobs_by_pk(id: $jobid) {
id
bodyshop{
id
md_responsibility_centers
cdk_dealerid
pbs_serialnumber
rr_dealerid
}
joblines {
id
line_desc
removed
act_price
db_price
oem_partno
alt_partno
part_type
}
}
parts_orders_by_pk(id: $partsorderid) {
id
parts_order_lines {
id
line_desc
act_price
cost
jobline {
id
line_desc
act_price
oem_partno
alt_partno
part_type
}
}
}
joblines {
id
line_desc
removed
act_price
db_price
oem_partno
alt_partno
part_type
}
}
`, {
jobid, partsorderid // this may fail if null?
}
`, {
jobid, // TODO: Refactor back in parts orders
});
//TODO: Need to find a vendor ID. Create a fuse for it, and fuzzy search for it using the textract vendor info.
//Create fuses of line descriptions for matching.
const jobLineDescFuse = new Fuse(
jobData.jobs_by_pk.joblines.map(jl => ({ ...jl, line_desc_normalized: normalizeText(jl.line_desc || ""), oem_partno_normalized: normalizePartNumber(jl.oem_partno || ""), alt_partno_normalized: normalizePartNumber(jl.alt_partno || "") })),
@@ -226,7 +345,7 @@ async function generateBillFormData({ processedData, jobid, bodyshopid, partsord
},
{
name: 'oem_partno_normalized',
weight: 5
weight: 6
},
{
name: 'alt_partno_normalized',
@@ -238,7 +357,6 @@ async function generateBillFormData({ processedData, jobid, bodyshopid, partsord
}
);
const joblineMatches = joblineFuzzySearch({ fuseToSearch: jobLineDescFuse, processedData });
console.log("*** ~ generateBillFormData ~ joblineMatches:", JSON.stringify(joblineMatches, null, 2));
const vendorFuse = new Fuse(
jobData.vendors,
@@ -250,13 +368,13 @@ async function generateBillFormData({ processedData, jobid, bodyshopid, partsord
}
);
const vendorMatches = vendorFuse.search(processedData.summary?.NAME?.value || processedData.summary?.VENDOR_NAME?.value);
console.log("*** ~ generateBillFormData ~ vendorMatches:", JSON.stringify(vendorMatches, null, 2));
const vendorMatches = vendorFuse.search(processedData.summary?.VENDOR_NAME?.value || processedData.summary?.NAME?.value);
let vendorid;
if (vendorMatches.length > 0) {
vendorid = vendorMatches[0].item.id;
}
const { jobs_by_pk: job, parts_orders_by_pk: partsOrder } = jobData;
const { jobs_by_pk: job } = jobData;
if (!job) {
throw new Error('Job not found for bill form data generation.');
}
@@ -344,9 +462,9 @@ async function generateBillFormData({ processedData, jobid, bodyshopid, partsord
: null
: responsibilityCenters.defaults &&
(responsibilityCenters.defaults.costs[matchToUse?.item?.part_type] || null)
: null, //Needs to get set by client side.
"applicable_taxes": { //Not sure what to do with these?
"federal": false,
: null,
"applicable_taxes": {
"federal": InstanceManager({ imex: true, rome: false }),
"state": false,
"local": false
},
@@ -551,43 +669,43 @@ function joblineFuzzySearch({ fuseToSearch, processedData }) {
})
// Output search statistics table
console.log('\n═══════════════════════════════════════════════════════════════════════');
console.log(' FUSE.JS SEARCH STATISTICS');
console.log('═══════════════════════════════════════════════════════════════════════\n');
// // Output search statistics table
// console.log('\n═══════════════════════════════════════════════════════════════════════');
// console.log(' FUSE.JS SEARCH STATISTICS');
// console.log('═══════════════════════════════════════════════════════════════════════\n');
searchStats.forEach(lineStat => {
console.log(`📄 Line Item #${lineStat.lineNumber}:`);
console.log('─'.repeat(75));
// searchStats.forEach(lineStat => {
// console.log(`📄 Line Item #${lineStat.lineNumber}:`);
// console.log('─'.repeat(75));
if (lineStat.searches.length > 0) {
const tableData = lineStat.searches.map(search => ({
'Search Type': search.type,
'Search Term': search.term.substring(0, 40) + (search.term.length > 40 ? '...' : ''),
'Results': search.results
}));
console.table(tableData);
} else {
console.log(' No searches performed for this line item.\n');
}
});
// if (lineStat.searches.length > 0) {
// const tableData = lineStat.searches.map(search => ({
// 'Search Type': search.type,
// 'Search Term': search.term.substring(0, 40) + (search.term.length > 40 ? '...' : ''),
// 'Results': search.results
// }));
// console.table(tableData);
// } else {
// console.log(' No searches performed for this line item.\n');
// }
// });
// Summary statistics
const totalSearches = searchStats.reduce((sum, stat) => sum + stat.searches.length, 0);
const totalResults = searchStats.reduce((sum, stat) =>
sum + stat.searches.reduce((s, search) => s + search.results, 0), 0);
const avgResultsPerSearch = totalSearches > 0 ? (totalResults / totalSearches).toFixed(2) : 0;
// // Summary statistics
// const totalSearches = searchStats.reduce((sum, stat) => sum + stat.searches.length, 0);
// const totalResults = searchStats.reduce((sum, stat) =>
// sum + stat.searches.reduce((s, search) => s + search.results, 0), 0);
// const avgResultsPerSearch = totalSearches > 0 ? (totalResults / totalSearches).toFixed(2) : 0;
console.log('═══════════════════════════════════════════════════════════════════════');
console.log(' SUMMARY');
console.log('═══════════════════════════════════════════════════════════════════════');
console.table({
'Total Line Items': processedData.lineItems.length,
'Total Searches Performed': totalSearches,
'Total Results Found': totalResults,
'Average Results per Search': avgResultsPerSearch
});
console.log('═══════════════════════════════════════════════════════════════════════\n');
// console.log('═══════════════════════════════════════════════════════════════════════');
// console.log(' SUMMARY');
// console.log('═══════════════════════════════════════════════════════════════════════');
// console.table({
// 'Total Line Items': processedData.lineItems.length,
// 'Total Searches Performed': totalSearches,
// 'Total Results Found': totalResults,
// 'Average Results per Search': avgResultsPerSearch
// });
// console.log('═══════════════════════════════════════════════════════════════════════\n');
return matches
}

View File

@@ -1,7 +1,6 @@
const PDFDocument = require('pdf-lib').PDFDocument;
const TEXTRACT_REDIS_PREFIX = "textract:"
const TEXTRACT_JOB_TTL = 3600;
const TEXTRACT_REDIS_PREFIX = `textract:${process.env?.NODE_ENV === "production" ? "PROD" : "TEST"}`
const TEXTRACT_JOB_TTL = 10 * 60;
/**

View File

@@ -6,6 +6,14 @@ function normalizeFieldName(fieldType) {
return fieldType;
}
const standardizedFieldsnames = {
actual_cost: "actual_cost",
actual_price: "actual_price",
line_desc: "line_desc",
quantity: "quantity",
part_no: "part_no",
ro_number: "ro_number",
}
function normalizeLabelName(labelText) {
if (!labelText) return '';
@@ -15,13 +23,7 @@ function normalizeLabelName(labelText) {
// Remove special characters and replace spaces with underscores
normalized = normalized.replace(/[^a-z0-9\s]/g, '').replace(/\s+/g, '_');
const standardizedFieldsnames = {
actual_cost: "actual_cost",
actual_price: "actual_price",
line_desc: "line_desc",
quantity: "quantity",
part_no: "part_no"
}
// Common label normalizations
const labelMap = {
@@ -30,6 +32,9 @@ function normalizeLabelName(labelText) {
'sale_qty': standardizedFieldsnames.quantity,
'invoiced_qty': standardizedFieldsnames.quantity,
'qty_shipped': standardizedFieldsnames.quantity,
'quantity': standardizedFieldsnames.quantity,
'filled': standardizedFieldsnames.quantity,
'count': standardizedFieldsnames.quantity,
'quant': standardizedFieldsnames.quantity,
'desc': standardizedFieldsnames.line_desc,
'description': standardizedFieldsnames.line_desc,
@@ -48,7 +53,10 @@ function normalizeLabelName(labelText) {
'net': standardizedFieldsnames.actual_cost,
'selling_price': standardizedFieldsnames.actual_cost,
'net_price': standardizedFieldsnames.actual_cost,
'net_cost': standardizedFieldsnames.actual_cost
'net_cost': standardizedFieldsnames.actual_cost,
'po_no': standardizedFieldsnames.ro_number,
'customer_po_no': standardizedFieldsnames.ro_number,
'customer_po_no_': standardizedFieldsnames.ro_number
};
@@ -102,10 +110,6 @@ function processScanData(invoiceData) {
return processedItem;
})
// .filter(item => {
// // Filter out items with no description or with quantity <= 0
// return item.description && (!item.quantity || item.quantity > 0);
// });
return processed;
}
@@ -162,7 +166,7 @@ function extractInvoiceData(textractResponse) {
let normalizedField = normalizeFieldName(fieldType);
// Ensure uniqueness by appending a counter if the field already exists
if (item.hasOwnProperty(normalizedField)) {
if (Object.prototype.hasOwnProperty.call(item, normalizedField)) {
fieldNameCounts[normalizedField] = (fieldNameCounts[normalizedField] || 1) + 1;
normalizedField = `${normalizedField}_${fieldNameCounts[normalizedField]}`;
}
@@ -191,5 +195,6 @@ function extractInvoiceData(textractResponse) {
module.exports = {
extractInvoiceData,
processScanData
processScanData,
standardizedFieldsnames
}

View File

@@ -6,4 +6,5 @@ Required Infrastructure setup
TODO:
* Create a rome bucket for uploads, or move to the regular spot.
* How to implement this across environments.
* How to prevent polling for a job that may have errored.
* How to prevent polling for a job that may have errored.
* Handling of HEIC files on upload.

View File

@@ -62,27 +62,12 @@ async function handleBillOcr(request, response) {
// The uploaded file is available in request.file
const uploadedFile = request.file;
const { jobid, bodyshopid, partsorderid, skipTextract } = request.body;
if (skipTextract === 'true') {
console.log('Skipping Textract processing as per request');
response.status(200).send({
success: true,
status: 'COMPLETED',
data: await generateBillFormData({ processedData: null, jobid, bodyshopid, partsorderid, req: request }), //This is broken if the processedData is not overwritten in the function for testing.
message: 'Invoice processing completed'
});
return;
}
const { jobid, bodyshopid, partsorderid } = request.body;
try {
const fileType = getFileType(uploadedFile);
console.log(`Processing file type: ${fileType}`);
// Images are always processed synchronously (single page)
if (fileType === 'image') {
console.log('Image => 1 page, processing synchronously');
const processedData = await processSinglePageDocument(uploadedFile.buffer);
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: request });
response.status(200).send({
@@ -94,11 +79,9 @@ async function handleBillOcr(request, response) {
} else if (fileType === 'pdf') {
// Check the number of pages in the PDF
const pageCount = await getPdfPageCount(uploadedFile.buffer);
console.log(`PDF has ${pageCount} page(s)`);
if (pageCount === 1) {
// Process synchronously for single-page documents
console.log('PDF => 1 page, processing synchronously');
const processedData = await processSinglePageDocument(uploadedFile.buffer);
const billForm = await generateBillFormData({ processedData: processedData, jobid, bodyshopid, partsorderid, req: request });
//const billResult = await generateBillFormData({ result, });
@@ -110,12 +93,11 @@ async function handleBillOcr(request, response) {
});
} else {
// Start the Textract job (non-blocking) for multi-page documents
console.log('PDF => 2+ pages, processing asynchronously');
const jobInfo = await startTextractJob(uploadedFile.buffer, { jobid, bodyshopid, partsorderid });
response.status(202).send({
success: true,
jobId: jobInfo.jobId,
textractJobId: jobInfo.jobId,
message: 'Invoice processing started',
statusUrl: `/ai/bill-ocr/status/${jobInfo.jobId}`
});
@@ -136,17 +118,14 @@ async function handleBillOcr(request, response) {
}
async function handleBillOcrStatus(request, response) {
const { jobId: textractJobId } = request.params;
const { textractJobId } = request.params;
if (!textractJobId) {
console.log('No textractJobId found in params');
response.status(400).send({ error: 'Job ID is required' });
return;
}
console.log('Looking for job:', textractJobId);
const jobStatus = await getTextractJob({ redisPubClient, textractJobId });
console.log('Job status:', jobStatus);
if (!jobStatus) {
response.status(404).send({ error: 'Job not found' });
@@ -156,18 +135,17 @@ async function handleBillOcrStatus(request, response) {
if (jobStatus.status === 'COMPLETED') {
// Generate billForm on-demand if not already generated
let billForm = jobStatus.data?.billForm;
if (!billForm && jobStatus.context) {
try {
console.log('Generating bill form data on-demand...');
billForm = await generateBillFormData({
processedData: jobStatus.data,
billForm = await generateBillFormData({
processedData: jobStatus.data,
jobid: jobStatus.context.jobid,
bodyshopid: jobStatus.context.bodyshopid,
partsorderid: jobStatus.context.partsorderid,
req: request // Now we have request context!
});
// Cache the billForm back to Redis for future requests
await setTextractJob({
redisPubClient,
@@ -181,7 +159,6 @@ async function handleBillOcrStatus(request, response) {
}
});
} catch (error) {
console.error('Error generating bill form data:', error);
response.status(500).send({
status: 'COMPLETED',
error: 'Data processed but failed to generate bill form',
@@ -191,7 +168,7 @@ async function handleBillOcrStatus(request, response) {
return;
}
}
response.status(200).send({
status: 'COMPLETED',
data: {
@@ -211,9 +188,6 @@ async function handleBillOcrStatus(request, response) {
}
}
/**
* Process a single-page document synchronously using AnalyzeExpenseCommand
* @param {Buffer} pdfBuffer
@@ -238,6 +212,7 @@ async function processSinglePageDocument(pdfBuffer) {
async function startTextractJob(pdfBuffer, context = {}) {
// Upload PDF to S3 temporarily for Textract async processing
const { bodyshopid, jobid } = context;
const s3Bucket = process.env.AWS_AI_BUCKET;
const snsTopicArn = process.env.AWS_TEXTRACT_SNS_TOPIC_ARN;
const snsRoleArn = process.env.AWS_TEXTRACT_SNS_ROLE_ARN;
@@ -253,7 +228,7 @@ async function startTextractJob(pdfBuffer, context = {}) {
}
const uploadId = uuidv4();
const s3Key = `textract-temp/${uploadId}.pdf`; //TODO Update Keys structure to something better.
const s3Key = `textract-temp/${bodyshopid}/${jobid}/${uploadId}.pdf`; //TODO Update Keys structure to something better.
// Upload to S3
const uploadCommand = new PutObjectCommand({
@@ -319,7 +294,6 @@ async function processSQSMessages() {
}
try {
console.log('Polling SQS queue:', queueUrl);
const receiveCommand = new ReceiveMessageCommand({
QueueUrl: queueUrl,
MaxNumberOfMessages: 10,
@@ -328,13 +302,12 @@ async function processSQSMessages() {
});
const result = await sqsClient.send(receiveCommand);
console.log('SQS poll result:', result.Messages ? `${result.Messages.length} messages` : 'no messages');
if (result.Messages && result.Messages.length > 0) {
console.log('Processing', result.Messages.length, 'messages from SQS');
for (const message of result.Messages) {
try {
console.log("Processing message:", message);
//TODO: Add environment level filtering here.
await handleTextractNotification(message);
// Delete message after successful processing

File diff suppressed because it is too large Load Diff