Write progress to file to avoid memory leakes.

This commit is contained in:
Patrick Fic
2025-11-10 13:16:09 -08:00
parent f6a9486284
commit 05c325f696

View File

@@ -7,6 +7,7 @@ import { logger } from "../server.js";
import { FolderPaths } from "./serverInit.js";
import axios from "axios";
import { UUID } from "crypto";
import fsPromises from "fs/promises";
const execAsync = promisify(exec);
@@ -183,8 +184,11 @@ export function createS3SyncFromEnv(): S3Sync | null {
/**
* Standalone function to analyze Jobs directory without S3 configuration
* Uses streaming to a temporary file to avoid memory issues with large datasets
*/
export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
const tempFilePath = path.join(FolderPaths.Jobs, "..", "jobs_analysis_temp.jsonl");
try {
logger.info("Starting Jobs directory analysis...");
//Read from the config.json file in the root directory to get the bodyshopid
@@ -206,10 +210,14 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
};
}
// Clear/create temporary file for streaming job stats
await fsPromises.writeFile(tempFilePath, "", "utf-8");
logger.info(`Created temporary file for streaming analysis: ${tempFilePath}`);
const jobFolders = await readdir(jobsPath);
const jobStats: JobFolderStats[] = [];
let total_documents = 0;
let total_size_bytes = 0;
let total_jobs = 0;
const aggregated_file_type_stats: { [extension: string]: number } = {};
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
@@ -223,25 +231,47 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
// Only process directories
if (stat.isDirectory()) {
const folderStats = await analyzeJobFolder(jobsPath, jobFolder, bodyshopid);
jobStats.push(folderStats);
// Stream to file immediately (one JSON object per line)
await fsPromises.appendFile(tempFilePath, JSON.stringify(folderStats) + "\n", "utf-8");
total_jobs++;
total_documents += folderStats.document_count;
total_size_bytes += folderStats.total_size_bytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
}
// Log progress every 100 jobs
if (total_jobs % 100 === 0) {
logger.info(`Progress: Analyzed ${total_jobs} jobs...`);
}
}
}
logger.info(`Reading back job stats from temporary file...`);
// Read back the job stats from file
const fileContent = await fsPromises.readFile(tempFilePath, "utf-8");
const jobStats: JobFolderStats[] = fileContent
.trim()
.split("\n")
.filter((line) => line.length > 0)
.map((line) => JSON.parse(line));
// Sort by jobid
jobStats.sort((a, b) => a.jobid?.localeCompare(b.jobid!) || 0);
const analysis: JobsDirectoryAnalysis = {
bodyshopid, //read from the config.json file in the root directory
total_jobs: jobStats.length,
total_jobs,
total_documents,
total_size_bytes,
total_size_mb: Math.round((total_size_bytes / (1024 * 1024)) * 100) / 100,
file_type_stats: aggregated_file_type_stats,
media_analytics_details: { data: jobStats.sort((a, b) => a.jobid?.localeCompare(b.jobid!) || 0) }
media_analytics_details: { data: jobStats }
};
logger.info(
@@ -249,12 +279,27 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
);
//Add an upload to the IO database to categorize all of this.
const apiURL = "https://api.test.imex.online/analytics/documents"; //TODO: don't hardcode and point to prod.
const apiURL = process.env.IS_TEST
? "https://api.test.imex.online/analytics/documents"
: "https://api.imex.online/analytics/documents";
const result = await axios.post(apiURL, { data: analysis });
// Clean up temporary file
await fsPromises.unlink(tempFilePath);
logger.info("Cleaned up temporary analysis file");
return analysis;
} catch (error) {
logger.error("Failed to analyze Jobs directory:", JSON.stringify(error, null, 4));
// Attempt to clean up temp file on error
try {
if (await fs.pathExists(tempFilePath)) {
await fs.remove(tempFilePath);
logger.info("Cleaned up temporary file after error");
}
} catch (cleanupError) {
logger.warn("Failed to clean up temporary file:", cleanupError);
}
throw error;
}
}