Move queue to redis to prevent multiple firings.

This commit is contained in:
Patrick Fic
2025-11-10 11:43:05 -08:00
parent f575f6ab9a
commit f6a9486284
3 changed files with 213 additions and 259 deletions

View File

@@ -60,7 +60,7 @@ export class S3Sync {
// Check if Jobs directory exists
if (!(await fs.pathExists(jobsPath))) {
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
logger.error(`Jobs directory does not exist: ${jobsPath}`);
return;
}
@@ -145,7 +145,7 @@ export class S3Sync {
* Get sync statistics using AWS CLI
*/
async getSyncStats(): Promise<{ bucketName: string; region: string; keyPrefix: string; available: boolean }> {
const available = await this.testConnection();
const available = false; //await this.testConnection();
return {
bucketName: this.config.bucketName,
region: this.config.region,
@@ -153,157 +153,6 @@ export class S3Sync {
available
};
}
/**
* Analyze all job folders in the Jobs directory
* Returns detailed statistics for each job folder
*/
async analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
try {
logger.info("Starting Jobs directory analysis...");
const jobsPath = FolderPaths.Jobs;
// Check if Jobs directory exists
if (!(await fs.pathExists(jobsPath))) {
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
return {
bodyshopid,
total_jobs: 0,
total_documents: 0,
total_size_bytes: 0,
total_size_mb: 0,
file_type_stats: {},
media_analytics_details: { data: [] }
};
}
const jobFolders = await readdir(jobsPath);
const jobStats: JobFolderStats[] = [];
let total_documents = 0;
let total_size_bytes = 0;
const aggregated_file_type_stats: { [extension: string]: number } = {};
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
for (const jobFolder of jobFolders) {
if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) {
logger.warn(`Skipping invalid jobid directory: ${jobFolder}`);
continue;
}
const jobFolderPath = path.join(jobsPath, jobFolder);
const stat = await fsStat(jobFolderPath);
// Only process directories
if (stat.isDirectory()) {
const folderStats = await this.analyzeJobFolder(jobsPath, jobFolder);
jobStats.push(folderStats);
total_documents += folderStats.document_count;
total_size_bytes += folderStats.total_size_bytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
}
}
}
const analysis: JobsDirectoryAnalysis = {
bodyshopid,
total_jobs: jobStats.length,
total_documents,
total_size_bytes,
total_size_mb: Math.round((total_size_bytes / (1024 * 1024)) * 100) / 100,
file_type_stats: aggregated_file_type_stats,
media_analytics_details: { data: jobStats.sort((a, b) => a.jobid?.localeCompare(b.jobid!) || 0) }
};
logger.info(
`Jobs directory analysis complete: ${analysis.total_jobs} jobs, ${analysis.total_documents} documents, ${analysis.total_size_mb} MB`
);
return analysis;
} catch (error) {
logger.error("Failed to analyze Jobs directory:", error);
throw error;
}
}
/**
* Analyze a single job folder
*/
private async analyzeJobFolder(jobsPath: string, jobid: string): Promise<JobFolderStats> {
const jobFolderPath = path.join(jobsPath, jobid);
// const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
const { document_count, total_size_bytes, file_type_stats } = await this.getDirectoryStats(jobFolderPath);
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
let validJobid: UUID | string | null = null;
if (jobid === "temporary") {
validJobid = null;
} else if (uuidRegex.test(jobid)) {
validJobid = jobid as UUID;
} else {
logger.warn(`Invalid jobid encountered in analyzeJobFolder: ${jobid}`);
validJobid = null;
}
return {
jobid: validJobid,
bodyshopid,
//relativePath,
document_count,
total_size_bytes,
total_size_mb: Math.round((total_size_bytes / (1024 * 1024)) * 100) / 100,
file_type_stats
};
}
/**
* Recursively get document count and total size for a directory
*/
private async getDirectoryStats(
dirPath: string
): Promise<{ document_count: number; total_size_bytes: number; file_type_stats: { [extension: string]: number } }> {
let document_count = 0;
let total_size_bytes = 0;
const file_type_stats: { [extension: string]: number } = {};
try {
const items = await readdir(dirPath);
for (const item of items) {
const itemPath = path.join(dirPath, item);
const stat = await fsStat(itemPath);
if (stat.isDirectory()) {
// Skip thumbs and ConvertedOriginals folders (case-insensitive)
const itemLower = item.toLowerCase();
if (itemLower === "thumbs" || itemLower === "convertedoriginals") {
continue;
}
// Recursively analyze subdirectories
const subStats = await this.getDirectoryStats(itemPath);
document_count += subStats.document_count;
total_size_bytes += subStats.total_size_bytes;
// Merge file type stats
for (const [ext, count] of Object.entries(subStats.file_type_stats)) {
file_type_stats[ext] = (file_type_stats[ext] || 0) + count;
}
} else {
// Count files as documents
document_count++;
total_size_bytes += stat.size;
// Track file extension
const ext = path.extname(item).toLowerCase() || "no-extension";
file_type_stats[ext] = (file_type_stats[ext] || 0) + 1;
}
}
} catch (error) {
logger.error(`Error analyzing directory ${dirPath}:`, error);
}
return { document_count, total_size_bytes, file_type_stats };
}
}
/**
@@ -363,26 +212,26 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
let total_size_bytes = 0;
const aggregated_file_type_stats: { [extension: string]: number } = {};
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
for (const jobFolder of jobFolders) {
if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) {
logger.warn(`Skipping invalid jobid directory: ${jobFolder}`);
continue;
}
const jobFolderPath = path.join(jobsPath, jobFolder);
const stat = await fsStat(jobFolderPath);
// Only process directories
if (stat.isDirectory()) {
const folderStats = await analyzeJobFolder(jobsPath, jobFolder);
jobStats.push(folderStats);
total_documents += folderStats.document_count;
total_size_bytes += folderStats.total_size_bytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
}
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
for (const jobFolder of jobFolders) {
if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) {
logger.info(`Skipping invalid jobid directory: ${jobFolder}`);
continue;
}
const jobFolderPath = path.join(jobsPath, jobFolder);
const stat = await fsStat(jobFolderPath);
// Only process directories
if (stat.isDirectory()) {
const folderStats = await analyzeJobFolder(jobsPath, jobFolder, bodyshopid);
jobStats.push(folderStats);
total_documents += folderStats.document_count;
total_size_bytes += folderStats.total_size_bytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
}
}
}
const analysis: JobsDirectoryAnalysis = {
bodyshopid, //read from the config.json file in the root directory
@@ -400,7 +249,7 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
);
//Add an upload to the IO database to categorize all of this.
const apiURL = "https://api.test.imex.online/analytics/documents"; //TODO: don't hardcode and point to prod.
const apiURL = "https://api.test.imex.online/analytics/documents"; //TODO: don't hardcode and point to prod.
const result = await axios.post(apiURL, { data: analysis });
return analysis;
@@ -411,11 +260,10 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
}
/**
* Analyze a single job folder (standalone helper function)
* Analyze a single job folder (helper function)
*/
async function analyzeJobFolder(jobsPath: string, jobid: string): Promise<JobFolderStats> {
async function analyzeJobFolder(jobsPath: string, jobid: string, bodyshopid: UUID): Promise<JobFolderStats> {
const jobFolderPath = path.join(jobsPath, jobid);
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
const { document_count, total_size_bytes, file_type_stats } = await getDirectoryStats(jobFolderPath);
@@ -432,7 +280,6 @@ async function analyzeJobFolder(jobsPath: string, jobid: string): Promise<JobFol
return {
bodyshopid,
jobid: validJobid,
// relativePath,
document_count,
total_size_bytes,
total_size_mb: Math.round((total_size_bytes / (1024 * 1024)) * 100) / 100,
@@ -441,7 +288,7 @@ async function analyzeJobFolder(jobsPath: string, jobid: string): Promise<JobFol
}
/**
* Recursively get document count and total size for a directory (standalone helper function)
* Recursively get document count and total size for a directory (helper function)
*/
async function getDirectoryStats(
dirPath: string
@@ -463,7 +310,7 @@ async function getDirectoryStats(
if (itemLower === "thumbs" || itemLower === "convertedoriginals") {
continue;
}
// Recursively analyze subdirectories
const subStats = await getDirectoryStats(itemPath);
document_count += subStats.document_count;