Move queue to redis to prevent multiple firings.
This commit is contained in:
203
util/s3Sync.ts
203
util/s3Sync.ts
@@ -60,7 +60,7 @@ export class S3Sync {
|
||||
|
||||
// Check if Jobs directory exists
|
||||
if (!(await fs.pathExists(jobsPath))) {
|
||||
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
|
||||
logger.error(`Jobs directory does not exist: ${jobsPath}`);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -145,7 +145,7 @@ export class S3Sync {
|
||||
* Get sync statistics using AWS CLI
|
||||
*/
|
||||
async getSyncStats(): Promise<{ bucketName: string; region: string; keyPrefix: string; available: boolean }> {
|
||||
const available = await this.testConnection();
|
||||
const available = false; //await this.testConnection();
|
||||
return {
|
||||
bucketName: this.config.bucketName,
|
||||
region: this.config.region,
|
||||
@@ -153,157 +153,6 @@ export class S3Sync {
|
||||
available
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze all job folders in the Jobs directory
|
||||
* Returns detailed statistics for each job folder
|
||||
*/
|
||||
async analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||
try {
|
||||
logger.info("Starting Jobs directory analysis...");
|
||||
|
||||
const jobsPath = FolderPaths.Jobs;
|
||||
|
||||
// Check if Jobs directory exists
|
||||
if (!(await fs.pathExists(jobsPath))) {
|
||||
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
|
||||
return {
|
||||
bodyshopid,
|
||||
total_jobs: 0,
|
||||
total_documents: 0,
|
||||
total_size_bytes: 0,
|
||||
total_size_mb: 0,
|
||||
file_type_stats: {},
|
||||
media_analytics_details: { data: [] }
|
||||
};
|
||||
}
|
||||
|
||||
const jobFolders = await readdir(jobsPath);
|
||||
const jobStats: JobFolderStats[] = [];
|
||||
let total_documents = 0;
|
||||
let total_size_bytes = 0;
|
||||
const aggregated_file_type_stats: { [extension: string]: number } = {};
|
||||
|
||||
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
|
||||
for (const jobFolder of jobFolders) {
|
||||
if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) {
|
||||
logger.warn(`Skipping invalid jobid directory: ${jobFolder}`);
|
||||
continue;
|
||||
}
|
||||
const jobFolderPath = path.join(jobsPath, jobFolder);
|
||||
const stat = await fsStat(jobFolderPath);
|
||||
// Only process directories
|
||||
if (stat.isDirectory()) {
|
||||
const folderStats = await this.analyzeJobFolder(jobsPath, jobFolder);
|
||||
jobStats.push(folderStats);
|
||||
total_documents += folderStats.document_count;
|
||||
total_size_bytes += folderStats.total_size_bytes;
|
||||
// Aggregate file type stats
|
||||
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
|
||||
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const analysis: JobsDirectoryAnalysis = {
|
||||
bodyshopid,
|
||||
total_jobs: jobStats.length,
|
||||
total_documents,
|
||||
total_size_bytes,
|
||||
total_size_mb: Math.round((total_size_bytes / (1024 * 1024)) * 100) / 100,
|
||||
file_type_stats: aggregated_file_type_stats,
|
||||
media_analytics_details: { data: jobStats.sort((a, b) => a.jobid?.localeCompare(b.jobid!) || 0) }
|
||||
};
|
||||
|
||||
logger.info(
|
||||
`Jobs directory analysis complete: ${analysis.total_jobs} jobs, ${analysis.total_documents} documents, ${analysis.total_size_mb} MB`
|
||||
);
|
||||
return analysis;
|
||||
} catch (error) {
|
||||
logger.error("Failed to analyze Jobs directory:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze a single job folder
|
||||
*/
|
||||
private async analyzeJobFolder(jobsPath: string, jobid: string): Promise<JobFolderStats> {
|
||||
const jobFolderPath = path.join(jobsPath, jobid);
|
||||
// const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
|
||||
|
||||
const { document_count, total_size_bytes, file_type_stats } = await this.getDirectoryStats(jobFolderPath);
|
||||
|
||||
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
|
||||
let validJobid: UUID | string | null = null;
|
||||
if (jobid === "temporary") {
|
||||
validJobid = null;
|
||||
} else if (uuidRegex.test(jobid)) {
|
||||
validJobid = jobid as UUID;
|
||||
} else {
|
||||
logger.warn(`Invalid jobid encountered in analyzeJobFolder: ${jobid}`);
|
||||
validJobid = null;
|
||||
}
|
||||
return {
|
||||
jobid: validJobid,
|
||||
bodyshopid,
|
||||
//relativePath,
|
||||
document_count,
|
||||
total_size_bytes,
|
||||
total_size_mb: Math.round((total_size_bytes / (1024 * 1024)) * 100) / 100,
|
||||
file_type_stats
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively get document count and total size for a directory
|
||||
*/
|
||||
private async getDirectoryStats(
|
||||
dirPath: string
|
||||
): Promise<{ document_count: number; total_size_bytes: number; file_type_stats: { [extension: string]: number } }> {
|
||||
let document_count = 0;
|
||||
let total_size_bytes = 0;
|
||||
const file_type_stats: { [extension: string]: number } = {};
|
||||
|
||||
try {
|
||||
const items = await readdir(dirPath);
|
||||
|
||||
for (const item of items) {
|
||||
const itemPath = path.join(dirPath, item);
|
||||
const stat = await fsStat(itemPath);
|
||||
|
||||
if (stat.isDirectory()) {
|
||||
// Skip thumbs and ConvertedOriginals folders (case-insensitive)
|
||||
const itemLower = item.toLowerCase();
|
||||
if (itemLower === "thumbs" || itemLower === "convertedoriginals") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Recursively analyze subdirectories
|
||||
const subStats = await this.getDirectoryStats(itemPath);
|
||||
document_count += subStats.document_count;
|
||||
total_size_bytes += subStats.total_size_bytes;
|
||||
|
||||
// Merge file type stats
|
||||
for (const [ext, count] of Object.entries(subStats.file_type_stats)) {
|
||||
file_type_stats[ext] = (file_type_stats[ext] || 0) + count;
|
||||
}
|
||||
} else {
|
||||
// Count files as documents
|
||||
document_count++;
|
||||
total_size_bytes += stat.size;
|
||||
|
||||
// Track file extension
|
||||
const ext = path.extname(item).toLowerCase() || "no-extension";
|
||||
file_type_stats[ext] = (file_type_stats[ext] || 0) + 1;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error analyzing directory ${dirPath}:`, error);
|
||||
}
|
||||
|
||||
return { document_count, total_size_bytes, file_type_stats };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -363,26 +212,26 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||
let total_size_bytes = 0;
|
||||
const aggregated_file_type_stats: { [extension: string]: number } = {};
|
||||
|
||||
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
|
||||
for (const jobFolder of jobFolders) {
|
||||
if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) {
|
||||
logger.warn(`Skipping invalid jobid directory: ${jobFolder}`);
|
||||
continue;
|
||||
}
|
||||
const jobFolderPath = path.join(jobsPath, jobFolder);
|
||||
const stat = await fsStat(jobFolderPath);
|
||||
// Only process directories
|
||||
if (stat.isDirectory()) {
|
||||
const folderStats = await analyzeJobFolder(jobsPath, jobFolder);
|
||||
jobStats.push(folderStats);
|
||||
total_documents += folderStats.document_count;
|
||||
total_size_bytes += folderStats.total_size_bytes;
|
||||
// Aggregate file type stats
|
||||
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
|
||||
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
|
||||
}
|
||||
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
|
||||
for (const jobFolder of jobFolders) {
|
||||
if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) {
|
||||
logger.info(`Skipping invalid jobid directory: ${jobFolder}`);
|
||||
continue;
|
||||
}
|
||||
const jobFolderPath = path.join(jobsPath, jobFolder);
|
||||
const stat = await fsStat(jobFolderPath);
|
||||
// Only process directories
|
||||
if (stat.isDirectory()) {
|
||||
const folderStats = await analyzeJobFolder(jobsPath, jobFolder, bodyshopid);
|
||||
jobStats.push(folderStats);
|
||||
total_documents += folderStats.document_count;
|
||||
total_size_bytes += folderStats.total_size_bytes;
|
||||
// Aggregate file type stats
|
||||
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
|
||||
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const analysis: JobsDirectoryAnalysis = {
|
||||
bodyshopid, //read from the config.json file in the root directory
|
||||
@@ -400,7 +249,7 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||
);
|
||||
|
||||
//Add an upload to the IO database to categorize all of this.
|
||||
const apiURL = "https://api.test.imex.online/analytics/documents"; //TODO: don't hardcode and point to prod.
|
||||
const apiURL = "https://api.test.imex.online/analytics/documents"; //TODO: don't hardcode and point to prod.
|
||||
const result = await axios.post(apiURL, { data: analysis });
|
||||
|
||||
return analysis;
|
||||
@@ -411,11 +260,10 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze a single job folder (standalone helper function)
|
||||
* Analyze a single job folder (helper function)
|
||||
*/
|
||||
async function analyzeJobFolder(jobsPath: string, jobid: string): Promise<JobFolderStats> {
|
||||
async function analyzeJobFolder(jobsPath: string, jobid: string, bodyshopid: UUID): Promise<JobFolderStats> {
|
||||
const jobFolderPath = path.join(jobsPath, jobid);
|
||||
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
|
||||
|
||||
const { document_count, total_size_bytes, file_type_stats } = await getDirectoryStats(jobFolderPath);
|
||||
|
||||
@@ -432,7 +280,6 @@ async function analyzeJobFolder(jobsPath: string, jobid: string): Promise<JobFol
|
||||
return {
|
||||
bodyshopid,
|
||||
jobid: validJobid,
|
||||
// relativePath,
|
||||
document_count,
|
||||
total_size_bytes,
|
||||
total_size_mb: Math.round((total_size_bytes / (1024 * 1024)) * 100) / 100,
|
||||
@@ -441,7 +288,7 @@ async function analyzeJobFolder(jobsPath: string, jobid: string): Promise<JobFol
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively get document count and total size for a directory (standalone helper function)
|
||||
* Recursively get document count and total size for a directory (helper function)
|
||||
*/
|
||||
async function getDirectoryStats(
|
||||
dirPath: string
|
||||
@@ -463,7 +310,7 @@ async function getDirectoryStats(
|
||||
if (itemLower === "thumbs" || itemLower === "convertedoriginals") {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Recursively analyze subdirectories
|
||||
const subStats = await getDirectoryStats(itemPath);
|
||||
document_count += subStats.document_count;
|
||||
|
||||
Reference in New Issue
Block a user