diff --git a/util/s3Sync.ts b/util/s3Sync.ts index efd4586..c2a0dfa 100644 --- a/util/s3Sync.ts +++ b/util/s3Sync.ts @@ -184,23 +184,26 @@ export class S3Sync { let total_size_bytes = 0; const aggregated_file_type_stats: { [extension: string]: number } = {}; - for (const jobFolder of jobFolders) { - const jobFolderPath = path.join(jobsPath, jobFolder); - const stat = await fsStat(jobFolderPath); - - // Only process directories - if (stat.isDirectory()) { - const folderStats = await this.analyzeJobFolder(jobsPath, jobFolder); - jobStats.push(folderStats); - total_documents += folderStats.document_count; - total_size_bytes += folderStats.total_size_bytes; - - // Aggregate file type stats - for (const [ext, count] of Object.entries(folderStats.file_type_stats)) { - aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count; + const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/; + for (const jobFolder of jobFolders) { + if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) { + logger.warn(`Skipping invalid jobid directory: ${jobFolder}`); + continue; + } + const jobFolderPath = path.join(jobsPath, jobFolder); + const stat = await fsStat(jobFolderPath); + // Only process directories + if (stat.isDirectory()) { + const folderStats = await this.analyzeJobFolder(jobsPath, jobFolder); + jobStats.push(folderStats); + total_documents += folderStats.document_count; + total_size_bytes += folderStats.total_size_bytes; + // Aggregate file type stats + for (const [ext, count] of Object.entries(folderStats.file_type_stats)) { + aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count; + } } } - } const analysis: JobsDirectoryAnalysis = { bodyshopid, @@ -231,8 +234,18 @@ export class S3Sync { const { document_count, total_size_bytes, file_type_stats } = await this.getDirectoryStats(jobFolderPath); + const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/; + let validJobid: UUID | string | null = null; + if (jobid === "temporary") { + validJobid = null; + } else if (uuidRegex.test(jobid)) { + validJobid = jobid as UUID; + } else { + logger.warn(`Invalid jobid encountered in analyzeJobFolder: ${jobid}`); + validJobid = null; + } return { - jobid: jobid === "temporary" ? null : (jobid as UUID), + jobid: validJobid, bodyshopid, //relativePath, document_count, @@ -344,23 +357,26 @@ export async function analyzeJobsDirectory(): Promise { let total_size_bytes = 0; const aggregated_file_type_stats: { [extension: string]: number } = {}; - for (const jobFolder of jobFolders) { - const jobFolderPath = path.join(jobsPath, jobFolder); - const stat = await fsStat(jobFolderPath); - - // Only process directories - if (stat.isDirectory()) { - const folderStats = await analyzeJobFolder(jobsPath, jobFolder); - jobStats.push(folderStats); - total_documents += folderStats.document_count; - total_size_bytes += folderStats.total_size_bytes; - - // Aggregate file type stats - for (const [ext, count] of Object.entries(folderStats.file_type_stats)) { - aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count; + const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/; + for (const jobFolder of jobFolders) { + if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) { + logger.warn(`Skipping invalid jobid directory: ${jobFolder}`); + continue; + } + const jobFolderPath = path.join(jobsPath, jobFolder); + const stat = await fsStat(jobFolderPath); + // Only process directories + if (stat.isDirectory()) { + const folderStats = await analyzeJobFolder(jobsPath, jobFolder); + jobStats.push(folderStats); + total_documents += folderStats.document_count; + total_size_bytes += folderStats.total_size_bytes; + // Aggregate file type stats + for (const [ext, count] of Object.entries(folderStats.file_type_stats)) { + aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count; + } } } - } const analysis: JobsDirectoryAnalysis = { bodyshopid, //read from the config.json file in the root directory @@ -378,7 +394,7 @@ export async function analyzeJobsDirectory(): Promise { ); //Add an upload to the IO database to categorize all of this. - const apiURL = "http://host.docker.internal:4000/analytics/documents"; + const apiURL = "https://api.test.imex.online/analytics/documents"; //TODO: don't hardcode and point to prod. const result = await axios.post(apiURL, { data: analysis }); return analysis; @@ -397,9 +413,19 @@ async function analyzeJobFolder(jobsPath: string, jobid: string): Promise