Add UUID checking for folders.

This commit is contained in:
Patrick Fic
2025-11-07 12:34:57 -08:00
parent 17b58238b4
commit 927cde5996

View File

@@ -184,23 +184,26 @@ export class S3Sync {
let total_size_bytes = 0;
const aggregated_file_type_stats: { [extension: string]: number } = {};
for (const jobFolder of jobFolders) {
const jobFolderPath = path.join(jobsPath, jobFolder);
const stat = await fsStat(jobFolderPath);
// Only process directories
if (stat.isDirectory()) {
const folderStats = await this.analyzeJobFolder(jobsPath, jobFolder);
jobStats.push(folderStats);
total_documents += folderStats.document_count;
total_size_bytes += folderStats.total_size_bytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
for (const jobFolder of jobFolders) {
if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) {
logger.warn(`Skipping invalid jobid directory: ${jobFolder}`);
continue;
}
const jobFolderPath = path.join(jobsPath, jobFolder);
const stat = await fsStat(jobFolderPath);
// Only process directories
if (stat.isDirectory()) {
const folderStats = await this.analyzeJobFolder(jobsPath, jobFolder);
jobStats.push(folderStats);
total_documents += folderStats.document_count;
total_size_bytes += folderStats.total_size_bytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
}
}
}
}
const analysis: JobsDirectoryAnalysis = {
bodyshopid,
@@ -231,8 +234,18 @@ export class S3Sync {
const { document_count, total_size_bytes, file_type_stats } = await this.getDirectoryStats(jobFolderPath);
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
let validJobid: UUID | string | null = null;
if (jobid === "temporary") {
validJobid = null;
} else if (uuidRegex.test(jobid)) {
validJobid = jobid as UUID;
} else {
logger.warn(`Invalid jobid encountered in analyzeJobFolder: ${jobid}`);
validJobid = null;
}
return {
jobid: jobid === "temporary" ? null : (jobid as UUID),
jobid: validJobid,
bodyshopid,
//relativePath,
document_count,
@@ -344,23 +357,26 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
let total_size_bytes = 0;
const aggregated_file_type_stats: { [extension: string]: number } = {};
for (const jobFolder of jobFolders) {
const jobFolderPath = path.join(jobsPath, jobFolder);
const stat = await fsStat(jobFolderPath);
// Only process directories
if (stat.isDirectory()) {
const folderStats = await analyzeJobFolder(jobsPath, jobFolder);
jobStats.push(folderStats);
total_documents += folderStats.document_count;
total_size_bytes += folderStats.total_size_bytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
for (const jobFolder of jobFolders) {
if (jobFolder !== "temporary" && !uuidRegex.test(jobFolder)) {
logger.warn(`Skipping invalid jobid directory: ${jobFolder}`);
continue;
}
const jobFolderPath = path.join(jobsPath, jobFolder);
const stat = await fsStat(jobFolderPath);
// Only process directories
if (stat.isDirectory()) {
const folderStats = await analyzeJobFolder(jobsPath, jobFolder);
jobStats.push(folderStats);
total_documents += folderStats.document_count;
total_size_bytes += folderStats.total_size_bytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.file_type_stats)) {
aggregated_file_type_stats[ext] = (aggregated_file_type_stats[ext] || 0) + count;
}
}
}
}
const analysis: JobsDirectoryAnalysis = {
bodyshopid, //read from the config.json file in the root directory
@@ -378,7 +394,7 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
);
//Add an upload to the IO database to categorize all of this.
const apiURL = "http://host.docker.internal:4000/analytics/documents";
const apiURL = "https://api.test.imex.online/analytics/documents"; //TODO: don't hardcode and point to prod.
const result = await axios.post(apiURL, { data: analysis });
return analysis;
@@ -397,9 +413,19 @@ async function analyzeJobFolder(jobsPath: string, jobid: string): Promise<JobFol
const { document_count, total_size_bytes, file_type_stats } = await getDirectoryStats(jobFolderPath);
const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
let validJobid: UUID | string | null = null;
if (jobid === "temporary") {
validJobid = null;
} else if (uuidRegex.test(jobid)) {
validJobid = jobid as UUID;
} else {
logger.warn(`Invalid jobid encountered in analyzeJobFolder: ${jobid}`);
validJobid = null;
}
return {
bodyshopid,
jobid,
jobid: validJobid,
// relativePath,
document_count,
total_size_bytes,