Add analysis trigger

This commit is contained in:
Patrick Fic
2025-11-05 14:10:34 -08:00
parent f4b34a956a
commit 994a35025b
3 changed files with 89 additions and 26 deletions

View File

@@ -187,7 +187,7 @@ export class S3Sync {
jobStats.push(folderStats);
totalDocuments += folderStats.documentCount;
totalSizeBytes += folderStats.totalSizeBytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.fileTypeStats)) {
aggregatedFileTypeStats[ext] = (aggregatedFileTypeStats[ext] || 0) + count;
@@ -236,7 +236,9 @@ export class S3Sync {
/**
* Recursively get document count and total size for a directory
*/
private async getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number; fileTypeStats: { [extension: string]: number } }> {
private async getDirectoryStats(
dirPath: string
): Promise<{ documentCount: number; totalSizeBytes: number; fileTypeStats: { [extension: string]: number } }> {
let documentCount = 0;
let totalSizeBytes = 0;
const fileTypeStats: { [extension: string]: number } = {};
@@ -253,7 +255,7 @@ export class S3Sync {
const subStats = await this.getDirectoryStats(itemPath);
documentCount += subStats.documentCount;
totalSizeBytes += subStats.totalSizeBytes;
// Merge file type stats
for (const [ext, count] of Object.entries(subStats.fileTypeStats)) {
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + count;
@@ -262,9 +264,9 @@ export class S3Sync {
// Count files as documents
documentCount++;
totalSizeBytes += stat.size;
// Track file extension
const ext = path.extname(item).toLowerCase() || 'no-extension';
const ext = path.extname(item).toLowerCase() || "no-extension";
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + 1;
}
}
@@ -340,7 +342,7 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
jobStats.push(folderStats);
totalDocuments += folderStats.documentCount;
totalSizeBytes += folderStats.totalSizeBytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.fileTypeStats)) {
aggregatedFileTypeStats[ext] = (aggregatedFileTypeStats[ext] || 0) + count;
@@ -360,6 +362,9 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
logger.info(
`Jobs directory analysis complete: ${analysis.totalJobs} jobs, ${analysis.totalDocuments} documents, ${analysis.totalSizeMB} MB`
);
//Add an upload to the IO database to categorize all of this.
return analysis;
} catch (error) {
logger.error("Failed to analyze Jobs directory:", error);
@@ -389,7 +394,9 @@ async function analyzeJobFolder(jobsPath: string, jobId: string): Promise<JobFol
/**
* Recursively get document count and total size for a directory (standalone helper function)
*/
async function getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number; fileTypeStats: { [extension: string]: number } }> {
async function getDirectoryStats(
dirPath: string
): Promise<{ documentCount: number; totalSizeBytes: number; fileTypeStats: { [extension: string]: number } }> {
let documentCount = 0;
let totalSizeBytes = 0;
const fileTypeStats: { [extension: string]: number } = {};
@@ -406,7 +413,7 @@ async function getDirectoryStats(dirPath: string): Promise<{ documentCount: numb
const subStats = await getDirectoryStats(itemPath);
documentCount += subStats.documentCount;
totalSizeBytes += subStats.totalSizeBytes;
// Merge file type stats
for (const [ext, count] of Object.entries(subStats.fileTypeStats)) {
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + count;
@@ -415,9 +422,9 @@ async function getDirectoryStats(dirPath: string): Promise<{ documentCount: numb
// Count files as documents
documentCount++;
totalSizeBytes += stat.size;
// Track file extension
const ext = path.extname(item).toLowerCase() || 'no-extension';
const ext = path.extname(item).toLowerCase() || "no-extension";
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + 1;
}
}