Basic directory analysis.

This commit is contained in:
Patrick Fic
2025-11-04 15:00:44 -08:00
parent 6a1f02c3cb
commit 40b2e0fdf7
9 changed files with 148 additions and 18 deletions

View File

@@ -1,6 +1,7 @@
import { exec } from "child_process";
import { promisify } from "util";
import * as fs from "fs-extra";
import { readdir, stat as fsStat } from "fs/promises";
import * as path from "path";
import { logger } from "../server.js";
import { FolderPaths } from "./serverInit.js";
@@ -21,6 +22,7 @@ export interface JobFolderStats {
documentCount: number;
totalSizeBytes: number;
totalSizeMB: number;
fileTypeStats: { [extension: string]: number };
}
export interface JobsDirectoryAnalysis {
@@ -28,6 +30,7 @@ export interface JobsDirectoryAnalysis {
totalDocuments: number;
totalSizeBytes: number;
totalSizeMB: number;
fileTypeStats: { [extension: string]: number };
jobs: JobFolderStats[];
}
@@ -163,18 +166,20 @@ export class S3Sync {
totalDocuments: 0,
totalSizeBytes: 0,
totalSizeMB: 0,
fileTypeStats: {},
jobs: []
};
}
const jobFolders = await fs.readdir(jobsPath);
const jobFolders = await readdir(jobsPath);
const jobStats: JobFolderStats[] = [];
let totalDocuments = 0;
let totalSizeBytes = 0;
const aggregatedFileTypeStats: { [extension: string]: number } = {};
for (const jobFolder of jobFolders) {
const jobFolderPath = path.join(jobsPath, jobFolder);
const stat = await fs.stat(jobFolderPath);
const stat = await fsStat(jobFolderPath);
// Only process directories
if (stat.isDirectory()) {
@@ -182,6 +187,11 @@ export class S3Sync {
jobStats.push(folderStats);
totalDocuments += folderStats.documentCount;
totalSizeBytes += folderStats.totalSizeBytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.fileTypeStats)) {
aggregatedFileTypeStats[ext] = (aggregatedFileTypeStats[ext] || 0) + count;
}
}
}
@@ -190,6 +200,7 @@ export class S3Sync {
totalDocuments,
totalSizeBytes,
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
fileTypeStats: aggregatedFileTypeStats,
jobs: jobStats.sort((a, b) => a.jobId.localeCompare(b.jobId))
};
@@ -210,47 +221,58 @@ export class S3Sync {
const jobFolderPath = path.join(jobsPath, jobId);
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
const { documentCount, totalSizeBytes } = await this.getDirectoryStats(jobFolderPath);
const { documentCount, totalSizeBytes, fileTypeStats } = await this.getDirectoryStats(jobFolderPath);
return {
jobId,
relativePath,
documentCount,
totalSizeBytes,
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
fileTypeStats
};
}
/**
* Recursively get document count and total size for a directory
*/
private async getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number }> {
private async getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number; fileTypeStats: { [extension: string]: number } }> {
let documentCount = 0;
let totalSizeBytes = 0;
const fileTypeStats: { [extension: string]: number } = {};
try {
const items = await fs.readdir(dirPath);
const items = await readdir(dirPath);
for (const item of items) {
const itemPath = path.join(dirPath, item);
const stat = await fs.stat(itemPath);
const stat = await fsStat(itemPath);
if (stat.isDirectory()) {
// Recursively analyze subdirectories
const subStats = await this.getDirectoryStats(itemPath);
documentCount += subStats.documentCount;
totalSizeBytes += subStats.totalSizeBytes;
// Merge file type stats
for (const [ext, count] of Object.entries(subStats.fileTypeStats)) {
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + count;
}
} else {
// Count files as documents
documentCount++;
totalSizeBytes += stat.size;
// Track file extension
const ext = path.extname(item).toLowerCase() || 'no-extension';
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + 1;
}
}
} catch (error) {
logger.error(`Error analyzing directory ${dirPath}:`, error);
}
return { documentCount, totalSizeBytes };
return { documentCount, totalSizeBytes, fileTypeStats };
}
}
@@ -297,18 +319,20 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
totalDocuments: 0,
totalSizeBytes: 0,
totalSizeMB: 0,
fileTypeStats: {},
jobs: []
};
}
const jobFolders = await fs.readdir(jobsPath);
const jobFolders = await readdir(jobsPath);
const jobStats: JobFolderStats[] = [];
let totalDocuments = 0;
let totalSizeBytes = 0;
const aggregatedFileTypeStats: { [extension: string]: number } = {};
for (const jobFolder of jobFolders) {
const jobFolderPath = path.join(jobsPath, jobFolder);
const stat = await fs.stat(jobFolderPath);
const stat = await fsStat(jobFolderPath);
// Only process directories
if (stat.isDirectory()) {
@@ -316,6 +340,11 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
jobStats.push(folderStats);
totalDocuments += folderStats.documentCount;
totalSizeBytes += folderStats.totalSizeBytes;
// Aggregate file type stats
for (const [ext, count] of Object.entries(folderStats.fileTypeStats)) {
aggregatedFileTypeStats[ext] = (aggregatedFileTypeStats[ext] || 0) + count;
}
}
}
@@ -324,6 +353,7 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
totalDocuments,
totalSizeBytes,
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
fileTypeStats: aggregatedFileTypeStats,
jobs: jobStats.sort((a, b) => a.jobId.localeCompare(b.jobId))
};
@@ -344,45 +374,56 @@ async function analyzeJobFolder(jobsPath: string, jobId: string): Promise<JobFol
const jobFolderPath = path.join(jobsPath, jobId);
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
const { documentCount, totalSizeBytes } = await getDirectoryStats(jobFolderPath);
const { documentCount, totalSizeBytes, fileTypeStats } = await getDirectoryStats(jobFolderPath);
return {
jobId,
relativePath,
documentCount,
totalSizeBytes,
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
fileTypeStats
};
}
/**
* Recursively get document count and total size for a directory (standalone helper function)
*/
async function getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number }> {
async function getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number; fileTypeStats: { [extension: string]: number } }> {
let documentCount = 0;
let totalSizeBytes = 0;
const fileTypeStats: { [extension: string]: number } = {};
try {
const items = await fs.readdir(dirPath);
const items = await readdir(dirPath);
for (const item of items) {
const itemPath = path.join(dirPath, item);
const stat = await fs.stat(itemPath);
const stat = await fsStat(itemPath);
if (stat.isDirectory()) {
// Recursively analyze subdirectories
const subStats = await getDirectoryStats(itemPath);
documentCount += subStats.documentCount;
totalSizeBytes += subStats.totalSizeBytes;
// Merge file type stats
for (const [ext, count] of Object.entries(subStats.fileTypeStats)) {
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + count;
}
} else {
// Count files as documents
documentCount++;
totalSizeBytes += stat.size;
// Track file extension
const ext = path.extname(item).toLowerCase() || 'no-extension';
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + 1;
}
}
} catch (error) {
logger.error(`Error analyzing directory ${dirPath}:`, error);
}
return { documentCount, totalSizeBytes };
return { documentCount, totalSizeBytes, fileTypeStats };
}