Basic directory analysis.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import { exec } from "child_process";
|
||||
import { promisify } from "util";
|
||||
import * as fs from "fs-extra";
|
||||
import { readdir, stat as fsStat } from "fs/promises";
|
||||
import * as path from "path";
|
||||
import { logger } from "../server.js";
|
||||
import { FolderPaths } from "./serverInit.js";
|
||||
@@ -21,6 +22,7 @@ export interface JobFolderStats {
|
||||
documentCount: number;
|
||||
totalSizeBytes: number;
|
||||
totalSizeMB: number;
|
||||
fileTypeStats: { [extension: string]: number };
|
||||
}
|
||||
|
||||
export interface JobsDirectoryAnalysis {
|
||||
@@ -28,6 +30,7 @@ export interface JobsDirectoryAnalysis {
|
||||
totalDocuments: number;
|
||||
totalSizeBytes: number;
|
||||
totalSizeMB: number;
|
||||
fileTypeStats: { [extension: string]: number };
|
||||
jobs: JobFolderStats[];
|
||||
}
|
||||
|
||||
@@ -163,18 +166,20 @@ export class S3Sync {
|
||||
totalDocuments: 0,
|
||||
totalSizeBytes: 0,
|
||||
totalSizeMB: 0,
|
||||
fileTypeStats: {},
|
||||
jobs: []
|
||||
};
|
||||
}
|
||||
|
||||
const jobFolders = await fs.readdir(jobsPath);
|
||||
const jobFolders = await readdir(jobsPath);
|
||||
const jobStats: JobFolderStats[] = [];
|
||||
let totalDocuments = 0;
|
||||
let totalSizeBytes = 0;
|
||||
const aggregatedFileTypeStats: { [extension: string]: number } = {};
|
||||
|
||||
for (const jobFolder of jobFolders) {
|
||||
const jobFolderPath = path.join(jobsPath, jobFolder);
|
||||
const stat = await fs.stat(jobFolderPath);
|
||||
const stat = await fsStat(jobFolderPath);
|
||||
|
||||
// Only process directories
|
||||
if (stat.isDirectory()) {
|
||||
@@ -182,6 +187,11 @@ export class S3Sync {
|
||||
jobStats.push(folderStats);
|
||||
totalDocuments += folderStats.documentCount;
|
||||
totalSizeBytes += folderStats.totalSizeBytes;
|
||||
|
||||
// Aggregate file type stats
|
||||
for (const [ext, count] of Object.entries(folderStats.fileTypeStats)) {
|
||||
aggregatedFileTypeStats[ext] = (aggregatedFileTypeStats[ext] || 0) + count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,6 +200,7 @@ export class S3Sync {
|
||||
totalDocuments,
|
||||
totalSizeBytes,
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
|
||||
fileTypeStats: aggregatedFileTypeStats,
|
||||
jobs: jobStats.sort((a, b) => a.jobId.localeCompare(b.jobId))
|
||||
};
|
||||
|
||||
@@ -210,47 +221,58 @@ export class S3Sync {
|
||||
const jobFolderPath = path.join(jobsPath, jobId);
|
||||
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
|
||||
|
||||
const { documentCount, totalSizeBytes } = await this.getDirectoryStats(jobFolderPath);
|
||||
const { documentCount, totalSizeBytes, fileTypeStats } = await this.getDirectoryStats(jobFolderPath);
|
||||
|
||||
return {
|
||||
jobId,
|
||||
relativePath,
|
||||
documentCount,
|
||||
totalSizeBytes,
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
|
||||
fileTypeStats
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively get document count and total size for a directory
|
||||
*/
|
||||
private async getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number }> {
|
||||
private async getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number; fileTypeStats: { [extension: string]: number } }> {
|
||||
let documentCount = 0;
|
||||
let totalSizeBytes = 0;
|
||||
const fileTypeStats: { [extension: string]: number } = {};
|
||||
|
||||
try {
|
||||
const items = await fs.readdir(dirPath);
|
||||
const items = await readdir(dirPath);
|
||||
|
||||
for (const item of items) {
|
||||
const itemPath = path.join(dirPath, item);
|
||||
const stat = await fs.stat(itemPath);
|
||||
const stat = await fsStat(itemPath);
|
||||
|
||||
if (stat.isDirectory()) {
|
||||
// Recursively analyze subdirectories
|
||||
const subStats = await this.getDirectoryStats(itemPath);
|
||||
documentCount += subStats.documentCount;
|
||||
totalSizeBytes += subStats.totalSizeBytes;
|
||||
|
||||
// Merge file type stats
|
||||
for (const [ext, count] of Object.entries(subStats.fileTypeStats)) {
|
||||
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + count;
|
||||
}
|
||||
} else {
|
||||
// Count files as documents
|
||||
documentCount++;
|
||||
totalSizeBytes += stat.size;
|
||||
|
||||
// Track file extension
|
||||
const ext = path.extname(item).toLowerCase() || 'no-extension';
|
||||
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + 1;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error analyzing directory ${dirPath}:`, error);
|
||||
}
|
||||
|
||||
return { documentCount, totalSizeBytes };
|
||||
return { documentCount, totalSizeBytes, fileTypeStats };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -297,18 +319,20 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||
totalDocuments: 0,
|
||||
totalSizeBytes: 0,
|
||||
totalSizeMB: 0,
|
||||
fileTypeStats: {},
|
||||
jobs: []
|
||||
};
|
||||
}
|
||||
|
||||
const jobFolders = await fs.readdir(jobsPath);
|
||||
const jobFolders = await readdir(jobsPath);
|
||||
const jobStats: JobFolderStats[] = [];
|
||||
let totalDocuments = 0;
|
||||
let totalSizeBytes = 0;
|
||||
const aggregatedFileTypeStats: { [extension: string]: number } = {};
|
||||
|
||||
for (const jobFolder of jobFolders) {
|
||||
const jobFolderPath = path.join(jobsPath, jobFolder);
|
||||
const stat = await fs.stat(jobFolderPath);
|
||||
const stat = await fsStat(jobFolderPath);
|
||||
|
||||
// Only process directories
|
||||
if (stat.isDirectory()) {
|
||||
@@ -316,6 +340,11 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||
jobStats.push(folderStats);
|
||||
totalDocuments += folderStats.documentCount;
|
||||
totalSizeBytes += folderStats.totalSizeBytes;
|
||||
|
||||
// Aggregate file type stats
|
||||
for (const [ext, count] of Object.entries(folderStats.fileTypeStats)) {
|
||||
aggregatedFileTypeStats[ext] = (aggregatedFileTypeStats[ext] || 0) + count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -324,6 +353,7 @@ export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||
totalDocuments,
|
||||
totalSizeBytes,
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
|
||||
fileTypeStats: aggregatedFileTypeStats,
|
||||
jobs: jobStats.sort((a, b) => a.jobId.localeCompare(b.jobId))
|
||||
};
|
||||
|
||||
@@ -344,45 +374,56 @@ async function analyzeJobFolder(jobsPath: string, jobId: string): Promise<JobFol
|
||||
const jobFolderPath = path.join(jobsPath, jobId);
|
||||
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
|
||||
|
||||
const { documentCount, totalSizeBytes } = await getDirectoryStats(jobFolderPath);
|
||||
const { documentCount, totalSizeBytes, fileTypeStats } = await getDirectoryStats(jobFolderPath);
|
||||
|
||||
return {
|
||||
jobId,
|
||||
relativePath,
|
||||
documentCount,
|
||||
totalSizeBytes,
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
|
||||
fileTypeStats
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively get document count and total size for a directory (standalone helper function)
|
||||
*/
|
||||
async function getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number }> {
|
||||
async function getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number; fileTypeStats: { [extension: string]: number } }> {
|
||||
let documentCount = 0;
|
||||
let totalSizeBytes = 0;
|
||||
const fileTypeStats: { [extension: string]: number } = {};
|
||||
|
||||
try {
|
||||
const items = await fs.readdir(dirPath);
|
||||
const items = await readdir(dirPath);
|
||||
|
||||
for (const item of items) {
|
||||
const itemPath = path.join(dirPath, item);
|
||||
const stat = await fs.stat(itemPath);
|
||||
const stat = await fsStat(itemPath);
|
||||
|
||||
if (stat.isDirectory()) {
|
||||
// Recursively analyze subdirectories
|
||||
const subStats = await getDirectoryStats(itemPath);
|
||||
documentCount += subStats.documentCount;
|
||||
totalSizeBytes += subStats.totalSizeBytes;
|
||||
|
||||
// Merge file type stats
|
||||
for (const [ext, count] of Object.entries(subStats.fileTypeStats)) {
|
||||
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + count;
|
||||
}
|
||||
} else {
|
||||
// Count files as documents
|
||||
documentCount++;
|
||||
totalSizeBytes += stat.size;
|
||||
|
||||
// Track file extension
|
||||
const ext = path.extname(item).toLowerCase() || 'no-extension';
|
||||
fileTypeStats[ext] = (fileTypeStats[ext] || 0) + 1;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error analyzing directory ${dirPath}:`, error);
|
||||
}
|
||||
|
||||
return { documentCount, totalSizeBytes };
|
||||
return { documentCount, totalSizeBytes, fileTypeStats };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user