WIP s3 sync and directory stats.
This commit is contained in:
@@ -1,2 +1,15 @@
|
|||||||
node_modules
|
node_modules
|
||||||
npm-debug.log
|
npm-debug.log
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
README.md
|
||||||
|
S3_SYNC_README.md
|
||||||
|
docker-create.md
|
||||||
|
.env*
|
||||||
|
!.env.production
|
||||||
|
*.log
|
||||||
|
dist
|
||||||
|
.DS_Store
|
||||||
|
.vscode
|
||||||
|
coverage
|
||||||
|
.nyc_output
|
||||||
19
Dockerfile
19
Dockerfile
@@ -2,8 +2,12 @@
|
|||||||
# Build stage for libraries
|
# Build stage for libraries
|
||||||
FROM node:22-alpine AS builder
|
FROM node:22-alpine AS builder
|
||||||
|
|
||||||
# Install build dependencies
|
# Install build dependencies including AWS CLI v2
|
||||||
RUN apk add --no-cache bash wget
|
RUN apk add --no-cache bash wget curl unzip gcompat
|
||||||
|
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
||||||
|
unzip awscliv2.zip && \
|
||||||
|
./aws/install && \
|
||||||
|
rm -rf awscliv2.zip aws/
|
||||||
|
|
||||||
# Node.js application build stage
|
# Node.js application build stage
|
||||||
WORKDIR /usr/src/app
|
WORKDIR /usr/src/app
|
||||||
@@ -21,8 +25,15 @@ FROM node:22-alpine
|
|||||||
RUN echo "https://dl-cdn.alpinelinux.org/alpine/v$(grep -oE '[0-9]+\.[0-9]+' /etc/alpine-release)/community" >> /etc/apk/repositories
|
RUN echo "https://dl-cdn.alpinelinux.org/alpine/v$(grep -oE '[0-9]+\.[0-9]+' /etc/alpine-release)/community" >> /etc/apk/repositories
|
||||||
RUN apk update
|
RUN apk update
|
||||||
|
|
||||||
# Install runtime dependencies only
|
# Install runtime dependencies including AWS CLI v2
|
||||||
RUN apk add --no-cache bash redis ghostscript graphicsmagick imagemagick libjpeg-turbo libpng libwebp tiff libheif libde265 x265 ffmpeg
|
RUN apk add --no-cache bash redis ghostscript graphicsmagick imagemagick libjpeg-turbo libpng libwebp tiff libheif libde265 x265 ffmpeg curl unzip gcompat tzdata
|
||||||
|
|
||||||
|
# Install AWS CLI v2
|
||||||
|
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
||||||
|
unzip awscliv2.zip && \
|
||||||
|
./aws/install && \
|
||||||
|
rm -rf awscliv2.zip aws/ && \
|
||||||
|
aws --version
|
||||||
|
|
||||||
RUN npm install -g pm2
|
RUN npm install -g pm2
|
||||||
|
|
||||||
|
|||||||
197
S3_SYNC_README.md
Normal file
197
S3_SYNC_README.md
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
# S3 Daily Sync Configuration
|
||||||
|
|
||||||
|
This application now includes automatic daily synchronization of the Jobs directory to an S3 bucket using the AWS CLI.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
### AWS CLI Installation
|
||||||
|
The sync functionality requires the AWS CLI to be installed on your system:
|
||||||
|
|
||||||
|
**macOS:**
|
||||||
|
```bash
|
||||||
|
curl "https://awscli.amazonaws.com/AWSCLIV2.pkg" -o "AWSCLIV2.pkg"
|
||||||
|
sudo installer -pkg AWSCLIV2.pkg -target /
|
||||||
|
```
|
||||||
|
|
||||||
|
**Linux:**
|
||||||
|
```bash
|
||||||
|
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
|
||||||
|
unzip awscliv2.zip
|
||||||
|
sudo ./aws/install
|
||||||
|
```
|
||||||
|
|
||||||
|
**Docker (if running in container):**
|
||||||
|
```dockerfile
|
||||||
|
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
||||||
|
unzip awscliv2.zip && \
|
||||||
|
./aws/install && \
|
||||||
|
rm -rf awscliv2.zip aws/
|
||||||
|
```
|
||||||
|
|
||||||
|
## Required Environment Variables
|
||||||
|
|
||||||
|
Add the following environment variables to your `.env` file:
|
||||||
|
|
||||||
|
```env
|
||||||
|
# S3 Configuration (Required for daily sync)
|
||||||
|
S3_BUCKET_NAME=your-s3-bucket-name
|
||||||
|
S3_REGION=us-east-1
|
||||||
|
S3_ACCESS_KEY_ID=your-access-key-id
|
||||||
|
S3_SECRET_ACCESS_KEY=your-secret-access-key
|
||||||
|
|
||||||
|
# Optional: S3 key prefix (defaults to "jobs/" if not specified)
|
||||||
|
S3_KEY_PREFIX=jobs/
|
||||||
|
```
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
### Automatic Daily Sync
|
||||||
|
- Runs every day at midnight PST/PDT using `aws s3 sync`
|
||||||
|
- Uses the `--delete` flag to remove files from S3 that no longer exist locally
|
||||||
|
- Efficient incremental sync (only uploads changed files)
|
||||||
|
- Comprehensive logging of sync operations
|
||||||
|
|
||||||
|
### Jobs Directory Analysis
|
||||||
|
- Real-time analysis of all job folders in the Jobs directory
|
||||||
|
- Recursive document counting and size calculation
|
||||||
|
- Detailed per-job statistics including document counts and sizes
|
||||||
|
- Useful for monitoring storage usage and job completion status
|
||||||
|
- No S3 configuration required for analysis functionality
|
||||||
|
|
||||||
|
### API Endpoints
|
||||||
|
|
||||||
|
#### Check Sync Status
|
||||||
|
```
|
||||||
|
GET /sync/status
|
||||||
|
```
|
||||||
|
Returns the current status of the S3 sync scheduler including:
|
||||||
|
- Configuration status
|
||||||
|
- Scheduler running status
|
||||||
|
- Next scheduled run time
|
||||||
|
- S3 connection availability
|
||||||
|
|
||||||
|
#### Manual Sync Trigger
|
||||||
|
```
|
||||||
|
POST /sync/trigger
|
||||||
|
```
|
||||||
|
Manually triggers an S3 sync operation (useful for testing).
|
||||||
|
|
||||||
|
#### Jobs Directory Analysis
|
||||||
|
```
|
||||||
|
GET /jobs/analysis
|
||||||
|
```
|
||||||
|
Analyzes the Jobs directory and returns detailed statistics:
|
||||||
|
- Total number of job folders
|
||||||
|
- Total documents across all jobs
|
||||||
|
- Total size in bytes and MB
|
||||||
|
- Per-job statistics including:
|
||||||
|
- Job ID (folder name)
|
||||||
|
- Relative path
|
||||||
|
- Document count in that job
|
||||||
|
- Total size for that job
|
||||||
|
|
||||||
|
**Example Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"totalJobs": 150,
|
||||||
|
"totalDocuments": 1250,
|
||||||
|
"totalSizeBytes": 2147483648,
|
||||||
|
"totalSizeMB": 2048.0,
|
||||||
|
"jobs": [
|
||||||
|
{
|
||||||
|
"jobId": "JOB-001",
|
||||||
|
"relativePath": "Jobs/JOB-001",
|
||||||
|
"documentCount": 8,
|
||||||
|
"totalSizeBytes": 15728640,
|
||||||
|
"totalSizeMB": 15.0
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Setup Instructions
|
||||||
|
|
||||||
|
1. **Install AWS CLI**: Follow the installation instructions above for your platform
|
||||||
|
|
||||||
|
2. **Configure S3 Bucket**:
|
||||||
|
- Create an S3 bucket in your AWS account
|
||||||
|
- Create an IAM user with S3 permissions for the bucket
|
||||||
|
- Generate access keys for the IAM user
|
||||||
|
|
||||||
|
3. **Set Environment Variables**:
|
||||||
|
- Add the S3 configuration to your environment file
|
||||||
|
- Restart the server
|
||||||
|
|
||||||
|
4. **Test the Setup**:
|
||||||
|
- Check the sync status: `GET /s3-sync/status`
|
||||||
|
- Trigger a manual sync: `POST /s3-sync/trigger`
|
||||||
|
- Monitor the logs for sync operations
|
||||||
|
|
||||||
|
## IAM Permissions
|
||||||
|
|
||||||
|
Your IAM user needs the following permissions for the S3 bucket:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"Version": "2012-10-17",
|
||||||
|
"Statement": [
|
||||||
|
{
|
||||||
|
"Effect": "Allow",
|
||||||
|
"Action": [
|
||||||
|
"s3:GetObject",
|
||||||
|
"s3:PutObject",
|
||||||
|
"s3:DeleteObject",
|
||||||
|
"s3:ListBucket"
|
||||||
|
],
|
||||||
|
"Resource": [
|
||||||
|
"arn:aws:s3:::your-bucket-name",
|
||||||
|
"arn:aws:s3:::your-bucket-name/*"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advantages of Using AWS CLI
|
||||||
|
|
||||||
|
### Why AWS CLI vs SDK?
|
||||||
|
- **Simplicity**: Single command (`aws s3 sync`) handles everything
|
||||||
|
- **Efficiency**: AWS CLI is optimized for bulk operations
|
||||||
|
- **Robustness**: Built-in retry logic and error handling
|
||||||
|
- **Features**: Automatic multipart uploads, checksums, and progress tracking
|
||||||
|
- **Maintenance**: No need to manage complex SDK code for file operations
|
||||||
|
|
||||||
|
### AWS CLI Sync Command
|
||||||
|
The sync uses: `aws s3 sync /local/jobs/path s3://bucket/jobs/ --delete`
|
||||||
|
|
||||||
|
This command:
|
||||||
|
- Only uploads files that are new or modified (based on size and timestamp)
|
||||||
|
- Automatically handles large files with multipart uploads
|
||||||
|
- Deletes files from S3 that no longer exist locally (with `--delete` flag)
|
||||||
|
- Provides detailed output of what was transferred
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
- **AWS CLI not found**: Install AWS CLI using the instructions above
|
||||||
|
- **Permission denied**: Check IAM permissions and access keys
|
||||||
|
- **Sync fails**: Check the application logs for detailed error messages
|
||||||
|
- **Connection issues**: Verify S3 bucket name and region
|
||||||
|
- **Test connection**: Use the status endpoint to verify S3 connectivity
|
||||||
|
|
||||||
|
## How It Works
|
||||||
|
|
||||||
|
1. **Scheduler**: Uses `node-cron` to schedule daily execution at midnight PST
|
||||||
|
2. **AWS CLI Check**: Verifies AWS CLI is installed and available
|
||||||
|
3. **Credential Setup**: Sets AWS credentials as environment variables
|
||||||
|
4. **Sync Execution**: Runs `aws s3 sync` with appropriate parameters
|
||||||
|
5. **Logging**: Captures and logs all command output and errors
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
The implementation now only requires:
|
||||||
|
- `node-cron` for scheduling
|
||||||
|
- `fs-extra` for file system operations
|
||||||
|
- Node.js built-in `child_process` for executing AWS CLI commands
|
||||||
|
|
||||||
|
No AWS SDK dependencies are needed!
|
||||||
@@ -12,4 +12,10 @@ services:
|
|||||||
- IMS_TOKEN=
|
- IMS_TOKEN=
|
||||||
- CONVERT_QUALITY=0.5
|
- CONVERT_QUALITY=0.5
|
||||||
- KEEP_CONVERTED_ORIGINALS=TRUE
|
- KEEP_CONVERTED_ORIGINALS=TRUE
|
||||||
|
# S3 Sync Configuration (uncomment and configure for daily sync)
|
||||||
|
# - S3_BUCKET_NAME=your-s3-bucket-name
|
||||||
|
# - S3_REGION=us-east-1
|
||||||
|
# - S3_ACCESS_KEY_ID=your-access-key-id
|
||||||
|
# - S3_SECRET_ACCESS_KEY=your-secret-access-key
|
||||||
|
# - S3_KEY_PREFIX=jobs/
|
||||||
image: imexonline/media-server:beta
|
image: imexonline/media-server:beta
|
||||||
@@ -1,2 +1,80 @@
|
|||||||
|
# Docker Deployment Guide
|
||||||
|
|
||||||
|
## Building the Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build latest version
|
||||||
docker build . -t imexonline/media-server:latest -t imexonline/media-server:1.0
|
docker build . -t imexonline/media-server:latest -t imexonline/media-server:1.0
|
||||||
|
|
||||||
|
# Build beta version
|
||||||
docker build . -t imexonline/media-server:beta
|
docker build . -t imexonline/media-server:beta
|
||||||
|
```
|
||||||
|
|
||||||
|
## Docker Image Features
|
||||||
|
|
||||||
|
The Docker image includes:
|
||||||
|
- Node.js 22 Alpine base
|
||||||
|
- AWS CLI v2 for S3 sync functionality
|
||||||
|
- GraphicsMagick, ImageMagick, and FFmpeg for media processing
|
||||||
|
- Redis for background job processing
|
||||||
|
- PM2 for process management
|
||||||
|
- PST timezone configuration for scheduled tasks
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
### Required
|
||||||
|
- `MEDIA_PATH` - Path to media storage directory
|
||||||
|
- `IMS_TOKEN` - Authentication token
|
||||||
|
|
||||||
|
### Optional
|
||||||
|
- `DUPLICATE_BILL_TO_VENDOR` - Whether to duplicate bills to vendor directory
|
||||||
|
- `CONVERT_QUALITY` - Image conversion quality (0.0-1.0)
|
||||||
|
- `KEEP_CONVERTED_ORIGINALS` - Whether to keep original files after conversion
|
||||||
|
|
||||||
|
### S3 Sync (Optional)
|
||||||
|
- `S3_BUCKET_NAME` - S3 bucket name for daily sync
|
||||||
|
- `S3_REGION` - AWS region (default: us-east-1)
|
||||||
|
- `S3_ACCESS_KEY_ID` - AWS access key
|
||||||
|
- `S3_SECRET_ACCESS_KEY` - AWS secret key
|
||||||
|
- `S3_KEY_PREFIX` - S3 key prefix (default: jobs/)
|
||||||
|
|
||||||
|
## Running with Docker Compose
|
||||||
|
|
||||||
|
1. Update `docker-compose.yml` with your configuration
|
||||||
|
2. Uncomment and configure S3 environment variables if needed
|
||||||
|
3. Run: `docker-compose up -d`
|
||||||
|
|
||||||
|
## Manual Docker Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -d \
|
||||||
|
--name bodyshop-media-server \
|
||||||
|
-p 8000:8000 \
|
||||||
|
-v "/path/to/media:/media" \
|
||||||
|
-e MEDIA_PATH=/media \
|
||||||
|
-e IMS_TOKEN=your-token \
|
||||||
|
-e S3_BUCKET_NAME=your-bucket \
|
||||||
|
-e S3_ACCESS_KEY_ID=your-key \
|
||||||
|
-e S3_SECRET_ACCESS_KEY=your-secret \
|
||||||
|
imexonline/media-server:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
## S3 Sync in Docker
|
||||||
|
|
||||||
|
The S3 sync functionality works automatically in Docker:
|
||||||
|
- AWS CLI v2 is pre-installed
|
||||||
|
- Timezone is set to PST for midnight scheduling
|
||||||
|
- Daily sync runs at midnight PST automatically
|
||||||
|
- Check sync status via API: `GET /s3-sync/status`
|
||||||
|
|
||||||
|
## Health Checks
|
||||||
|
|
||||||
|
- Main health check: `GET /health`
|
||||||
|
- Application status: `GET /` (requires token)
|
||||||
|
- S3 sync status: `GET /s3-sync/status` (requires token)
|
||||||
|
|
||||||
|
## Logs
|
||||||
|
|
||||||
|
View container logs: `docker logs bodyshop-media-server`
|
||||||
|
|
||||||
|
The application uses structured logging with daily rotation.
|
||||||
|
|||||||
29
package-lock.json
generated
29
package-lock.json
generated
@@ -26,6 +26,7 @@
|
|||||||
"morgan": "^1.10.1",
|
"morgan": "^1.10.1",
|
||||||
"multer": "^2.0.2",
|
"multer": "^2.0.2",
|
||||||
"nocache": "^4.0.0",
|
"nocache": "^4.0.0",
|
||||||
|
"node-cron": "^4.2.1",
|
||||||
"response-time": "^2.3.4",
|
"response-time": "^2.3.4",
|
||||||
"simple-thumbnail": "^1.6.5",
|
"simple-thumbnail": "^1.6.5",
|
||||||
"winston": "^3.17.0",
|
"winston": "^3.17.0",
|
||||||
@@ -40,6 +41,7 @@
|
|||||||
"@types/morgan": "^1.9.10",
|
"@types/morgan": "^1.9.10",
|
||||||
"@types/multer": "^2.0.0",
|
"@types/multer": "^2.0.0",
|
||||||
"@types/node": "^24.1.0",
|
"@types/node": "^24.1.0",
|
||||||
|
"@types/node-cron": "^3.0.11",
|
||||||
"@types/response-time": "^2.3.9",
|
"@types/response-time": "^2.3.9",
|
||||||
"nodemon": "^3.1.10",
|
"nodemon": "^3.1.10",
|
||||||
"prettier": "^3.6.2",
|
"prettier": "^3.6.2",
|
||||||
@@ -745,6 +747,13 @@
|
|||||||
"undici-types": "~7.8.0"
|
"undici-types": "~7.8.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/node-cron": {
|
||||||
|
"version": "3.0.11",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-cron/-/node-cron-3.0.11.tgz",
|
||||||
|
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@types/qs": {
|
"node_modules/@types/qs": {
|
||||||
"version": "6.9.18",
|
"version": "6.9.18",
|
||||||
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.18.tgz",
|
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.18.tgz",
|
||||||
@@ -2563,6 +2572,15 @@
|
|||||||
"integrity": "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==",
|
"integrity": "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/node-cron": {
|
||||||
|
"version": "4.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/node-cron/-/node-cron-4.2.1.tgz",
|
||||||
|
"integrity": "sha512-lgimEHPE/QDgFlywTd8yTR61ptugX3Qer29efeyWw2rv259HtGBNn1vZVmp8lB9uo9wC0t/AT4iGqXxia+CJFg==",
|
||||||
|
"license": "ISC",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=6.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/node-gyp-build-optional-packages": {
|
"node_modules/node-gyp-build-optional-packages": {
|
||||||
"version": "5.2.2",
|
"version": "5.2.2",
|
||||||
"resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.2.2.tgz",
|
||||||
@@ -4091,6 +4109,12 @@
|
|||||||
"undici-types": "~7.8.0"
|
"undici-types": "~7.8.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"@types/node-cron": {
|
||||||
|
"version": "3.0.11",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-cron/-/node-cron-3.0.11.tgz",
|
||||||
|
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||||
|
"dev": true
|
||||||
|
},
|
||||||
"@types/qs": {
|
"@types/qs": {
|
||||||
"version": "6.9.18",
|
"version": "6.9.18",
|
||||||
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.18.tgz",
|
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.18.tgz",
|
||||||
@@ -5353,6 +5377,11 @@
|
|||||||
"resolved": "https://registry.npmjs.org/node-abort-controller/-/node-abort-controller-3.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/node-abort-controller/-/node-abort-controller-3.1.1.tgz",
|
||||||
"integrity": "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ=="
|
"integrity": "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ=="
|
||||||
},
|
},
|
||||||
|
"node-cron": {
|
||||||
|
"version": "4.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/node-cron/-/node-cron-4.2.1.tgz",
|
||||||
|
"integrity": "sha512-lgimEHPE/QDgFlywTd8yTR61ptugX3Qer29efeyWw2rv259HtGBNn1vZVmp8lB9uo9wC0t/AT4iGqXxia+CJFg=="
|
||||||
|
},
|
||||||
"node-gyp-build-optional-packages": {
|
"node-gyp-build-optional-packages": {
|
||||||
"version": "5.2.2",
|
"version": "5.2.2",
|
||||||
"resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.2.2.tgz",
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
"morgan": "^1.10.1",
|
"morgan": "^1.10.1",
|
||||||
"multer": "^2.0.2",
|
"multer": "^2.0.2",
|
||||||
"nocache": "^4.0.0",
|
"nocache": "^4.0.0",
|
||||||
|
"node-cron": "^4.2.1",
|
||||||
"response-time": "^2.3.4",
|
"response-time": "^2.3.4",
|
||||||
"simple-thumbnail": "^1.6.5",
|
"simple-thumbnail": "^1.6.5",
|
||||||
"winston": "^3.17.0",
|
"winston": "^3.17.0",
|
||||||
@@ -44,6 +45,7 @@
|
|||||||
"@types/morgan": "^1.9.10",
|
"@types/morgan": "^1.9.10",
|
||||||
"@types/multer": "^2.0.0",
|
"@types/multer": "^2.0.0",
|
||||||
"@types/node": "^24.1.0",
|
"@types/node": "^24.1.0",
|
||||||
|
"@types/node-cron": "^3.0.11",
|
||||||
"@types/response-time": "^2.3.9",
|
"@types/response-time": "^2.3.9",
|
||||||
"nodemon": "^3.1.10",
|
"nodemon": "^3.1.10",
|
||||||
"prettier": "^3.6.2",
|
"prettier": "^3.6.2",
|
||||||
|
|||||||
43
server.ts
43
server.ts
@@ -21,6 +21,8 @@ import { JobsMoveMedia } from "./jobs/jobsMoveMedia.js";
|
|||||||
import { JobMediaUploadMulter, jobsUploadMedia } from "./jobs/jobsUploadMedia.js";
|
import { JobMediaUploadMulter, jobsUploadMedia } from "./jobs/jobsUploadMedia.js";
|
||||||
import InitServer, { FolderPaths } from "./util/serverInit.js";
|
import InitServer, { FolderPaths } from "./util/serverInit.js";
|
||||||
import ValidateImsToken from "./util/validateToken.js";
|
import ValidateImsToken from "./util/validateToken.js";
|
||||||
|
import { dailyS3Scheduler } from "./util/dailyS3Scheduler.js";
|
||||||
|
import { analyzeJobsDirectory } from "./util/s3Sync.js";
|
||||||
|
|
||||||
dotenv.config({
|
dotenv.config({
|
||||||
path: resolve(process.cwd(), `.env.${process.env.NODE_ENV || "development"}`)
|
path: resolve(process.cwd(), `.env.${process.env.NODE_ENV || "development"}`)
|
||||||
@@ -132,11 +134,52 @@ app.get("/health", (req, res) => {
|
|||||||
res.status(200).send("OK");
|
res.status(200).send("OK");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// S3 sync status endpoint
|
||||||
|
app.get("/sync/status", ValidateImsToken, async (req, res) => {
|
||||||
|
try {
|
||||||
|
const status = await dailyS3Scheduler.getStatus();
|
||||||
|
res.json(status);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Failed to get sync status:", error);
|
||||||
|
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||||
|
res.status(500).json({ error: errorMessage });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Manual S3 sync trigger endpoint (for testing)
|
||||||
|
app.post("/sync/trigger", ValidateImsToken, async (req, res) => {
|
||||||
|
try {
|
||||||
|
await dailyS3Scheduler.triggerManualSync();
|
||||||
|
res.json({ success: true, message: "Manual sync triggered successfully" });
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Manua--l sync failed:", error);
|
||||||
|
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||||
|
res.status(500).json({ success: false, message: "Manual sync failed", error: errorMessage });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Jobs directory analysis endpoint
|
||||||
|
app.get("/jobs/analysis", ValidateImsToken, async (req, res) => {
|
||||||
|
try {
|
||||||
|
const analysis = await analyzeJobsDirectory();
|
||||||
|
res.json(analysis);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Failed to analyze jobs directory:", error);
|
||||||
|
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||||
|
res.status(500).json({ success: false, message: "Jobs analysis failed", error: errorMessage });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Static files
|
// Static files
|
||||||
InitServer();
|
InitServer();
|
||||||
app.use(FolderPaths.StaticPath, express.static(FolderPaths.Root, { etag: false, maxAge: 30 * 1000 }));
|
app.use(FolderPaths.StaticPath, express.static(FolderPaths.Root, { etag: false, maxAge: 30 * 1000 }));
|
||||||
app.use("/assets", express.static("/assets", { etag: false, maxAge: 30 * 1000 }));
|
app.use("/assets", express.static("/assets", { etag: false, maxAge: 30 * 1000 }));
|
||||||
|
|
||||||
|
// Start the daily S3 sync scheduler
|
||||||
|
dailyS3Scheduler.start().catch((error) => {
|
||||||
|
logger.error("Failed to start sync scheduler:", error);
|
||||||
|
});
|
||||||
|
|
||||||
app.listen(port, () => {
|
app.listen(port, () => {
|
||||||
logger.info(`ImEX Media Server is running at http://localhost:${port}`);
|
logger.info(`ImEX Media Server is running at http://localhost:${port}`);
|
||||||
});
|
});
|
||||||
|
|||||||
154
util/dailyS3Scheduler.ts
Normal file
154
util/dailyS3Scheduler.ts
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
import * as cron from "node-cron";
|
||||||
|
import { logger } from "../server.js";
|
||||||
|
import { S3Sync, createS3SyncFromEnv } from "./s3Sync.js";
|
||||||
|
|
||||||
|
export class DailyS3Scheduler {
|
||||||
|
private s3Sync: S3Sync | null = null;
|
||||||
|
private cronJob: cron.ScheduledTask | null = null;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.s3Sync = createS3SyncFromEnv();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start the daily S3 sync scheduler
|
||||||
|
* Runs at midnight PST (00:00 PST = 08:00 UTC during standard time, 07:00 UTC during daylight time)
|
||||||
|
*/
|
||||||
|
async start(): Promise<void> {
|
||||||
|
if (!this.s3Sync) {
|
||||||
|
logger.warn("S3 sync not configured. Skipping scheduler setup.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test S3 connection before starting scheduler
|
||||||
|
const connectionTest = await this.s3Sync.testConnection();
|
||||||
|
if (!connectionTest) {
|
||||||
|
logger.error("S3 connection test failed. S3 sync scheduler will not be started.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cron expression for midnight PST
|
||||||
|
// Note: This uses PST timezone. During PDT (daylight time), it will still run at midnight local time
|
||||||
|
const cronExpression = "0 0 * * *"; // Every day at midnight
|
||||||
|
const timezone = "America/Los_Angeles"; // PST/PDT timezone
|
||||||
|
|
||||||
|
this.cronJob = cron.schedule(
|
||||||
|
cronExpression,
|
||||||
|
async () => {
|
||||||
|
await this.performDailySync();
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timezone: timezone,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
logger.info(`Daily S3 sync scheduler started. Will run at midnight PST/PDT.`);
|
||||||
|
logger.info(`Next sync scheduled for: ${this.getNextRunTime()}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop the scheduler
|
||||||
|
*/
|
||||||
|
stop(): void {
|
||||||
|
if (this.cronJob) {
|
||||||
|
this.cronJob.stop();
|
||||||
|
this.cronJob = null;
|
||||||
|
logger.info("Daily S3 sync scheduler stopped.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform the daily sync operation
|
||||||
|
*/
|
||||||
|
private async performDailySync(): Promise<void> {
|
||||||
|
if (!this.s3Sync) {
|
||||||
|
logger.error("S3 sync not available for daily sync");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const startTime = new Date();
|
||||||
|
logger.info(`Starting daily S3 sync at ${startTime.toISOString()}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.s3Sync.syncJobsToS3();
|
||||||
|
const endTime = new Date();
|
||||||
|
const duration = endTime.getTime() - startTime.getTime();
|
||||||
|
logger.info(`Daily S3 sync completed successfully in ${duration}ms`);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Daily S3 sync failed:", error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Manually trigger a sync (useful for testing)
|
||||||
|
*/
|
||||||
|
async triggerManualSync(): Promise<void> {
|
||||||
|
if (!this.s3Sync) {
|
||||||
|
logger.error("S3 sync not configured");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Triggering manual S3 sync...");
|
||||||
|
await this.performDailySync();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the next scheduled run time
|
||||||
|
*/
|
||||||
|
private getNextRunTime(): string {
|
||||||
|
if (!this.cronJob) {
|
||||||
|
return "Not scheduled";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a date object for midnight PST today
|
||||||
|
const now = new Date();
|
||||||
|
const pstNow = new Date(now.toLocaleString("en-US", { timeZone: "America/Los_Angeles" }));
|
||||||
|
|
||||||
|
// If it's past midnight today, next run is tomorrow at midnight
|
||||||
|
const nextRun = new Date(pstNow);
|
||||||
|
if (pstNow.getHours() > 0 || pstNow.getMinutes() > 0 || pstNow.getSeconds() > 0) {
|
||||||
|
nextRun.setDate(nextRun.getDate() + 1);
|
||||||
|
}
|
||||||
|
nextRun.setHours(0, 0, 0, 0);
|
||||||
|
|
||||||
|
return nextRun.toLocaleString("en-US", {
|
||||||
|
timeZone: "America/Los_Angeles",
|
||||||
|
weekday: "long",
|
||||||
|
year: "numeric",
|
||||||
|
month: "long",
|
||||||
|
day: "numeric",
|
||||||
|
hour: "2-digit",
|
||||||
|
minute: "2-digit",
|
||||||
|
timeZoneName: "short"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get scheduler status
|
||||||
|
*/
|
||||||
|
async getStatus(): Promise<{
|
||||||
|
isConfigured: boolean;
|
||||||
|
isRunning: boolean;
|
||||||
|
nextRun: string;
|
||||||
|
syncStats?: { bucketName: string; region: string; keyPrefix: string; available: boolean };
|
||||||
|
}> {
|
||||||
|
let syncStats;
|
||||||
|
if (this.s3Sync) {
|
||||||
|
try {
|
||||||
|
syncStats = await this.s3Sync.getSyncStats();
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Failed to get sync stats:", error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
isConfigured: this.s3Sync !== null,
|
||||||
|
isRunning: this.cronJob !== null,
|
||||||
|
nextRun: this.getNextRunTime(),
|
||||||
|
syncStats,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export a singleton instance
|
||||||
|
export const dailyS3Scheduler = new DailyS3Scheduler();
|
||||||
388
util/s3Sync.ts
Normal file
388
util/s3Sync.ts
Normal file
@@ -0,0 +1,388 @@
|
|||||||
|
import { exec } from "child_process";
|
||||||
|
import { promisify } from "util";
|
||||||
|
import * as fs from "fs-extra";
|
||||||
|
import * as path from "path";
|
||||||
|
import { logger } from "../server.js";
|
||||||
|
import { FolderPaths } from "./serverInit.js";
|
||||||
|
|
||||||
|
const execAsync = promisify(exec);
|
||||||
|
|
||||||
|
interface S3SyncConfig {
|
||||||
|
bucketName: string;
|
||||||
|
region: string;
|
||||||
|
accessKeyId: string;
|
||||||
|
secretAccessKey: string;
|
||||||
|
keyPrefix?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface JobFolderStats {
|
||||||
|
jobId: string;
|
||||||
|
relativePath: string;
|
||||||
|
documentCount: number;
|
||||||
|
totalSizeBytes: number;
|
||||||
|
totalSizeMB: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface JobsDirectoryAnalysis {
|
||||||
|
totalJobs: number;
|
||||||
|
totalDocuments: number;
|
||||||
|
totalSizeBytes: number;
|
||||||
|
totalSizeMB: number;
|
||||||
|
jobs: JobFolderStats[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export class S3Sync {
|
||||||
|
private config: S3SyncConfig;
|
||||||
|
|
||||||
|
constructor(config: S3SyncConfig) {
|
||||||
|
this.config = config;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sync the Jobs directory to S3 bucket using AWS CLI
|
||||||
|
*/
|
||||||
|
async syncJobsToS3(): Promise<void> {
|
||||||
|
try {
|
||||||
|
logger.info("Starting S3 sync for Jobs directory using AWS CLI...");
|
||||||
|
|
||||||
|
const jobsPath = FolderPaths.Jobs;
|
||||||
|
const keyPrefix = this.config.keyPrefix || "jobs/";
|
||||||
|
const s3Path = `s3://${this.config.bucketName}/${keyPrefix}`;
|
||||||
|
|
||||||
|
// Check if Jobs directory exists
|
||||||
|
if (!(await fs.pathExists(jobsPath))) {
|
||||||
|
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if AWS CLI is available
|
||||||
|
await this.checkAwsCli();
|
||||||
|
|
||||||
|
// Set AWS credentials as environment variables for the command
|
||||||
|
const env = {
|
||||||
|
...process.env,
|
||||||
|
AWS_ACCESS_KEY_ID: this.config.accessKeyId,
|
||||||
|
AWS_SECRET_ACCESS_KEY: this.config.secretAccessKey,
|
||||||
|
AWS_DEFAULT_REGION: this.config.region
|
||||||
|
};
|
||||||
|
|
||||||
|
// Run AWS S3 sync command
|
||||||
|
const syncCommand = `aws s3 sync "${jobsPath}" "${s3Path}"`;
|
||||||
|
|
||||||
|
logger.info(`Executing AWS S3 sync command`);
|
||||||
|
|
||||||
|
const { stdout, stderr } = await execAsync(syncCommand, {
|
||||||
|
env,
|
||||||
|
maxBuffer: 1024 * 1024 * 10 // 10MB buffer for large sync outputs
|
||||||
|
});
|
||||||
|
|
||||||
|
if (stdout) {
|
||||||
|
logger.info("S3 sync output:", stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stderr) {
|
||||||
|
logger.warn("S3 sync warnings:", stderr);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("S3 sync completed successfully using AWS CLI");
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("S3 sync failed:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if AWS CLI is available
|
||||||
|
*/
|
||||||
|
private async checkAwsCli(): Promise<void> {
|
||||||
|
try {
|
||||||
|
await execAsync("aws --version");
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(
|
||||||
|
"AWS CLI not found. Please install AWS CLI: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test S3 connection using AWS CLI
|
||||||
|
*/
|
||||||
|
async testConnection(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
// Check if AWS CLI is available first
|
||||||
|
await this.checkAwsCli();
|
||||||
|
|
||||||
|
// Set AWS credentials as environment variables
|
||||||
|
const env = {
|
||||||
|
...process.env,
|
||||||
|
AWS_ACCESS_KEY_ID: this.config.accessKeyId,
|
||||||
|
AWS_SECRET_ACCESS_KEY: this.config.secretAccessKey,
|
||||||
|
AWS_DEFAULT_REGION: this.config.region
|
||||||
|
};
|
||||||
|
|
||||||
|
// Test connection by listing bucket
|
||||||
|
const testCommand = `aws s3 ls s3://${this.config.bucketName} --max-items 1`;
|
||||||
|
await execAsync(testCommand, { env });
|
||||||
|
|
||||||
|
logger.info(`S3 connection test successful for bucket: ${this.config.bucketName}`);
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("S3 connection test failed:", error);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get sync statistics using AWS CLI
|
||||||
|
*/
|
||||||
|
async getSyncStats(): Promise<{ bucketName: string; region: string; keyPrefix: string; available: boolean }> {
|
||||||
|
const available = await this.testConnection();
|
||||||
|
return {
|
||||||
|
bucketName: this.config.bucketName,
|
||||||
|
region: this.config.region,
|
||||||
|
keyPrefix: this.config.keyPrefix || "jobs/",
|
||||||
|
available
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze all job folders in the Jobs directory
|
||||||
|
* Returns detailed statistics for each job folder
|
||||||
|
*/
|
||||||
|
async analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||||
|
try {
|
||||||
|
logger.info("Starting Jobs directory analysis...");
|
||||||
|
|
||||||
|
const jobsPath = FolderPaths.Jobs;
|
||||||
|
|
||||||
|
// Check if Jobs directory exists
|
||||||
|
if (!(await fs.pathExists(jobsPath))) {
|
||||||
|
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
|
||||||
|
return {
|
||||||
|
totalJobs: 0,
|
||||||
|
totalDocuments: 0,
|
||||||
|
totalSizeBytes: 0,
|
||||||
|
totalSizeMB: 0,
|
||||||
|
jobs: []
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const jobFolders = await fs.readdir(jobsPath);
|
||||||
|
const jobStats: JobFolderStats[] = [];
|
||||||
|
let totalDocuments = 0;
|
||||||
|
let totalSizeBytes = 0;
|
||||||
|
|
||||||
|
for (const jobFolder of jobFolders) {
|
||||||
|
const jobFolderPath = path.join(jobsPath, jobFolder);
|
||||||
|
const stat = await fs.stat(jobFolderPath);
|
||||||
|
|
||||||
|
// Only process directories
|
||||||
|
if (stat.isDirectory()) {
|
||||||
|
const folderStats = await this.analyzeJobFolder(jobsPath, jobFolder);
|
||||||
|
jobStats.push(folderStats);
|
||||||
|
totalDocuments += folderStats.documentCount;
|
||||||
|
totalSizeBytes += folderStats.totalSizeBytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const analysis: JobsDirectoryAnalysis = {
|
||||||
|
totalJobs: jobStats.length,
|
||||||
|
totalDocuments,
|
||||||
|
totalSizeBytes,
|
||||||
|
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
|
||||||
|
jobs: jobStats.sort((a, b) => a.jobId.localeCompare(b.jobId))
|
||||||
|
};
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
`Jobs directory analysis complete: ${analysis.totalJobs} jobs, ${analysis.totalDocuments} documents, ${analysis.totalSizeMB} MB`
|
||||||
|
);
|
||||||
|
return analysis;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Failed to analyze Jobs directory:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze a single job folder
|
||||||
|
*/
|
||||||
|
private async analyzeJobFolder(jobsPath: string, jobId: string): Promise<JobFolderStats> {
|
||||||
|
const jobFolderPath = path.join(jobsPath, jobId);
|
||||||
|
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
|
||||||
|
|
||||||
|
const { documentCount, totalSizeBytes } = await this.getDirectoryStats(jobFolderPath);
|
||||||
|
|
||||||
|
return {
|
||||||
|
jobId,
|
||||||
|
relativePath,
|
||||||
|
documentCount,
|
||||||
|
totalSizeBytes,
|
||||||
|
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively get document count and total size for a directory
|
||||||
|
*/
|
||||||
|
private async getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number }> {
|
||||||
|
let documentCount = 0;
|
||||||
|
let totalSizeBytes = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const items = await fs.readdir(dirPath);
|
||||||
|
|
||||||
|
for (const item of items) {
|
||||||
|
const itemPath = path.join(dirPath, item);
|
||||||
|
const stat = await fs.stat(itemPath);
|
||||||
|
|
||||||
|
if (stat.isDirectory()) {
|
||||||
|
// Recursively analyze subdirectories
|
||||||
|
const subStats = await this.getDirectoryStats(itemPath);
|
||||||
|
documentCount += subStats.documentCount;
|
||||||
|
totalSizeBytes += subStats.totalSizeBytes;
|
||||||
|
} else {
|
||||||
|
// Count files as documents
|
||||||
|
documentCount++;
|
||||||
|
totalSizeBytes += stat.size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Error analyzing directory ${dirPath}:`, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { documentCount, totalSizeBytes };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create S3Sync instance from environment variables
|
||||||
|
*/
|
||||||
|
export function createS3SyncFromEnv(): S3Sync | null {
|
||||||
|
const bucketName = process.env.S3_BUCKET_NAME || "test";
|
||||||
|
const region = process.env.S3_REGION || "ca-central-1";
|
||||||
|
const accessKeyId = process.env.S3_ACCESS_KEY_ID || "key";
|
||||||
|
const secretAccessKey = process.env.S3_SECRET_ACCESS_KEY || "secret";
|
||||||
|
const keyPrefix = process.env.S3_KEY_PREFIX || "prefix";
|
||||||
|
|
||||||
|
if (!bucketName || !accessKeyId || !secretAccessKey) {
|
||||||
|
logger.warn(
|
||||||
|
"S3 configuration incomplete. Required env vars: S3_BUCKET_NAME, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY"
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new S3Sync({
|
||||||
|
bucketName,
|
||||||
|
region,
|
||||||
|
accessKeyId,
|
||||||
|
secretAccessKey,
|
||||||
|
keyPrefix
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Standalone function to analyze Jobs directory without S3 configuration
|
||||||
|
*/
|
||||||
|
export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||||
|
try {
|
||||||
|
logger.info("Starting Jobs directory analysis...");
|
||||||
|
|
||||||
|
const jobsPath = FolderPaths.Jobs;
|
||||||
|
|
||||||
|
// Check if Jobs directory exists
|
||||||
|
if (!(await fs.pathExists(jobsPath))) {
|
||||||
|
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
|
||||||
|
return {
|
||||||
|
totalJobs: 0,
|
||||||
|
totalDocuments: 0,
|
||||||
|
totalSizeBytes: 0,
|
||||||
|
totalSizeMB: 0,
|
||||||
|
jobs: []
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const jobFolders = await fs.readdir(jobsPath);
|
||||||
|
const jobStats: JobFolderStats[] = [];
|
||||||
|
let totalDocuments = 0;
|
||||||
|
let totalSizeBytes = 0;
|
||||||
|
|
||||||
|
for (const jobFolder of jobFolders) {
|
||||||
|
const jobFolderPath = path.join(jobsPath, jobFolder);
|
||||||
|
const stat = await fs.stat(jobFolderPath);
|
||||||
|
|
||||||
|
// Only process directories
|
||||||
|
if (stat.isDirectory()) {
|
||||||
|
const folderStats = await analyzeJobFolder(jobsPath, jobFolder);
|
||||||
|
jobStats.push(folderStats);
|
||||||
|
totalDocuments += folderStats.documentCount;
|
||||||
|
totalSizeBytes += folderStats.totalSizeBytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const analysis: JobsDirectoryAnalysis = {
|
||||||
|
totalJobs: jobStats.length,
|
||||||
|
totalDocuments,
|
||||||
|
totalSizeBytes,
|
||||||
|
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
|
||||||
|
jobs: jobStats.sort((a, b) => a.jobId.localeCompare(b.jobId))
|
||||||
|
};
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
`Jobs directory analysis complete: ${analysis.totalJobs} jobs, ${analysis.totalDocuments} documents, ${analysis.totalSizeMB} MB`
|
||||||
|
);
|
||||||
|
return analysis;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Failed to analyze Jobs directory:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze a single job folder (standalone helper function)
|
||||||
|
*/
|
||||||
|
async function analyzeJobFolder(jobsPath: string, jobId: string): Promise<JobFolderStats> {
|
||||||
|
const jobFolderPath = path.join(jobsPath, jobId);
|
||||||
|
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
|
||||||
|
|
||||||
|
const { documentCount, totalSizeBytes } = await getDirectoryStats(jobFolderPath);
|
||||||
|
|
||||||
|
return {
|
||||||
|
jobId,
|
||||||
|
relativePath,
|
||||||
|
documentCount,
|
||||||
|
totalSizeBytes,
|
||||||
|
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively get document count and total size for a directory (standalone helper function)
|
||||||
|
*/
|
||||||
|
async function getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number }> {
|
||||||
|
let documentCount = 0;
|
||||||
|
let totalSizeBytes = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const items = await fs.readdir(dirPath);
|
||||||
|
|
||||||
|
for (const item of items) {
|
||||||
|
const itemPath = path.join(dirPath, item);
|
||||||
|
const stat = await fs.stat(itemPath);
|
||||||
|
|
||||||
|
if (stat.isDirectory()) {
|
||||||
|
// Recursively analyze subdirectories
|
||||||
|
const subStats = await getDirectoryStats(itemPath);
|
||||||
|
documentCount += subStats.documentCount;
|
||||||
|
totalSizeBytes += subStats.totalSizeBytes;
|
||||||
|
} else {
|
||||||
|
// Count files as documents
|
||||||
|
documentCount++;
|
||||||
|
totalSizeBytes += stat.size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Error analyzing directory ${dirPath}:`, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { documentCount, totalSizeBytes };
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user