WIP s3 sync and directory stats.
This commit is contained in:
@@ -1,2 +1,15 @@
|
||||
node_modules
|
||||
npm-debug.log
|
||||
npm-debug.log
|
||||
.git
|
||||
.gitignore
|
||||
README.md
|
||||
S3_SYNC_README.md
|
||||
docker-create.md
|
||||
.env*
|
||||
!.env.production
|
||||
*.log
|
||||
dist
|
||||
.DS_Store
|
||||
.vscode
|
||||
coverage
|
||||
.nyc_output
|
||||
19
Dockerfile
19
Dockerfile
@@ -2,8 +2,12 @@
|
||||
# Build stage for libraries
|
||||
FROM node:22-alpine AS builder
|
||||
|
||||
# Install build dependencies
|
||||
RUN apk add --no-cache bash wget
|
||||
# Install build dependencies including AWS CLI v2
|
||||
RUN apk add --no-cache bash wget curl unzip gcompat
|
||||
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
||||
unzip awscliv2.zip && \
|
||||
./aws/install && \
|
||||
rm -rf awscliv2.zip aws/
|
||||
|
||||
# Node.js application build stage
|
||||
WORKDIR /usr/src/app
|
||||
@@ -21,8 +25,15 @@ FROM node:22-alpine
|
||||
RUN echo "https://dl-cdn.alpinelinux.org/alpine/v$(grep -oE '[0-9]+\.[0-9]+' /etc/alpine-release)/community" >> /etc/apk/repositories
|
||||
RUN apk update
|
||||
|
||||
# Install runtime dependencies only
|
||||
RUN apk add --no-cache bash redis ghostscript graphicsmagick imagemagick libjpeg-turbo libpng libwebp tiff libheif libde265 x265 ffmpeg
|
||||
# Install runtime dependencies including AWS CLI v2
|
||||
RUN apk add --no-cache bash redis ghostscript graphicsmagick imagemagick libjpeg-turbo libpng libwebp tiff libheif libde265 x265 ffmpeg curl unzip gcompat tzdata
|
||||
|
||||
# Install AWS CLI v2
|
||||
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
||||
unzip awscliv2.zip && \
|
||||
./aws/install && \
|
||||
rm -rf awscliv2.zip aws/ && \
|
||||
aws --version
|
||||
|
||||
RUN npm install -g pm2
|
||||
|
||||
|
||||
197
S3_SYNC_README.md
Normal file
197
S3_SYNC_README.md
Normal file
@@ -0,0 +1,197 @@
|
||||
# S3 Daily Sync Configuration
|
||||
|
||||
This application now includes automatic daily synchronization of the Jobs directory to an S3 bucket using the AWS CLI.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### AWS CLI Installation
|
||||
The sync functionality requires the AWS CLI to be installed on your system:
|
||||
|
||||
**macOS:**
|
||||
```bash
|
||||
curl "https://awscli.amazonaws.com/AWSCLIV2.pkg" -o "AWSCLIV2.pkg"
|
||||
sudo installer -pkg AWSCLIV2.pkg -target /
|
||||
```
|
||||
|
||||
**Linux:**
|
||||
```bash
|
||||
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
|
||||
unzip awscliv2.zip
|
||||
sudo ./aws/install
|
||||
```
|
||||
|
||||
**Docker (if running in container):**
|
||||
```dockerfile
|
||||
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
||||
unzip awscliv2.zip && \
|
||||
./aws/install && \
|
||||
rm -rf awscliv2.zip aws/
|
||||
```
|
||||
|
||||
## Required Environment Variables
|
||||
|
||||
Add the following environment variables to your `.env` file:
|
||||
|
||||
```env
|
||||
# S3 Configuration (Required for daily sync)
|
||||
S3_BUCKET_NAME=your-s3-bucket-name
|
||||
S3_REGION=us-east-1
|
||||
S3_ACCESS_KEY_ID=your-access-key-id
|
||||
S3_SECRET_ACCESS_KEY=your-secret-access-key
|
||||
|
||||
# Optional: S3 key prefix (defaults to "jobs/" if not specified)
|
||||
S3_KEY_PREFIX=jobs/
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
### Automatic Daily Sync
|
||||
- Runs every day at midnight PST/PDT using `aws s3 sync`
|
||||
- Uses the `--delete` flag to remove files from S3 that no longer exist locally
|
||||
- Efficient incremental sync (only uploads changed files)
|
||||
- Comprehensive logging of sync operations
|
||||
|
||||
### Jobs Directory Analysis
|
||||
- Real-time analysis of all job folders in the Jobs directory
|
||||
- Recursive document counting and size calculation
|
||||
- Detailed per-job statistics including document counts and sizes
|
||||
- Useful for monitoring storage usage and job completion status
|
||||
- No S3 configuration required for analysis functionality
|
||||
|
||||
### API Endpoints
|
||||
|
||||
#### Check Sync Status
|
||||
```
|
||||
GET /sync/status
|
||||
```
|
||||
Returns the current status of the S3 sync scheduler including:
|
||||
- Configuration status
|
||||
- Scheduler running status
|
||||
- Next scheduled run time
|
||||
- S3 connection availability
|
||||
|
||||
#### Manual Sync Trigger
|
||||
```
|
||||
POST /sync/trigger
|
||||
```
|
||||
Manually triggers an S3 sync operation (useful for testing).
|
||||
|
||||
#### Jobs Directory Analysis
|
||||
```
|
||||
GET /jobs/analysis
|
||||
```
|
||||
Analyzes the Jobs directory and returns detailed statistics:
|
||||
- Total number of job folders
|
||||
- Total documents across all jobs
|
||||
- Total size in bytes and MB
|
||||
- Per-job statistics including:
|
||||
- Job ID (folder name)
|
||||
- Relative path
|
||||
- Document count in that job
|
||||
- Total size for that job
|
||||
|
||||
**Example Response:**
|
||||
```json
|
||||
{
|
||||
"totalJobs": 150,
|
||||
"totalDocuments": 1250,
|
||||
"totalSizeBytes": 2147483648,
|
||||
"totalSizeMB": 2048.0,
|
||||
"jobs": [
|
||||
{
|
||||
"jobId": "JOB-001",
|
||||
"relativePath": "Jobs/JOB-001",
|
||||
"documentCount": 8,
|
||||
"totalSizeBytes": 15728640,
|
||||
"totalSizeMB": 15.0
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Setup Instructions
|
||||
|
||||
1. **Install AWS CLI**: Follow the installation instructions above for your platform
|
||||
|
||||
2. **Configure S3 Bucket**:
|
||||
- Create an S3 bucket in your AWS account
|
||||
- Create an IAM user with S3 permissions for the bucket
|
||||
- Generate access keys for the IAM user
|
||||
|
||||
3. **Set Environment Variables**:
|
||||
- Add the S3 configuration to your environment file
|
||||
- Restart the server
|
||||
|
||||
4. **Test the Setup**:
|
||||
- Check the sync status: `GET /s3-sync/status`
|
||||
- Trigger a manual sync: `POST /s3-sync/trigger`
|
||||
- Monitor the logs for sync operations
|
||||
|
||||
## IAM Permissions
|
||||
|
||||
Your IAM user needs the following permissions for the S3 bucket:
|
||||
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:GetObject",
|
||||
"s3:PutObject",
|
||||
"s3:DeleteObject",
|
||||
"s3:ListBucket"
|
||||
],
|
||||
"Resource": [
|
||||
"arn:aws:s3:::your-bucket-name",
|
||||
"arn:aws:s3:::your-bucket-name/*"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Advantages of Using AWS CLI
|
||||
|
||||
### Why AWS CLI vs SDK?
|
||||
- **Simplicity**: Single command (`aws s3 sync`) handles everything
|
||||
- **Efficiency**: AWS CLI is optimized for bulk operations
|
||||
- **Robustness**: Built-in retry logic and error handling
|
||||
- **Features**: Automatic multipart uploads, checksums, and progress tracking
|
||||
- **Maintenance**: No need to manage complex SDK code for file operations
|
||||
|
||||
### AWS CLI Sync Command
|
||||
The sync uses: `aws s3 sync /local/jobs/path s3://bucket/jobs/ --delete`
|
||||
|
||||
This command:
|
||||
- Only uploads files that are new or modified (based on size and timestamp)
|
||||
- Automatically handles large files with multipart uploads
|
||||
- Deletes files from S3 that no longer exist locally (with `--delete` flag)
|
||||
- Provides detailed output of what was transferred
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- **AWS CLI not found**: Install AWS CLI using the instructions above
|
||||
- **Permission denied**: Check IAM permissions and access keys
|
||||
- **Sync fails**: Check the application logs for detailed error messages
|
||||
- **Connection issues**: Verify S3 bucket name and region
|
||||
- **Test connection**: Use the status endpoint to verify S3 connectivity
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Scheduler**: Uses `node-cron` to schedule daily execution at midnight PST
|
||||
2. **AWS CLI Check**: Verifies AWS CLI is installed and available
|
||||
3. **Credential Setup**: Sets AWS credentials as environment variables
|
||||
4. **Sync Execution**: Runs `aws s3 sync` with appropriate parameters
|
||||
5. **Logging**: Captures and logs all command output and errors
|
||||
|
||||
## Dependencies
|
||||
|
||||
The implementation now only requires:
|
||||
- `node-cron` for scheduling
|
||||
- `fs-extra` for file system operations
|
||||
- Node.js built-in `child_process` for executing AWS CLI commands
|
||||
|
||||
No AWS SDK dependencies are needed!
|
||||
@@ -12,4 +12,10 @@ services:
|
||||
- IMS_TOKEN=
|
||||
- CONVERT_QUALITY=0.5
|
||||
- KEEP_CONVERTED_ORIGINALS=TRUE
|
||||
# S3 Sync Configuration (uncomment and configure for daily sync)
|
||||
# - S3_BUCKET_NAME=your-s3-bucket-name
|
||||
# - S3_REGION=us-east-1
|
||||
# - S3_ACCESS_KEY_ID=your-access-key-id
|
||||
# - S3_SECRET_ACCESS_KEY=your-secret-access-key
|
||||
# - S3_KEY_PREFIX=jobs/
|
||||
image: imexonline/media-server:beta
|
||||
@@ -1,2 +1,80 @@
|
||||
# Docker Deployment Guide
|
||||
|
||||
## Building the Image
|
||||
|
||||
```bash
|
||||
# Build latest version
|
||||
docker build . -t imexonline/media-server:latest -t imexonline/media-server:1.0
|
||||
|
||||
# Build beta version
|
||||
docker build . -t imexonline/media-server:beta
|
||||
```
|
||||
|
||||
## Docker Image Features
|
||||
|
||||
The Docker image includes:
|
||||
- Node.js 22 Alpine base
|
||||
- AWS CLI v2 for S3 sync functionality
|
||||
- GraphicsMagick, ImageMagick, and FFmpeg for media processing
|
||||
- Redis for background job processing
|
||||
- PM2 for process management
|
||||
- PST timezone configuration for scheduled tasks
|
||||
|
||||
## Environment Variables
|
||||
|
||||
### Required
|
||||
- `MEDIA_PATH` - Path to media storage directory
|
||||
- `IMS_TOKEN` - Authentication token
|
||||
|
||||
### Optional
|
||||
- `DUPLICATE_BILL_TO_VENDOR` - Whether to duplicate bills to vendor directory
|
||||
- `CONVERT_QUALITY` - Image conversion quality (0.0-1.0)
|
||||
- `KEEP_CONVERTED_ORIGINALS` - Whether to keep original files after conversion
|
||||
|
||||
### S3 Sync (Optional)
|
||||
- `S3_BUCKET_NAME` - S3 bucket name for daily sync
|
||||
- `S3_REGION` - AWS region (default: us-east-1)
|
||||
- `S3_ACCESS_KEY_ID` - AWS access key
|
||||
- `S3_SECRET_ACCESS_KEY` - AWS secret key
|
||||
- `S3_KEY_PREFIX` - S3 key prefix (default: jobs/)
|
||||
|
||||
## Running with Docker Compose
|
||||
|
||||
1. Update `docker-compose.yml` with your configuration
|
||||
2. Uncomment and configure S3 environment variables if needed
|
||||
3. Run: `docker-compose up -d`
|
||||
|
||||
## Manual Docker Run
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name bodyshop-media-server \
|
||||
-p 8000:8000 \
|
||||
-v "/path/to/media:/media" \
|
||||
-e MEDIA_PATH=/media \
|
||||
-e IMS_TOKEN=your-token \
|
||||
-e S3_BUCKET_NAME=your-bucket \
|
||||
-e S3_ACCESS_KEY_ID=your-key \
|
||||
-e S3_SECRET_ACCESS_KEY=your-secret \
|
||||
imexonline/media-server:latest
|
||||
```
|
||||
|
||||
## S3 Sync in Docker
|
||||
|
||||
The S3 sync functionality works automatically in Docker:
|
||||
- AWS CLI v2 is pre-installed
|
||||
- Timezone is set to PST for midnight scheduling
|
||||
- Daily sync runs at midnight PST automatically
|
||||
- Check sync status via API: `GET /s3-sync/status`
|
||||
|
||||
## Health Checks
|
||||
|
||||
- Main health check: `GET /health`
|
||||
- Application status: `GET /` (requires token)
|
||||
- S3 sync status: `GET /s3-sync/status` (requires token)
|
||||
|
||||
## Logs
|
||||
|
||||
View container logs: `docker logs bodyshop-media-server`
|
||||
|
||||
The application uses structured logging with daily rotation.
|
||||
|
||||
29
package-lock.json
generated
29
package-lock.json
generated
@@ -26,6 +26,7 @@
|
||||
"morgan": "^1.10.1",
|
||||
"multer": "^2.0.2",
|
||||
"nocache": "^4.0.0",
|
||||
"node-cron": "^4.2.1",
|
||||
"response-time": "^2.3.4",
|
||||
"simple-thumbnail": "^1.6.5",
|
||||
"winston": "^3.17.0",
|
||||
@@ -40,6 +41,7 @@
|
||||
"@types/morgan": "^1.9.10",
|
||||
"@types/multer": "^2.0.0",
|
||||
"@types/node": "^24.1.0",
|
||||
"@types/node-cron": "^3.0.11",
|
||||
"@types/response-time": "^2.3.9",
|
||||
"nodemon": "^3.1.10",
|
||||
"prettier": "^3.6.2",
|
||||
@@ -745,6 +747,13 @@
|
||||
"undici-types": "~7.8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node-cron": {
|
||||
"version": "3.0.11",
|
||||
"resolved": "https://registry.npmjs.org/@types/node-cron/-/node-cron-3.0.11.tgz",
|
||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/qs": {
|
||||
"version": "6.9.18",
|
||||
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.18.tgz",
|
||||
@@ -2563,6 +2572,15 @@
|
||||
"integrity": "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/node-cron": {
|
||||
"version": "4.2.1",
|
||||
"resolved": "https://registry.npmjs.org/node-cron/-/node-cron-4.2.1.tgz",
|
||||
"integrity": "sha512-lgimEHPE/QDgFlywTd8yTR61ptugX3Qer29efeyWw2rv259HtGBNn1vZVmp8lB9uo9wC0t/AT4iGqXxia+CJFg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/node-gyp-build-optional-packages": {
|
||||
"version": "5.2.2",
|
||||
"resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.2.2.tgz",
|
||||
@@ -4091,6 +4109,12 @@
|
||||
"undici-types": "~7.8.0"
|
||||
}
|
||||
},
|
||||
"@types/node-cron": {
|
||||
"version": "3.0.11",
|
||||
"resolved": "https://registry.npmjs.org/@types/node-cron/-/node-cron-3.0.11.tgz",
|
||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/qs": {
|
||||
"version": "6.9.18",
|
||||
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.18.tgz",
|
||||
@@ -5353,6 +5377,11 @@
|
||||
"resolved": "https://registry.npmjs.org/node-abort-controller/-/node-abort-controller-3.1.1.tgz",
|
||||
"integrity": "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ=="
|
||||
},
|
||||
"node-cron": {
|
||||
"version": "4.2.1",
|
||||
"resolved": "https://registry.npmjs.org/node-cron/-/node-cron-4.2.1.tgz",
|
||||
"integrity": "sha512-lgimEHPE/QDgFlywTd8yTR61ptugX3Qer29efeyWw2rv259HtGBNn1vZVmp8lB9uo9wC0t/AT4iGqXxia+CJFg=="
|
||||
},
|
||||
"node-gyp-build-optional-packages": {
|
||||
"version": "5.2.2",
|
||||
"resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.2.2.tgz",
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
"morgan": "^1.10.1",
|
||||
"multer": "^2.0.2",
|
||||
"nocache": "^4.0.0",
|
||||
"node-cron": "^4.2.1",
|
||||
"response-time": "^2.3.4",
|
||||
"simple-thumbnail": "^1.6.5",
|
||||
"winston": "^3.17.0",
|
||||
@@ -44,6 +45,7 @@
|
||||
"@types/morgan": "^1.9.10",
|
||||
"@types/multer": "^2.0.0",
|
||||
"@types/node": "^24.1.0",
|
||||
"@types/node-cron": "^3.0.11",
|
||||
"@types/response-time": "^2.3.9",
|
||||
"nodemon": "^3.1.10",
|
||||
"prettier": "^3.6.2",
|
||||
|
||||
43
server.ts
43
server.ts
@@ -21,6 +21,8 @@ import { JobsMoveMedia } from "./jobs/jobsMoveMedia.js";
|
||||
import { JobMediaUploadMulter, jobsUploadMedia } from "./jobs/jobsUploadMedia.js";
|
||||
import InitServer, { FolderPaths } from "./util/serverInit.js";
|
||||
import ValidateImsToken from "./util/validateToken.js";
|
||||
import { dailyS3Scheduler } from "./util/dailyS3Scheduler.js";
|
||||
import { analyzeJobsDirectory } from "./util/s3Sync.js";
|
||||
|
||||
dotenv.config({
|
||||
path: resolve(process.cwd(), `.env.${process.env.NODE_ENV || "development"}`)
|
||||
@@ -132,11 +134,52 @@ app.get("/health", (req, res) => {
|
||||
res.status(200).send("OK");
|
||||
});
|
||||
|
||||
// S3 sync status endpoint
|
||||
app.get("/sync/status", ValidateImsToken, async (req, res) => {
|
||||
try {
|
||||
const status = await dailyS3Scheduler.getStatus();
|
||||
res.json(status);
|
||||
} catch (error) {
|
||||
logger.error("Failed to get sync status:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
res.status(500).json({ error: errorMessage });
|
||||
}
|
||||
});
|
||||
|
||||
// Manual S3 sync trigger endpoint (for testing)
|
||||
app.post("/sync/trigger", ValidateImsToken, async (req, res) => {
|
||||
try {
|
||||
await dailyS3Scheduler.triggerManualSync();
|
||||
res.json({ success: true, message: "Manual sync triggered successfully" });
|
||||
} catch (error) {
|
||||
logger.error("Manua--l sync failed:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
res.status(500).json({ success: false, message: "Manual sync failed", error: errorMessage });
|
||||
}
|
||||
});
|
||||
|
||||
// Jobs directory analysis endpoint
|
||||
app.get("/jobs/analysis", ValidateImsToken, async (req, res) => {
|
||||
try {
|
||||
const analysis = await analyzeJobsDirectory();
|
||||
res.json(analysis);
|
||||
} catch (error) {
|
||||
logger.error("Failed to analyze jobs directory:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
res.status(500).json({ success: false, message: "Jobs analysis failed", error: errorMessage });
|
||||
}
|
||||
});
|
||||
|
||||
// Static files
|
||||
InitServer();
|
||||
app.use(FolderPaths.StaticPath, express.static(FolderPaths.Root, { etag: false, maxAge: 30 * 1000 }));
|
||||
app.use("/assets", express.static("/assets", { etag: false, maxAge: 30 * 1000 }));
|
||||
|
||||
// Start the daily S3 sync scheduler
|
||||
dailyS3Scheduler.start().catch((error) => {
|
||||
logger.error("Failed to start sync scheduler:", error);
|
||||
});
|
||||
|
||||
app.listen(port, () => {
|
||||
logger.info(`ImEX Media Server is running at http://localhost:${port}`);
|
||||
});
|
||||
|
||||
154
util/dailyS3Scheduler.ts
Normal file
154
util/dailyS3Scheduler.ts
Normal file
@@ -0,0 +1,154 @@
|
||||
import * as cron from "node-cron";
|
||||
import { logger } from "../server.js";
|
||||
import { S3Sync, createS3SyncFromEnv } from "./s3Sync.js";
|
||||
|
||||
export class DailyS3Scheduler {
|
||||
private s3Sync: S3Sync | null = null;
|
||||
private cronJob: cron.ScheduledTask | null = null;
|
||||
|
||||
constructor() {
|
||||
this.s3Sync = createS3SyncFromEnv();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the daily S3 sync scheduler
|
||||
* Runs at midnight PST (00:00 PST = 08:00 UTC during standard time, 07:00 UTC during daylight time)
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
if (!this.s3Sync) {
|
||||
logger.warn("S3 sync not configured. Skipping scheduler setup.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Test S3 connection before starting scheduler
|
||||
const connectionTest = await this.s3Sync.testConnection();
|
||||
if (!connectionTest) {
|
||||
logger.error("S3 connection test failed. S3 sync scheduler will not be started.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Cron expression for midnight PST
|
||||
// Note: This uses PST timezone. During PDT (daylight time), it will still run at midnight local time
|
||||
const cronExpression = "0 0 * * *"; // Every day at midnight
|
||||
const timezone = "America/Los_Angeles"; // PST/PDT timezone
|
||||
|
||||
this.cronJob = cron.schedule(
|
||||
cronExpression,
|
||||
async () => {
|
||||
await this.performDailySync();
|
||||
},
|
||||
{
|
||||
timezone: timezone,
|
||||
}
|
||||
);
|
||||
|
||||
logger.info(`Daily S3 sync scheduler started. Will run at midnight PST/PDT.`);
|
||||
logger.info(`Next sync scheduled for: ${this.getNextRunTime()}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the scheduler
|
||||
*/
|
||||
stop(): void {
|
||||
if (this.cronJob) {
|
||||
this.cronJob.stop();
|
||||
this.cronJob = null;
|
||||
logger.info("Daily S3 sync scheduler stopped.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the daily sync operation
|
||||
*/
|
||||
private async performDailySync(): Promise<void> {
|
||||
if (!this.s3Sync) {
|
||||
logger.error("S3 sync not available for daily sync");
|
||||
return;
|
||||
}
|
||||
|
||||
const startTime = new Date();
|
||||
logger.info(`Starting daily S3 sync at ${startTime.toISOString()}`);
|
||||
|
||||
try {
|
||||
await this.s3Sync.syncJobsToS3();
|
||||
const endTime = new Date();
|
||||
const duration = endTime.getTime() - startTime.getTime();
|
||||
logger.info(`Daily S3 sync completed successfully in ${duration}ms`);
|
||||
} catch (error) {
|
||||
logger.error("Daily S3 sync failed:", error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually trigger a sync (useful for testing)
|
||||
*/
|
||||
async triggerManualSync(): Promise<void> {
|
||||
if (!this.s3Sync) {
|
||||
logger.error("S3 sync not configured");
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info("Triggering manual S3 sync...");
|
||||
await this.performDailySync();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next scheduled run time
|
||||
*/
|
||||
private getNextRunTime(): string {
|
||||
if (!this.cronJob) {
|
||||
return "Not scheduled";
|
||||
}
|
||||
|
||||
// Create a date object for midnight PST today
|
||||
const now = new Date();
|
||||
const pstNow = new Date(now.toLocaleString("en-US", { timeZone: "America/Los_Angeles" }));
|
||||
|
||||
// If it's past midnight today, next run is tomorrow at midnight
|
||||
const nextRun = new Date(pstNow);
|
||||
if (pstNow.getHours() > 0 || pstNow.getMinutes() > 0 || pstNow.getSeconds() > 0) {
|
||||
nextRun.setDate(nextRun.getDate() + 1);
|
||||
}
|
||||
nextRun.setHours(0, 0, 0, 0);
|
||||
|
||||
return nextRun.toLocaleString("en-US", {
|
||||
timeZone: "America/Los_Angeles",
|
||||
weekday: "long",
|
||||
year: "numeric",
|
||||
month: "long",
|
||||
day: "numeric",
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
timeZoneName: "short"
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get scheduler status
|
||||
*/
|
||||
async getStatus(): Promise<{
|
||||
isConfigured: boolean;
|
||||
isRunning: boolean;
|
||||
nextRun: string;
|
||||
syncStats?: { bucketName: string; region: string; keyPrefix: string; available: boolean };
|
||||
}> {
|
||||
let syncStats;
|
||||
if (this.s3Sync) {
|
||||
try {
|
||||
syncStats = await this.s3Sync.getSyncStats();
|
||||
} catch (error) {
|
||||
logger.error("Failed to get sync stats:", error);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
isConfigured: this.s3Sync !== null,
|
||||
isRunning: this.cronJob !== null,
|
||||
nextRun: this.getNextRunTime(),
|
||||
syncStats,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Export a singleton instance
|
||||
export const dailyS3Scheduler = new DailyS3Scheduler();
|
||||
388
util/s3Sync.ts
Normal file
388
util/s3Sync.ts
Normal file
@@ -0,0 +1,388 @@
|
||||
import { exec } from "child_process";
|
||||
import { promisify } from "util";
|
||||
import * as fs from "fs-extra";
|
||||
import * as path from "path";
|
||||
import { logger } from "../server.js";
|
||||
import { FolderPaths } from "./serverInit.js";
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
interface S3SyncConfig {
|
||||
bucketName: string;
|
||||
region: string;
|
||||
accessKeyId: string;
|
||||
secretAccessKey: string;
|
||||
keyPrefix?: string;
|
||||
}
|
||||
|
||||
export interface JobFolderStats {
|
||||
jobId: string;
|
||||
relativePath: string;
|
||||
documentCount: number;
|
||||
totalSizeBytes: number;
|
||||
totalSizeMB: number;
|
||||
}
|
||||
|
||||
export interface JobsDirectoryAnalysis {
|
||||
totalJobs: number;
|
||||
totalDocuments: number;
|
||||
totalSizeBytes: number;
|
||||
totalSizeMB: number;
|
||||
jobs: JobFolderStats[];
|
||||
}
|
||||
|
||||
export class S3Sync {
|
||||
private config: S3SyncConfig;
|
||||
|
||||
constructor(config: S3SyncConfig) {
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync the Jobs directory to S3 bucket using AWS CLI
|
||||
*/
|
||||
async syncJobsToS3(): Promise<void> {
|
||||
try {
|
||||
logger.info("Starting S3 sync for Jobs directory using AWS CLI...");
|
||||
|
||||
const jobsPath = FolderPaths.Jobs;
|
||||
const keyPrefix = this.config.keyPrefix || "jobs/";
|
||||
const s3Path = `s3://${this.config.bucketName}/${keyPrefix}`;
|
||||
|
||||
// Check if Jobs directory exists
|
||||
if (!(await fs.pathExists(jobsPath))) {
|
||||
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if AWS CLI is available
|
||||
await this.checkAwsCli();
|
||||
|
||||
// Set AWS credentials as environment variables for the command
|
||||
const env = {
|
||||
...process.env,
|
||||
AWS_ACCESS_KEY_ID: this.config.accessKeyId,
|
||||
AWS_SECRET_ACCESS_KEY: this.config.secretAccessKey,
|
||||
AWS_DEFAULT_REGION: this.config.region
|
||||
};
|
||||
|
||||
// Run AWS S3 sync command
|
||||
const syncCommand = `aws s3 sync "${jobsPath}" "${s3Path}"`;
|
||||
|
||||
logger.info(`Executing AWS S3 sync command`);
|
||||
|
||||
const { stdout, stderr } = await execAsync(syncCommand, {
|
||||
env,
|
||||
maxBuffer: 1024 * 1024 * 10 // 10MB buffer for large sync outputs
|
||||
});
|
||||
|
||||
if (stdout) {
|
||||
logger.info("S3 sync output:", stdout);
|
||||
}
|
||||
|
||||
if (stderr) {
|
||||
logger.warn("S3 sync warnings:", stderr);
|
||||
}
|
||||
|
||||
logger.info("S3 sync completed successfully using AWS CLI");
|
||||
} catch (error) {
|
||||
logger.error("S3 sync failed:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if AWS CLI is available
|
||||
*/
|
||||
private async checkAwsCli(): Promise<void> {
|
||||
try {
|
||||
await execAsync("aws --version");
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
"AWS CLI not found. Please install AWS CLI: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test S3 connection using AWS CLI
|
||||
*/
|
||||
async testConnection(): Promise<boolean> {
|
||||
try {
|
||||
// Check if AWS CLI is available first
|
||||
await this.checkAwsCli();
|
||||
|
||||
// Set AWS credentials as environment variables
|
||||
const env = {
|
||||
...process.env,
|
||||
AWS_ACCESS_KEY_ID: this.config.accessKeyId,
|
||||
AWS_SECRET_ACCESS_KEY: this.config.secretAccessKey,
|
||||
AWS_DEFAULT_REGION: this.config.region
|
||||
};
|
||||
|
||||
// Test connection by listing bucket
|
||||
const testCommand = `aws s3 ls s3://${this.config.bucketName} --max-items 1`;
|
||||
await execAsync(testCommand, { env });
|
||||
|
||||
logger.info(`S3 connection test successful for bucket: ${this.config.bucketName}`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error("S3 connection test failed:", error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get sync statistics using AWS CLI
|
||||
*/
|
||||
async getSyncStats(): Promise<{ bucketName: string; region: string; keyPrefix: string; available: boolean }> {
|
||||
const available = await this.testConnection();
|
||||
return {
|
||||
bucketName: this.config.bucketName,
|
||||
region: this.config.region,
|
||||
keyPrefix: this.config.keyPrefix || "jobs/",
|
||||
available
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze all job folders in the Jobs directory
|
||||
* Returns detailed statistics for each job folder
|
||||
*/
|
||||
async analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||
try {
|
||||
logger.info("Starting Jobs directory analysis...");
|
||||
|
||||
const jobsPath = FolderPaths.Jobs;
|
||||
|
||||
// Check if Jobs directory exists
|
||||
if (!(await fs.pathExists(jobsPath))) {
|
||||
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
|
||||
return {
|
||||
totalJobs: 0,
|
||||
totalDocuments: 0,
|
||||
totalSizeBytes: 0,
|
||||
totalSizeMB: 0,
|
||||
jobs: []
|
||||
};
|
||||
}
|
||||
|
||||
const jobFolders = await fs.readdir(jobsPath);
|
||||
const jobStats: JobFolderStats[] = [];
|
||||
let totalDocuments = 0;
|
||||
let totalSizeBytes = 0;
|
||||
|
||||
for (const jobFolder of jobFolders) {
|
||||
const jobFolderPath = path.join(jobsPath, jobFolder);
|
||||
const stat = await fs.stat(jobFolderPath);
|
||||
|
||||
// Only process directories
|
||||
if (stat.isDirectory()) {
|
||||
const folderStats = await this.analyzeJobFolder(jobsPath, jobFolder);
|
||||
jobStats.push(folderStats);
|
||||
totalDocuments += folderStats.documentCount;
|
||||
totalSizeBytes += folderStats.totalSizeBytes;
|
||||
}
|
||||
}
|
||||
|
||||
const analysis: JobsDirectoryAnalysis = {
|
||||
totalJobs: jobStats.length,
|
||||
totalDocuments,
|
||||
totalSizeBytes,
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
|
||||
jobs: jobStats.sort((a, b) => a.jobId.localeCompare(b.jobId))
|
||||
};
|
||||
|
||||
logger.info(
|
||||
`Jobs directory analysis complete: ${analysis.totalJobs} jobs, ${analysis.totalDocuments} documents, ${analysis.totalSizeMB} MB`
|
||||
);
|
||||
return analysis;
|
||||
} catch (error) {
|
||||
logger.error("Failed to analyze Jobs directory:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze a single job folder
|
||||
*/
|
||||
private async analyzeJobFolder(jobsPath: string, jobId: string): Promise<JobFolderStats> {
|
||||
const jobFolderPath = path.join(jobsPath, jobId);
|
||||
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
|
||||
|
||||
const { documentCount, totalSizeBytes } = await this.getDirectoryStats(jobFolderPath);
|
||||
|
||||
return {
|
||||
jobId,
|
||||
relativePath,
|
||||
documentCount,
|
||||
totalSizeBytes,
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively get document count and total size for a directory
|
||||
*/
|
||||
private async getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number }> {
|
||||
let documentCount = 0;
|
||||
let totalSizeBytes = 0;
|
||||
|
||||
try {
|
||||
const items = await fs.readdir(dirPath);
|
||||
|
||||
for (const item of items) {
|
||||
const itemPath = path.join(dirPath, item);
|
||||
const stat = await fs.stat(itemPath);
|
||||
|
||||
if (stat.isDirectory()) {
|
||||
// Recursively analyze subdirectories
|
||||
const subStats = await this.getDirectoryStats(itemPath);
|
||||
documentCount += subStats.documentCount;
|
||||
totalSizeBytes += subStats.totalSizeBytes;
|
||||
} else {
|
||||
// Count files as documents
|
||||
documentCount++;
|
||||
totalSizeBytes += stat.size;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error analyzing directory ${dirPath}:`, error);
|
||||
}
|
||||
|
||||
return { documentCount, totalSizeBytes };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create S3Sync instance from environment variables
|
||||
*/
|
||||
export function createS3SyncFromEnv(): S3Sync | null {
|
||||
const bucketName = process.env.S3_BUCKET_NAME || "test";
|
||||
const region = process.env.S3_REGION || "ca-central-1";
|
||||
const accessKeyId = process.env.S3_ACCESS_KEY_ID || "key";
|
||||
const secretAccessKey = process.env.S3_SECRET_ACCESS_KEY || "secret";
|
||||
const keyPrefix = process.env.S3_KEY_PREFIX || "prefix";
|
||||
|
||||
if (!bucketName || !accessKeyId || !secretAccessKey) {
|
||||
logger.warn(
|
||||
"S3 configuration incomplete. Required env vars: S3_BUCKET_NAME, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY"
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
return new S3Sync({
|
||||
bucketName,
|
||||
region,
|
||||
accessKeyId,
|
||||
secretAccessKey,
|
||||
keyPrefix
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Standalone function to analyze Jobs directory without S3 configuration
|
||||
*/
|
||||
export async function analyzeJobsDirectory(): Promise<JobsDirectoryAnalysis> {
|
||||
try {
|
||||
logger.info("Starting Jobs directory analysis...");
|
||||
|
||||
const jobsPath = FolderPaths.Jobs;
|
||||
|
||||
// Check if Jobs directory exists
|
||||
if (!(await fs.pathExists(jobsPath))) {
|
||||
logger.warn(`Jobs directory does not exist: ${jobsPath}`);
|
||||
return {
|
||||
totalJobs: 0,
|
||||
totalDocuments: 0,
|
||||
totalSizeBytes: 0,
|
||||
totalSizeMB: 0,
|
||||
jobs: []
|
||||
};
|
||||
}
|
||||
|
||||
const jobFolders = await fs.readdir(jobsPath);
|
||||
const jobStats: JobFolderStats[] = [];
|
||||
let totalDocuments = 0;
|
||||
let totalSizeBytes = 0;
|
||||
|
||||
for (const jobFolder of jobFolders) {
|
||||
const jobFolderPath = path.join(jobsPath, jobFolder);
|
||||
const stat = await fs.stat(jobFolderPath);
|
||||
|
||||
// Only process directories
|
||||
if (stat.isDirectory()) {
|
||||
const folderStats = await analyzeJobFolder(jobsPath, jobFolder);
|
||||
jobStats.push(folderStats);
|
||||
totalDocuments += folderStats.documentCount;
|
||||
totalSizeBytes += folderStats.totalSizeBytes;
|
||||
}
|
||||
}
|
||||
|
||||
const analysis: JobsDirectoryAnalysis = {
|
||||
totalJobs: jobStats.length,
|
||||
totalDocuments,
|
||||
totalSizeBytes,
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100,
|
||||
jobs: jobStats.sort((a, b) => a.jobId.localeCompare(b.jobId))
|
||||
};
|
||||
|
||||
logger.info(
|
||||
`Jobs directory analysis complete: ${analysis.totalJobs} jobs, ${analysis.totalDocuments} documents, ${analysis.totalSizeMB} MB`
|
||||
);
|
||||
return analysis;
|
||||
} catch (error) {
|
||||
logger.error("Failed to analyze Jobs directory:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze a single job folder (standalone helper function)
|
||||
*/
|
||||
async function analyzeJobFolder(jobsPath: string, jobId: string): Promise<JobFolderStats> {
|
||||
const jobFolderPath = path.join(jobsPath, jobId);
|
||||
const relativePath = path.relative(FolderPaths.Root, jobFolderPath);
|
||||
|
||||
const { documentCount, totalSizeBytes } = await getDirectoryStats(jobFolderPath);
|
||||
|
||||
return {
|
||||
jobId,
|
||||
relativePath,
|
||||
documentCount,
|
||||
totalSizeBytes,
|
||||
totalSizeMB: Math.round((totalSizeBytes / (1024 * 1024)) * 100) / 100
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively get document count and total size for a directory (standalone helper function)
|
||||
*/
|
||||
async function getDirectoryStats(dirPath: string): Promise<{ documentCount: number; totalSizeBytes: number }> {
|
||||
let documentCount = 0;
|
||||
let totalSizeBytes = 0;
|
||||
|
||||
try {
|
||||
const items = await fs.readdir(dirPath);
|
||||
|
||||
for (const item of items) {
|
||||
const itemPath = path.join(dirPath, item);
|
||||
const stat = await fs.stat(itemPath);
|
||||
|
||||
if (stat.isDirectory()) {
|
||||
// Recursively analyze subdirectories
|
||||
const subStats = await getDirectoryStats(itemPath);
|
||||
documentCount += subStats.documentCount;
|
||||
totalSizeBytes += subStats.totalSizeBytes;
|
||||
} else {
|
||||
// Count files as documents
|
||||
documentCount++;
|
||||
totalSizeBytes += stat.size;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error analyzing directory ${dirPath}:`, error);
|
||||
}
|
||||
|
||||
return { documentCount, totalSizeBytes };
|
||||
}
|
||||
Reference in New Issue
Block a user