feat(gtm-architect): Finalize migration and implement web scraping

- Refactors the gtm-architect Dockerfile for a flat, more efficient build process.
- Implements robust web scraping via BeautifulSoup in helpers.py for URL analysis in phase1.
- Makes shared library imports (gspread, pandas, etc.) in helpers.py optional to prevent ModuleNotFoundErrors in microservices.
- Implements the main execution logic in the orchestrator to handle command-line arguments.
- Updates documentation to reflect the new architecture, scraping feature, and dependency handling.
This commit is contained in:
2026-01-03 08:43:53 +00:00
parent 2663d85ae7
commit 302a211239
7 changed files with 282 additions and 64 deletions

View File

@@ -1,5 +1,5 @@
# Stage 1: Build the React frontend
FROM node:18-slim AS builder
FROM node:20-slim AS builder
WORKDIR /app
# Copy package.json from the subdirectory (relative to project root)
@@ -15,20 +15,22 @@ COPY gtm-architect/ ./
RUN npm run build
# Stage 2: Setup the production environment
FROM python:3.9-slim
FROM python:3.11-slim
WORKDIR /app
# Install Node.js
RUN apt-get update && apt-get install -y curl && \
curl -sL https://deb.nodesource.com/setup_18.x | bash - && \
apt-get install -y nodejs
RUN apt-get update && \
apt-get install -y --no-install-recommends curl ca-certificates && \
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt-get install -y --no-install-recommends nodejs && \
rm -rf /var/lib/apt/lists/*
# Copy built frontend from builder stage
COPY --from=builder /app/dist ./gtm-architect/dist
COPY --from=builder /app/dist ./dist
# Copy backend files and application code (paths relative to project root)
COPY gtm-architect/server.cjs ./gtm-architect/
COPY gtm-architect/package.json ./gtm-architect/
COPY gtm-architect/server.cjs .
COPY gtm-architect/package.json .
COPY gtm_architect_orchestrator.py .
COPY helpers.py .
COPY config.py .
@@ -38,10 +40,10 @@ COPY gtm_db_manager.py .
# Install Python and Node.js dependencies
RUN pip install --no-cache-dir -r requirements.txt
RUN cd gtm-architect && npm install --production
RUN npm install --omit=dev
# Expose the port the server will run on
EXPOSE 3005
# Command to run the server
CMD ["node", "gtm-architect/server.cjs"]
CMD ["node", "server.cjs"]

View File

@@ -4,6 +4,8 @@ const cors = require('cors');
const path = require('path');
const fs = require('fs');
const VERSION = "1.0.0"; // Added for debugging and tracking
const app = express();
const port = 3005;
@@ -33,7 +35,7 @@ app.post('/api/run', (req, res) => {
const payloadString = JSON.stringify(payload);
const payloadBase64 = Buffer.from(payloadString).toString('base64');
const pythonScriptPath = path.join(__dirname, '../gtm_architect_orchestrator.py');
const pythonScriptPath = path.join(__dirname, 'gtm_architect_orchestrator.py');
const pythonProcess = spawn('python3', [
pythonScriptPath,
'--mode', mode,
@@ -84,6 +86,9 @@ if (fs.existsSync(staticPath)) {
}
app.listen(port, () => {
console.log(`Server listening on port ${port}`);
});
const server = app.listen(port, () => {
console.log(`Server listening on port ${port} (Version: ${VERSION})`);
});
server.setTimeout(600000); // 10 minutes
server.keepAliveTimeout = 610000;
server.headersTimeout = 620000;