# Настройка мониторинга и логирования SFERA ## 🎯 Обзор Комплексная система мониторинга и логирования для платформы SFERA, включающая метрики производительности, логирование ошибок, алертинг и визуализацию данных для обеспечения надежности и производительности в production окружении. ## 📊 Архитектура мониторинга ### Компоненты системы ```mermaid graph TB A[SFERA App] --> B[Winston Logger] A --> C[Prometheus Metrics] A --> D[OpenTelemetry] B --> E[Log Files] B --> F[ELK Stack] C --> G[Grafana Dashboard] D --> H[Jaeger Tracing] I[Alertmanager] --> J[Slack/Email] G --> I ``` ## 🚨 Логирование ### 1. Структурированное логирование с Winston #### Установка зависимостей ```bash npm install winston winston-daily-rotate-file npm install --save-dev @types/winston ``` #### Конфигурация логгера Создание `src/lib/logger.ts`: ```typescript import winston from 'winston' import DailyRotateFile from 'winston-daily-rotate-file' // Определение уровней логирования const levels = { error: 0, warn: 1, info: 2, http: 3, verbose: 4, debug: 5, silly: 6, } // Цвета для консольного вывода const colors = { error: 'red', warn: 'yellow', info: 'green', http: 'magenta', verbose: 'white', debug: 'cyan', silly: 'grey', } winston.addColors(colors) // Формат для production const productionFormat = winston.format.combine( winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss:ms' }), winston.format.errors({ stack: true }), winston.format.json(), ) // Формат для разработки const developmentFormat = winston.format.combine( winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss:ms' }), winston.format.colorize({ all: true }), winston.format.printf( (info) => `${info.timestamp} ${info.level}: ${info.message}${info.stack ? '\n' + info.stack : ''}`, ), ) // Транспорты для production const productionTransports: winston.transport[] = [ // Консольный вывод new winston.transports.Console({ level: 'info', format: productionFormat, }), // Ротация логов по дням - общие логи new DailyRotateFile({ filename: 'logs/application-%DATE%.log', datePattern: 'YYYY-MM-DD', zippedArchive: true, maxSize: '20m', maxFiles: '14d', level: 'info', format: productionFormat, }), // Отдельный файл для ошибок new DailyRotateFile({ filename: 'logs/error-%DATE%.log', datePattern: 'YYYY-MM-DD', zippedArchive: true, maxSize: '20m', maxFiles: '30d', level: 'error', format: productionFormat, }), // HTTP запросы new DailyRotateFile({ filename: 'logs/http-%DATE%.log', datePattern: 'YYYY-MM-DD', zippedArchive: true, maxSize: '20m', maxFiles: '7d', level: 'http', format: productionFormat, }), ] // Транспорты для разработки const developmentTransports: winston.transport[] = [ new winston.transports.Console({ level: 'debug', format: developmentFormat, }), ] // Создание логгера export const logger = winston.createLogger({ level: process.env.NODE_ENV === 'production' ? 'info' : 'debug', levels, format: process.env.NODE_ENV === 'production' ? productionFormat : developmentFormat, transports: process.env.NODE_ENV === 'production' ? productionTransports : developmentTransports, exitOnError: false, }) // Middleware для Express/Next.js export const loggerMiddleware = (req: any, res: any, next: any) => { const start = Date.now() res.on('finish', () => { const duration = Date.now() - start logger.http('HTTP Request', { method: req.method, url: req.url, status: res.statusCode, duration: `${duration}ms`, userAgent: req.get('User-Agent'), ip: req.ip, }) }) next() } // Утилиты для логирования export const logError = (error: Error, context?: object) => { logger.error('Application Error', { message: error.message, stack: error.stack, ...context, }) } export const logInfo = (message: string, meta?: object) => { logger.info(message, meta) } export const logWarn = (message: string, meta?: object) => { logger.warn(message, meta) } export const logDebug = (message: string, meta?: object) => { logger.debug(message, meta) } ``` ### 2. Интеграция с Next.js API #### API Routes логирование ```typescript // src/app/api/graphql/route.ts import { logger } from '@/lib/logger' export async function POST(request: Request) { const startTime = Date.now() try { logger.info('GraphQL Request Started') // Основная логика GraphQL const result = await handleGraphQLRequest(request) logger.info('GraphQL Request Completed', { duration: Date.now() - startTime, success: true, }) return result } catch (error) { logger.error('GraphQL Request Failed', { duration: Date.now() - startTime, error: error.message, stack: error.stack, }) throw error } } ``` #### GraphQL Resolvers логирование ```typescript // src/graphql/resolvers.ts import { logger } from '@/lib/logger' export const resolvers = { Query: { getUser: async (parent: any, args: any, context: any) => { const { userId } = args logger.info('Getting user', { userId, requestId: context.requestId }) try { const user = await prisma.user.findUnique({ where: { id: userId }, }) logger.info('User retrieved successfully', { userId }) return user } catch (error) { logger.error('Failed to get user', { userId, error: error.message, }) throw error } }, }, } ``` ## 📈 Метрики и мониторинг ### 1. Prometheus метрики #### Установка зависимостей ```bash npm install prom-client npm install --save-dev @types/prom-client ``` #### Настройка метрик Создание `src/lib/metrics.ts`: ```typescript import promClient from 'prom-client' // Создание реестра метрик export const register = new promClient.Registry() // Добавление стандартных метрик promClient.collectDefaultMetrics({ register, prefix: 'sfera_', }) // HTTP запросы export const httpRequestsTotal = new promClient.Counter({ name: 'sfera_http_requests_total', help: 'Total number of HTTP requests', labelNames: ['method', 'route', 'status'], registers: [register], }) export const httpRequestDuration = new promClient.Histogram({ name: 'sfera_http_request_duration_seconds', help: 'Duration of HTTP requests in seconds', labelNames: ['method', 'route', 'status'], buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10], registers: [register], }) // GraphQL метрики export const graphqlOperationsTotal = new promClient.Counter({ name: 'sfera_graphql_operations_total', help: 'Total number of GraphQL operations', labelNames: ['operation_name', 'operation_type', 'success'], registers: [register], }) export const graphqlOperationDuration = new promClient.Histogram({ name: 'sfera_graphql_operation_duration_seconds', help: 'Duration of GraphQL operations in seconds', labelNames: ['operation_name', 'operation_type'], buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5], registers: [register], }) // База данных export const databaseConnectionsActive = new promClient.Gauge({ name: 'sfera_database_connections_active', help: 'Number of active database connections', registers: [register], }) export const databaseQueryDuration = new promClient.Histogram({ name: 'sfera_database_query_duration_seconds', help: 'Duration of database queries in seconds', labelNames: ['query_type'], buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1], registers: [register], }) // Бизнес метрики export const usersOnline = new promClient.Gauge({ name: 'sfera_users_online', help: 'Number of users currently online', registers: [register], }) export const ordersTotal = new promClient.Counter({ name: 'sfera_orders_total', help: 'Total number of orders created', labelNames: ['organization_type', 'status'], registers: [register], }) export const messagesTotal = new promClient.Counter({ name: 'sfera_messages_total', help: 'Total number of messages sent', labelNames: ['message_type'], registers: [register], }) // Redis/кэш метрики export const cacheHitsTotal = new promClient.Counter({ name: 'sfera_cache_hits_total', help: 'Total number of cache hits', labelNames: ['cache_key_pattern'], registers: [register], }) export const cacheMissesTotal = new promClient.Counter({ name: 'sfera_cache_misses_total', help: 'Total number of cache misses', labelNames: ['cache_key_pattern'], registers: [register], }) // Middleware для сбора HTTP метрик export const metricsMiddleware = (req: any, res: any, next: any) => { const start = Date.now() res.on('finish', () => { const duration = (Date.now() - start) / 1000 const route = req.route?.path || req.path httpRequestsTotal.labels(req.method, route, res.statusCode.toString()).inc() httpRequestDuration.labels(req.method, route, res.statusCode.toString()).observe(duration) }) next() } ``` #### API endpoint для метрик ```typescript // src/app/api/metrics/route.ts import { NextResponse } from 'next/server' import { register } from '@/lib/metrics' export async function GET() { try { const metrics = await register.metrics() return new NextResponse(metrics, { headers: { 'Content-Type': register.contentType, }, }) } catch (error) { return NextResponse.json({ error: 'Failed to generate metrics' }, { status: 500 }) } } ``` ### 2. OpenTelemetry трассировка #### Установка зависимостей ```bash npm install @opentelemetry/api @opentelemetry/sdk-node npm install @opentelemetry/instrumentation-http npm install @opentelemetry/instrumentation-graphql npm install @opentelemetry/exporter-jaeger ``` #### Конфигурация трассировки Создание `src/lib/tracing.ts`: ```typescript import { NodeSDK } from '@opentelemetry/sdk-node' import { HttpInstrumentation } from '@opentelemetry/instrumentation-http' import { GraphQLInstrumentation } from '@opentelemetry/instrumentation-graphql' import { JaegerExporter } from '@opentelemetry/exporter-jaeger' import { Resource } from '@opentelemetry/resources' import { SemanticResourceAttributes } from '@opentelemetry/semantic-conventions' // Настройка экспортера для Jaeger const jaegerExporter = new JaegerExporter({ endpoint: process.env.JAEGER_ENDPOINT || 'http://localhost:14268/api/traces', }) // Настройка SDK const sdk = new NodeSDK({ resource: new Resource({ [SemanticResourceAttributes.SERVICE_NAME]: 'sfera-app', [SemanticResourceAttributes.SERVICE_VERSION]: '1.0.0', }), traceExporter: jaegerExporter, instrumentations: [ new HttpInstrumentation({ applyCustomAttributesOnSpan: (span, request, response) => { span.setAttributes({ 'http.request.body.size': request.headers['content-length'] || 0, 'http.response.body.size': response.getHeader('content-length') || 0, }) }, }), new GraphQLInstrumentation({ mergeItems: true, allowValues: true, }), ], }) // Инициализация трассировки if (process.env.NODE_ENV === 'production') { sdk.start() console.log('Tracing started successfully') } export { sdk } ``` ## 📱 Dashboard и визуализация ### 1. Grafana Dashboard конфигурация #### Docker Compose для мониторинга стека Создание `docker-compose.monitoring.yml`: ```yaml version: '3.8' services: prometheus: image: prom/prometheus:latest container_name: sfera-prometheus ports: - '9090:9090' volumes: - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml - prometheus_data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' restart: unless-stopped grafana: image: grafana/grafana:latest container_name: sfera-grafana ports: - '3001:3000' volumes: - grafana_data:/var/lib/grafana - ./monitoring/grafana/provisioning:/etc/grafana/provisioning - ./monitoring/grafana/dashboards:/etc/grafana/dashboards environment: - GF_SECURITY_ADMIN_PASSWORD=admin123 - GF_INSTALL_PLUGINS=grafana-piechart-panel restart: unless-stopped jaeger: image: jaegertracing/all-in-one:latest container_name: sfera-jaeger ports: - '16686:16686' - '14268:14268' environment: - COLLECTOR_OTLP_ENABLED=true restart: unless-stopped alertmanager: image: prom/alertmanager:latest container_name: sfera-alertmanager ports: - '9093:9093' volumes: - ./monitoring/alertmanager.yml:/etc/alertmanager/alertmanager.yml restart: unless-stopped volumes: prometheus_data: grafana_data: ``` #### Prometheus конфигурация Создание `monitoring/prometheus.yml`: ```yaml global: scrape_interval: 15s evaluation_interval: 15s rule_files: - 'rules/*.yml' alerting: alertmanagers: - static_configs: - targets: - alertmanager:9093 scrape_configs: - job_name: 'sfera-app' static_configs: - targets: ['host.docker.internal:3000'] metrics_path: '/api/metrics' scrape_interval: 30s - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - job_name: 'node-exporter' static_configs: - targets: ['node-exporter:9100'] ``` #### Grafana Dashboard JSON Создание `monitoring/grafana/dashboards/sfera-dashboard.json`: ```json { "dashboard": { "id": null, "title": "SFERA Application Dashboard", "tags": ["sfera"], "timezone": "browser", "panels": [ { "id": 1, "title": "HTTP Request Rate", "type": "graph", "targets": [ { "expr": "rate(sfera_http_requests_total[5m])", "legendFormat": "{{method}} {{route}}" } ], "yAxes": [ { "label": "Requests/sec", "min": 0 } ] }, { "id": 2, "title": "Response Time", "type": "graph", "targets": [ { "expr": "histogram_quantile(0.95, rate(sfera_http_request_duration_seconds_bucket[5m]))", "legendFormat": "95th percentile" }, { "expr": "histogram_quantile(0.50, rate(sfera_http_request_duration_seconds_bucket[5m]))", "legendFormat": "50th percentile" } ] }, { "id": 3, "title": "GraphQL Operations", "type": "graph", "targets": [ { "expr": "rate(sfera_graphql_operations_total[5m])", "legendFormat": "{{operation_name}} ({{operation_type}})" } ] }, { "id": 4, "title": "Database Connections", "type": "singlestat", "targets": [ { "expr": "sfera_database_connections_active", "legendFormat": "Active Connections" } ] }, { "id": 5, "title": "Error Rate", "type": "graph", "targets": [ { "expr": "rate(sfera_http_requests_total{status=~\"5..\"}[5m])", "legendFormat": "5xx Errors" } ] }, { "id": 6, "title": "Orders Created", "type": "graph", "targets": [ { "expr": "rate(sfera_orders_total[5m])", "legendFormat": "{{organization_type}}" } ] } ], "time": { "from": "now-1h", "to": "now" }, "refresh": "30s" } } ``` ### 2. Alerting правила #### Prometheus правила алертинга Создание `monitoring/rules/alerts.yml`: ```yaml groups: - name: sfera.alerts rules: # Высокий уровень ошибок - alert: HighErrorRate expr: rate(sfera_http_requests_total{status=~"5.."}[5m]) > 0.1 for: 2m labels: severity: critical annotations: summary: 'High error rate detected' description: 'Error rate is {{ $value }} requests/sec' # Медленные ответы - alert: HighResponseTime expr: histogram_quantile(0.95, rate(sfera_http_request_duration_seconds_bucket[5m])) > 1 for: 5m labels: severity: warning annotations: summary: 'High response time detected' description: '95th percentile response time is {{ $value }}s' # Падение приложения - alert: ApplicationDown expr: up{job="sfera-app"} == 0 for: 1m labels: severity: critical annotations: summary: 'Application is down' description: 'SFERA application is not responding' # Много активных подключений к БД - alert: HighDatabaseConnections expr: sfera_database_connections_active > 50 for: 5m labels: severity: warning annotations: summary: 'High number of database connections' description: '{{ $value }} active database connections' # Мало дискового пространства - alert: DiskSpaceLow expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 10 for: 5m labels: severity: critical annotations: summary: 'Disk space is low' description: 'Only {{ $value }}% disk space remaining' # Высокое использование памяти - alert: HighMemoryUsage expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 90 for: 5m labels: severity: warning annotations: summary: 'High memory usage' description: 'Memory usage is {{ $value }}%' ``` #### Alertmanager конфигурация Создание `monitoring/alertmanager.yml`: ```yaml global: smtp_smarthost: 'localhost:587' smtp_from: 'alerts@sfera.com' route: group_by: ['alertname'] group_wait: 10s group_interval: 10s repeat_interval: 1h receiver: 'web.hook' receivers: - name: 'web.hook' slack_configs: - api_url: 'YOUR_SLACK_WEBHOOK_URL' channel: '#alerts' title: 'SFERA Alert' text: '{{ range .Alerts }}{{ .Annotations.summary }}: {{ .Annotations.description }}{{ end }}' email_configs: - to: 'admin@sfera.com' subject: 'SFERA Alert: {{ .GroupLabels.alertname }}' body: | {{ range .Alerts }} Alert: {{ .Annotations.summary }} Description: {{ .Annotations.description }} {{ end }} inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname', 'dev', 'instance'] ``` ## 🔧 Практические примеры использования ### 1. Логирование в компонентах ```typescript // src/components/orders/order-processing.tsx import { logger } from '@/lib/logger' import { ordersTotal } from '@/lib/metrics' export function OrderProcessor({ orderId }: { orderId: string }) { const processOrder = async () => { logger.info('Starting order processing', { orderId }) try { const result = await processOrderLogic(orderId) // Инкремент метрики ordersTotal.labels('SELLER', 'completed').inc() logger.info('Order processed successfully', { orderId, processingTime: result.processingTime }) return result } catch (error) { logger.error('Order processing failed', { orderId, error: error.message, stack: error.stack }) ordersTotal.labels('SELLER', 'failed').inc() throw error } } return ( ) } ``` ### 2. Мониторинг GraphQL запросов ```typescript // src/lib/graphql-monitoring.ts import { graphqlOperationsTotal, graphqlOperationDuration } from '@/lib/metrics' import { logger } from '@/lib/logger' export const graphqlMiddleware = { requestDidStart() { return { didResolveOperation(requestContext: any) { const { operationName, operation } = requestContext.request logger.info('GraphQL operation started', { operationName, operationType: operation.operation, }) }, willSendResponse(requestContext: any) { const { operationName, operation } = requestContext.request const { errors } = requestContext.response const success = !errors || errors.length === 0 graphqlOperationsTotal.labels(operationName || 'unknown', operation.operation, success.toString()).inc() if (errors) { logger.error('GraphQL operation failed', { operationName, errors: errors.map((e) => e.message), }) } }, } }, } ``` ### 3. Мониторинг бизнес-метрик ```typescript // src/hooks/useRealtime.ts import { usersOnline, messagesTotal } from '@/lib/metrics' import { logger } from '@/lib/logger' export const useRealtime = ({ onEvent }: { onEvent: (event: any) => void }) => { useEffect(() => { const socket = io() socket.on('connect', () => { usersOnline.inc() logger.info('User connected to realtime', { userId: socket.id }) }) socket.on('disconnect', () => { usersOnline.dec() logger.info('User disconnected from realtime', { userId: socket.id }) }) socket.on('message:new', (message) => { messagesTotal.labels(message.type).inc() logger.info('New message received', { messageId: message.id, conversationId: message.conversationId, }) onEvent({ type: 'message:new', data: message }) }) return () => socket.disconnect() }, [onEvent]) } ``` ## 🚀 Запуск мониторинга ### 1. Локальная среда ```bash # Запуск стека мониторинга docker-compose -f docker-compose.monitoring.yml up -d # Доступ к сервисам # Prometheus: http://localhost:9090 # Grafana: http://localhost:3001 (admin/admin123) # Jaeger: http://localhost:16686 # Alertmanager: http://localhost:9093 ``` ### 2. Production среда ```bash # Создание необходимых директорий mkdir -p monitoring/{grafana/provisioning,rules} mkdir -p logs # Установка прав доступа chmod -R 755 monitoring/ chmod -R 777 logs/ # Запуск с production конфигурацией docker-compose -f docker-compose.yml -f docker-compose.monitoring.yml up -d ``` ## 🎯 Заключение Система мониторинга и логирования SFERA обеспечивает: 1. **Полную видимость**: Метрики, логи, трассировка 2. **Проактивный мониторинг**: Алерты и уведомления 3. **Производительность**: Мониторинг производительности в реальном времени 4. **Отладка**: Детализированное логирование и трассировка 5. **Бизнес-аналитика**: Метрики по заказам, пользователям, сообщениям Эта система гарантирует надежность и высокую производительность платформы SFERA в production окружении.