Skip to content

Production Setup Guide

Production Setup Guide

This guide covers enterprise-grade deployment and configuration of Klira AI SDK for production environments, based on proven patterns from production deployments.

Quick Production Checklist

Security: API keys via environment variables, secure endpoints Performance: Redis caching, optimized batch sizes, connection pooling Monitoring: Distributed tracing, performance metrics, error tracking Reliability: Retry logic, fallback mechanisms, health checks Scalability: Multi-instance deployment, load balancing

Core Production Configuration

1. Environment-Based Configuration

production_config.py
import os
from klira import Klira
from klira.config import KliraConfig
def create_production_config() -> KliraConfig:
"""Create production-ready configuration."""
return KliraConfig(
# Core settings
app_name=os.getenv("KLIRA_APP_NAME", "ProductionApp"),
api_key=os.getenv("KLIRA_API_KEY"), # Required
# OpenTelemetry settings (uses https://api.getklira.com by default)
tracing_enabled=True,
trace_content=False, # Disable for privacy in production
metrics_enabled=True,
# Performance settings
lazy_loading=True,
framework_detection_cache_size=5000, # Larger cache for production
# Security settings
policy_enforcement=True,
policies_path=os.getenv("KLIRA_POLICIES_PATH", "/app/policies"),
# Debugging (disabled in production)
debug_mode=False,
verbose=False,
logging_enabled=False, # Use structured logging instead
)
# Initialize with production config
config = create_production_config()
klira = Klira.init(**config.to_dict())
> **Note**: To use a custom OpenTelemetry endpoint instead of Klira AI's default, add `opentelemetry_endpoint=os.getenv("KLIRA_OPENTELEMETRY_ENDPOINT", "https://your-otlp-endpoint.com")` to the KliraConfig.

2. Environment Variables

Set these environment variables in your production environment:

Terminal window
# Core Configuration
export KLIRA_API_KEY="klira_prod_your_api_key_here"
export KLIRA_APP_NAME="YourApp-Production"
# OpenTelemetry (uses https://api.getklira.com by default)
export KLIRA_TRACING_ENABLED="true"
export KLIRA_TRACE_CONTENT="false" # Privacy in production
export KLIRA_METRICS_ENABLED="true"
# Performance
export KLIRA_LAZY_LOADING="true"
export KLIRA_FRAMEWORK_CACHE_SIZE="5000"
# Policies
export KLIRA_POLICY_ENFORCEMENT="true"
export KLIRA_POLICIES_PATH="/app/policies"
# Security
export KLIRA_DEBUG="false"
export KLIRA_VERBOSE="false"
export KLIRA_LOGGING_ENABLED="false"

Note: To use a custom OTLP endpoint instead of Klira AI’s default, add:

Terminal window
export KLIRA_OPENTELEMETRY_ENDPOINT="https://your-otlp-endpoint.com"

High-Performance Caching Setup

Redis Configuration for Production

from klira.cache.redis_adapter import RedisAdapter, RedisConfig
from klira.cache.cache_hierarchy import CacheHierarchy, CacheHierarchyConfig
# Production Redis configuration
redis_config = RedisConfig(
url=os.getenv("REDIS_URL", "redis://redis-cluster:6379"),
max_connections=50, # Higher for production load
socket_timeout=2.0, # Faster timeout
socket_connect_timeout=3.0,
retry_on_timeout=True,
retry_attempts=3,
enable_cluster=True, # For high availability
cluster_nodes=[
"redis-node1:6379",
"redis-node2:6379",
"redis-node3:6379"
],
key_prefix="prod:klira:",
default_ttl=1800 # 30 minutes
)
# Multi-layer cache hierarchy
cache_config = CacheHierarchyConfig(
# L1: Local memory cache
l1_enabled=True,
l1_max_size=20000, # Larger for production
l1_ttl_seconds=300,
# L2: Distributed Redis cache
l2_enabled=True,
l2_redis_url=redis_config.url,
l2_ttl_seconds=1800,
l2_fallback_on_error=True, # Critical for reliability
# Performance optimizations
async_write_behind=True,
promotion_threshold=2, # Faster promotion
# Monitoring
metrics_enabled=True,
health_check_interval=30 # More frequent checks
)
# Initialize cache hierarchy
cache_hierarchy = CacheHierarchy(cache_config)

Cache Performance Monitoring

def monitor_cache_performance():
"""Monitor cache performance in production."""
stats = cache_hierarchy.get_stats()
# Key metrics to track
l1_hit_rate = stats["l1_hit_rate"]
l2_hit_rate = stats["l2_hit_rate"]
overall_hit_rate = stats["overall_hit_rate"]
avg_latency = stats["avg_latency_ms"]
# Alert thresholds
if overall_hit_rate < 80:
logger.warning(f"Cache hit rate low: {overall_hit_rate}%")
if avg_latency > 50:
logger.warning(f"Cache latency high: {avg_latency}ms")
# Log metrics
logger.info(f"Cache performance: {overall_hit_rate}% hit rate, {avg_latency}ms avg latency")

Streaming Configuration for Production

from klira.streaming.types import StreamConfig
# Production streaming configuration
stream_config = StreamConfig(
# Performance settings
max_chunk_size=2048, # Larger chunks for efficiency
buffer_size=20, # Larger buffer
timeout_seconds=60.0, # Longer timeout for production
# Guardrail settings
enable_guardrails=True,
realtime_validation=True,
validation_interval=2, # Every 2 chunks for performance
# Caching settings
enable_caching=True,
cache_partial_responses=True,
cache_ttl_seconds=600, # 10 minutes
# Monitoring
enable_metrics=True,
metrics_interval=5.0, # Every 5 seconds
# Framework-specific optimizations
framework_specific={
"openai": {
"stream_options": {"include_usage": True}
},
"anthropic": {
"max_tokens": 4096
}
}
)

Analytics and Monitoring Setup

Production Analytics Configuration

from klira.analytics.processors import (
KliraHubProcessor,
FileAnalyticsProcessor,
PerformanceMonitoringProcessor,
MetricsAggregationProcessor
)
# Configure analytics processors for production
def setup_production_analytics():
processors = []
# 1. Klira AI Hub processor (primary)
klira_hub = KliraHubProcessor(
api_key=os.getenv("KLIRA_API_KEY"),
api_endpoint="https://api.getklira.com",
batch_size=100, # Larger batches for efficiency
flush_interval_seconds=15, # More frequent flushes
max_retries=5,
timeout_seconds=30,
include_blocked_messages=True,
include_policy_violations=True,
include_performance_metrics=True
)
processors.append(klira_hub)
# 2. Local file backup (failsafe)
file_processor = FileAnalyticsProcessor(
output_dir="/var/log/klira",
buffer_size=2000,
auto_flush_interval=30,
create_daily_files=True # Better for log rotation
)
processors.append(file_processor)
# 3. Performance monitoring with alerts
perf_monitor = PerformanceMonitoringProcessor(
latency_threshold_ms=3000, # 3 second threshold
error_rate_threshold=0.05, # 5% error rate
memory_threshold_mb=2000, # 2GB threshold
monitoring_window_seconds=300
)
# Add alerting callbacks
def alert_callback(alert_data):
# Send to your monitoring system (PagerDuty, Slack, etc.)
logger.critical(f"KLIRA ALERT: {alert_data}")
# send_to_monitoring_system(alert_data)
perf_monitor.add_alert_callback(alert_callback)
processors.append(perf_monitor)
# 4. Metrics aggregation
metrics_agg = MetricsAggregationProcessor(
aggregation_window_seconds=300, # 5 minute windows
keep_raw_data=False, # Save memory in production
export_interval_seconds=60
)
processors.append(metrics_agg)
return processors

Security Best Practices

1. API Key Management

# ❠Never do this in production
klira = Klira.init(
app_name="MyApp",
api_key="klira_hardcoded_key_bad" # Never hardcode!
)
# Always use environment variables
klira = Klira.init(
app_name="MyApp",
api_key=os.getenv("KLIRA_API_KEY") # Secure
)
# Even better: Use secrets management
import boto3
def get_api_key():
"""Get API key from AWS Secrets Manager."""
client = boto3.client('secretsmanager')
response = client.get_secret_value(SecretId='klira/api-key')
return response['SecretString']
klira = Klira.init(
app_name="MyApp",
api_key=get_api_key()
)

2. Content Privacy

# Production configuration for content privacy
config = KliraConfig(
trace_content=False, # Never trace content in production
debug_mode=False, # Disable debug logging
verbose=False, # Disable verbose logging
logging_enabled=False # Use structured logging instead
)

3. Network Security

# Secure OTLP endpoint configuration
config = KliraConfig(
opentelemetry_endpoint="https://secure-otlp.company.com",
# Add custom headers for authentication
)
# Initialize with secure headers
klira = Klira.init(
**config.to_dict(),
headers={
"Authorization": f"Bearer {os.getenv('OTLP_TOKEN')}",
"X-Company-ID": os.getenv("COMPANY_ID")
}
)

Multi-Instance Deployment

Load Balancer Configuration

# nginx.conf for Klira-enabled applications
upstream klira_app {
server app1:8000;
server app2:8000;
server app3:8000;
# Health checks
keepalive 32;
}
server {
listen 80;
server_name your-app.com;
location / {
proxy_pass http://klira_app;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# Important for streaming
proxy_buffering off;
proxy_cache off;
# Timeouts for long-running LLM requests
proxy_read_timeout 300s;
proxy_send_timeout 300s;
}
}

Container Deployment

# Dockerfile for production Klira AI app
FROM python:3.11-slim
WORKDIR /app
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY . .
# Production configuration
ENV KLIRA_TRACING_ENABLED=true
ENV KLIRA_TRACE_CONTENT=false
ENV KLIRA_DEBUG=false
ENV KLIRA_VERBOSE=false
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "from klira import Klira; print('healthy')"
CMD ["python", "app.py"]

Kubernetes Deployment

k8s-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: klira-app
spec:
replicas: 3
selector:
matchLabels:
app: klira-app
template:
metadata:
labels:
app: klira-app
spec:
containers:
- name: app
image: your-company/klira-app:latest
env:
- name: KLIRA_API_KEY
valueFrom:
secretKeyRef:
name: klira-secrets
key: api-key
- name: REDIS_URL
value: "redis://redis-service:6379"
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 8000
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: klira-app-service
spec:
selector:
app: klira-app
ports:
- port: 80
targetPort: 8000
type: LoadBalancer

Performance Optimization

1. Framework Detection Caching

# Optimize framework detection for production
config = KliraConfig(
framework_detection_cache_size=10000, # Large cache
lazy_loading=True # Load adapters on demand
)

2. Batch Processing Configuration

import os
# Optimize batch processing
klira = Klira.init(
app_name="ProductionApp",
api_key=os.getenv("KLIRA_API_KEY"),
disable_batch=False, # Enable batching for efficiency
headers={
"User-Agent": "YourApp/1.0 (Klira-SDK)"
}
)

3. Connection Pooling

# Redis connection pooling for high throughput
redis_config = RedisConfig(
max_connections=100, # High connection pool
socket_timeout=1.0, # Fast timeouts
retry_attempts=2 # Quick retries
)

Monitoring and Alerting

Health Check Endpoints

from flask import Flask, jsonify
from klira.cache.cache_hierarchy import CacheHierarchy
app = Flask(__name__)
@app.route('/health')
def health_check():
"""Basic health check."""
try:
# Test Klira AI SDK health
config = get_config()
if not config.api_key:
return jsonify({"status": "unhealthy", "reason": "No API key"}), 500
return jsonify({"status": "healthy"}), 200
except Exception as e:
return jsonify({"status": "unhealthy", "error": str(e)}), 500
@app.route('/ready')
def readiness_check():
"""Readiness check including dependencies."""
try:
# Check cache health
cache_health = cache_hierarchy.health_check()
if not cache_health["healthy"]:
return jsonify({
"status": "not_ready",
"cache": cache_health
}), 503
# Check Redis connectivity
redis_stats = cache_hierarchy.get_stats()
return jsonify({
"status": "ready",
"cache": cache_health,
"stats": redis_stats
}), 200
except Exception as e:
return jsonify({"status": "not_ready", "error": str(e)}), 503

Production Metrics

def log_production_metrics():
"""Log key production metrics."""
# Cache performance
cache_stats = cache_hierarchy.get_stats()
logger.info(f"Cache hit rate: {cache_stats['overall_hit_rate']}%")
# Performance metrics
perf_stats = perf_monitor.get_performance_stats()
logger.info(f"Avg latency: {perf_stats['avg_latency_ms']}ms")
logger.info(f"Error rate: {perf_stats['error_rate']}%")
# Analytics stats
analytics_stats = klira_hub.get_stats()
logger.info(f"Events sent: {analytics_stats['events_sent']}")
logger.info(f"Send errors: {analytics_stats['send_errors']}")

Troubleshooting Production Issues

Common Production Issues

  1. High Latency
# Check cache performance
cache_stats = cache_hierarchy.get_stats()
if cache_stats['overall_hit_rate'] < 70:
logger.warning("Low cache hit rate - consider cache tuning")
  1. Memory Usage
# Monitor cache memory usage
if cache_stats['l1_size'] > 15000:
logger.warning("L1 cache approaching limit")
  1. Redis Connection Issues
# Check Redis health
redis_health = cache_hierarchy.health_check()
if not redis_health['redis_available']:
logger.error("Redis unavailable - running in degraded mode")

Performance Tuning Checklist

  • Cache hit rate > 80%
  • Average latency < 100ms
  • Error rate < 1%
  • Memory usage stable
  • Redis connections healthy
  • Analytics data flowing

Next Steps

  1. Performance Tuning - Detailed optimization strategies
  2. Security Best Practices - Comprehensive security guide
  3. Environment Variables - Complete configuration reference
  4. Scaling Guide - Multi-region deployment patterns

Production Deployment Checklist

Environment variables configured Redis cluster deployed Monitoring and alerting setup Health checks implemented Load balancer configured Container images built Kubernetes manifests deployed Performance baselines established

Ready for enterprise-scale LLM governance