Complete Guide to Prevent Flask 500 Errors in Production
Preventing your Python Flask app from returning 500 internal server errors in production requires careful planning, proper configuration, and robust error handling. This comprehensive guide walks you through setting up a production-ready Flask application that minimizes the risk of 500 errors.
Understanding Production vs Development #
The main difference between development and production environments is error visibility. Development shows detailed stack traces, while production hides them for security. This makes prevention crucial since debugging is more challenging in production.
Setting Up Robust Flask Configuration #
Environment-Based Configuration #
Create a configuration system that adapts to different environments:
import os
from datetime import timedelta
class Config:
SECRET_KEY = os.environ.get('SECRET_KEY') or 'dev-secret-key'
SQLALCHEMY_TRACK_MODIFICATIONS = False
PERMANENT_SESSION_LIFETIME = timedelta(hours=24)
@staticmethod
def init_app(app):
pass
class DevelopmentConfig(Config):
DEBUG = True
SQLALCHEMY_DATABASE_URI = os.environ.get('DEV_DATABASE_URL') or \
'sqlite:///dev.db'
class ProductionConfig(Config):
DEBUG = False
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL')
@classmethod
def init_app(cls, app):
Config.init_app(app)
# Log to stderr
import logging
from logging.handlers import SysLogHandler
syslog_handler = SysLogHandler()
syslog_handler.setLevel(logging.WARNING)
app.logger.addHandler(syslog_handler)
config = {
'development': DevelopmentConfig,
'production': ProductionConfig,
'default': DevelopmentConfig
}
Application Factory Pattern #
Use the application factory pattern to ensure clean initialization:
🐍 Try it yourself
Implementing Comprehensive Error Handling #
Global Exception Handler #
from flask import Flask, jsonify, request
import logging
import traceback
from datetime import datetime
def setup_error_handlers(app):
@app.errorhandler(404)
def not_found(error):
app.logger.warning(f"404 error: {request.url}")
return jsonify({'error': 'Resource not found'}), 404
@app.errorhandler(500)
def internal_error(error):
# Log detailed error information
app.logger.error(f"500 Error at {request.url}: {str(error)}")
app.logger.error(f"Traceback: {traceback.format_exc()}")
# Rollback database session if exists
try:
from your_app import db
db.session.rollback()
except:
pass
return jsonify({
'error': 'Internal server error',
'timestamp': datetime.utcnow().isoformat(),
'request_id': request.headers.get('X-Request-ID', 'unknown')
}), 500
@app.errorhandler(Exception)
def handle_unexpected_error(error):
app.logger.critical(f"Unexpected error: {str(error)}")
app.logger.critical(f"Traceback: {traceback.format_exc()}")
return jsonify({'error': 'Internal server error'}), 500
Request Context Error Handling #
from flask import g, request
import uuid
@app.before_request
def before_request():
g.request_id = str(uuid.uuid4())
g.start_time = time.time()
# Validate critical requirements
if not os.getenv('DATABASE_URL') and app.config.get('REQUIRE_DB', True):
app.logger.error("DATABASE_URL not configured")
abort(500)
@app.after_request
def after_request(response):
duration = time.time() - g.start_time
app.logger.info(f"Request {g.request_id} completed in {duration:.3f}s")
response.headers['X-Request-ID'] = g.request_id
return response
Database Connection Management #
Connection Pool Configuration #
from sqlalchemy import create_engine
from sqlalchemy.pool import QueuePool
import os
def setup_database(app):
database_url = os.environ.get('DATABASE_URL')
if not database_url:
raise ValueError("DATABASE_URL environment variable is required")
# Configure connection pool
engine = create_engine(
database_url,
poolclass=QueuePool,
pool_size=10,
max_overflow=20,
pool_pre_ping=True, # Validate connections before use
pool_recycle=3600, # Recycle connections every hour
)
app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
'pool_size': 10,
'max_overflow': 20,
'pool_pre_ping': True,
}
# Test connection
try:
with engine.connect() as conn:
result = conn.execute('SELECT 1')
app.logger.info("Database connection test successful")
except Exception as e:
app.logger.error(f"Database connection failed: {str(e)}")
raise
Database Error Recovery #
from sqlalchemy.exc import DisconnectionError, OperationalError
from functools import wraps
def db_retry(max_retries=3):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except (DisconnectionError, OperationalError) as e:
if attempt == max_retries - 1:
app.logger.error(f"Database operation failed after {max_retries} attempts: {str(e)}")
raise
app.logger.warning(f"Database operation failed, retrying (attempt {attempt + 1}): {str(e)}")
db.session.rollback()
return None
return wrapper
return decorator
@app.route('/users/<int:user_id>')
@db_retry(max_retries=3)
def get_user(user_id):
user = User.query.get_or_404(user_id)
return jsonify(user.to_dict())
Production Logging Setup #
Structured Logging Configuration #
import logging
import json
from logging.handlers import RotatingFileHandler
from datetime import datetime
class JSONFormatter(logging.Formatter):
def format(self, record):
log_data = {
'timestamp': datetime.utcnow().isoformat(),
'level': record.levelname,
'message': record.getMessage(),
'module': record.module,
'function': record.funcName,
'line': record.lineno,
}
if hasattr(record, 'request_id'):
log_data['request_id'] = record.request_id
if record.exc_info:
log_data['exception'] = self.formatException(record.exc_info)
return json.dumps(log_data)
def setup_logging(app):
if not app.debug:
# Create logs directory
os.makedirs('logs', exist_ok=True)
# File handler
file_handler = RotatingFileHandler(
'logs/app.log', maxBytes=10240000, backupCount=10
)
file_handler.setFormatter(JSONFormatter())
file_handler.setLevel(logging.INFO)
# Console handler for critical errors
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.ERROR)
console_handler.setFormatter(JSONFormatter())
app.logger.addHandler(file_handler)
app.logger.addHandler(console_handler)
app.logger.setLevel(logging.INFO)
Environment Validation #
Startup Health Checks #
🐍 Try it yourself
WSGI Server Configuration #
Gunicorn Production Setup #
Create a gunicorn.conf.py file:
import multiprocessing
import os
# Server socket
bind = f"0.0.0.0:{os.getenv('PORT', 5000)}"
backlog = 2048
# Worker processes
workers = multiprocessing.cpu_count() * 2 + 1
worker_class = 'sync'
worker_connections = 1000
timeout = 30
keepalive = 2
# Restart workers after this many requests
max_requests = 1000
max_requests_jitter = 50
# Logging
accesslog = '-'
errorlog = '-'
loglevel = 'info'
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
# Process naming
proc_name = 'flask_app'
# Server mechanics
preload_app = True
daemon = False
pidfile = '/tmp/gunicorn.pid'
user = os.getenv('USER', 'www-data')
group = os.getenv('GROUP', 'www-data')
tmp_upload_dir = None
# SSL
keyfile = os.getenv('SSL_KEYFILE')
certfile = os.getenv('SSL_CERTFILE')
Monitoring and Alerting #
Health Check Endpoint #
from flask import jsonify
import psutil
import time
@app.route('/health')
def health_check():
try:
# Check database connection
db.session.execute('SELECT 1')
db_status = 'healthy'
except Exception as e:
db_status = f'unhealthy: {str(e)}'
app.logger.error(f"Health check database error: {str(e)}")
# System metrics
cpu_percent = psutil.cpu_percent(interval=1)
memory = psutil.virtual_memory()
health_data = {
'status': 'healthy' if db_status == 'healthy' else 'unhealthy',
'timestamp': time.time(),
'database': db_status,
'system': {
'cpu_percent': cpu_percent,
'memory_percent': memory.percent,
'available_memory': memory.available
}
}
status_code = 200 if health_data['status'] == 'healthy' else 503
return jsonify(health_data), status_code
Deployment Checklist #
Before deploying your Flask app to prevent 500 internal server errors:
- Environment variables are properly configured
- Database connections are tested and pooled
- Error handlers are implemented for all exception types
- Logging is configured with appropriate levels
- Health check endpoints are available
- WSGI server is properly configured
- Static files are served correctly
- SSL certificates are valid and configured
- Resource limits are set appropriately
- Monitoring and alerting are in place
Common Mistakes to Avoid #
Not Testing Production Configuration
# ❌ Only testing in development
if __name__ == '__main__':
app.run(debug=True)
# ✅ Test production configuration locally
if __name__ == '__main__':
import os
env = os.getenv('FLASK_ENV', 'development')
debug = env == 'development'
app.run(debug=debug, host='0.0.0.0', port=5000)
Insufficient Error Logging
# ❌ Generic error handling
@app.errorhandler(500)
def error_500(e):
return "Internal Error", 500
# ✅ Detailed error logging
@app.errorhandler(500)
def error_500(e):
app.logger.error(f"500 Error: {str(e)}\n{traceback.format_exc()}")
return jsonify({'error': 'Internal server error'}), 500
Summary #
To prevent your Python Flask app from returning 500 internal server errors in production:
- Implement environment-specific configuration with proper validation
- Set up comprehensive error handling and structured logging
- Configure database connection pooling with retry mechanisms
- Use production-ready WSGI servers with appropriate settings
- Implement health checks and monitoring endpoints
- Test production configurations in staging environments
- Follow deployment checklists to ensure all components are properly configured
By following these practices, you'll significantly reduce the likelihood of 500 errors and be better equipped to diagnose issues when they do occur.