async def serve_http(
app: FastAPI,
sock: socket.socket | None,
enable_ssl_refresh: bool = False,
**uvicorn_kwargs: Any,
):
"""
Start a FastAPI app using Uvicorn, with support for custom Uvicorn config
options. Supports http header limits via h11_max_incomplete_event_size and
h11_max_header_count.
"""
logger.info("Available routes are:")
# post endpoints
for route in app.routes:
methods = getattr(route, "methods", None)
path = getattr(route, "path", None)
if methods is None or path is None:
continue
logger.info("Route: %s, Methods: %s", path, ", ".join(methods))
# other endpoints
for route in app.routes:
endpoint = getattr(route, "endpoint", None)
methods = getattr(route, "methods", None)
path = getattr(route, "path", None)
if endpoint is None or path is None or methods is not None:
continue
logger.info("Route: %s, Endpoint: %s", path, endpoint.__name__)
# Extract header limit options if present
h11_max_incomplete_event_size = uvicorn_kwargs.pop(
"h11_max_incomplete_event_size", None
)
h11_max_header_count = uvicorn_kwargs.pop("h11_max_header_count", None)
# Set safe defaults if not provided
if h11_max_incomplete_event_size is None:
h11_max_incomplete_event_size = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
if h11_max_header_count is None:
h11_max_header_count = H11_MAX_HEADER_COUNT_DEFAULT
config = uvicorn.Config(app, **uvicorn_kwargs)
# Set header limits
config.h11_max_incomplete_event_size = h11_max_incomplete_event_size
config.h11_max_header_count = h11_max_header_count
config.load()
server = uvicorn.Server(config)
app.state.server = server
loop = asyncio.get_running_loop()
watchdog_task = loop.create_task(watchdog_loop(server, app.state.engine_client))
server_task = loop.create_task(server.serve(sockets=[sock] if sock else None))
ssl_cert_refresher = (
None
if not enable_ssl_refresh
else SSLCertRefresher(
ssl_context=config.ssl,
key_path=config.ssl_keyfile,
cert_path=config.ssl_certfile,
ca_path=config.ssl_ca_certs,
)
)
shutdown_event = asyncio.Event()
def signal_handler() -> None:
shutdown_event.set()
async def dummy_shutdown() -> None:
pass
loop.add_signal_handler(signal.SIGINT, signal_handler)
loop.add_signal_handler(signal.SIGTERM, signal_handler)
async def handle_shutdown() -> None:
await shutdown_event.wait()
engine_client = app.state.engine_client
timeout = engine_client.vllm_config.shutdown_timeout
await loop.run_in_executor(
None, partial(engine_client.shutdown, timeout=timeout)
)
server.should_exit = True
server_task.cancel()
watchdog_task.cancel()
if ssl_cert_refresher:
ssl_cert_refresher.stop()
shutdown_task = loop.create_task(handle_shutdown())
try:
await server_task
return dummy_shutdown()
except asyncio.CancelledError:
port = uvicorn_kwargs["port"]
process = find_process_using_port(port)
if process is not None:
logger.warning(
"port %s is used by process %s launched with command:\n%s",
port,
process,
" ".join(process.cmdline()),
)
logger.info("Shutting down FastAPI HTTP server.")
return server.shutdown()
finally:
shutdown_task.cancel()
watchdog_task.cancel()