diff --git a/mkdocs/docs/guides/server-deployment.md b/mkdocs/docs/guides/server-deployment.md index aff2b06e3..f368b62d5 100644 --- a/mkdocs/docs/guides/server-deployment.md +++ b/mkdocs/docs/guides/server-deployment.md @@ -5,7 +5,10 @@ description: Deploying the dstack server The `dstack` server can run on your laptop or any environment with access to the cloud and on-prem clusters you plan to use. -The minimum hardware requirements for running the server are 1 CPU and 1GB of RAM. +??? info "Hardware requirements" + The minimum hardware requirements for running the server are 1 CPU and 1GB of RAM. The recommended RAM is + "8MB × number of active instances". For example, a server with 1000 active instances should have 8GB of RAM. + You can set the `DSTACK_SERVER_SSH_POOL_DISABLED` env var to minimize RAM usage at the expense of slower processing. === "pip" > The server can be set up via `pip` on Linux, macOS, and Windows (via WSL 2). It requires Git and OpenSSH. @@ -43,7 +46,7 @@ The minimum hardware requirements for running the server are 1 CPU and 1GB of RA === "Docker" - > To deploy the server most reliably, it's recommended to use `dstackai/dstack` Docker image. + > For production deployments, it's recommended to use `dstackai/dstack` Docker image.
diff --git a/mkdocs/docs/reference/env.md b/mkdocs/docs/reference/env.md index 925e4ae9d..86a7dd051 100644 --- a/mkdocs/docs/reference/env.md +++ b/mkdocs/docs/reference/env.md @@ -146,6 +146,8 @@ For more details on the options below, refer to the [server deployment](../guide - `DSTACK_SERVER_SSHPROXY_ENFORCED`{ #DSTACK_SERVER_SSHPROXY_ENFORCED } – When set to any value, restricts all SSH connections to go through the SSH proxy. - `DSTACK_SERVER_JOB_NETWORK_MODE`{ #DSTACK_SERVER_JOB_NETWORK_MODE } – Controls the network mode assigned to jobs. Accepts an integer value: `1` forces bridge networking for single-node jobs while distributed tasks still use host networking; `2` uses host networking whenever the job occupies a full instance (default); `3` forces bridge networking for all jobs including distributed tasks. - `DSTACK_SERVER_SSH_CONNECT_TIMEOUT`{ #DSTACK_SERVER_SSH_CONNECT_TIMEOUT } – The SSH `ConnectTimeout` for server-instance connections, in seconds. Defaults to `3`. Increase if there are high-latency links between the server and instances. +- `DSTACK_SERVER_SSH_POOL_DISABLED`{ #DSTACK_SERVER_SSH_POOL_DISABLED } – Disables the reuse of server SSH connections to instances. If set, significantly decreases server RAM usage, but +slows down processing and may cause CPU spikes due to frequent SSH-connection establishment. ??? info "Internal environment variables" The following environment variables are intended for development purposes: diff --git a/src/dstack/_internal/server/app.py b/src/dstack/_internal/server/app.py index 8b9e04477..0f02806aa 100644 --- a/src/dstack/_internal/server/app.py +++ b/src/dstack/_internal/server/app.py @@ -171,6 +171,8 @@ async def lifespan(app: FastAPI): init_default_storage() if settings.SERVER_SSH_POOL_ENABLED: await run_async(instance_connection_pool.startup_cleanup) + else: + logger.info("Server SSH pool is disabled") scheduler = None pipeline_manager = None if settings.SERVER_BACKGROUND_PROCESSING_ENABLED: diff --git a/src/dstack/_internal/server/settings.py b/src/dstack/_internal/server/settings.py index 2845687e2..27a97a6db 100644 --- a/src/dstack/_internal/server/settings.py +++ b/src/dstack/_internal/server/settings.py @@ -149,9 +149,8 @@ os.getenv("DSTACK_SERVER_LOG_QUOTA_PER_JOB_HOUR", 50 * 1024 * 1024) # 50 MB ) -# TODO: Replace DSTACK_SERVER_SSH_POOL_ENABLED with DSTACK_SERVER_SSH_POOL_DISABLED -# as pool becomes opt-out and document the env var. -SERVER_SSH_POOL_ENABLED = os.getenv("DSTACK_SERVER_SSH_POOL_ENABLED") is not None +SERVER_SSH_POOL_DISABLED = os.getenv("DSTACK_SERVER_SSH_POOL_DISABLED") is not None +SERVER_SSH_POOL_ENABLED = not SERVER_SSH_POOL_DISABLED SERVER_SSH_CONNECT_TIMEOUT = int(os.getenv("DSTACK_SERVER_SSH_CONNECT_TIMEOUT", 3)) # Development settings