diff --git a/README.md b/README.md index 6ef7e44..fb4f94e 100644 --- a/README.md +++ b/README.md @@ -1,414 +1,35 @@ # nix-nodeiwest -NixOS flake for NodeiWest VPS provisioning and ongoing deployment. +Employee and workstation flake for NodeiWest. -This repo currently provisions NixOS hosts with: +Server deployment moved to the sibling repo `../nix-deployment`. -- the `nodeiwest` employee helper CLI for safe provisioning -- shared base config in `modules/nixos/common.nix` -- Tailscale bootstrap via OpenBao AppRole in `modules/nixos/tailscale-init.nix` -- Home Manager profile in `modules/home.nix` -- disk partitioning via `disko` -- deployment via `colmena` +This repo now owns: -## Current Model +- shared Home Manager modules +- employee shell packages and environment variables +- workstation-side access to the `nodeiwest` helper by consuming it from `../nix-deployment` -- Employees should use `nodeiwest` as the supported provisioning interface -- New machines are installed with `nixos-anywhere` -- Ongoing changes are deployed with `colmena` -- Hosts authenticate to OpenBao as clients -- Tailscale auth keys are fetched from OpenBao namespace `it`, KV mount `kv`, path `tailscale`, field `CLIENT_SECRET` -- Public SSH must work independently of Tailscale for first access and recovery +This repo no longer owns: -## Repo Layout +- NixOS server host definitions +- Colmena deployment state +- Tailscale server bootstrap +- k3s bootstrap +- OpenBao server or Kubernetes infra manifests -```text -flake.nix -hosts/ - vps[X]/ - configuration.nix - disko.nix - hardware-configuration.nix -modules/ - home.nix - helpers/ - home.nix - nixos/ - common.nix - tailscale-init.nix -pkgs/ - helpers/ - cli.py - templates/ -``` +## Helper Consumption -## Recommended Workflow - -The supported employee path is the `nodeiwest` CLI. - -It is exported from the root flake as `.#nodeiwest-helper` and installed by the shared Home Manager profile. You can also run it ad hoc with: +The helper package is re-exported from the deployment repo: ```bash nix run .#nodeiwest-helper -- --help ``` -Recommended sequence for a new VPS: - -### 1. Probe The Live Host - -```bash -nodeiwest host probe --ip -``` - -This validates SSH reachability and derives the boot mode, root device, primary disk candidate, and swap facts from the live machine. - -### 2. Scaffold The Host Files - -Dry-run first: - -```bash -nodeiwest host init --name --ip -``` - -Write after reviewing the plan: - -```bash -nodeiwest host init --name --ip --apply -``` - -This command: - -- probes the host unless you override disk or boot mode -- creates or updates `hosts//configuration.nix` -- creates or updates `hosts//disko.nix` -- creates `hosts//hardware-configuration.nix` as a placeholder if needed -- prints the exact `flake.nix` snippets still required for `nixosConfigurations` and `colmena` - -### 3. Create The OpenBao Bootstrap Material - -Dry-run first: - -```bash -nodeiwest openbao init-host --name -``` - -Apply after reviewing the policy and AppRole plan: - -```bash -nodeiwest openbao init-host --name --apply -``` - -This verifies your existing `bao` login, creates the host policy and AppRole, and writes: - -- `bootstrap/var/lib/nodeiwest/openbao-approle-role-id` -- `bootstrap/var/lib/nodeiwest/openbao-approle-secret-id` - -### 4. Plan Or Run The Install - -```bash -nodeiwest install plan --name -nodeiwest install run --name --apply -``` - -`install plan` validates the generated host files and bootstrap files, then prints the exact `nixos-anywhere` command. `install run` re-validates, asks for confirmation, and executes that command. - -### 5. Verify First Boot And Colmena Readiness - -```bash -nodeiwest verify host --name --ip -nodeiwest colmena plan --name -``` - -`verify host` summarizes the first-boot OpenBao and Tailscale services over SSH. `colmena plan` confirms the deploy target or prints the exact missing host stanza. - -## Manual Flow (Fallback / Advanced) - -This is the underlying sequence that `nodeiwest` automates. Keep it as the fallback path for unsupported host layouts or when you intentionally want to run the raw commands yourself. - -### 1. Prepare The Host Entry - -Create a new directory under `hosts//` with: - -- `configuration.nix` -- `disko.nix` -- `hardware-configuration.nix` - -`configuration.nix` should import both `disko.nix` and `hardware-configuration.nix`. - -Example: +If you import `modules/helpers/home.nix` directly, pass the deployment flake as a special arg: ```nix -{ lib, ... }: -{ - imports = [ - ./disko.nix - ./hardware-configuration.nix - ]; - - networking.hostName = "vps1"; - networking.useDHCP = lib.mkDefault true; - - time.timeZone = "UTC"; - - boot.loader.efi.canTouchEfiVariables = true; - boot.loader.grub = { - enable = true; - efiSupport = true; - device = "nodev"; - }; - - nodeiwest.ssh.userCAPublicKeys = [ - "ssh-ed25519 AAAA... openbao-user-ca" - ]; - - nodeiwest.tailscale.openbao.enable = true; - - system.stateVersion = "25.05"; -} +extraSpecialArgs = { + deployment = inputs.deployment; +}; ``` - -### 2. Add The Host To `flake.nix` - -Add the host to: - -- `nixosConfigurations` -- `colmena` - -For `colmena`, set: - -- `deployment.targetHost` -- `deployment.targetUser = "root"` -- tags as needed - -## Discover Disk And Boot Facts - -Before writing `disko.nix`, inspect the current VPS over SSH: - -```bash -ssh root@ 'lsblk -o NAME,SIZE,TYPE,MODEL,FSTYPE,PTTYPE,MOUNTPOINTS' -ssh root@ 'test -d /sys/firmware/efi && echo UEFI || echo BIOS' -ssh root@ 'findmnt -no SOURCE /' -ssh root@ 'cat /proc/swaps' -``` - -Use that output to decide: - -- disk device name: `/dev/sda`, `/dev/vda`, `/dev/nvme0n1`, etc. -- boot mode: UEFI or BIOS -- partition layout you want `disko` to create - -`hosts/vps1/disko.nix` currently assumes: - -- GPT -- `/dev/sda` -- UEFI -- ext4 root -- swap partition - -Do not install blindly if those assumptions are wrong. - -## Generate `hardware-configuration.nix` - -`hardware-configuration.nix` is generated during install with `nixos-anywhere`. - -The repo path is passed directly to the install command: - -```bash ---generate-hardware-config nixos-generate-config ./hosts//hardware-configuration.nix -``` - -That generated file should remain tracked in Git after install. - -## OpenBao Setup For Tailscale - -Each host gets its own AppRole. - -The host uses: - -- OpenBao address: `https://secrets.api.nodeiwest.se` -- namespace: `it` -- KV mount: `kv` -- auth mount: `auth/approle` -- secret path: `tailscale` -- field: `CLIENT_SECRET` - -The host stores: - -- `/var/lib/nodeiwest/openbao-approle-role-id` -- `/var/lib/nodeiwest/openbao-approle-secret-id` - -The rendered Tailscale auth key lives at: - -- `/run/nodeiwest/tailscale-auth-key` - -### Create A Policy - -Create a minimal read-only policy for the Tailscale secret. - -If the secret is accessible as: - -```bash -BAO_NAMESPACE=it bao kv get -mount=kv tailscale -``` - -then create the matching read policy for that mount. - -Example shape for the KV v2 mount `kv`: - -```hcl -path "kv/data/tailscale" { - capabilities = ["read"] -} -``` - -Write it from your machine: - -```bash -export BAO_ADDR=https://secrets.api.nodeiwest.se -export BAO_NAMESPACE=it - -bao policy write tailscale-vps1 ./tailscale-vps1-policy.hcl -``` - -Adjust the path to match your actual OpenBao KV mount. - -### Create The AppRole - -Create one AppRole per host. - -Example for `vps1`: - -```bash -bao write auth/approle/role/tailscale-vps1 \ - token_policies=tailscale-vps1 \ - token_ttl=1h \ - token_max_ttl=24h \ - token_num_uses=0 \ - secret_id_num_uses=0 -``` - -### Generate Bootstrap Credentials - -Create a temporary bootstrap directory on your machine: - -```bash -mkdir -p bootstrap/var/lib/nodeiwest -``` - -Write the AppRole credentials into it: - -```bash -bao read -field=role_id auth/approle/role/tailscale-vps1/role-id \ - > bootstrap/var/lib/nodeiwest/openbao-approle-role-id - -bao write -f -field=secret_id auth/approle/role/tailscale-vps1/secret-id \ - > bootstrap/var/lib/nodeiwest/openbao-approle-secret-id - -chmod 0400 bootstrap/var/lib/nodeiwest/openbao-approle-role-id -chmod 0400 bootstrap/var/lib/nodeiwest/openbao-approle-secret-id -``` - -These files are install-time bootstrap material. They are not stored in Git. - -## Install With `nixos-anywhere` - -Install from your machine: - -```bash -nix run github:nix-community/nixos-anywhere -- \ - --extra-files ./bootstrap \ - --copy-host-keys \ - --generate-hardware-config nixos-generate-config ./hosts/vps1/hardware-configuration.nix \ - --flake .#vps1 \ - root@100.101.167.118 -``` - -What this does: - -- wipes the target disk according to `hosts/vps1/disko.nix` -- installs NixOS with `.#vps1` -- copies the AppRole bootstrap files into `/var/lib/nodeiwest` -- generates `hosts/vps1/hardware-configuration.nix` - -Important: - -- this destroys the existing OS on the target -- take provider snapshots and application backups first -- the target SSH host keys may change after install - -## First Boot Behavior - -On first boot: - -1. `vault-agent-tailscale.service` starts using `pkgs.openbao` -2. it authenticates to OpenBao with AppRole -3. it renders `CLIENT_SECRET` from namespace `it`, KV mount `kv`, path `tailscale` to `/run/nodeiwest/tailscale-auth-key` -4. `nodeiwest-tailscale-authkey-ready.service` waits until that file exists -5. `tailscaled-autoconnect.service` uses that file and runs `tailscale up --ssh` - -Public SSH remains the recovery path if OpenBao or Tailscale bootstrap fails. - -## Verify After Install - -SSH to the host over the public IP first. - -Check: - -```bash -systemctl status vault-agent-tailscale -systemctl status nodeiwest-tailscale-authkey-ready -systemctl status tailscaled-autoconnect - -ls -l /var/lib/nodeiwest -ls -l /run/nodeiwest/tailscale-auth-key - -tailscale status -``` - -If Tailscale bootstrap fails, inspect logs: - -```bash -journalctl -u vault-agent-tailscale -b -journalctl -u nodeiwest-tailscale-authkey-ready -b -journalctl -u tailscaled-autoconnect -b -``` - -Typical causes: - -- wrong AppRole credentials -- wrong OpenBao policy -- wrong secret path -- wrong KV mount path -- `CLIENT_SECRET` field missing in the secret - -## Deploy Changes After Install - -Once the host is installed and reachable, use Colmena: - -```bash -nix run .#colmena -- apply --on vps1 -``` - -## Rotating The AppRole SecretID - -To rotate the machine credential: - -1. generate a new `secret_id` from your machine -2. replace `/var/lib/nodeiwest/openbao-approle-secret-id` on the host -3. restart the agent - -Example: - -```bash -bao write -f -field=secret_id auth/approle/role/tailscale-vps1/secret-id > new-secret-id -scp new-secret-id root@100.101.167.118:/var/lib/nodeiwest/openbao-approle-secret-id -ssh root@100.101.167.118 'chmod 0400 /var/lib/nodeiwest/openbao-approle-secret-id && systemctl restart vault-agent-tailscale tailscaled-autoconnect' -rm -f new-secret-id -``` - -## Recovery Notes - -- Tailscale is additive. It should not be your only access path. -- Public SSH on port `22` must remain available for first access and recovery. -- OpenBao SSH CA auth is separate from Tailscale bootstrap. -- If a machine fails to join the tailnet, recover via public SSH or provider console. diff --git a/flake.lock b/flake.lock index b25545f..258b093 100644 --- a/flake.lock +++ b/flake.lock @@ -22,9 +22,29 @@ "type": "github" } }, + "deployment": { + "inputs": { + "colmena": "colmena", + "disko": "disko", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 0, + "narHash": "sha256-BW+YgPQb2t5davyiQ6gb4sIbBdIL72jCaLGiehkGT9U=", + "type": "git", + "url": "file:../nix-deployment" + }, + "original": { + "type": "git", + "url": "file:../nix-deployment" + } + }, "disko": { "inputs": { "nixpkgs": [ + "deployment", "nixpkgs" ] }, @@ -96,6 +116,7 @@ "nix-github-actions": { "inputs": { "nixpkgs": [ + "deployment", "colmena", "nixpkgs" ] @@ -148,8 +169,7 @@ }, "root": { "inputs": { - "colmena": "colmena", - "disko": "disko", + "deployment": "deployment", "home-manager": "home-manager", "nixpkgs": "nixpkgs_2" } diff --git a/flake.nix b/flake.nix index 068e9db..f0430a4 100644 --- a/flake.nix +++ b/flake.nix @@ -1,26 +1,23 @@ { - description = "NodeiWest company flake"; + description = "NodeiWest employee and workstation flake"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; - colmena.url = "github:zhaofengli/colmena"; - disko = { - url = "github:nix-community/disko"; - inputs.nixpkgs.follows = "nixpkgs"; - }; home-manager = { url = "github:nix-community/home-manager"; inputs.nixpkgs.follows = "nixpkgs"; }; + deployment = { + url = "git+file:../nix-deployment"; + inputs.nixpkgs.follows = "nixpkgs"; + }; }; outputs = inputs@{ self, nixpkgs, - colmena, - disko, - home-manager, + deployment, ... }: let @@ -31,111 +28,22 @@ "x86_64-linux" ]; forAllSystems = lib.genAttrs supportedSystems; - - mkPkgs = - system: - import nixpkgs { - inherit system; - }; - - mkHost = - name: - nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - specialArgs = { - inherit inputs self; - }; - modules = [ - disko.nixosModules.disko - home-manager.nixosModules.home-manager - self.nixosModules.common - ./hosts/${name}/configuration.nix - ]; - }; in { homeManagerModules.default = ./modules/home.nix; homeManagerModules.helpers = ./modules/helpers/home.nix; - nixosModules.common = ./modules/nixos/common.nix; - packages = forAllSystems ( - system: - let - pkgs = mkPkgs system; - nodeiwestHelper = pkgs.callPackage ./pkgs/helpers { }; - in - { - colmena = colmena.packages.${system}.colmena; - nodeiwest-helper = nodeiwestHelper; - default = colmena.packages.${system}.colmena; - } - ); + packages = forAllSystems (system: { + nodeiwest-helper = deployment.packages.${system}.nodeiwest-helper; + default = self.packages.${system}.nodeiwest-helper; + }); apps = forAllSystems (system: { - colmena = { - type = "app"; - program = "${colmena.packages.${system}.colmena}/bin/colmena"; - }; nodeiwest-helper = { type = "app"; program = "${self.packages.${system}.nodeiwest-helper}/bin/nodeiwest"; }; - default = self.apps.${system}.colmena; + default = self.apps.${system}.nodeiwest-helper; }); - - nixosConfigurations = { - vps1 = mkHost "vps1"; - lab = mkHost "lab"; - }; - - colmena = { - meta = { - nixpkgs = mkPkgs "x86_64-linux"; - specialArgs = { - inherit inputs self; - }; - }; - - defaults = - { name, ... }: - { - networking.hostName = name; - imports = [ - disko.nixosModules.disko - home-manager.nixosModules.home-manager - self.nixosModules.common - ]; - }; - - vps1 = { - deployment = { - targetHost = "100.101.167.118"; - targetUser = "root"; - tags = [ - "company" - "edge" - ]; - }; - - imports = [ ./hosts/vps1/configuration.nix ]; - }; - - lab = { - deployment = { - targetHost = "100.101.167.118"; - targetUser = "root"; - tags = [ - "company" - "manager" - ]; - - }; - - imports = [ ./hosts/lab/configuration.nix ]; - - }; - }; - - colmenaHive = colmena.lib.makeHive self.outputs.colmena; }; } diff --git a/hosts/lab/configuration.nix b/hosts/lab/configuration.nix deleted file mode 100644 index f0de599..0000000 --- a/hosts/lab/configuration.nix +++ /dev/null @@ -1,30 +0,0 @@ -{ lib, ... }: -{ - # Generated by nodeiwest host init. - imports = [ - ./disko.nix - ./hardware-configuration.nix - ]; - - networking.hostName = "lab"; - networking.useDHCP = lib.mkDefault true; - - time.timeZone = "UTC"; - - boot.loader.efi.canTouchEfiVariables = true; - boot.loader.grub = { - enable = true; - efiSupport = true; - device = "nodev"; - }; - - nodeiwest.ssh.userCAPublicKeys = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE6c2oMkM7lLg9qWHVgbrFaFBDrrFyynFlPviiydQdFi openbao-user-ca" - ]; - - nodeiwest.tailscale.openbao = { - enable = true; - }; - - system.stateVersion = "25.05"; -} diff --git a/hosts/lab/disko.nix b/hosts/lab/disko.nix deleted file mode 100644 index d703537..0000000 --- a/hosts/lab/disko.nix +++ /dev/null @@ -1,47 +0,0 @@ -{ - lib, - ... -}: -{ - # Generated by nodeiwest host init. - # Replace the disk only if the provider exposes a different primary device. - disko.devices = { - disk.main = { - type = "disk"; - device = lib.mkDefault "/dev/sda"; - content = { - type = "gpt"; - partitions = { - ESP = { - priority = 1; - name = "ESP"; - start = "1MiB"; - end = "512MiB"; - type = "EF00"; - content = { - type = "filesystem"; - format = "vfat"; - mountpoint = "/boot"; - mountOptions = [ "umask=0077" ]; - }; - }; - swap = { - size = "4G"; - content = { - type = "swap"; - resumeDevice = true; - }; - }; - root = { - size = "100%"; - content = { - type = "filesystem"; - format = "ext4"; - mountpoint = "/"; - }; - }; - }; - }; - }; - }; -} diff --git a/hosts/lab/hardware-configuration.nix b/hosts/lab/hardware-configuration.nix deleted file mode 100644 index 3f6bc7b..0000000 --- a/hosts/lab/hardware-configuration.nix +++ /dev/null @@ -1,5 +0,0 @@ -{ ... }: -{ - # Placeholder generated by nodeiwest host init. - # nixos-anywhere will replace this with the generated hardware config. -} diff --git a/hosts/vps1/configuration.nix b/hosts/vps1/configuration.nix deleted file mode 100644 index d9536c9..0000000 --- a/hosts/vps1/configuration.nix +++ /dev/null @@ -1,28 +0,0 @@ -{ lib, ... }: -{ - imports = [ - ./disko.nix - ./hardware-configuration.nix - ]; - - networking.hostName = "vps1"; - networking.useDHCP = lib.mkDefault true; - - time.timeZone = "UTC"; - - boot.loader.efi.canTouchEfiVariables = true; - boot.loader.grub = { - enable = true; - efiSupport = true; - device = "nodev"; - }; - - nodeiwest.ssh.userCAPublicKeys = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE6c2oMkM7lLg9qWHVgbrFaFBDrrFyynFlPviiydQdFi openbao-user-ca" - ]; - nodeiwest.tailscale.openbao = { - enable = true; - }; - - system.stateVersion = "25.05"; -} diff --git a/hosts/vps1/disko.nix b/hosts/vps1/disko.nix deleted file mode 100644 index eee0690..0000000 --- a/hosts/vps1/disko.nix +++ /dev/null @@ -1,46 +0,0 @@ -{ - lib, - ... -}: -{ - # Replace /dev/sda if the VPS exposes a different disk, e.g. /dev/vda or /dev/nvme0n1. - disko.devices = { - disk.main = { - type = "disk"; - device = lib.mkDefault "/dev/sda"; - content = { - type = "gpt"; - partitions = { - ESP = { - priority = 1; - name = "ESP"; - start = "1MiB"; - end = "512MiB"; - type = "EF00"; - content = { - type = "filesystem"; - format = "vfat"; - mountpoint = "/boot"; - mountOptions = [ "umask=0077" ]; - }; - }; - swap = { - size = "4G"; - content = { - type = "swap"; - resumeDevice = true; - }; - }; - root = { - size = "100%"; - content = { - type = "filesystem"; - format = "ext4"; - mountpoint = "/"; - }; - }; - }; - }; - }; - }; -} diff --git a/hosts/vps1/hardware-configuration.nix b/hosts/vps1/hardware-configuration.nix deleted file mode 100644 index 746bccf..0000000 --- a/hosts/vps1/hardware-configuration.nix +++ /dev/null @@ -1,10 +0,0 @@ -{ lib, ... }: -{ - # Replace this file with the generated hardware config from the target host. - fileSystems."/" = lib.mkDefault { - device = "/dev/disk/by-label/nixos"; - fsType = "ext4"; - }; - - swapDevices = [ ]; -} diff --git a/modules/helpers/home.nix b/modules/helpers/home.nix index d946109..2e926b8 100644 --- a/modules/helpers/home.nix +++ b/modules/helpers/home.nix @@ -1,10 +1,7 @@ -{ pkgs, ... }: -let - nodeiwestHelper = pkgs.callPackage ../../pkgs/helpers { }; -in +{ pkgs, deployment, ... }: { home.packages = [ pkgs.python3 - nodeiwestHelper + deployment.packages.${pkgs.system}.nodeiwest-helper ]; } diff --git a/modules/home.nix b/modules/home.nix index 54a4078..d388605 100644 --- a/modules/home.nix +++ b/modules/home.nix @@ -14,5 +14,6 @@ openbao colmena # etc. + sops ]; } diff --git a/modules/nixos/common.nix b/modules/nixos/common.nix deleted file mode 100644 index f44af2f..0000000 --- a/modules/nixos/common.nix +++ /dev/null @@ -1,101 +0,0 @@ -{ - config, - lib, - self, - ... -}: -let - cfg = config.nodeiwest; - trustedUserCAKeysPath = "/etc/ssh/trusted-user-ca-keys.pem"; -in -{ - imports = [ ./tailscale-init.nix ]; - - options.nodeiwest = { - openbao.address = lib.mkOption { - type = lib.types.str; - default = "https://secrets.api.nodeiwest.se"; - description = "Remote OpenBao address that hosts should use as clients."; - example = "https://secrets.api.nodeiwest.se"; - }; - - homeManagerUsers = lib.mkOption { - type = lib.types.listOf lib.types.str; - default = [ - "root" - "deploy" - ]; - description = "Users that should receive the shared Home Manager company profile."; - example = [ - "root" - "deploy" - ]; - }; - - ssh.userCAPublicKeys = lib.mkOption { - type = lib.types.listOf lib.types.singleLineStr; - default = [ ]; - description = "OpenBao SSH user CA public keys trusted by sshd for user certificate authentication."; - example = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBExampleOpenBaoUserCA openbao-user-ca" - ]; - }; - }; - - config = { - networking.firewall.allowedTCPPorts = [ - 22 - 80 - 443 - ]; - - services.openssh = { - enable = true; - settings = { - PasswordAuthentication = false; - KbdInteractiveAuthentication = false; - PubkeyAuthentication = true; - PermitRootLogin = "prohibit-password"; - } - // lib.optionalAttrs (cfg.ssh.userCAPublicKeys != [ ]) { - TrustedUserCAKeys = trustedUserCAKeysPath; - }; - }; - - users.groups.deploy = { }; - users.users.deploy = { - isNormalUser = true; - group = "deploy"; - createHome = true; - extraGroups = [ "wheel" ]; - }; - - services.traefik = { - enable = true; - staticConfigOptions = { - api.dashboard = true; - entryPoints.web.address = ":80"; - entryPoints.websecure.address = ":443"; - ping = { }; - }; - dynamicConfigOptions = lib.mkMerge [ ]; - }; - - home-manager = { - useGlobalPkgs = true; - useUserPackages = true; - users = lib.genAttrs cfg.homeManagerUsers (_: { - imports = [ self.homeManagerModules.default ]; - home.stateVersion = config.system.stateVersion; - }); - }; - - environment.etc = lib.mkIf (cfg.ssh.userCAPublicKeys != [ ]) { - "ssh/trusted-user-ca-keys.pem".text = lib.concatStringsSep "\n" cfg.ssh.userCAPublicKeys + "\n"; - }; - - environment.variables = { - BAO_ADDR = cfg.openbao.address; - }; - }; -} diff --git a/modules/nixos/tailscale-init.nix b/modules/nixos/tailscale-init.nix deleted file mode 100644 index 374c5af..0000000 --- a/modules/nixos/tailscale-init.nix +++ /dev/null @@ -1,155 +0,0 @@ -{ - config, - lib, - pkgs, - ... -}: -let - cfg = config.nodeiwest; - tailscaleOpenbaoCfg = cfg.tailscale.openbao; -in -{ - options.nodeiwest.tailscale.openbao = { - enable = lib.mkEnableOption "fetching the Tailscale auth key from OpenBao"; - - namespace = lib.mkOption { - type = lib.types.str; - default = "it"; - description = "OpenBao namespace used when fetching the Tailscale auth key."; - }; - - authPath = lib.mkOption { - type = lib.types.str; - default = "auth/approle"; - description = "OpenBao auth mount path used by the AppRole login."; - }; - - secretPath = lib.mkOption { - type = lib.types.str; - default = "tailscale"; - description = "OpenBao secret path containing the Tailscale auth key."; - }; - - field = lib.mkOption { - type = lib.types.str; - default = "CLIENT_SECRET"; - description = "Field in the OpenBao secret that contains the Tailscale auth key."; - }; - - renderedAuthKeyFile = lib.mkOption { - type = lib.types.str; - default = "/run/nodeiwest/tailscale-auth-key"; - description = "Runtime file rendered by OpenBao Agent and consumed by Tailscale autoconnect."; - }; - - approle = { - roleIdFile = lib.mkOption { - type = lib.types.str; - default = "/var/lib/nodeiwest/openbao-approle-role-id"; - description = "Root-only file containing the OpenBao AppRole role_id."; - }; - - secretIdFile = lib.mkOption { - type = lib.types.str; - default = "/var/lib/nodeiwest/openbao-approle-secret-id"; - description = "Root-only file containing the OpenBao AppRole secret_id."; - }; - }; - }; - - config = { - systemd.tmpfiles.rules = [ - "d /var/lib/nodeiwest 0700 root root - -" - "d /run/nodeiwest 0700 root root - -" - ]; - - services.tailscale = { - enable = true; - openFirewall = true; - extraUpFlags = lib.optionals tailscaleOpenbaoCfg.enable [ "--ssh" ]; - authKeyFile = if tailscaleOpenbaoCfg.enable then tailscaleOpenbaoCfg.renderedAuthKeyFile else null; - }; - - services.vault-agent.instances.tailscale = lib.mkIf tailscaleOpenbaoCfg.enable { - package = pkgs.openbao; - settings = { - vault.address = cfg.openbao.address; - auto_auth = { - method = [ - { - type = "approle"; - mount_path = tailscaleOpenbaoCfg.authPath; - namespace = tailscaleOpenbaoCfg.namespace; - config = { - role_id_file_path = tailscaleOpenbaoCfg.approle.roleIdFile; - secret_id_file_path = tailscaleOpenbaoCfg.approle.secretIdFile; - remove_secret_id_file_after_reading = false; - }; - } - ]; - }; - template = [ - { - contents = ''{{- with secret "${tailscaleOpenbaoCfg.secretPath}" -}}{{- if .Data.data -}}{{ index .Data.data "${tailscaleOpenbaoCfg.field}" }}{{- else -}}{{ index .Data "${tailscaleOpenbaoCfg.field}" }}{{- end -}}{{- end -}}''; - destination = tailscaleOpenbaoCfg.renderedAuthKeyFile; - perms = "0400"; - } - ]; - }; - }; - - systemd.services.vault-agent-tailscale = lib.mkIf tailscaleOpenbaoCfg.enable { - wants = [ "network-online.target" ]; - after = [ "network-online.target" ]; - serviceConfig.Environment = [ "BAO_NAMESPACE=${tailscaleOpenbaoCfg.namespace}" ]; - }; - - systemd.services.nodeiwest-tailscale-authkey-ready = lib.mkIf tailscaleOpenbaoCfg.enable { - description = "Wait for the Tailscale auth key rendered by OpenBao Agent"; - after = [ "vault-agent-tailscale.service" ]; - requires = [ "vault-agent-tailscale.service" ]; - before = [ "tailscaled-autoconnect.service" ]; - requiredBy = [ "tailscaled-autoconnect.service" ]; - path = [ pkgs.coreutils ]; - serviceConfig = { - Type = "oneshot"; - }; - script = '' - set -euo pipefail - - for _ in $(seq 1 60); do - if [ -s ${lib.escapeShellArg tailscaleOpenbaoCfg.renderedAuthKeyFile} ]; then - exit 0 - fi - sleep 1 - done - - echo "Timed out waiting for rendered Tailscale auth key at ${tailscaleOpenbaoCfg.renderedAuthKeyFile}" >&2 - exit 1 - ''; - }; - - systemd.services.tailscaled-autoconnect = lib.mkIf tailscaleOpenbaoCfg.enable { - after = [ - "vault-agent-tailscale.service" - "nodeiwest-tailscale-authkey-ready.service" - ]; - requires = [ - "vault-agent-tailscale.service" - "nodeiwest-tailscale-authkey-ready.service" - ]; - serviceConfig.ExecStartPre = [ - "${lib.getExe' pkgs.coreutils "test"} -s ${tailscaleOpenbaoCfg.renderedAuthKeyFile}" - ]; - }; - - assertions = [ - { - assertion = - (!tailscaleOpenbaoCfg.enable) - || (tailscaleOpenbaoCfg.approle.roleIdFile != "" && tailscaleOpenbaoCfg.approle.secretIdFile != ""); - message = "AppRole roleIdFile and secretIdFile must be set when OpenBao-backed Tailscale enrollment is enabled."; - } - ]; - }; -} diff --git a/pkgs/helpers/__pycache__/cli.cpython-313.pyc b/pkgs/helpers/__pycache__/cli.cpython-313.pyc deleted file mode 100644 index 65ea1cb..0000000 Binary files a/pkgs/helpers/__pycache__/cli.cpython-313.pyc and /dev/null differ diff --git a/pkgs/helpers/cli.py b/pkgs/helpers/cli.py deleted file mode 100644 index 4d2c467..0000000 --- a/pkgs/helpers/cli.py +++ /dev/null @@ -1,1498 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import argparse -import dataclasses -import datetime as dt -import difflib -import json -import os -import re -import shlex -import shutil -import select -import subprocess -import sys -import tempfile -import time -from pathlib import Path -from typing import Any - - -SUPPORTED_CONFIG_MARKER = "Generated by nodeiwest host init." -SUPPORTED_DISKO_MARKER = "Generated by nodeiwest host init." -DEFAULT_STATE_VERSION = "25.05" -BOOT_MODE_CHOICES = ("uefi", "bios") -ACTIVITY_FRAMES = (0, 1, 2, 3, 2, 1) - - -class NodeiwestError(RuntimeError): - pass - - -@dataclasses.dataclass -class ProbeFacts: - ip: str - user: str - boot_mode: str - primary_disk: str - root_partition: str - root_source: str - disk_family: str - swap_devices: list[str] - disk_rows: list[dict[str, str]] - raw_outputs: dict[str, str] - - def to_json(self) -> dict[str, Any]: - return dataclasses.asdict(self) - - -@dataclasses.dataclass -class ExistingConfiguration: - host_name: str - timezone: str - boot_mode: str - tailscale_openbao: bool - user_ca_public_keys: list[str] - state_version: str - managed: bool - - -@dataclasses.dataclass -class ExistingDisko: - disk_device: str - boot_mode: str - swap_size: str - managed: bool - - -@dataclasses.dataclass -class RepoDefaults: - state_version: str - user_ca_public_keys: list[str] - - -def main() -> int: - parser = build_parser() - args = parser.parse_args() - - if not hasattr(args, "func"): - parser.print_help() - return 1 - - try: - return int(args.func(args) or 0) - except KeyboardInterrupt: - print("Interrupted.", file=sys.stderr) - return 130 - except NodeiwestError as exc: - print(str(exc), file=sys.stderr) - return 1 - - -def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - prog="nodeiwest", - description="Safe VPS provisioning helpers for the NodeiWest flake.", - ) - subparsers = parser.add_subparsers(dest="command") - - host_parser = subparsers.add_parser("host", help="Probe and initialize host files.") - host_subparsers = host_parser.add_subparsers(dest="host_command") - - probe_parser = host_subparsers.add_parser("probe", help="Probe a live host over SSH.") - probe_parser.add_argument("--ip", required=True, help="Target host IP or hostname.") - probe_parser.add_argument("--user", default="root", help="SSH user. Default: root.") - probe_parser.add_argument("--json", action="store_true", help="Emit machine-readable JSON.") - probe_parser.set_defaults(func=cmd_host_probe) - - init_parser = host_subparsers.add_parser("init", help="Create or update hosts// files.") - init_parser.add_argument("--name", required=True, help="Host name, e.g. vps2.") - init_parser.add_argument("--ip", required=True, help="Target host IP or hostname.") - init_parser.add_argument("--user", default="root", help="SSH user. Default: root.") - init_parser.add_argument("--disk", help="Override the probed disk device, e.g. /dev/sda.") - init_parser.add_argument("--boot-mode", choices=BOOT_MODE_CHOICES, help="Override the probed boot mode.") - init_parser.add_argument("--swap-size", help="Swap partition size. Default: 4G.") - init_parser.add_argument("--timezone", help="Time zone. Default for new hosts: UTC.") - init_parser.add_argument( - "--tailscale-openbao", - choices=("on", "off"), - help="Enable or disable OpenBao-backed Tailscale bootstrap. Default for new hosts: on.", - ) - init_parser.add_argument("--apply", action="store_true", help="Write files after confirmation.") - init_parser.add_argument("--yes", action="store_true", help="Skip the interactive confirmation prompt.") - init_parser.add_argument("--force", action="store_true", help="Proceed even if target files are dirty.") - init_parser.set_defaults(func=cmd_host_init) - - openbao_parser = subparsers.add_parser("openbao", help="Create host OpenBao bootstrap material.") - openbao_subparsers = openbao_parser.add_subparsers(dest="openbao_command") - - init_host_parser = openbao_subparsers.add_parser("init-host", help="Create policy, AppRole, and bootstrap files.") - init_host_parser.add_argument("--name", required=True, help="Host name, e.g. vps2.") - init_host_parser.add_argument("--namespace", default="it", help="OpenBao namespace. Default: it.") - init_host_parser.add_argument("--kv-mount", default="kv", help="KV v2 mount name. Default: kv.") - init_host_parser.add_argument("--secret-path", default="tailscale", help="Logical secret path. Default: tailscale.") - init_host_parser.add_argument("--field", default="CLIENT_SECRET", help="Secret field. Default: CLIENT_SECRET.") - init_host_parser.add_argument("--auth-path", default="auth/approle", help="AppRole auth mount. Default: auth/approle.") - init_host_parser.add_argument("--policy-name", help="Policy name. Default: tailscale-.") - init_host_parser.add_argument("--role-name", help="AppRole name. Default: tailscale-.") - init_host_parser.add_argument("--out", default="bootstrap", help="Bootstrap output directory. Default: ./bootstrap.") - init_host_parser.add_argument( - "--kv-mount-path", - help="Override the actual HCL policy path, e.g. kv/data/tailscale.", - ) - init_host_parser.add_argument("--cidr", action="append", default=[], help="Optional CIDR restriction. Repeatable.") - init_host_parser.add_argument("--apply", action="store_true", help="Execute the plan after confirmation.") - init_host_parser.add_argument("--yes", action="store_true", help="Skip the interactive confirmation prompt.") - init_host_parser.set_defaults(func=cmd_openbao_init_host) - - install_parser = subparsers.add_parser("install", help="Plan or run nixos-anywhere.") - install_subparsers = install_parser.add_subparsers(dest="install_command") - - install_plan_parser = install_subparsers.add_parser("plan", help="Print the nixos-anywhere command.") - add_install_arguments(install_plan_parser) - install_plan_parser.set_defaults(func=cmd_install_plan) - - install_run_parser = install_subparsers.add_parser("run", help="Execute the nixos-anywhere command.") - add_install_arguments(install_run_parser) - install_run_parser.add_argument("--apply", action="store_true", help="Actually run nixos-anywhere.") - install_run_parser.add_argument("--yes", action="store_true", help="Skip the interactive confirmation prompt.") - install_run_parser.set_defaults(func=cmd_install_run) - - verify_parser = subparsers.add_parser("verify", help="Verify a provisioned host.") - verify_subparsers = verify_parser.add_subparsers(dest="verify_command") - - verify_host_parser = verify_subparsers.add_parser("host", help="Check first-boot service health.") - verify_host_parser.add_argument("--name", required=True, help="Host name.") - verify_host_parser.add_argument("--ip", required=True, help="Target host IP or hostname.") - verify_host_parser.add_argument("--user", default="root", help="SSH user. Default: root.") - verify_host_parser.set_defaults(func=cmd_verify_host) - - colmena_parser = subparsers.add_parser("colmena", help="Check colmena host inventory.") - colmena_subparsers = colmena_parser.add_subparsers(dest="colmena_command") - - colmena_plan_parser = colmena_subparsers.add_parser("plan", help="Print the colmena target block or deploy command.") - colmena_plan_parser.add_argument("--name", required=True, help="Host name.") - colmena_plan_parser.add_argument("--ip", help="Target IP to use in a suggested snippet when missing.") - colmena_plan_parser.set_defaults(func=cmd_colmena_plan) - - return parser - - -def add_install_arguments(parser: argparse.ArgumentParser) -> None: - parser.add_argument("--name", required=True, help="Host name.") - parser.add_argument("--ip", help="Target host IP. Defaults to the colmena inventory if present.") - parser.add_argument("--bootstrap-dir", default="bootstrap", help="Bootstrap directory. Default: ./bootstrap.") - parser.add_argument( - "--copy-host-keys", - choices=("on", "off"), - default="on", - help="Whether to pass --copy-host-keys. Default: on.", - ) - parser.add_argument( - "--generate-hardware-config", - choices=("on", "off"), - default="on", - help="Whether to pass --generate-hardware-config. Default: on.", - ) - - -def cmd_host_probe(args: argparse.Namespace) -> int: - facts = probe_host(args.ip, args.user) - if args.json: - print(json.dumps(facts.to_json(), indent=2, sort_keys=True)) - return 0 - - print(f"Host: {args.user}@{args.ip}") - print(f"Boot mode: {facts.boot_mode.upper()}") - print(f"Primary disk: {facts.primary_disk}") - print(f"Root source: {facts.root_source}") - print(f"Root partition: {facts.root_partition}") - print(f"Disk family: {facts.disk_family}") - print(f"Swap devices: {', '.join(facts.swap_devices) if facts.swap_devices else 'none'}") - print("") - print("Disk inventory:") - for row in facts.disk_rows: - model = row.get("MODEL") or "n/a" - print( - " " - + f"{row.get('NAME', '?')} size={row.get('SIZE', '?')} type={row.get('TYPE', '?')} " - + f"model={model} fstype={row.get('FSTYPE', '') or '-'} pttype={row.get('PTTYPE', '') or '-'}" - ) - return 0 - - -def cmd_host_init(args: argparse.Namespace) -> int: - repo_root = find_repo_root(Path.cwd()) - ensure_expected_repo_root(repo_root) - validate_host_name(args.name) - - host_dir = repo_root / "hosts" / args.name - config_path = host_dir / "configuration.nix" - disko_path = host_dir / "disko.nix" - hardware_path = host_dir / "hardware-configuration.nix" - - if not args.force: - ensure_git_paths_clean(repo_root, [config_path, disko_path, hardware_path]) - - host_dir.mkdir(parents=True, exist_ok=True) - - existing_config = parse_existing_configuration(config_path) if config_path.exists() else None - existing_disko = parse_existing_disko(disko_path) if disko_path.exists() else None - - repo_defaults = infer_repo_defaults(repo_root, skip_host=args.name) - facts = None - if not (args.disk and args.boot_mode): - facts = probe_host(args.ip, args.user) - - disk_device = args.disk or (facts.primary_disk if facts else None) - boot_mode = normalize_boot_mode(args.boot_mode or (facts.boot_mode if facts else None)) - if not disk_device or not boot_mode: - raise NodeiwestError("Unable to determine both disk and boot mode. Supply --disk and --boot-mode explicitly.") - - if existing_config is not None and existing_config.host_name != args.name: - raise NodeiwestError( - f"{config_path.relative_to(repo_root)} already declares hostName={existing_config.host_name!r}, not {args.name!r}." - ) - if existing_config is not None and existing_config.boot_mode != boot_mode: - raise NodeiwestError( - f"{config_path.relative_to(repo_root)} uses {existing_config.boot_mode.upper()} boot settings but the requested boot mode is {boot_mode.upper()}." - ) - if existing_disko is not None and existing_disko.boot_mode != boot_mode: - raise NodeiwestError( - f"{disko_path.relative_to(repo_root)} describes a {existing_disko.boot_mode.upper()} layout but the requested boot mode is {boot_mode.upper()}." - ) - - if existing_disko is not None and existing_disko.disk_device != disk_device and not args.yes: - print( - f"Existing disk device in {disko_path.relative_to(repo_root)} is {existing_disko.disk_device}; requested device is {disk_device}.", - file=sys.stderr, - ) - - swap_size = normalize_swap_size(args.swap_size or (existing_disko.swap_size if existing_disko else "4G")) - timezone = args.timezone or (existing_config.timezone if existing_config else "UTC") - tailscale_openbao = parse_on_off(args.tailscale_openbao, existing_config.tailscale_openbao if existing_config else True) - state_version = existing_config.state_version if existing_config else repo_defaults.state_version - user_ca_public_keys = existing_config.user_ca_public_keys if existing_config else repo_defaults.user_ca_public_keys - - if not user_ca_public_keys: - raise NodeiwestError( - "No SSH user CA public keys could be inferred from the repo. Add them to an existing host config first or create this host manually." - ) - - configuration_text = render_configuration( - host_name=args.name, - timezone=timezone, - boot_mode=boot_mode, - disk_device=disk_device, - tailscale_openbao=tailscale_openbao, - state_version=state_version, - user_ca_public_keys=user_ca_public_keys, - ) - disko_text = render_disko(boot_mode=boot_mode, disk_device=disk_device, swap_size=swap_size) - hardware_text = load_template("hardware-configuration.placeholder.nix") - - plans = [] - plans.extend(plan_file_update(config_path, configuration_text)) - plans.extend(plan_file_update(disko_path, disko_text)) - if hardware_path.exists(): - plans.extend(plan_file_update(hardware_path, hardware_path.read_text())) - else: - plans.extend(plan_file_update(hardware_path, hardware_text)) - - if not plans: - print(f"No changes required under hosts/{args.name}.") - else: - print(f"Planned updates for hosts/{args.name}:") - for plan in plans: - print("") - print(plan["summary"]) - if plan["diff"]: - print(plan["diff"]) - - flake_text = (repo_root / "flake.nix").read_text() - nixos_missing = not flake_has_nixos_configuration(flake_text, args.name) - colmena_missing = not flake_has_colmena_host(flake_text, args.name) - if nixos_missing or colmena_missing: - print("") - print("flake.nix additions required:") - if nixos_missing: - print(build_nixos_configuration_snippet(args.name)) - if colmena_missing: - print(build_colmena_host_snippet(args.name, args.ip)) - - if not args.apply: - print("") - print("Dry run only. Re-run with --apply to write these files.") - return 0 - - if plans and not args.yes: - if not confirm("Write the planned host files? [y/N] "): - raise NodeiwestError("Aborted before writing host files.") - - for plan in plans: - if plan["changed"]: - write_file_with_backup(plan["path"], plan["new_text"]) - rel_path = plan["path"].relative_to(repo_root) - print(f"Wrote {rel_path}") - - if not plans: - print("Nothing to write.") - return 0 - - -def cmd_openbao_init_host(args: argparse.Namespace) -> int: - repo_root = find_repo_root(Path.cwd()) - ensure_expected_repo_root(repo_root) - validate_host_name(args.name) - ensure_command_available("bao") - ensure_bao_authenticated() - - policy_name = args.policy_name or f"tailscale-{args.name}" - role_name = args.role_name or f"tailscale-{args.name}" - output_dir = resolve_path(repo_root, args.out) - role_id_path = output_dir / "var" / "lib" / "nodeiwest" / "openbao-approle-role-id" - secret_id_path = output_dir / "var" / "lib" / "nodeiwest" / "openbao-approle-secret-id" - - secret_data = bao_kv_get(args.namespace, args.kv_mount, args.secret_path) - fields = secret_data.get("data", {}) - if isinstance(fields.get("data"), dict): - fields = fields["data"] - if args.field not in fields: - raise NodeiwestError( - f"OpenBao secret {args.secret_path!r} in namespace {args.namespace!r} does not contain field {args.field!r}." - ) - - if args.kv_mount_path: - policy_content = render_openbao_policy(args.kv_mount_path) - else: - policy_content = derive_openbao_policy(args.namespace, args.kv_mount, args.secret_path) - - role_command = build_approle_write_command(args.auth_path, role_name, policy_name, args.cidr) - - print(f"Namespace: {args.namespace}") - print(f"KV mount: {args.kv_mount}") - print(f"Policy name: {policy_name}") - print(f"Role name: {role_name}") - print(f"Secret path: {args.secret_path}") - print(f"Field: {args.field}") - print(f"Bootstrap output: {output_dir}") - print("") - print("Policy content:") - print(policy_content.rstrip()) - print("") - print("AppRole command:") - print(shlex.join(role_command)) - print("") - print("Bootstrap files:") - print(f" {role_id_path}") - print(f" {secret_id_path}") - - if not args.apply: - print("") - print("Dry run only. Re-run with --apply to create the policy, AppRole, and bootstrap files.") - return 0 - - if not args.yes and not confirm("Create or update the OpenBao policy, AppRole, and bootstrap files? [y/N] "): - raise NodeiwestError("Aborted before OpenBao writes.") - - with tempfile.NamedTemporaryFile("w", delete=False) as handle: - handle.write(policy_content.rstrip() + "\n") - temp_policy_path = Path(handle.name) - - try: - bao_env = {"BAO_NAMESPACE": args.namespace} - run_command( - ["bao", "policy", "write", policy_name, str(temp_policy_path)], - cwd=repo_root, - env=bao_env, - next_fix="Check that your token can write policies in the selected namespace.", - ) - run_command( - role_command, - cwd=repo_root, - env=bao_env, - next_fix="Check that the AppRole auth mount exists and that your token can manage roles.", - ) - role_id = run_command( - ["bao", "read", "-field=role_id", f"{args.auth_path}/role/{role_name}/role-id"], - cwd=repo_root, - env=bao_env, - next_fix="Check that the AppRole was created successfully before fetching role_id.", - ).stdout.strip() - secret_id = run_command( - ["bao", "write", "-f", "-field=secret_id", f"{args.auth_path}/role/{role_name}/secret-id"], - cwd=repo_root, - env=bao_env, - next_fix="Check that the AppRole supports SecretIDs and that your token can generate them.", - ).stdout.strip() - finally: - temp_policy_path.unlink(missing_ok=True) - - role_id_path.parent.mkdir(parents=True, exist_ok=True) - write_secret_file(role_id_path, role_id + "\n") - write_secret_file(secret_id_path, secret_id + "\n") - - print("") - print("OpenBao bootstrap material written.") - print(f"Role ID: {role_id_path}") - print(f"Secret ID: {secret_id_path}") - print("") - print("Next step:") - print(f" nodeiwest install plan --name {args.name} --bootstrap-dir {shlex.quote(str(output_dir))}") - return 0 - - -def cmd_install_plan(args: argparse.Namespace) -> int: - repo_root = find_repo_root(Path.cwd()) - ensure_expected_repo_root(repo_root) - install_context = build_install_context(repo_root, args) - print_install_plan(install_context) - return 0 - - -def cmd_install_run(args: argparse.Namespace) -> int: - if not args.apply: - raise NodeiwestError("install run is destructive. Re-run with --apply to execute nixos-anywhere.") - - repo_root = find_repo_root(Path.cwd()) - ensure_expected_repo_root(repo_root) - install_context = build_install_context(repo_root, args) - ensure_ssh_reachable(install_context["ip"], "root") - print_install_plan(install_context) - if not args.yes and not confirm("Run nixos-anywhere now? [y/N] "): - raise NodeiwestError("Aborted before running nixos-anywhere.") - - print("") - stream_command( - install_context["command"], - cwd=repo_root, - next_fix="Recover via provider console or public SSH, then re-check the generated host files and bootstrap material.", - activity_label="Executing install", - ) - - print("") - print("Install completed. Verify first boot with:") - print(f" nodeiwest verify host --name {args.name} --ip {install_context['ip']}") - return 0 - - -def cmd_verify_host(args: argparse.Namespace) -> int: - validate_host_name(args.name) - services = [ - "vault-agent-tailscale", - "nodeiwest-tailscale-authkey-ready", - "tailscaled-autoconnect", - ] - - service_results: dict[str, subprocess.CompletedProcess[str]] = {} - for service in services: - service_results[service] = ssh_command( - args.user, - args.ip, - f"systemctl status --no-pager --lines=20 {shlex.quote(service)}", - check=False, - next_fix="Check public SSH reachability before retrying verification.", - ) - - tailscale_status = ssh_command( - args.user, - args.ip, - "tailscale status", - check=False, - next_fix="Check public SSH reachability before retrying verification.", - ) - - print(f"Verification target: {args.user}@{args.ip} ({args.name})") - print("") - for service in services: - state = classify_systemd_status(service_results[service]) - print(f"{service}: {state}") - print(f"tailscale status: {'healthy' if tailscale_status.returncode == 0 else 'error'}") - - causes = infer_verify_failures(service_results, tailscale_status) - if causes: - print("") - print("Likely causes:") - for cause in causes: - print(f" - {cause}") - - print("") - print("Service excerpts:") - for service in services: - print(f"[{service}]") - excerpt = summarize_text(service_results[service].stdout or service_results[service].stderr, 12) - print(excerpt or "(no output)") - print("") - print("[tailscale status]") - print(summarize_text(tailscale_status.stdout or tailscale_status.stderr, 12) or "(no output)") - return 0 - - -def cmd_colmena_plan(args: argparse.Namespace) -> int: - repo_root = find_repo_root(Path.cwd()) - ensure_expected_repo_root(repo_root) - validate_host_name(args.name) - - flake_text = (repo_root / "flake.nix").read_text() - target_host = lookup_colmena_target_host(flake_text, args.name) - if target_host: - print(f"colmena targetHost for {args.name}: {target_host}") - else: - if not args.ip: - raise NodeiwestError( - f"flake.nix does not define colmena.{args.name}.deployment.targetHost and no --ip was provided." - ) - print("Missing colmena host block. Add this to flake.nix:") - print(build_colmena_host_snippet(args.name, args.ip)) - print("") - print(f"Deploy command: nix run .#colmena -- apply --on {args.name}") - return 0 - - -def find_repo_root(start: Path) -> Path: - git_result = subprocess.run( - ["git", "rev-parse", "--show-toplevel"], - cwd=start, - text=True, - capture_output=True, - ) - if git_result.returncode == 0: - return Path(git_result.stdout.strip()).resolve() - - current = start.resolve() - for candidate in [current, *current.parents]: - if (candidate / "flake.nix").exists() and (candidate / "modules" / "home.nix").exists(): - return candidate - raise NodeiwestError("Not inside the nix-nodeiwest repository. Run the helper from this flake checkout.") - - -def ensure_expected_repo_root(repo_root: Path) -> None: - required = [ - repo_root / "flake.nix", - repo_root / "modules" / "home.nix", - repo_root / "hosts", - ] - missing = [path for path in required if not path.exists()] - if missing: - formatted = ", ".join(str(path.relative_to(repo_root)) for path in missing) - raise NodeiwestError(f"Repository root is missing expected files: {formatted}") - - -def validate_host_name(name: str) -> None: - if not re.fullmatch(r"[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?", name): - raise NodeiwestError( - f"Invalid host name {name!r}. Use lowercase letters, digits, and hyphens only, without a trailing hyphen." - ) - - -def probe_host(ip: str, user: str) -> ProbeFacts: - lsblk_cmd = "lsblk -P -o NAME,SIZE,TYPE,MODEL,FSTYPE,PTTYPE,MOUNTPOINTS" - boot_cmd = "test -d /sys/firmware/efi && echo UEFI || echo BIOS" - root_cmd = "findmnt -no SOURCE /" - swap_cmd = "cat /proc/swaps" - - lsblk_output = ssh_command(user, ip, lsblk_cmd, next_fix="Check SSH access and that lsblk exists on the target.").stdout - boot_output = ssh_command(user, ip, boot_cmd, next_fix="Check SSH access and that /sys/firmware is readable.").stdout - root_output = ssh_command(user, ip, root_cmd, next_fix="Check SSH access and that findmnt exists on the target.").stdout - swap_output = ssh_command(user, ip, swap_cmd, next_fix="Check SSH access and that /proc/swaps is readable.").stdout - - disk_rows = parse_lsblk_output(lsblk_output) - disk_devices = [f"/dev/{row['NAME']}" for row in disk_rows if row.get("TYPE") == "disk"] - if not disk_devices: - raise NodeiwestError("No disk devices were found in the remote lsblk output.") - - root_source = root_output.strip() - if not root_source: - raise NodeiwestError("findmnt returned an empty root source; cannot determine the primary disk.") - root_partition = normalize_device(root_source) - primary_disk = disk_from_device(root_partition) - if primary_disk not in disk_devices: - if len(disk_devices) == 1: - primary_disk = disk_devices[0] - else: - raise NodeiwestError( - "Multiple candidate disks were found and the root source did not map cleanly to one of them. Re-run with --disk." - ) - - boot_mode = normalize_boot_mode(boot_output.strip()) - swap_devices = parse_swaps(swap_output) - - return ProbeFacts( - ip=ip, - user=user, - boot_mode=boot_mode, - primary_disk=primary_disk, - root_partition=root_partition, - root_source=root_source, - disk_family=classify_disk_family(primary_disk), - swap_devices=swap_devices, - disk_rows=disk_rows, - raw_outputs={ - "lsblk": lsblk_output, - "boot_mode": boot_output, - "root_source": root_output, - "swaps": swap_output, - }, - ) - - -def parse_lsblk_output(output: str) -> list[dict[str, str]]: - lines = [line.strip() for line in output.splitlines() if line.strip()] - if not lines: - raise NodeiwestError("Unexpected lsblk output: not enough lines to parse.") - - columns = ["NAME", "SIZE", "TYPE", "MODEL", "FSTYPE", "PTTYPE", "MOUNTPOINTS"] - rows: list[dict[str, str]] = [] - for line in lines: - tokens = shlex.split(line) - row = {} - for token in tokens: - if "=" not in token: - continue - key, value = token.split("=", 1) - row[key] = value - missing = [column for column in columns if column not in row] - if missing: - raise NodeiwestError( - f"Unexpected lsblk output: missing columns {', '.join(missing)} in line {line!r}." - ) - rows.append(row) - return rows - - -def normalize_boot_mode(value: str | None) -> str: - if not value: - raise NodeiwestError("Boot mode is missing.") - normalized = value.strip().lower() - if normalized not in BOOT_MODE_CHOICES: - raise NodeiwestError(f"Unsupported boot mode {value!r}. Expected one of: {', '.join(BOOT_MODE_CHOICES)}.") - return normalized - - -def normalize_device(value: str) -> str: - normalized = value.strip() - if not normalized.startswith("/dev/"): - raise NodeiwestError( - f"Unsupported root source {value!r}. Only plain /dev/* block devices are supported by the helper." - ) - return normalized - - -def disk_from_device(device: str) -> str: - name = Path(device).name - if re.fullmatch(r"nvme\d+n\d+p\d+", name) or re.fullmatch(r"mmcblk\d+p\d+", name): - base_name = re.sub(r"p\d+$", "", name) - return f"/dev/{base_name}" - if re.search(r"\d+$", name): - base_name = re.sub(r"\d+$", "", name) - return f"/dev/{base_name}" - return device - - -def classify_disk_family(device: str) -> str: - name = Path(device).name - if name.startswith("nvme"): - return "nvme" - if name.startswith("vd"): - return "vda" - if name.startswith("sd"): - return "sda" - return "other" - - -def parse_swaps(output: str) -> list[str]: - lines = [line.strip() for line in output.splitlines() if line.strip()] - if len(lines) <= 1: - return [] - return [line.split()[0] for line in lines[1:]] - - -def normalize_swap_size(value: str) -> str: - normalized = value.strip() - replacements = { - "KiB": "K", - "MiB": "M", - "GiB": "G", - "TiB": "T", - "PiB": "P", - } - for suffix, replacement in replacements.items(): - if normalized.endswith(suffix): - return normalized[: -len(suffix)] + replacement - return normalized - - -def parse_existing_configuration(path: Path) -> ExistingConfiguration: - text = path.read_text() - if "./disko.nix" not in text or "./hardware-configuration.nix" not in text: - raise NodeiwestError( - f"{path} does not match the supported configuration shape. Manual intervention is required." - ) - - host_name = extract_single_match(text, r'networking\.hostName\s*=\s*"([^"]+)";', path, "hostName") - timezone = extract_single_match(text, r'time\.timeZone\s*=\s*"([^"]+)";', path, "time.timeZone") - state_version = extract_single_match(text, r'system\.stateVersion\s*=\s*"([^"]+)";', path, "system.stateVersion") - user_ca_public_keys = extract_nix_string_list(text, r"nodeiwest\.ssh\.userCAPublicKeys\s*=\s*\[(.*?)\];", path) - tailscale_enable_text = extract_optional_match( - text, - r"nodeiwest\.tailscale\.openbao(?:\.enable\s*=\s*|\s*=\s*\{[^}]*enable\s*=\s*)(true|false);", - ) - if tailscale_enable_text is None: - raise NodeiwestError( - f"{path} does not contain a supported nodeiwest.tailscale.openbao.enable declaration." - ) - if 'boot.loader.efi.canTouchEfiVariables = true;' in text and 'device = "nodev";' in text: - boot_mode = "uefi" - elif re.search(r'boot\.loader\.grub\s*=\s*\{[^}]*device\s*=\s*"/dev/', text, re.S) or 'efiSupport = false;' in text: - boot_mode = "bios" - else: - raise NodeiwestError( - f"{path} has a boot loader configuration outside the helper's supported template shape." - ) - - return ExistingConfiguration( - host_name=host_name, - timezone=timezone, - boot_mode=boot_mode, - tailscale_openbao=(tailscale_enable_text == "true"), - user_ca_public_keys=user_ca_public_keys, - state_version=state_version, - managed=SUPPORTED_CONFIG_MARKER in text, - ) - - -def parse_existing_disko(path: Path) -> ExistingDisko: - text = path.read_text() - if 'type = "gpt";' not in text or 'format = "ext4";' not in text or 'type = "swap";' not in text: - raise NodeiwestError( - f"{path} does not match the supported single-disk ext4+swap disko shape. Manual intervention is required." - ) - disk_device = extract_single_match(text, r'device\s*=\s*lib\.mkDefault\s*"([^"]+)";', path, "disk device") - swap_size = extract_single_match(text, r'swap\s*=\s*\{.*?size\s*=\s*"([^"]+)";', path, "swap size", flags=re.S) - if 'type = "EF00";' in text and 'mountpoint = "/boot";' in text: - boot_mode = "uefi" - elif 'type = "EF02";' in text: - boot_mode = "bios" - else: - raise NodeiwestError( - f"{path} does not match the helper's supported UEFI or BIOS templates." - ) - - return ExistingDisko( - disk_device=disk_device, - boot_mode=boot_mode, - swap_size=swap_size, - managed=SUPPORTED_DISKO_MARKER in text, - ) - - -def infer_repo_defaults(repo_root: Path, skip_host: str | None = None) -> RepoDefaults: - hosts_dir = repo_root / "hosts" - state_versions: list[str] = [] - ca_key_sets: set[tuple[str, ...]] = set() - - for config_path in sorted(hosts_dir.glob("*/configuration.nix")): - if skip_host and config_path.parent.name == skip_host: - continue - try: - existing = parse_existing_configuration(config_path) - except NodeiwestError: - continue - state_versions.append(existing.state_version) - if existing.user_ca_public_keys: - ca_key_sets.add(tuple(existing.user_ca_public_keys)) - - state_version = most_common_value(state_versions) or DEFAULT_STATE_VERSION - if len(ca_key_sets) > 1: - raise NodeiwestError( - "Existing host configs define multiple different SSH user CA key lists. The helper will not guess which set to reuse." - ) - user_ca_public_keys = list(next(iter(ca_key_sets))) if ca_key_sets else [] - return RepoDefaults(state_version=state_version, user_ca_public_keys=user_ca_public_keys) - - -def most_common_value(values: list[str]) -> str | None: - if not values: - return None - counts: dict[str, int] = {} - for value in values: - counts[value] = counts.get(value, 0) + 1 - return sorted(counts.items(), key=lambda item: (-item[1], item[0]))[0][0] - - -def render_configuration( - *, - host_name: str, - timezone: str, - boot_mode: str, - disk_device: str, - tailscale_openbao: bool, - state_version: str, - user_ca_public_keys: list[str], -) -> str: - template = load_template("configuration.nix.tmpl") - boot_loader_block = render_boot_loader_block(boot_mode, disk_device) - rendered = template - rendered = rendered.replace("@@HOST_NAME@@", host_name) - rendered = rendered.replace("@@TIMEZONE@@", timezone) - rendered = rendered.replace("@@BOOT_LOADER_BLOCK@@", indent(boot_loader_block.rstrip(), " ")) - rendered = rendered.replace("@@SSH_CA_KEYS@@", render_nix_string_list(user_ca_public_keys, indent_level=2)) - rendered = rendered.replace("@@TAILSCALE_OPENBAO_ENABLE@@", render_nix_bool(tailscale_openbao)) - rendered = rendered.replace("@@STATE_VERSION@@", state_version) - return ensure_trailing_newline(rendered) - - -def render_boot_loader_block(boot_mode: str, disk_device: str) -> str: - if boot_mode == "uefi": - return """ -boot.loader.efi.canTouchEfiVariables = true; -boot.loader.grub = { - enable = true; - efiSupport = true; - device = "nodev"; -}; -""".strip("\n") - return f""" -boot.loader.grub = {{ - enable = true; - efiSupport = false; - device = "{escape_nix_string(disk_device)}"; -}}; -""".strip("\n") - - -def render_disko(*, boot_mode: str, disk_device: str, swap_size: str) -> str: - template_name = "disko-uefi-ext4.nix" if boot_mode == "uefi" else "disko-bios-ext4.nix" - rendered = load_template(template_name) - rendered = rendered.replace("@@DISK_DEVICE@@", escape_nix_string(disk_device)) - rendered = rendered.replace("@@SWAP_SIZE@@", escape_nix_string(swap_size)) - return ensure_trailing_newline(rendered) - - -def render_openbao_policy(policy_path: str) -> str: - rendered = load_template("openbao-policy.hcl.tmpl").replace("@@POLICY_PATH@@", policy_path) - return ensure_trailing_newline(rendered) - - -def load_template(name: str) -> str: - templates_dir = Path(os.environ.get("NODEIWEST_HELPER_TEMPLATES", Path(__file__).resolve().parent / "templates")) - template_path = templates_dir / name - if not template_path.exists(): - raise NodeiwestError(f"Missing helper template: {template_path}") - return template_path.read_text() - - -def render_nix_string_list(values: list[str], indent_level: int = 0) -> str: - if not values: - return "[ ]" - indent_text = " " * indent_level - lines = ["["] - for value in values: - lines.append(f'{indent_text} "{escape_nix_string(value)}"') - lines.append(f"{indent_text}]") - return "\n".join(lines) - - -def render_nix_bool(value: bool) -> str: - return "true" if value else "false" - - -def escape_nix_string(value: str) -> str: - return value.replace("\\", "\\\\").replace('"', '\\"') - - -def ensure_trailing_newline(text: str) -> str: - return text if text.endswith("\n") else text + "\n" - - -def indent(text: str, prefix: str) -> str: - return "\n".join(prefix + line if line else line for line in text.splitlines()) - - -def plan_file_update(path: Path, new_text: str) -> list[dict[str, Any]]: - if path.exists(): - old_text = path.read_text() - if old_text == new_text: - return [] - diff = unified_diff(path, old_text, new_text) - return [{ - "path": path, - "changed": True, - "new_text": new_text, - "summary": f"Update {path.name}", - "diff": diff, - }] - - diff = unified_diff(path, "", new_text) - return [{ - "path": path, - "changed": True, - "new_text": new_text, - "summary": f"Create {path.name}", - "diff": diff, - }] - - -def unified_diff(path: Path, old_text: str, new_text: str) -> str: - old_lines = old_text.splitlines() - new_lines = new_text.splitlines() - diff = difflib.unified_diff( - old_lines, - new_lines, - fromfile=str(path), - tofile=str(path), - lineterm="", - ) - return "\n".join(diff) - - -def write_file_with_backup(path: Path, text: str) -> None: - if path.exists(): - backup_path = backup_file(path) - print(f"Backed up {path.name} to {backup_path.name}") - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(text) - - -def write_secret_file(path: Path, text: str) -> None: - if path.exists(): - backup_path = backup_file(path) - print(f"Backed up {path.name} to {backup_path.name}") - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(text) - path.chmod(0o400) - - -def backup_file(path: Path) -> Path: - timestamp = dt.datetime.now(dt.timezone.utc).strftime("%Y%m%d%H%M%S") - backup_path = path.with_name(f"{path.name}.bak.{timestamp}") - shutil.copy2(path, backup_path) - return backup_path - - -def ensure_git_paths_clean(repo_root: Path, paths: list[Path]) -> None: - existing_paths = [path for path in paths if path.exists()] - if not existing_paths: - return - relative_paths = [str(path.relative_to(repo_root)) for path in existing_paths] - result = run_command( - ["git", "status", "--porcelain", "--", *relative_paths], - cwd=repo_root, - next_fix="Commit or stash local edits to the target host files, or re-run with --force if you intentionally want to overwrite them.", - ) - if result.stdout.strip(): - raise NodeiwestError( - "Refusing to modify host files with local git changes:\n" - + summarize_text(result.stdout, 20) - + "\nRe-run with --force to override this guard." - ) - - -def flake_has_nixos_configuration(flake_text: str, name: str) -> bool: - pattern = rf'^\s*{re.escape(name)}\s*=\s*mkHost\s+"{re.escape(name)}";' - return re.search(pattern, flake_text, re.M) is not None - - -def flake_has_colmena_host(flake_text: str, name: str) -> bool: - target_host = lookup_colmena_target_host(flake_text, name) - return target_host is not None - - -def lookup_colmena_target_host(flake_text: str, name: str) -> str | None: - pattern = re.compile( - rf'colmena\s*=\s*\{{.*?^\s*{re.escape(name)}\s*=\s*\{{.*?targetHost\s*=\s*"([^"]+)";', - re.S | re.M, - ) - match = pattern.search(flake_text) - return match.group(1) if match else None - - -def build_nixos_configuration_snippet(name: str) -> str: - return f' {name} = mkHost "{name}";' - - -def build_colmena_host_snippet(name: str, ip: str) -> str: - return ( - f" {name} = {{\n" - f" deployment = {{\n" - f' targetHost = "{ip}";\n' - f' targetUser = "root";\n' - f" tags = [\n" - f' "company"\n' - f" ];\n" - f" }};\n\n" - f" imports = [ ./hosts/{name}/configuration.nix ];\n" - f" }};" - ) - - -def ensure_command_available(name: str) -> None: - if shutil.which(name) is None: - raise NodeiwestError(f"Required command {name!r} is not available in PATH.") - - -def ensure_bao_authenticated() -> None: - run_command( - ["bao", "token", "lookup"], - next_fix="Run a bao login flow first and verify that `bao token lookup` succeeds.", - ) - - -def bao_kv_get(namespace: str, kv_mount: str, secret_path: str) -> dict[str, Any]: - result = run_command( - ["bao", "kv", "get", f"-mount={kv_mount}", "-format=json", secret_path], - env={"BAO_NAMESPACE": namespace}, - next_fix=( - "Check BAO_ADDR, BAO_NAMESPACE, the KV mount, and the logical secret path. " - "If the KV mount is not the default, re-run with --kv-mount." - ), - ) - try: - return json.loads(result.stdout) - except json.JSONDecodeError as exc: - raise NodeiwestError(f"Failed to parse `bao kv get` JSON output: {exc}") from exc - - -def derive_openbao_policy(namespace: str, kv_mount: str, secret_path: str) -> str: - result = run_command( - ["bao", "kv", "get", f"-mount={kv_mount}", "-output-policy", secret_path], - env={"BAO_NAMESPACE": namespace}, - next_fix=( - "Check BAO_ADDR, BAO_NAMESPACE, the KV mount, and the logical secret path. " - "If the KV mount is not the default, re-run with --kv-mount. " - "If policy derivation still does not match your mount layout, re-run with --kv-mount-path." - ), - ) - policy = result.stdout.strip() - if not policy: - raise NodeiwestError("`bao kv get -output-policy` returned an empty policy.") - return ensure_trailing_newline(policy) - - -def build_approle_write_command(auth_path: str, role_name: str, policy_name: str, cidrs: list[str]) -> list[str]: - command = [ - "bao", - "write", - f"{auth_path}/role/{role_name}", - f"token_policies={policy_name}", - "token_ttl=1h", - "token_max_ttl=24h", - "token_num_uses=0", - "secret_id_num_uses=0", - ] - if cidrs: - csv = ",".join(cidrs) - command.extend([ - f"token_bound_cidrs={csv}", - f"secret_id_bound_cidrs={csv}", - ]) - return command - - -def build_install_context(repo_root: Path, args: argparse.Namespace) -> dict[str, Any]: - validate_host_name(args.name) - flake_text = (repo_root / "flake.nix").read_text() - if not flake_has_nixos_configuration(flake_text, args.name): - raise NodeiwestError( - f"flake.nix does not define nixosConfigurations.{args.name}.\nAdd this block:\n{build_nixos_configuration_snippet(args.name)}" - ) - - ip = args.ip or lookup_colmena_target_host(flake_text, args.name) - if not ip: - raise NodeiwestError( - f"Could not determine an IP for {args.name}. Pass --ip or add a colmena targetHost.\n" - + build_colmena_host_snippet(args.name, "") - ) - - host_dir = repo_root / "hosts" / args.name - configuration_path = host_dir / "configuration.nix" - disko_path = host_dir / "disko.nix" - hardware_path = host_dir / "hardware-configuration.nix" - bootstrap_dir = resolve_path(repo_root, args.bootstrap_dir) - role_id_path = bootstrap_dir / "var" / "lib" / "nodeiwest" / "openbao-approle-role-id" - secret_id_path = bootstrap_dir / "var" / "lib" / "nodeiwest" / "openbao-approle-secret-id" - - required_paths = [configuration_path, disko_path, role_id_path, secret_id_path] - missing = [path for path in required_paths if not path.exists()] - if missing: - formatted = "\n".join(f" - {path}" for path in missing) - raise NodeiwestError(f"Install prerequisites are missing:\n{formatted}") - - if args.generate_hardware_config == "off" and not hardware_path.exists(): - raise NodeiwestError( - f"{hardware_path.relative_to(repo_root)} is missing and --generate-hardware-config=off was requested." - ) - - command = [ - "nix", - "run", - "github:nix-community/nixos-anywhere", - "--", - "--extra-files", - str(bootstrap_dir), - ] - if args.copy_host_keys == "on": - command.append("--copy-host-keys") - if args.generate_hardware_config == "on": - command.extend([ - "--generate-hardware-config", - "nixos-generate-config", - str(hardware_path), - ]) - command.extend([ - "--flake", - f".#{args.name}", - f"root@{ip}", - ]) - - return { - "ip": ip, - "command": command, - "configuration_path": configuration_path, - "disko_path": disko_path, - "hardware_path": hardware_path, - "role_id_path": role_id_path, - "secret_id_path": secret_id_path, - "colmena_missing": not flake_has_colmena_host(flake_text, args.name), - } - - -def print_install_plan(context: dict[str, Any]) -> None: - print("Install command:") - print(shlex.join(context["command"])) - print("") - print("Preflight checklist:") - print(" - provider snapshot taken") - print(" - application/data backup taken") - print(" - public SSH reachable") - print(" - host keys may change after install") - print("") - print("Validated files:") - print(f" - {context['configuration_path']}") - print(f" - {context['disko_path']}") - if context["hardware_path"].exists(): - print(f" - {context['hardware_path']}") - print(f" - {context['role_id_path']}") - print(f" - {context['secret_id_path']}") - if context["colmena_missing"]: - print("") - print("colmena host block is missing. Add this before the first deploy:") - print(build_colmena_host_snippet(Path(context["configuration_path"]).parent.name, context["ip"])) - - -def ensure_ssh_reachable(ip: str, user: str) -> None: - ssh_command( - user, - ip, - "true", - next_fix="Check public SSH reachability, host keys, and the target user before running nixos-anywhere.", - ) - - -def ssh_command( - user: str, - ip: str, - remote_command: str, - *, - check: bool = True, - next_fix: str | None = None, -) -> subprocess.CompletedProcess[str]: - return run_command( - [ - "ssh", - "-o", - "BatchMode=yes", - "-o", - "ConnectTimeout=10", - f"{user}@{ip}", - remote_command, - ], - check=check, - next_fix=next_fix or "Check SSH reachability and authentication before retrying.", - ) - - -def classify_systemd_status(result: subprocess.CompletedProcess[str]) -> str: - text = f"{result.stdout}\n{result.stderr}".lower() - if "active (running)" in text or "active (exited)" in text: - return "active" - if "failed" in text: - return "failed" - if "inactive" in text: - return "inactive" - return "unknown" - - -def infer_verify_failures( - service_results: dict[str, subprocess.CompletedProcess[str]], - tailscale_status: subprocess.CompletedProcess[str], -) -> list[str]: - messages: list[str] = [] - combined = "\n".join( - (result.stdout or "") + "\n" + (result.stderr or "") - for result in [*service_results.values(), tailscale_status] - ).lower() - - if any(path in combined for path in ["openbao-approle-role-id", "openbao-approle-secret-id", "no such file"]): - messages.append("Missing AppRole files on the host. Check /var/lib/nodeiwest/openbao-approle-role-id and ...secret-id.") - if any(fragment in combined for fragment in ["invalid secret id", "permission denied", "approle", "failed to authenticate"]): - messages.append("OpenBao AppRole authentication failed. Re-check the role, secret_id, namespace, and auth mount.") - if any(fragment in combined for fragment in ["CLIENT_SECRET", "timed out waiting for rendered tailscale auth key", "no data", "secret path"]): - messages.append("OpenBao rendered no Tailscale auth key. Check the secret path, KV mount path, and CLIENT_SECRET field.") - if tailscale_status.returncode != 0 or "logged out" in (tailscale_status.stdout or "").lower(): - messages.append("Tailscale autoconnect is blocked. Check tailscaled-autoconnect, the rendered auth key, and outbound access to Tailscale.") - - deduped: list[str] = [] - for message in messages: - if message not in deduped: - deduped.append(message) - return deduped - - -def summarize_text(text: str, lines: int) -> str: - cleaned = [line.rstrip() for line in text.splitlines() if line.strip()] - return "\n".join(cleaned[:lines]) - - -def resolve_path(repo_root: Path, value: str) -> Path: - path = Path(value) - return path if path.is_absolute() else (repo_root / path) - - -def parse_on_off(value: str | None, default: bool) -> bool: - if value is None: - return default - return value == "on" - - -def confirm(prompt: str) -> bool: - answer = input(prompt).strip().lower() - return answer in {"y", "yes"} - - -def extract_single_match( - text: str, - pattern: str, - path: Path, - label: str, - *, - flags: int = 0, -) -> str: - match = re.search(pattern, text, flags) - if not match: - raise NodeiwestError(f"Could not parse {label} from {path}; manual intervention is required.") - return match.group(1) - - -def extract_optional_match(text: str, pattern: str, *, flags: int = re.S) -> str | None: - match = re.search(pattern, text, flags) - return match.group(1) if match else None - - -def extract_nix_string_list(text: str, pattern: str, path: Path) -> list[str]: - match = re.search(pattern, text, re.S) - if not match: - raise NodeiwestError(f"Could not parse nodeiwest.ssh.userCAPublicKeys from {path}.") - values = re.findall(r'"((?:[^"\\]|\\.)*)"', match.group(1)) - return [value.replace('\\"', '"').replace("\\\\", "\\") for value in values] - - -def run_command( - command: list[str], - *, - cwd: Path | None = None, - env: dict[str, str] | None = None, - check: bool = True, - next_fix: str | None = None, -) -> subprocess.CompletedProcess[str]: - merged_env = os.environ.copy() - if env: - merged_env.update(env) - result = subprocess.run( - command, - cwd=str(cwd) if cwd else None, - env=merged_env, - text=True, - capture_output=True, - ) - if check and result.returncode != 0: - raise NodeiwestError(format_command_failure(command, result, next_fix)) - return result - - -def stream_command( - command: list[str], - *, - cwd: Path | None = None, - env: dict[str, str] | None = None, - next_fix: str | None = None, - activity_label: str | None = None, -) -> None: - merged_env = os.environ.copy() - if env: - merged_env.update(env) - indicator = BottomActivityIndicator(activity_label) if activity_label else None - process = subprocess.Popen( - command, - cwd=str(cwd) if cwd else None, - env=merged_env, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - if process.stdout is None: - raise NodeiwestError(f"Failed to open output stream for command: {shlex.join(command)}") - - if indicator is not None: - indicator.start() - - stdout_fd = process.stdout.fileno() - os.set_blocking(stdout_fd, False) - - try: - while True: - if indicator is not None: - indicator.render() - - ready, _, _ = select.select([stdout_fd], [], [], 0.1) - if ready: - chunk = read_process_chunk(stdout_fd) - if chunk: - write_output_chunk(chunk) - if indicator is not None: - indicator.render(force=True) - continue - break - - if process.poll() is not None: - chunk = read_process_chunk(stdout_fd) - if chunk: - write_output_chunk(chunk) - continue - break - finally: - process.stdout.close() - if indicator is not None: - indicator.stop() - - return_code = process.wait() - if return_code != 0: - raise NodeiwestError( - f"Command failed: {shlex.join(command)}\nExit code: {return_code}\n" - + (f"Next likely fix: {next_fix}" if next_fix else "") - ) - - -class BottomActivityIndicator: - def __init__(self, label: str, stream: Any | None = None) -> None: - self.label = label - self.stream = stream or choose_activity_stream() - self.enabled = bool(self.stream and supports_ansi_status(self.stream)) - self.rows = 0 - self.frame_index = 0 - self.last_render_at = 0.0 - - def start(self) -> None: - if not self.enabled: - return - self.rows = shutil.get_terminal_size(fallback=(80, 24)).lines - if self.rows < 2: - self.enabled = False - return - self.stream.write("\033[?25l") - self.stream.write(f"\033[1;{self.rows - 1}r") - self.stream.flush() - self.render(force=True) - - def render(self, *, force: bool = False) -> None: - if not self.enabled: - return - now = time.monotonic() - if not force and (now - self.last_render_at) < 0.12: - return - - rows = shutil.get_terminal_size(fallback=(80, 24)).lines - if rows != self.rows and rows >= 2: - self.rows = rows - self.stream.write(f"\033[1;{self.rows - 1}r") - - frame = format_activity_frame(self.label, ACTIVITY_FRAMES[self.frame_index]) - self.frame_index = (self.frame_index + 1) % len(ACTIVITY_FRAMES) - self.stream.write("\0337") - self.stream.write(f"\033[{self.rows};1H\033[2K{frame}") - self.stream.write("\0338") - self.stream.flush() - self.last_render_at = now - - def stop(self) -> None: - if not self.enabled: - return - self.stream.write("\0337") - self.stream.write(f"\033[{self.rows};1H\033[2K") - self.stream.write("\0338") - self.stream.write("\033[r") - self.stream.write("\033[?25h") - self.stream.flush() - - -def choose_activity_stream() -> Any | None: - if getattr(sys.stderr, "isatty", lambda: False)(): - return sys.stderr - if getattr(sys.stdout, "isatty", lambda: False)(): - return sys.stdout - return None - - -def supports_ansi_status(stream: Any) -> bool: - return bool(getattr(stream, "isatty", lambda: False)() and os.environ.get("TERM", "") not in {"", "dumb"}) - - -def format_activity_frame(label: str, active_index: int) -> str: - blocks = [] - for index in range(4): - if index == active_index: - blocks.append("\033[38;5;220mâ–ˆ\033[0m") - else: - blocks.append("\033[38;5;208mâ–ˆ\033[0m") - return f"{''.join(blocks)} \033[1;37m{label}\033[0m" - - -def read_process_chunk(fd: int) -> bytes: - try: - return os.read(fd, 4096) - except BlockingIOError: - return b"" - - -def write_output_chunk(chunk: bytes) -> None: - if hasattr(sys.stdout, "buffer"): - sys.stdout.buffer.write(chunk) - sys.stdout.buffer.flush() - return - sys.stdout.write(chunk.decode(errors="replace")) - sys.stdout.flush() - - -def format_command_failure( - command: list[str], - result: subprocess.CompletedProcess[str], - next_fix: str | None, -) -> str: - pieces = [ - f"Command failed: {shlex.join(command)}", - f"Exit code: {result.returncode}", - ] - stdout = summarize_text(result.stdout or "", 20) - stderr = summarize_text(result.stderr or "", 20) - if stdout: - pieces.append(f"stdout:\n{stdout}") - if stderr: - pieces.append(f"stderr:\n{stderr}") - if next_fix: - pieces.append(f"Next likely fix: {next_fix}") - return "\n".join(pieces) - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/pkgs/helpers/default.nix b/pkgs/helpers/default.nix deleted file mode 100644 index 4476ed9..0000000 --- a/pkgs/helpers/default.nix +++ /dev/null @@ -1,32 +0,0 @@ -{ - lib, - writeShellApplication, - python3, - openbao, - openssh, - gitMinimal, - nix, -}: -writeShellApplication { - name = "nodeiwest"; - - runtimeInputs = [ - python3 - openbao - openssh - gitMinimal - nix - ]; - - text = '' - export NODEIWEST_HELPER_TEMPLATES=${./templates} - exec ${python3}/bin/python ${./cli.py} "$@" - ''; - - meta = with lib; { - description = "Safe VPS provisioning helper for the NodeiWest NixOS flake"; - license = licenses.mit; - mainProgram = "nodeiwest"; - platforms = platforms.unix; - }; -} diff --git a/pkgs/helpers/templates/configuration.nix.tmpl b/pkgs/helpers/templates/configuration.nix.tmpl deleted file mode 100644 index 3b5b2b9..0000000 --- a/pkgs/helpers/templates/configuration.nix.tmpl +++ /dev/null @@ -1,23 +0,0 @@ -{ lib, ... }: -{ - # Generated by nodeiwest host init. - imports = [ - ./disko.nix - ./hardware-configuration.nix - ]; - - networking.hostName = "@@HOST_NAME@@"; - networking.useDHCP = lib.mkDefault true; - - time.timeZone = "@@TIMEZONE@@"; - -@@BOOT_LOADER_BLOCK@@ - - nodeiwest.ssh.userCAPublicKeys = @@SSH_CA_KEYS@@; - - nodeiwest.tailscale.openbao = { - enable = @@TAILSCALE_OPENBAO_ENABLE@@; - }; - - system.stateVersion = "@@STATE_VERSION@@"; -} diff --git a/pkgs/helpers/templates/disko-bios-ext4.nix b/pkgs/helpers/templates/disko-bios-ext4.nix deleted file mode 100644 index b8ac109..0000000 --- a/pkgs/helpers/templates/disko-bios-ext4.nix +++ /dev/null @@ -1,41 +0,0 @@ -{ - lib, - ... -}: -{ - # Generated by nodeiwest host init. - # Replace the disk only if the provider exposes a different primary device. - disko.devices = { - disk.main = { - type = "disk"; - device = lib.mkDefault "@@DISK_DEVICE@@"; - content = { - type = "gpt"; - partitions = { - BIOS = { - priority = 1; - name = "BIOS"; - start = "1MiB"; - end = "2MiB"; - type = "EF02"; - }; - swap = { - size = "@@SWAP_SIZE@@"; - content = { - type = "swap"; - resumeDevice = true; - }; - }; - root = { - size = "100%"; - content = { - type = "filesystem"; - format = "ext4"; - mountpoint = "/"; - }; - }; - }; - }; - }; - }; -} diff --git a/pkgs/helpers/templates/disko-uefi-ext4.nix b/pkgs/helpers/templates/disko-uefi-ext4.nix deleted file mode 100644 index 3677816..0000000 --- a/pkgs/helpers/templates/disko-uefi-ext4.nix +++ /dev/null @@ -1,47 +0,0 @@ -{ - lib, - ... -}: -{ - # Generated by nodeiwest host init. - # Replace the disk only if the provider exposes a different primary device. - disko.devices = { - disk.main = { - type = "disk"; - device = lib.mkDefault "@@DISK_DEVICE@@"; - content = { - type = "gpt"; - partitions = { - ESP = { - priority = 1; - name = "ESP"; - start = "1MiB"; - end = "512MiB"; - type = "EF00"; - content = { - type = "filesystem"; - format = "vfat"; - mountpoint = "/boot"; - mountOptions = [ "umask=0077" ]; - }; - }; - swap = { - size = "@@SWAP_SIZE@@"; - content = { - type = "swap"; - resumeDevice = true; - }; - }; - root = { - size = "100%"; - content = { - type = "filesystem"; - format = "ext4"; - mountpoint = "/"; - }; - }; - }; - }; - }; - }; -} diff --git a/pkgs/helpers/templates/hardware-configuration.placeholder.nix b/pkgs/helpers/templates/hardware-configuration.placeholder.nix deleted file mode 100644 index 3f6bc7b..0000000 --- a/pkgs/helpers/templates/hardware-configuration.placeholder.nix +++ /dev/null @@ -1,5 +0,0 @@ -{ ... }: -{ - # Placeholder generated by nodeiwest host init. - # nixos-anywhere will replace this with the generated hardware config. -} diff --git a/pkgs/helpers/templates/openbao-policy.hcl.tmpl b/pkgs/helpers/templates/openbao-policy.hcl.tmpl deleted file mode 100644 index 5466886..0000000 --- a/pkgs/helpers/templates/openbao-policy.hcl.tmpl +++ /dev/null @@ -1,3 +0,0 @@ -path "@@POLICY_PATH@@" { - capabilities = ["read"] -} diff --git a/pkgs/helpers/tests/test_cli.py b/pkgs/helpers/tests/test_cli.py deleted file mode 100644 index b33609c..0000000 --- a/pkgs/helpers/tests/test_cli.py +++ /dev/null @@ -1,114 +0,0 @@ -from __future__ import annotations - -import importlib.util -import sys -import unittest -from unittest import mock -from pathlib import Path - - -REPO_ROOT = Path(__file__).resolve().parents[3] -CLI_PATH = REPO_ROOT / "pkgs" / "helpers" / "cli.py" - -spec = importlib.util.spec_from_file_location("nodeiwest_cli", CLI_PATH) -cli = importlib.util.module_from_spec(spec) -assert spec.loader is not None -sys.modules[spec.name] = cli -spec.loader.exec_module(cli) - - -class HelperCliTests(unittest.TestCase): - def test_format_activity_frame_highlights_one_block_and_keeps_label(self) -> None: - frame = cli.format_activity_frame("Executing install", 2) - - self.assertIn("Executing install", frame) - self.assertEqual(frame.count("â–ˆ"), 4) - self.assertEqual(frame.count("[38;5;220m"), 1) - self.assertEqual(frame.count("[38;5;208m"), 3) - - def test_supports_ansi_status_requires_tty_and_real_term(self) -> None: - tty_stream = mock.Mock() - tty_stream.isatty.return_value = True - dumb_stream = mock.Mock() - dumb_stream.isatty.return_value = True - pipe_stream = mock.Mock() - pipe_stream.isatty.return_value = False - - with mock.patch.dict(cli.os.environ, {"TERM": "xterm-256color"}, clear=False): - self.assertTrue(cli.supports_ansi_status(tty_stream)) - self.assertFalse(cli.supports_ansi_status(pipe_stream)) - - with mock.patch.dict(cli.os.environ, {"TERM": "dumb"}, clear=False): - self.assertFalse(cli.supports_ansi_status(dumb_stream)) - - def test_disk_from_device_supports_sd_and_nvme(self) -> None: - self.assertEqual(cli.disk_from_device("/dev/sda2"), "/dev/sda") - self.assertEqual(cli.disk_from_device("/dev/nvme0n1p2"), "/dev/nvme0n1") - - def test_lookup_colmena_target_host_reads_existing_inventory(self) -> None: - flake_text = (REPO_ROOT / "flake.nix").read_text() - self.assertEqual(cli.lookup_colmena_target_host(flake_text, "vps1"), "100.101.167.118") - - def test_parse_existing_vps1_configuration(self) -> None: - configuration = cli.parse_existing_configuration(REPO_ROOT / "hosts" / "vps1" / "configuration.nix") - self.assertEqual(configuration.host_name, "vps1") - self.assertEqual(configuration.boot_mode, "uefi") - self.assertTrue(configuration.tailscale_openbao) - self.assertEqual(configuration.state_version, "25.05") - self.assertTrue(configuration.user_ca_public_keys) - - def test_parse_existing_vps1_disko(self) -> None: - disko = cli.parse_existing_disko(REPO_ROOT / "hosts" / "vps1" / "disko.nix") - self.assertEqual(disko.disk_device, "/dev/sda") - self.assertEqual(disko.boot_mode, "uefi") - self.assertEqual(disko.swap_size, "4G") - - def test_render_bios_disko_uses_bios_partition(self) -> None: - rendered = cli.render_disko(boot_mode="bios", disk_device="/dev/vda", swap_size="8G") - self.assertIn('type = "EF02";', rendered) - self.assertIn('device = lib.mkDefault "/dev/vda";', rendered) - self.assertIn('size = "8G";', rendered) - - def test_parse_lsblk_output_reads_pairs_without_smearing_columns(self) -> None: - output = ( - 'NAME="sda" SIZE="11G" TYPE="disk" MODEL="QEMU HARDDISK" FSTYPE="" PTTYPE="gpt" MOUNTPOINTS=""\n' - 'NAME="sda1" SIZE="512M" TYPE="part" MODEL="" FSTYPE="vfat" PTTYPE="" MOUNTPOINTS="/boot"\n' - ) - rows = cli.parse_lsblk_output(output) - - self.assertEqual(rows[0]["NAME"], "sda") - self.assertEqual(rows[0]["SIZE"], "11G") - self.assertEqual(rows[0]["MODEL"], "QEMU HARDDISK") - self.assertEqual(rows[1]["NAME"], "sda1") - self.assertEqual(rows[1]["MOUNTPOINTS"], "/boot") - - def test_normalize_swap_size_accepts_gib_suffix(self) -> None: - self.assertEqual(cli.normalize_swap_size("4GiB"), "4G") - self.assertEqual(cli.normalize_swap_size("512MiB"), "512M") - self.assertEqual(cli.normalize_swap_size("8G"), "8G") - - def test_bao_kv_get_uses_explicit_kv_mount(self) -> None: - completed = mock.Mock() - completed.stdout = '{"data": {"data": {"CLIENT_ID": "x"}}}' - with mock.patch.object(cli, "run_command", return_value=completed) as run_command: - data = cli.bao_kv_get("it", "kv", "tailscale") - - self.assertEqual(data["data"]["data"]["CLIENT_ID"], "x") - command = run_command.call_args.args[0] - self.assertEqual(command, ["bao", "kv", "get", "-mount=kv", "-format=json", "tailscale"]) - self.assertEqual(run_command.call_args.kwargs["env"], {"BAO_NAMESPACE": "it"}) - - def test_derive_openbao_policy_uses_explicit_kv_mount(self) -> None: - completed = mock.Mock() - completed.stdout = 'path "kv/data/tailscale" { capabilities = ["read"] }\n' - with mock.patch.object(cli, "run_command", return_value=completed) as run_command: - policy = cli.derive_openbao_policy("it", "kv", "tailscale") - - self.assertIn('path "kv/data/tailscale"', policy) - command = run_command.call_args.args[0] - self.assertEqual(command, ["bao", "kv", "get", "-mount=kv", "-output-policy", "tailscale"]) - self.assertEqual(run_command.call_args.kwargs["env"], {"BAO_NAMESPACE": "it"}) - - -if __name__ == "__main__": - unittest.main()