Slice 2: agents and control plane run under systemd

- systemd/sdp-control-plane.service: plain host process on 186,
  listens on :3452, data dir ~/SDP/data. MemoryMax=512M,
  Restart=always, ReadWritePaths scoped to the data dir.
- systemd/sdp-agent-micro.service: plain host process on 92,
  default SDP_CP_URL=ws://172.18.139.186:3452/ws/agent. Operator
  can drop /etc/default/sdp-agent-micro to override. Depends on
  docker.service so the dockerd is up before the agent starts.
- systemd/sdp-agent-gateway.service: plain host process on 186,
  default SDP_CP_URL=ws://127.0.0.1:3452/ws/agent (loopback since
  both live on the same VM). Same env-file override pattern.
- All three use Type=simple, Restart=always, RestartSec=2s. The
  agents already reconnect on transient network drops, so
  restart-on-crash is the right policy.
- The agents talk to the host dockerd via /var/run/docker.sock to
  spawn the actual service containers (sdp-<repo>). Service
  containers are managed by docker, not systemd — only the
  long-running agents and the control plane are under systemd.
- scripts/deploy.sh: now a one-shot — scp's binaries, dashboard,
  and unit files; systemctl daemon-reload + enable --now + restart
  each service in the right order (control plane first on 186 so
  the gateway agent has something to dial). Prints status + last
  10 journal lines per service so the user can see it came up.
- AGENTS.md, README.md: layout tree updated, deploy section
  rewritten, the systemd units documented alongside the agents
  and control plane.
This commit is contained in:
Achmad
2026-06-24 04:54:28 +00:00
parent f12d4f0b12
commit 574e6d207b
6 changed files with 144 additions and 14 deletions
+46 -7
View File
@@ -1,8 +1,10 @@
#!/usr/bin/env bash
# Push the built binaries and dashboard to both SDP VMs.
# Push the built binaries, dashboard, and systemd unit files to both
# SDP VMs, then enable + start the services.
#
# 92 (micro): ~/SDP/agent-micro
# 186 (gateway): ~/SDP/control-plane, ~/SDP/agent-gateway, ~/SDP/dashboard
# 92 (micro): ~/SDP/agent-micro, sdp-agent-micro.service
# 186 (gateway): ~/SDP/{control-plane,agent-gateway,dashboard},
# sdp-control-plane.service, sdp-agent-gateway.service
#
# Nginx is configured by hand on 186 (out of scope for this script).
# Run scripts/build.sh first.
@@ -28,15 +30,40 @@ SSH_186="sshpass -p $PASS_186 ssh -o StrictHostKeyChecking=no -o UserKnownHostsF
SCP_186="sshpass -p $PASS_186 scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
# ponytail: Wipe-and-replace. The deploys are stateful on the VM only via
# SQLite + .log files in ~/SDP/data — we keep that. Binaries and the
# dashboard are replaced cleanly.
# SQLite + .log files in ~/SDP/data — we keep that. Binaries, dashboard,
# and unit files in /etc/systemd/system are replaced cleanly.
REMOTE_RESET='rm -rf ~/SDP/bin ~/SDP/dashboard && mkdir -p ~/SDP/bin ~/SDP/dashboard'
# install_unit <ssh-prefix-with-host> <unit-name>
# stops the old unit (if any), copies the file from /tmp/ (already
# scp'd there), reloads systemd, and re-enables on next boot.
install_unit() {
local ssh_prefix="$1" # e.g. "sshpass -p ... ssh -o ... administrator@host"
local unit="$2"
$ssh_prefix "sudo systemctl stop $unit 2>/dev/null || true"
$ssh_prefix "sudo install -m 644 -o root -g root /tmp/$unit /etc/systemd/system/$unit"
$ssh_prefix "sudo systemctl daemon-reload"
$ssh_prefix "sudo systemctl enable $unit"
}
# status_block <ssh-prefix> <unit>
# prints a short status + last 10 journal lines.
status_block() {
local ssh_prefix="$1"
local unit="$2"
$ssh_prefix "echo ' status:'; sudo systemctl --no-pager --full status $unit | head -3"
$ssh_prefix "echo ' journal (last 10):'; sudo journalctl -u $unit -n 10 --no-pager"
}
echo "==> 92: $HOST_92"
$SSH_92 "$HOST_92" "$REMOTE_RESET"
$SCP_92 "$REPO_ROOT/bin/agent-micro" "$HOST_92:~/SDP/bin/agent-micro"
$SCP_92 "$REPO_ROOT/systemd/sdp-agent-micro.service" "$HOST_92:/tmp/sdp-agent-micro.service"
$SSH_92 "$HOST_92" "chmod +x ~/SDP/bin/agent-micro"
echo " agent-micro copied"
install_unit "$SSH_92 $HOST_92" sdp-agent-micro.service
$SSH_92 "$HOST_92" "sudo systemctl restart sdp-agent-micro"
status_block "$SSH_92 $HOST_92" sdp-agent-micro
echo " agent-micro installed"
echo
echo "==> 186: $HOST_186"
@@ -44,8 +71,20 @@ $SSH_186 "$HOST_186" "$REMOTE_RESET"
$SCP_186 "$REPO_ROOT/bin/control-plane" "$HOST_186:~/SDP/bin/control-plane"
$SCP_186 "$REPO_ROOT/bin/agent-gateway" "$HOST_186:~/SDP/bin/agent-gateway"
$SCP_186 -r "$REPO_ROOT/dashboard/out/." "$HOST_186:~/SDP/dashboard/"
$SCP_186 "$REPO_ROOT/systemd/sdp-control-plane.service" "$HOST_186:/tmp/sdp-control-plane.service"
$SCP_186 "$REPO_ROOT/systemd/sdp-agent-gateway.service" "$HOST_186:/tmp/sdp-agent-gateway.service"
$SSH_186 "$HOST_186" "chmod +x ~/SDP/bin/control-plane ~/SDP/bin/agent-gateway"
echo " control-plane, agent-gateway, dashboard copied"
# Control plane first so the gateway agent's -cp URL has something to dial.
install_unit "$SSH_186 $HOST_186" sdp-control-plane.service
$SSH_186 "$HOST_186" "sudo systemctl restart sdp-control-plane"
status_block "$SSH_186 $HOST_186" sdp-control-plane
install_unit "$SSH_186 $HOST_186" sdp-agent-gateway.service
$SSH_186 "$HOST_186" "sudo systemctl restart sdp-agent-gateway"
status_block "$SSH_186 $HOST_186" sdp-agent-gateway
echo " control-plane, agent-gateway, dashboard installed"
echo
echo "done. (configure nginx by hand on 186; see AGENTS.md for the location block.)"