deploy fix bug

This commit is contained in:
ZuoZuo 2026-04-07 19:32:58 +08:00
parent 80364221f8
commit e2bfc7769b
2 changed files with 206 additions and 0 deletions

View File

@ -0,0 +1,167 @@
#!/usr/bin/env bash
set -Eeuo pipefail
SERVICE=""
RELEASE_ID=""
PACKAGE_URL=""
SHA256_VALUE=""
HEALTH_URL=""
UNIT_NAME=""
DEPLOY_ROOT=""
READY_TIMEOUT_SECONDS="${READY_TIMEOUT_SECONDS:-180}"
DOWNLOAD_TIMEOUT_SECONDS="${DOWNLOAD_TIMEOUT_SECONDS:-600}"
POLL_INTERVAL_SECONDS="${POLL_INTERVAL_SECONDS:-2}"
usage() {
cat <<'EOF'
Usage:
deploy.sh \
--service <service> \
--release-id <release_id> \
--package-url <url> \
--sha256 <sha256> \
--health-url <url> \
--unit-name <systemd unit> \
--deploy-root <path>
EOF
}
log() {
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S %z')" "$*"
}
require_cmd() {
command -v "$1" >/dev/null 2>&1 || {
echo "missing command: $1" >&2
exit 1
}
}
while [[ $# -gt 0 ]]; do
case "$1" in
--service) SERVICE="$2"; shift 2 ;;
--release-id) RELEASE_ID="$2"; shift 2 ;;
--package-url) PACKAGE_URL="$2"; shift 2 ;;
--sha256) SHA256_VALUE="$2"; shift 2 ;;
--health-url) HEALTH_URL="$2"; shift 2 ;;
--unit-name) UNIT_NAME="$2"; shift 2 ;;
--deploy-root) DEPLOY_ROOT="$2"; shift 2 ;;
--ready-timeout-seconds) READY_TIMEOUT_SECONDS="$2"; shift 2 ;;
--download-timeout-seconds) DOWNLOAD_TIMEOUT_SECONDS="$2"; shift 2 ;;
--poll-interval-seconds) POLL_INTERVAL_SECONDS="$2"; shift 2 ;;
-h|--help) usage; exit 0 ;;
*) echo "unknown arg: $1" >&2; usage; exit 1 ;;
esac
done
[[ -n "$SERVICE" && -n "$RELEASE_ID" && -n "$PACKAGE_URL" && -n "$SHA256_VALUE" && -n "$HEALTH_URL" && -n "$UNIT_NAME" && -n "$DEPLOY_ROOT" ]] || {
usage
exit 1
}
require_cmd curl
require_cmd sha256sum
require_cmd tar
require_cmd systemctl
release_dir="${DEPLOY_ROOT}/releases/${RELEASE_ID}"
current_link="${DEPLOY_ROOT}/current"
staging_root="/tmp/chatapp-deploy/${SERVICE}/${RELEASE_ID}"
staging_package="${staging_root}/${SERVICE}.tgz"
staging_extract="${staging_root}/extract"
previous_target=""
mkdir -p "${DEPLOY_ROOT}/releases" "${staging_root}" "${staging_extract}"
if [[ -L "${current_link}" ]]; then
previous_target="$(readlink -f "${current_link}" || true)"
fi
cleanup() {
rm -rf "${staging_root}"
}
trap cleanup EXIT
rollback() {
local rollback_reason="$1"
log "rollback: ${rollback_reason}"
if [[ -n "${previous_target}" && -d "${previous_target}" ]]; then
ln -sfn "${previous_target}" "${current_link}.tmp"
mv -Tf "${current_link}.tmp" "${current_link}"
/usr/bin/systemctl reset-failed "${UNIT_NAME}" >/dev/null 2>&1 || true
/usr/bin/systemctl start "${UNIT_NAME}" || true
if ! wait_ready "rollback"; then
log "rollback target is still not ready"
fi
fi
}
wait_ready() {
local phase="$1"
local deadline=$(( $(date +%s) + READY_TIMEOUT_SECONDS ))
while [[ "$(date +%s)" -lt "${deadline}" ]]; do
if curl -fsS --max-time 3 "${HEALTH_URL}" >/dev/null; then
log "${phase}: ready check passed"
return 0
fi
sleep "${POLL_INTERVAL_SECONDS}"
done
return 1
}
stop_service() {
if /usr/bin/systemctl is-active --quiet "${UNIT_NAME}"; then
log "stopping ${UNIT_NAME} with SIGTERM"
# The unit files already set KillSignal=SIGTERM, so stop triggers graceful shutdown.
/usr/bin/systemctl stop "${UNIT_NAME}"
else
log "${UNIT_NAME} already inactive"
fi
}
start_service() {
log "starting ${UNIT_NAME}"
/usr/bin/systemctl reset-failed "${UNIT_NAME}" >/dev/null 2>&1 || true
/usr/bin/systemctl start "${UNIT_NAME}"
}
log "download package ${PACKAGE_URL}"
curl -fL --connect-timeout 10 --max-time "${DOWNLOAD_TIMEOUT_SECONDS}" -o "${staging_package}" "${PACKAGE_URL}"
echo "${SHA256_VALUE} ${staging_package}" | sha256sum -c -
if [[ ! -d "${release_dir}" ]]; then
mkdir -p "${release_dir}"
tar -C "${staging_extract}" -xzf "${staging_package}"
cp -R "${staging_extract}/." "${release_dir}/"
else
log "release dir already exists, reuse ${release_dir}"
fi
if [[ ! -x "${release_dir}/bin/${SERVICE}" ]]; then
echo "release binary missing: ${release_dir}/bin/${SERVICE}" >&2
exit 1
fi
ln -sfn "${release_dir}" "${current_link}.tmp"
mv -Tf "${current_link}.tmp" "${current_link}"
if ! stop_service; then
rollback "systemd stop failed"
echo "systemd stop failed: ${UNIT_NAME}" >&2
exit 1
fi
if ! start_service; then
rollback "systemd start failed"
echo "systemd start failed: ${UNIT_NAME}" >&2
exit 1
fi
if ! wait_ready "deploy"; then
rollback "service failed to become ready after deploy"
echo "service failed to become ready: ${UNIT_NAME}" >&2
exit 1
fi
log "deploy success: ${SERVICE}@${RELEASE_ID}"

View File

@ -59,6 +59,17 @@ def should_retry_without_output_cos(exc: TencentCloudSDKException) -> bool:
return code == "ResourceNotFound.RoleNotFound" and "TAT_QCSLinkedRoleInUploadInvocation" in message
def local_remote_deploy_script() -> str:
template_path = Path(__file__).with_name("remote_deploy.sh")
try:
content = template_path.read_text(encoding="utf-8")
except FileNotFoundError as exc:
raise RuntimeError(f"missing remote deploy template: {template_path}") from exc
if not content.strip():
raise RuntimeError(f"remote deploy template is empty: {template_path}")
return content.rstrip() + "\n"
class CloudOperator:
def __init__(self, config: dict[str, Any], service_name: str, instance_id: str, release_id: str | None) -> None:
self.config = config
@ -105,6 +116,7 @@ class CloudOperator:
def deploy(self) -> None:
self.ensure_agent_online()
self.ensure_remote_deploy_script_installed()
self.ensure_systemd_unit_installed()
package_url, sha256 = self.resolve_release_package()
@ -215,6 +227,33 @@ class CloudOperator:
task = self.wait_for_tat(invocation_id)
self.ensure_task_success(task, f"ensure systemd unit {unit_name}")
def ensure_remote_deploy_script_installed(self) -> None:
script_path = str(self.config["tat"]["script_path"])
script_dir = str(Path(script_path).parent)
script_content_b64 = base64.b64encode(local_remote_deploy_script().encode("utf-8")).decode("ascii")
command = textwrap.dedent(
f"""\
set -Eeuo pipefail
script_path={shlex.quote(script_path)}
script_dir={shlex.quote(script_dir)}
tmp_file="$(mktemp)"
trap 'rm -f "$tmp_file"' EXIT
mkdir -p "$script_dir"
TMP_FILE="$tmp_file" python3 - <<'PY'
import base64
import os
from pathlib import Path
Path(os.environ["TMP_FILE"]).write_bytes(base64.b64decode("{script_content_b64}"))
PY
if [[ ! -f "$script_path" ]] || ! cmp -s "$tmp_file" "$script_path"; then
install -m 0755 "$tmp_file" "$script_path"
fi
"""
)
invocation_id = self.run_tat_command(command, f"ensure-deploy-script-{self.service_name}")
task = self.wait_for_tat(invocation_id)
self.ensure_task_success(task, f"ensure deploy script {script_path}")
def systemd_unit_content(self) -> str:
deploy_root = str(self.service_cfg["deploy_root"]).rstrip("/")
binary_path = f"{deploy_root}/current/bin/{self.service_name}"