deploy fix bug
This commit is contained in:
parent
80364221f8
commit
e2bfc7769b
167
ops-scripts/remote_deploy.sh
Normal file
167
ops-scripts/remote_deploy.sh
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -Eeuo pipefail
|
||||||
|
|
||||||
|
SERVICE=""
|
||||||
|
RELEASE_ID=""
|
||||||
|
PACKAGE_URL=""
|
||||||
|
SHA256_VALUE=""
|
||||||
|
HEALTH_URL=""
|
||||||
|
UNIT_NAME=""
|
||||||
|
DEPLOY_ROOT=""
|
||||||
|
READY_TIMEOUT_SECONDS="${READY_TIMEOUT_SECONDS:-180}"
|
||||||
|
DOWNLOAD_TIMEOUT_SECONDS="${DOWNLOAD_TIMEOUT_SECONDS:-600}"
|
||||||
|
POLL_INTERVAL_SECONDS="${POLL_INTERVAL_SECONDS:-2}"
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<'EOF'
|
||||||
|
Usage:
|
||||||
|
deploy.sh \
|
||||||
|
--service <service> \
|
||||||
|
--release-id <release_id> \
|
||||||
|
--package-url <url> \
|
||||||
|
--sha256 <sha256> \
|
||||||
|
--health-url <url> \
|
||||||
|
--unit-name <systemd unit> \
|
||||||
|
--deploy-root <path>
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
log() {
|
||||||
|
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S %z')" "$*"
|
||||||
|
}
|
||||||
|
|
||||||
|
require_cmd() {
|
||||||
|
command -v "$1" >/dev/null 2>&1 || {
|
||||||
|
echo "missing command: $1" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--service) SERVICE="$2"; shift 2 ;;
|
||||||
|
--release-id) RELEASE_ID="$2"; shift 2 ;;
|
||||||
|
--package-url) PACKAGE_URL="$2"; shift 2 ;;
|
||||||
|
--sha256) SHA256_VALUE="$2"; shift 2 ;;
|
||||||
|
--health-url) HEALTH_URL="$2"; shift 2 ;;
|
||||||
|
--unit-name) UNIT_NAME="$2"; shift 2 ;;
|
||||||
|
--deploy-root) DEPLOY_ROOT="$2"; shift 2 ;;
|
||||||
|
--ready-timeout-seconds) READY_TIMEOUT_SECONDS="$2"; shift 2 ;;
|
||||||
|
--download-timeout-seconds) DOWNLOAD_TIMEOUT_SECONDS="$2"; shift 2 ;;
|
||||||
|
--poll-interval-seconds) POLL_INTERVAL_SECONDS="$2"; shift 2 ;;
|
||||||
|
-h|--help) usage; exit 0 ;;
|
||||||
|
*) echo "unknown arg: $1" >&2; usage; exit 1 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
[[ -n "$SERVICE" && -n "$RELEASE_ID" && -n "$PACKAGE_URL" && -n "$SHA256_VALUE" && -n "$HEALTH_URL" && -n "$UNIT_NAME" && -n "$DEPLOY_ROOT" ]] || {
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
require_cmd curl
|
||||||
|
require_cmd sha256sum
|
||||||
|
require_cmd tar
|
||||||
|
require_cmd systemctl
|
||||||
|
|
||||||
|
release_dir="${DEPLOY_ROOT}/releases/${RELEASE_ID}"
|
||||||
|
current_link="${DEPLOY_ROOT}/current"
|
||||||
|
staging_root="/tmp/chatapp-deploy/${SERVICE}/${RELEASE_ID}"
|
||||||
|
staging_package="${staging_root}/${SERVICE}.tgz"
|
||||||
|
staging_extract="${staging_root}/extract"
|
||||||
|
previous_target=""
|
||||||
|
|
||||||
|
mkdir -p "${DEPLOY_ROOT}/releases" "${staging_root}" "${staging_extract}"
|
||||||
|
if [[ -L "${current_link}" ]]; then
|
||||||
|
previous_target="$(readlink -f "${current_link}" || true)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
rm -rf "${staging_root}"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
rollback() {
|
||||||
|
local rollback_reason="$1"
|
||||||
|
log "rollback: ${rollback_reason}"
|
||||||
|
if [[ -n "${previous_target}" && -d "${previous_target}" ]]; then
|
||||||
|
ln -sfn "${previous_target}" "${current_link}.tmp"
|
||||||
|
mv -Tf "${current_link}.tmp" "${current_link}"
|
||||||
|
/usr/bin/systemctl reset-failed "${UNIT_NAME}" >/dev/null 2>&1 || true
|
||||||
|
/usr/bin/systemctl start "${UNIT_NAME}" || true
|
||||||
|
if ! wait_ready "rollback"; then
|
||||||
|
log "rollback target is still not ready"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_ready() {
|
||||||
|
local phase="$1"
|
||||||
|
local deadline=$(( $(date +%s) + READY_TIMEOUT_SECONDS ))
|
||||||
|
|
||||||
|
while [[ "$(date +%s)" -lt "${deadline}" ]]; do
|
||||||
|
if curl -fsS --max-time 3 "${HEALTH_URL}" >/dev/null; then
|
||||||
|
log "${phase}: ready check passed"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep "${POLL_INTERVAL_SECONDS}"
|
||||||
|
done
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
stop_service() {
|
||||||
|
if /usr/bin/systemctl is-active --quiet "${UNIT_NAME}"; then
|
||||||
|
log "stopping ${UNIT_NAME} with SIGTERM"
|
||||||
|
# The unit files already set KillSignal=SIGTERM, so stop triggers graceful shutdown.
|
||||||
|
/usr/bin/systemctl stop "${UNIT_NAME}"
|
||||||
|
else
|
||||||
|
log "${UNIT_NAME} already inactive"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
start_service() {
|
||||||
|
log "starting ${UNIT_NAME}"
|
||||||
|
/usr/bin/systemctl reset-failed "${UNIT_NAME}" >/dev/null 2>&1 || true
|
||||||
|
/usr/bin/systemctl start "${UNIT_NAME}"
|
||||||
|
}
|
||||||
|
|
||||||
|
log "download package ${PACKAGE_URL}"
|
||||||
|
curl -fL --connect-timeout 10 --max-time "${DOWNLOAD_TIMEOUT_SECONDS}" -o "${staging_package}" "${PACKAGE_URL}"
|
||||||
|
echo "${SHA256_VALUE} ${staging_package}" | sha256sum -c -
|
||||||
|
|
||||||
|
if [[ ! -d "${release_dir}" ]]; then
|
||||||
|
mkdir -p "${release_dir}"
|
||||||
|
tar -C "${staging_extract}" -xzf "${staging_package}"
|
||||||
|
cp -R "${staging_extract}/." "${release_dir}/"
|
||||||
|
else
|
||||||
|
log "release dir already exists, reuse ${release_dir}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -x "${release_dir}/bin/${SERVICE}" ]]; then
|
||||||
|
echo "release binary missing: ${release_dir}/bin/${SERVICE}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
ln -sfn "${release_dir}" "${current_link}.tmp"
|
||||||
|
mv -Tf "${current_link}.tmp" "${current_link}"
|
||||||
|
|
||||||
|
if ! stop_service; then
|
||||||
|
rollback "systemd stop failed"
|
||||||
|
echo "systemd stop failed: ${UNIT_NAME}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! start_service; then
|
||||||
|
rollback "systemd start failed"
|
||||||
|
echo "systemd start failed: ${UNIT_NAME}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! wait_ready "deploy"; then
|
||||||
|
rollback "service failed to become ready after deploy"
|
||||||
|
echo "service failed to become ready: ${UNIT_NAME}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "deploy success: ${SERVICE}@${RELEASE_ID}"
|
||||||
@ -59,6 +59,17 @@ def should_retry_without_output_cos(exc: TencentCloudSDKException) -> bool:
|
|||||||
return code == "ResourceNotFound.RoleNotFound" and "TAT_QCSLinkedRoleInUploadInvocation" in message
|
return code == "ResourceNotFound.RoleNotFound" and "TAT_QCSLinkedRoleInUploadInvocation" in message
|
||||||
|
|
||||||
|
|
||||||
|
def local_remote_deploy_script() -> str:
|
||||||
|
template_path = Path(__file__).with_name("remote_deploy.sh")
|
||||||
|
try:
|
||||||
|
content = template_path.read_text(encoding="utf-8")
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise RuntimeError(f"missing remote deploy template: {template_path}") from exc
|
||||||
|
if not content.strip():
|
||||||
|
raise RuntimeError(f"remote deploy template is empty: {template_path}")
|
||||||
|
return content.rstrip() + "\n"
|
||||||
|
|
||||||
|
|
||||||
class CloudOperator:
|
class CloudOperator:
|
||||||
def __init__(self, config: dict[str, Any], service_name: str, instance_id: str, release_id: str | None) -> None:
|
def __init__(self, config: dict[str, Any], service_name: str, instance_id: str, release_id: str | None) -> None:
|
||||||
self.config = config
|
self.config = config
|
||||||
@ -105,6 +116,7 @@ class CloudOperator:
|
|||||||
|
|
||||||
def deploy(self) -> None:
|
def deploy(self) -> None:
|
||||||
self.ensure_agent_online()
|
self.ensure_agent_online()
|
||||||
|
self.ensure_remote_deploy_script_installed()
|
||||||
self.ensure_systemd_unit_installed()
|
self.ensure_systemd_unit_installed()
|
||||||
package_url, sha256 = self.resolve_release_package()
|
package_url, sha256 = self.resolve_release_package()
|
||||||
|
|
||||||
@ -215,6 +227,33 @@ class CloudOperator:
|
|||||||
task = self.wait_for_tat(invocation_id)
|
task = self.wait_for_tat(invocation_id)
|
||||||
self.ensure_task_success(task, f"ensure systemd unit {unit_name}")
|
self.ensure_task_success(task, f"ensure systemd unit {unit_name}")
|
||||||
|
|
||||||
|
def ensure_remote_deploy_script_installed(self) -> None:
|
||||||
|
script_path = str(self.config["tat"]["script_path"])
|
||||||
|
script_dir = str(Path(script_path).parent)
|
||||||
|
script_content_b64 = base64.b64encode(local_remote_deploy_script().encode("utf-8")).decode("ascii")
|
||||||
|
command = textwrap.dedent(
|
||||||
|
f"""\
|
||||||
|
set -Eeuo pipefail
|
||||||
|
script_path={shlex.quote(script_path)}
|
||||||
|
script_dir={shlex.quote(script_dir)}
|
||||||
|
tmp_file="$(mktemp)"
|
||||||
|
trap 'rm -f "$tmp_file"' EXIT
|
||||||
|
mkdir -p "$script_dir"
|
||||||
|
TMP_FILE="$tmp_file" python3 - <<'PY'
|
||||||
|
import base64
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
Path(os.environ["TMP_FILE"]).write_bytes(base64.b64decode("{script_content_b64}"))
|
||||||
|
PY
|
||||||
|
if [[ ! -f "$script_path" ]] || ! cmp -s "$tmp_file" "$script_path"; then
|
||||||
|
install -m 0755 "$tmp_file" "$script_path"
|
||||||
|
fi
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
invocation_id = self.run_tat_command(command, f"ensure-deploy-script-{self.service_name}")
|
||||||
|
task = self.wait_for_tat(invocation_id)
|
||||||
|
self.ensure_task_success(task, f"ensure deploy script {script_path}")
|
||||||
|
|
||||||
def systemd_unit_content(self) -> str:
|
def systemd_unit_content(self) -> str:
|
||||||
deploy_root = str(self.service_cfg["deploy_root"]).rstrip("/")
|
deploy_root = str(self.service_cfg["deploy_root"]).rstrip("/")
|
||||||
binary_path = f"{deploy_root}/current/bin/{self.service_name}"
|
binary_path = f"{deploy_root}/current/bin/{self.service_name}"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user