deploy fix bug
This commit is contained in:
parent
80364221f8
commit
e2bfc7769b
167
ops-scripts/remote_deploy.sh
Normal file
167
ops-scripts/remote_deploy.sh
Normal file
@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -Eeuo pipefail
|
||||
|
||||
SERVICE=""
|
||||
RELEASE_ID=""
|
||||
PACKAGE_URL=""
|
||||
SHA256_VALUE=""
|
||||
HEALTH_URL=""
|
||||
UNIT_NAME=""
|
||||
DEPLOY_ROOT=""
|
||||
READY_TIMEOUT_SECONDS="${READY_TIMEOUT_SECONDS:-180}"
|
||||
DOWNLOAD_TIMEOUT_SECONDS="${DOWNLOAD_TIMEOUT_SECONDS:-600}"
|
||||
POLL_INTERVAL_SECONDS="${POLL_INTERVAL_SECONDS:-2}"
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage:
|
||||
deploy.sh \
|
||||
--service <service> \
|
||||
--release-id <release_id> \
|
||||
--package-url <url> \
|
||||
--sha256 <sha256> \
|
||||
--health-url <url> \
|
||||
--unit-name <systemd unit> \
|
||||
--deploy-root <path>
|
||||
EOF
|
||||
}
|
||||
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S %z')" "$*"
|
||||
}
|
||||
|
||||
require_cmd() {
|
||||
command -v "$1" >/dev/null 2>&1 || {
|
||||
echo "missing command: $1" >&2
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--service) SERVICE="$2"; shift 2 ;;
|
||||
--release-id) RELEASE_ID="$2"; shift 2 ;;
|
||||
--package-url) PACKAGE_URL="$2"; shift 2 ;;
|
||||
--sha256) SHA256_VALUE="$2"; shift 2 ;;
|
||||
--health-url) HEALTH_URL="$2"; shift 2 ;;
|
||||
--unit-name) UNIT_NAME="$2"; shift 2 ;;
|
||||
--deploy-root) DEPLOY_ROOT="$2"; shift 2 ;;
|
||||
--ready-timeout-seconds) READY_TIMEOUT_SECONDS="$2"; shift 2 ;;
|
||||
--download-timeout-seconds) DOWNLOAD_TIMEOUT_SECONDS="$2"; shift 2 ;;
|
||||
--poll-interval-seconds) POLL_INTERVAL_SECONDS="$2"; shift 2 ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) echo "unknown arg: $1" >&2; usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ -n "$SERVICE" && -n "$RELEASE_ID" && -n "$PACKAGE_URL" && -n "$SHA256_VALUE" && -n "$HEALTH_URL" && -n "$UNIT_NAME" && -n "$DEPLOY_ROOT" ]] || {
|
||||
usage
|
||||
exit 1
|
||||
}
|
||||
|
||||
require_cmd curl
|
||||
require_cmd sha256sum
|
||||
require_cmd tar
|
||||
require_cmd systemctl
|
||||
|
||||
release_dir="${DEPLOY_ROOT}/releases/${RELEASE_ID}"
|
||||
current_link="${DEPLOY_ROOT}/current"
|
||||
staging_root="/tmp/chatapp-deploy/${SERVICE}/${RELEASE_ID}"
|
||||
staging_package="${staging_root}/${SERVICE}.tgz"
|
||||
staging_extract="${staging_root}/extract"
|
||||
previous_target=""
|
||||
|
||||
mkdir -p "${DEPLOY_ROOT}/releases" "${staging_root}" "${staging_extract}"
|
||||
if [[ -L "${current_link}" ]]; then
|
||||
previous_target="$(readlink -f "${current_link}" || true)"
|
||||
fi
|
||||
|
||||
cleanup() {
|
||||
rm -rf "${staging_root}"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
rollback() {
|
||||
local rollback_reason="$1"
|
||||
log "rollback: ${rollback_reason}"
|
||||
if [[ -n "${previous_target}" && -d "${previous_target}" ]]; then
|
||||
ln -sfn "${previous_target}" "${current_link}.tmp"
|
||||
mv -Tf "${current_link}.tmp" "${current_link}"
|
||||
/usr/bin/systemctl reset-failed "${UNIT_NAME}" >/dev/null 2>&1 || true
|
||||
/usr/bin/systemctl start "${UNIT_NAME}" || true
|
||||
if ! wait_ready "rollback"; then
|
||||
log "rollback target is still not ready"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
wait_ready() {
|
||||
local phase="$1"
|
||||
local deadline=$(( $(date +%s) + READY_TIMEOUT_SECONDS ))
|
||||
|
||||
while [[ "$(date +%s)" -lt "${deadline}" ]]; do
|
||||
if curl -fsS --max-time 3 "${HEALTH_URL}" >/dev/null; then
|
||||
log "${phase}: ready check passed"
|
||||
return 0
|
||||
fi
|
||||
sleep "${POLL_INTERVAL_SECONDS}"
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
stop_service() {
|
||||
if /usr/bin/systemctl is-active --quiet "${UNIT_NAME}"; then
|
||||
log "stopping ${UNIT_NAME} with SIGTERM"
|
||||
# The unit files already set KillSignal=SIGTERM, so stop triggers graceful shutdown.
|
||||
/usr/bin/systemctl stop "${UNIT_NAME}"
|
||||
else
|
||||
log "${UNIT_NAME} already inactive"
|
||||
fi
|
||||
}
|
||||
|
||||
start_service() {
|
||||
log "starting ${UNIT_NAME}"
|
||||
/usr/bin/systemctl reset-failed "${UNIT_NAME}" >/dev/null 2>&1 || true
|
||||
/usr/bin/systemctl start "${UNIT_NAME}"
|
||||
}
|
||||
|
||||
log "download package ${PACKAGE_URL}"
|
||||
curl -fL --connect-timeout 10 --max-time "${DOWNLOAD_TIMEOUT_SECONDS}" -o "${staging_package}" "${PACKAGE_URL}"
|
||||
echo "${SHA256_VALUE} ${staging_package}" | sha256sum -c -
|
||||
|
||||
if [[ ! -d "${release_dir}" ]]; then
|
||||
mkdir -p "${release_dir}"
|
||||
tar -C "${staging_extract}" -xzf "${staging_package}"
|
||||
cp -R "${staging_extract}/." "${release_dir}/"
|
||||
else
|
||||
log "release dir already exists, reuse ${release_dir}"
|
||||
fi
|
||||
|
||||
if [[ ! -x "${release_dir}/bin/${SERVICE}" ]]; then
|
||||
echo "release binary missing: ${release_dir}/bin/${SERVICE}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ln -sfn "${release_dir}" "${current_link}.tmp"
|
||||
mv -Tf "${current_link}.tmp" "${current_link}"
|
||||
|
||||
if ! stop_service; then
|
||||
rollback "systemd stop failed"
|
||||
echo "systemd stop failed: ${UNIT_NAME}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if ! start_service; then
|
||||
rollback "systemd start failed"
|
||||
echo "systemd start failed: ${UNIT_NAME}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! wait_ready "deploy"; then
|
||||
rollback "service failed to become ready after deploy"
|
||||
echo "service failed to become ready: ${UNIT_NAME}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "deploy success: ${SERVICE}@${RELEASE_ID}"
|
||||
@ -59,6 +59,17 @@ def should_retry_without_output_cos(exc: TencentCloudSDKException) -> bool:
|
||||
return code == "ResourceNotFound.RoleNotFound" and "TAT_QCSLinkedRoleInUploadInvocation" in message
|
||||
|
||||
|
||||
def local_remote_deploy_script() -> str:
|
||||
template_path = Path(__file__).with_name("remote_deploy.sh")
|
||||
try:
|
||||
content = template_path.read_text(encoding="utf-8")
|
||||
except FileNotFoundError as exc:
|
||||
raise RuntimeError(f"missing remote deploy template: {template_path}") from exc
|
||||
if not content.strip():
|
||||
raise RuntimeError(f"remote deploy template is empty: {template_path}")
|
||||
return content.rstrip() + "\n"
|
||||
|
||||
|
||||
class CloudOperator:
|
||||
def __init__(self, config: dict[str, Any], service_name: str, instance_id: str, release_id: str | None) -> None:
|
||||
self.config = config
|
||||
@ -105,6 +116,7 @@ class CloudOperator:
|
||||
|
||||
def deploy(self) -> None:
|
||||
self.ensure_agent_online()
|
||||
self.ensure_remote_deploy_script_installed()
|
||||
self.ensure_systemd_unit_installed()
|
||||
package_url, sha256 = self.resolve_release_package()
|
||||
|
||||
@ -215,6 +227,33 @@ class CloudOperator:
|
||||
task = self.wait_for_tat(invocation_id)
|
||||
self.ensure_task_success(task, f"ensure systemd unit {unit_name}")
|
||||
|
||||
def ensure_remote_deploy_script_installed(self) -> None:
|
||||
script_path = str(self.config["tat"]["script_path"])
|
||||
script_dir = str(Path(script_path).parent)
|
||||
script_content_b64 = base64.b64encode(local_remote_deploy_script().encode("utf-8")).decode("ascii")
|
||||
command = textwrap.dedent(
|
||||
f"""\
|
||||
set -Eeuo pipefail
|
||||
script_path={shlex.quote(script_path)}
|
||||
script_dir={shlex.quote(script_dir)}
|
||||
tmp_file="$(mktemp)"
|
||||
trap 'rm -f "$tmp_file"' EXIT
|
||||
mkdir -p "$script_dir"
|
||||
TMP_FILE="$tmp_file" python3 - <<'PY'
|
||||
import base64
|
||||
import os
|
||||
from pathlib import Path
|
||||
Path(os.environ["TMP_FILE"]).write_bytes(base64.b64decode("{script_content_b64}"))
|
||||
PY
|
||||
if [[ ! -f "$script_path" ]] || ! cmp -s "$tmp_file" "$script_path"; then
|
||||
install -m 0755 "$tmp_file" "$script_path"
|
||||
fi
|
||||
"""
|
||||
)
|
||||
invocation_id = self.run_tat_command(command, f"ensure-deploy-script-{self.service_name}")
|
||||
task = self.wait_for_tat(invocation_id)
|
||||
self.ensure_task_success(task, f"ensure deploy script {script_path}")
|
||||
|
||||
def systemd_unit_content(self) -> str:
|
||||
deploy_root = str(self.service_cfg["deploy_root"]).rstrip("/")
|
||||
binary_path = f"{deploy_root}/current/bin/{self.service_name}"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user