feat(cron): add failure destination support to failed cron jobs (#31059)
* feat(cron): add failure destination support with webhook mode and bestEffort handling Extends PR #24789 failure alerts with features from PR #29145: - Add webhook delivery mode for failure alerts (mode: 'webhook') - Add accountId support for multi-account channel configurations - Add bestEffort handling to skip alerts when job has bestEffort=true - Add separate failureDestination config (global + per-job in delivery) - Add duplicate prevention (prevents sending to same as primary delivery) - Add CLI flags: --failure-alert-mode, --failure-alert-account-id - Add UI fields for new options in web cron editor * fix(cron): merge failureAlert mode/accountId and preserve failureDestination on updates - Fix mergeCronFailureAlert to merge mode and accountId fields - Fix mergeCronDelivery to preserve failureDestination on updates - Fix isSameDeliveryTarget to use 'announce' as default instead of 'none' to properly detect duplicates when delivery.mode is undefined * fix(cron): validate webhook mode requires URL in resolveFailureDestination When mode is 'webhook' but no 'to' URL is provided, return null instead of creating an invalid plan that silently fails later. * fix(cron): fail closed on webhook mode without URL and make failureDestination fields clearable - sendCronFailureAlert: fail closed when mode is webhook but URL is missing - mergeCronDelivery: use per-key presence checks so callers can clear nested failureDestination fields via cron.update Note: protocol:check shows missing internalEvents in Swift models - this is a pre-existing issue unrelated to these changes (upstream sync needed). * fix(cron): use separate schema for failureDestination and fix type cast - Create CronFailureDestinationSchema excluding after/cooldownMs fields - Fix type cast in sendFailureNotificationAnnounce to use CronMessageChannel * fix(cron): merge global failureDestination with partial job overrides When job has partial failureDestination config, fall back to global config for unset fields instead of treating it as a full override. * fix(cron): avoid forcing announce mode and clear inherited to on mode change - UI: only include mode in patch if explicitly set to non-default - delivery.ts: clear inherited 'to' when job overrides mode, since URL semantics differ between announce and webhook modes * fix(cron): preserve explicit to on mode override and always include mode in UI patches - delivery.ts: preserve job-level explicit 'to' when overriding mode - UI: always include mode in failureAlert patch so users can switch between announce/webhook * fix(cron): allow clearing accountId and treat undefined global mode as announce - UI: always include accountId in patch so users can clear it - delivery.ts: treat undefined global mode as announce when comparing for clearing inherited 'to' * Cron: harden failure destination routing and add regression coverage * Cron: resolve failure destination review feedback * Cron: drop unrelated timeout assertions from conflict resolution * Cron: format cron CLI regression test * Cron: align gateway cron test mock types --------- Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -73,6 +73,11 @@ export function registerCronEditCommand(cron: Command) {
|
||||
)
|
||||
.option("--failure-alert-to <dest>", "Failure alert destination")
|
||||
.option("--failure-alert-cooldown <duration>", "Minimum time between alerts (e.g. 1h, 30m)")
|
||||
.option("--failure-alert-mode <mode>", "Failure alert delivery mode (announce or webhook)")
|
||||
.option(
|
||||
"--failure-alert-account-id <id>",
|
||||
"Account ID for failure alert channel (multi-account setups)",
|
||||
)
|
||||
.action(async (id, opts) => {
|
||||
try {
|
||||
if (opts.session === "main" && opts.message) {
|
||||
@@ -286,11 +291,15 @@ export function registerCronEditCommand(cron: Command) {
|
||||
const hasFailureAlertChannel = typeof opts.failureAlertChannel === "string";
|
||||
const hasFailureAlertTo = typeof opts.failureAlertTo === "string";
|
||||
const hasFailureAlertCooldown = typeof opts.failureAlertCooldown === "string";
|
||||
const hasFailureAlertMode = typeof opts.failureAlertMode === "string";
|
||||
const hasFailureAlertAccountId = typeof opts.failureAlertAccountId === "string";
|
||||
const hasFailureAlertFields =
|
||||
hasFailureAlertAfter ||
|
||||
hasFailureAlertChannel ||
|
||||
hasFailureAlertTo ||
|
||||
hasFailureAlertCooldown;
|
||||
hasFailureAlertCooldown ||
|
||||
hasFailureAlertMode ||
|
||||
hasFailureAlertAccountId;
|
||||
const failureAlertFlag =
|
||||
typeof opts.failureAlert === "boolean" ? opts.failureAlert : undefined;
|
||||
if (failureAlertFlag === false && hasFailureAlertFields) {
|
||||
@@ -322,6 +331,17 @@ export function registerCronEditCommand(cron: Command) {
|
||||
}
|
||||
failureAlert.cooldownMs = cooldownMs;
|
||||
}
|
||||
if (hasFailureAlertMode) {
|
||||
const mode = String(opts.failureAlertMode).trim().toLowerCase();
|
||||
if (mode !== "announce" && mode !== "webhook") {
|
||||
throw new Error("Invalid --failure-alert-mode (must be 'announce' or 'webhook').");
|
||||
}
|
||||
failureAlert.mode = mode;
|
||||
}
|
||||
if (hasFailureAlertAccountId) {
|
||||
const accountId = String(opts.failureAlertAccountId).trim();
|
||||
failureAlert.accountId = accountId ? accountId : undefined;
|
||||
}
|
||||
patch.failureAlert = failureAlert;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user