Outage Management Pipeline

End-to-end network outage management pipeline. Validates the outage report, diagnoses the root cause across affected network segments, assesses the impact radius and number of affected subscribers, dispatches field technicians, applies network rerouting to restore partial service, executes full service restoration, and generates the incident report for SLA tracking.
{
  "Comment": "Outage management pipeline — validate, diagnose, assess impact, reroute, dispatch field tech, restore, and report",
  "StartAt": "ValidateOutageReport",
  "States": {
    "ValidateOutageReport": {
      "Type": "Task",
      "Resource": "arn:aws:states:::lambda:invoke",
      "Parameters": {
        "FunctionName": "${ValidateOutageReportFunctionArn}",
        "Payload.$": "$"
      },
      "ResultPath": "$.validation",
      "Retry": [
        {
          "ErrorEquals": [
            "Lambda.ServiceException",
            "Lambda.AWSLambdaException",
            "Lambda.SdkClientException",
            "Lambda.TooManyRequestsException"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 3,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "OutageFailed",
          "ResultPath": "$.error"
        }
      ],
      "Next": "IsReportValid"
    },
    "IsReportValid": {
      "Type": "Choice",
      "Choices": [
        {
          "Variable": "$.validation.Payload.isValid",
          "BooleanEquals": true,
          "Next": "InvestigateOutage"
        }
      ],
      "Default": "OutageFailed"
    },
    "InvestigateOutage": {
      "Type": "Parallel",
      "Branches": [
        {
          "StartAt": "DiagnoseRootCause",
          "States": {
            "DiagnoseRootCause": {
              "Type": "Task",
              "Resource": "arn:aws:states:::lambda:invoke",
              "Parameters": {
                "FunctionName": "${DiagnoseRootCauseFunctionArn}",
                "Payload.$": "$.validation.Payload"
              },
              "Retry": [
                {
                  "ErrorEquals": [
                    "Lambda.ServiceException",
                    "Lambda.AWSLambdaException",
                    "Lambda.SdkClientException",
                    "Lambda.TooManyRequestsException"
                  ],
                  "IntervalSeconds": 2,
                  "MaxAttempts": 3,
                  "BackoffRate": 2
                }
              ],
              "End": true
            }
          }
        },
        {
          "StartAt": "AssessImpactRadius",
          "States": {
            "AssessImpactRadius": {
              "Type": "Task",
              "Resource": "arn:aws:states:::lambda:invoke",
              "Parameters": {
                "FunctionName": "${AssessImpactRadiusFunctionArn}",
                "Payload.$": "$.validation.Payload"
              },
              "Retry": [
                {
                  "ErrorEquals": [
                    "Lambda.ServiceException",
                    "Lambda.AWSLambdaException",
                    "Lambda.SdkClientException",
                    "Lambda.TooManyRequestsException"
                  ],
                  "IntervalSeconds": 2,
                  "MaxAttempts": 3,
                  "BackoffRate": 2
                }
              ],
              "End": true
            }
          }
        }
      ],
      "ResultPath": "$.investigation",
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "OutageFailed",
          "ResultPath": "$.error"
        }
      ],
      "Next": "ApplyNetworkReroute"
    },
    "ApplyNetworkReroute": {
      "Type": "Task",
      "Resource": "arn:aws:states:::lambda:invoke",
      "Parameters": {
        "FunctionName": "${ApplyNetworkRerouteFunctionArn}",
        "Payload.$": "$.investigation[1].Payload"
      },
      "ResultPath": "$.reroute",
      "Retry": [
        {
          "ErrorEquals": [
            "Lambda.ServiceException",
            "Lambda.AWSLambdaException",
            "Lambda.SdkClientException",
            "Lambda.TooManyRequestsException"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 3,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "OutageFailed",
          "ResultPath": "$.error"
        }
      ],
      "Next": "DispatchFieldTech"
    },
    "DispatchFieldTech": {
      "Type": "Task",
      "Resource": "arn:aws:states:::lambda:invoke",
      "Parameters": {
        "FunctionName": "${DispatchFieldTechFunctionArn}",
        "Payload.$": "$.reroute.Payload"
      },
      "ResultPath": "$.dispatch",
      "Retry": [
        {
          "ErrorEquals": [
            "Lambda.ServiceException",
            "Lambda.AWSLambdaException",
            "Lambda.SdkClientException",
            "Lambda.TooManyRequestsException"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 3,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "OutageFailed",
          "ResultPath": "$.error"
        }
      ],
      "Next": "RestoreService"
    },
    "RestoreService": {
      "Type": "Task",
      "Resource": "arn:aws:states:::lambda:invoke",
      "Parameters": {
        "FunctionName": "${RestoreServiceFunctionArn}",
        "Payload.$": "$.dispatch.Payload"
      },
      "ResultPath": "$.restore",
      "Retry": [
        {
          "ErrorEquals": [
            "Lambda.ServiceException",
            "Lambda.AWSLambdaException",
            "Lambda.SdkClientException",
            "Lambda.TooManyRequestsException"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 3,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "OutageFailed",
          "ResultPath": "$.error"
        }
      ],
      "Next": "GenerateIncidentReport"
    },
    "GenerateIncidentReport": {
      "Type": "Task",
      "Resource": "arn:aws:states:::lambda:invoke",
      "Parameters": {
        "FunctionName": "${GenerateIncidentReportFunctionArn}",
        "Payload.$": "$.restore.Payload"
      },
      "ResultPath": "$.report",
      "Retry": [
        {
          "ErrorEquals": [
            "Lambda.ServiceException",
            "Lambda.AWSLambdaException",
            "Lambda.SdkClientException",
            "Lambda.TooManyRequestsException"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 3,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "OutageFailed",
          "ResultPath": "$.error"
        }
      ],
      "Next": "OutageResolved"
    },
    "OutageResolved": {
      "Type": "Succeed"
    },
    "OutageFailed": {
      "Type": "Fail",
      "Error": "OutageManagementFailed",
      "Cause": "Outage report could not be validated, diagnosed, or resolved through the management pipeline"
    }
  }
}
JSON
Expand
100%

Telecommunications teams can use patterns like this to build reliable, compliant, and scalable automation for payment systems and can test and refine these flows locally with Thrubit to reduce cloud cost and speed up iteration.

Free Trial