Express Snippet Indexing

Fast-path indexing for short text snippets such as support articles, product descriptions, or FAQ entries. Skips OCR extraction and goes directly to embedding generation.

{
  "Comment": "Fast-path indexing for short text snippets such as support articles, product descriptions, or FAQ entries. Skips OCR extraction and goes directly to embedding generation, then verifies indexing with a native Bedrock retrieve call.",
  "StartAt": "SetSnippetContext",
  "States": {
    "SetSnippetContext": {
      "Type": "Pass",
      "Parameters": {
        "documentId.$": "$.documentId",
        "documentType.$": "$.documentType",
        "title.$": "$.title",
        "sourceUri.$": "$.sourceUri",
        "namespace.$": "$.namespace",
        "contentOwner.$": "$.contentOwner",
        "tags.$": "$.tags",
        "expiryDate.$": "$.expiryDate",
        "qualityThreshold.$": "$.qualityThreshold",
        "knowledgeBaseId.$": "$.knowledgeBaseId",
        "snippetMode": "EXPRESS",
        "skipOcr": true,
        "skipExtractionStep": true
      },
      "Next": "ExpressIngestSnippet"
    },
    "ExpressIngestSnippet": {
      "Type": "Task",
      "Resource": "${IngestDocumentFunctionArn}",
      "Retry": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 2,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "SnippetIndexingFailed"
        }
      ],
      "Next": "ExpressGenerateSnippetEmbeddings"
    },
    "ExpressGenerateSnippetEmbeddings": {
      "Type": "Task",
      "Resource": "${GenerateEmbeddingsFunctionArn}",
      "Retry": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 2,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "SnippetIndexingFailed"
        }
      ],
      "Next": "ExpressIndexSnippet"
    },
    "ExpressIndexSnippet": {
      "Type": "Task",
      "Resource": "${IndexToVectorStoreFunctionArn}",
      "Retry": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 2,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "SnippetIndexingFailed"
        }
      ],
      "Next": "WaitForIndexPropagation"
    },
    "WaitForIndexPropagation": {
      "Type": "Wait",
      "Seconds": 2,
      "Next": "VerifySnippetIndexed"
    },
    "VerifySnippetIndexed": {
      "Type": "Task",
      "Resource": "arn:aws:states:::bedrock:retrieve",
      "Parameters": {
        "KnowledgeBaseId.$": "$.knowledgeBaseId",
        "RetrievalQuery": {
          "Text.$": "States.Format('{}', $.title)"
        }
      },
      "ResultSelector": {
        "retrievedChunks.$": "$.retrievalResults",
        "chunkCount.$": "States.ArrayLength($.retrievalResults)"
      },
      "ResultPath": "$.indexVerification",
      "Retry": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 2,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "SnippetIndexingFailed"
        }
      ],
      "Next": "RouteByIndexHealth"
    },
    "RouteByIndexHealth": {
      "Type": "Choice",
      "Choices": [
        {
          "Variable": "$.indexVerification.chunkCount",
          "NumericGreaterThan": 0,
          "Next": "ExpressUpdateSnippetCatalog"
        }
      ],
      "Default": "SnippetIndexingFailed"
    },
    "ExpressUpdateSnippetCatalog": {
      "Type": "Task",
      "Resource": "${UpdateKnowledgeCatalogFunctionArn}",
      "Retry": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 2,
          "BackoffRate": 2
        }
      ],
      "Catch": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "Next": "SnippetIndexingFailed"
        }
      ],
      "Next": "NotifySnippetIndexed"
    },
    "NotifySnippetIndexed": {
      "Type": "Task",
      "Resource": "${NotifyIngestionStatusFunctionArn}",
      "Retry": [
        {
          "ErrorEquals": [
            "States.ALL"
          ],
          "IntervalSeconds": 2,
          "MaxAttempts": 2,
          "BackoffRate": 2
        }
      ],
      "Next": "SnippetIndexingComplete"
    },
    "SnippetIndexingComplete": {
      "Type": "Succeed"
    },
    "SnippetIndexingFailed": {
      "Type": "Fail",
      "Error": "SnippetIndexingFailed",
      "Cause": "Express snippet indexing failed. Snippet may not be retrievable."
    }
  }
}
JSON
Expand
100%

AI teams can use patterns like this to build reliable, compliant, and scalable automation for payment systems and can test and refine these flows locally with Thrubit to reduce cloud cost and speed up iteration.