{
  "meta": {
    "instanceId": "b9f144fdc910a1e14e522063b576e7e28af8b611858295f590957fc8454b2836",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "1ef38f7c-61dd-419f-9b6d-c7ed5224d993",
      "name": "Wait",
      "type": "n8n-nodes-base.wait",
      "position": [
        288,
        32
      ],
      "webhookId": "ceb53c60-2977-4d77-b27d-20bcd1f6ea47",
      "parameters": {
        "amount": 1
      },
      "typeVersion": 1.1
    },
    {
      "id": "7a2fbe88-a859-4143-b96d-de8d213d1006",
      "name": "Check Job Status",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        128,
        32
      ],
      "parameters": {
        "url": "=https://api.subworkflow.ai/v1/jobs/{{ $json.data.id }}",
        "options": {},
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth"
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "credential-id",
          "name": "httpHeaderAuth Credential"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "87fe7e38-78ec-4f3b-a7e8-331af339dc04",
      "name": "Get Dataset Items",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        832,
        -128
      ],
      "parameters": {
        "url": "=https://api.subworkflow.ai/v1/datasets/{{ $json.data.id }}/items?row=jpg&limit=10",
        "options": {
          "pagination": {
            "pagination": {
              "parameters": {
                "parameters": [
                  {
                    "name": "offset",
                    "value": "={{ ($response.body.offset ?? 0) + 10 }}"
                  }
                ]
              },
              "maxRequests": 5,
              "requestInterval": 500,
              "limitPagesFetched": true,
              "completeExpression": "={{ $response.body.data.length < 10 }}",
              "paginationCompleteWhen": "other"
            }
          }
        },
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth"
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "credential-id",
          "name": "httpHeaderAuth Credential"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "93ecd55d-9f01-4c6a-acb7-94184ad54781",
      "name": "Get Dataset",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        608,
        -128
      ],
      "parameters": {
        "url": "=https://api.subworkflow.ai/v1/datasets/{{ $json.data.datasetId }}",
        "options": {},
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth"
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "credential-id",
          "name": "httpHeaderAuth Credential"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "1e9fa5b0-ca47-4d44-8818-a7251173346f",
      "name": "When clicking ‘Execute workflow’",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -736,
        32
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "54c34849-bbbb-41e2-a182-1ec021939d5b",
      "name": "Download file",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -544,
        32
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "=1wS9U7MQDthj57CvEcqG_Llkr-ek6RqGA"
        },
        "options": {},
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "credential-id",
          "name": "googleDriveOAuth2Api Credential"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "39367d95-269a-47ed-a23f-4635e4d1fa4f",
      "name": "Split Out",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        1232,
        -128
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "data"
      },
      "typeVersion": 1
    },
    {
      "id": "2b1ac563-43c5-4379-8ce2-d06f14897e42",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -832,
        -160
      ],
      "parameters": {
        "color": 7,
        "width": 672,
        "height": 416,
        "content": "### 1. Upload Binary File to Extract API\n[Extract API Documentation](https://docs.subworkflow.ai/api-reference/post-v1-extract)\n\nOur workflow starts with uploading our document to the SubworkflowAI service. There are actually 2 methods to do so but for this example, we'll focus on the Extract API. The Extract API does the minimal to split, convert and index pages of a document for easy retrieval. This API call fires off an async job so the response of this API is a \"job\" object."
      },
      "typeVersion": 1
    },
    {
      "id": "6e6fd235-f260-4733-a31f-54f04ae3efc3",
      "name": "Extract API",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -352,
        32
      ],
      "parameters": {
        "url": "https://api.subworkflow.ai/v1/extract",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "contentType": "multipart-form-data",
        "authentication": "genericCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "file",
              "parameterType": "formBinaryData",
              "inputDataFieldName": "data"
            },
            {
              "name": "expiresInDays",
              "value": "0"
            }
          ]
        },
        "genericAuthType": "httpHeaderAuth"
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "credential-id",
          "name": "httpHeaderAuth Credential"
        }
      },
      "retryOnFail": true,
      "typeVersion": 4.2,
      "waitBetweenTries": 5000
    },
    {
      "id": "3c7d2ca3-8496-4136-82a9-db84baefa658",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -128,
        -160
      ],
      "parameters": {
        "color": 7,
        "width": 608,
        "height": 416,
        "content": "### 2. Poll for Async \"Extract\" Job to Complete\n[Jobs API Documentation](https://docs.subworkflow.ai/api-reference/get-v1-jobs-id)\n\nWhilst the extract API is busy with our file, the \"job\" record associated with the task is how we track its progress. Use a loop to poll the job and conditional act on the \"status\" property - whilst the status is \"IN_PROGRESS\", we continue to poll and when we hit \"SUCCESS\" or \"ERROR\", we can break out of the loop."
      },
      "typeVersion": 1
    },
    {
      "id": "a0cfd795-01c0-4e10-ac86-497b300899e0",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        512,
        -368
      ],
      "parameters": {
        "color": 7,
        "width": 544,
        "height": 432,
        "content": "### 3. Fetch Resulting Dataset and Get Dataset Items\n[Datasets API Documentation](https://docs.subworkflow.ai/api-reference/get-v1-datasets)\n\nOnce the extract process is done, we can safely access the corresponding Dataset which is represents the original file. Note, we do not receive all pages back - this would be too memory intensive! Instead, Subworkflow holds on to the data until you explicitly request for them. To get the individual pages of document, call the DatasetItems API."
      },
      "typeVersion": 1
    },
    {
      "id": "1ede59b6-24af-4a33-bac4-50bc21d60a2f",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1072,
        -368
      ],
      "parameters": {
        "color": 7,
        "width": 640,
        "height": 432,
        "content": "### 4. Example VLM Use-Case - Document OCR\n[Learn more about the Gemini node](https://docs.n8n.io/integrations/builtin/app-nodes/n8n-nodes-langchain.googlegemini/)\n\nFinally, the DatasetItems API provides a \"Share link\" to publicly share the page data binary. You can use these links as image inputs for most multimodal LLMs to perform, as in this example, document OCR via VLM (Visual Language Model).\nThe benefit of using Subworkflow here is no additional binary data is further downloaded and/or passed between nodes helping to reduce the memory required to run the workflow."
      },
      "typeVersion": 1
    },
    {
      "id": "273556be-9941-46ea-985e-94795949a741",
      "name": "Job Complete?",
      "type": "n8n-nodes-base.if",
      "position": [
        -48,
        32
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "or",
          "conditions": [
            {
              "id": "3b76a3df-2333-42ae-adc9-e82c3e6d8cf5",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.data.status }}",
              "rightValue": "SUCCESS"
            },
            {
              "id": "5ae3b589-fd55-43ea-8e94-4923dd2bbc5f",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.data.status }}",
              "rightValue": "ERROR"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "65566edc-f596-4aae-ae57-37393d41f7b9",
      "name": "Document OCR via VLM",
      "type": "@n8n/n8n-nodes-langchain.googleGemini",
      "position": [
        1440,
        -128
      ],
      "parameters": {
        "text": "Transcribe this image to Markdown",
        "modelId": {
          "__rl": true,
          "mode": "list",
          "value": "models/gemini-2.5-flash",
          "cachedResultName": "models/gemini-2.5-flash"
        },
        "options": {},
        "resource": "image",
        "imageUrls": "={{ $json.share.url }}",
        "operation": "analyze"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "credential-id",
          "name": "googlePalmApi Credential"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "00a67bf0-7b51-4bd6-bc2d-dbb25b1ce907",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1744,
        -32
      ],
      "parameters": {
        "width": 416,
        "height": 96,
        "content": "### Next Steps\nLet's next check out the Search API!\nhttps://docs.subworkflow.ai/api-reference/post-v1-search"
      },
      "typeVersion": 1
    },
    {
      "id": "780ae573-6d15-4cbc-8c5f-0e1893c6a9bc",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1360,
        -784
      ],
      "parameters": {
        "width": 480,
        "height": 1232,
        "content": "[![banner](https://cdn.subworkflow.ai/marketing/banner-300x100.png#full-width)](https://subworkflow.ai?utm=n8n)\n## Working with Large Documents In Your VLM OCR Workflow\n\nDocument workflows are popular ways to use AI but what happens when your document is too large for your app or your AI to handle? Whether its context window or application memory that's grinding to a halt, [Subworkflow.ai](https://subworkflow.ai) is one approach to keep you going.\n\n### Prequisites\n1. You'll need a Subworkflow.ai API key to use the Subworkflow.ai service.\n2. Add the API key as a header auth credential. More details in the official docs - [https://docs.subworkflow.ai/category/api-reference](https://docs.subworkflow.ai/category/api-reference)\n\n### How it Works\n1. Import your document into your n8n workflow\n2. Upload it to the Subworkflow.ai service via the **Extract API** using the HTTP node. This endpoint takes files up to 100mb.\n3. Once uploaded, this will trigger an `Extract` job on the service's side and the response is a \"job\" record to track progress.\n4. Poll Subworkflow.ai's `Jobs` endpoint and keep polling until the job is finished. You can use the \"IF\" node looping back unto itself to achieve this in n8n.\n5. Once the job is done, the `Dataset` of the uploaded document is ready for retrieval. Use the `Datasets` and `DatasetItems` API to retrieve whatever you need to complete your AI task.\n6. In this example, all pages are retrieved and run through a multimodal LLM to parse into markdown. A well-known process when parsing data tables or graphics are required.\n\n### How to use\n* Integrate Subworkflow's Extract API seemlessly into your existing document workflows to support larger documents from 100mb+ to up to 5000 pages.\n\n### Customising the workflow\n* Sometimes you don't want the entire document back especially if the document is quite large (think 500+ pages!), instead, use query parameters on the `DatasetItems` API to pick individual pages or a range of pages to reduce the load.\n\n### Need Help?\n* **Official API documentation** - [https://docs.subworkflow.ai/category/api-reference](https://docs.subworkflow.ai/category/api-reference)\n* **Join the discord** - [https://discord.gg/RCHeCPJnYw](RCHeCPJnYw)"
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "Wait": {
      "main": [
        [
          {
            "node": "Job Complete?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out": {
      "main": [
        [
          {
            "node": "Document OCR via VLM",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract API": {
      "main": [
        [
          {
            "node": "Job Complete?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Dataset": {
      "main": [
        [
          {
            "node": "Get Dataset Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download file": {
      "main": [
        [
          {
            "node": "Extract API",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Job Complete?": {
      "main": [
        [
          {
            "node": "Get Dataset",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Check Job Status",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check Job Status": {
      "main": [
        [
          {
            "node": "Wait",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Dataset Items": {
      "main": [
        [
          {
            "node": "Split Out",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Execute workflow’": {
      "main": [
        [
          {
            "node": "Download file",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}