Simple example on how you can use Node.JS processor with connection to Azure cognitive services especially Form Recogniser. Before going further review the Form Recogniser service description and use: Invoice data extraction – Document Intelligence (formerly Form Recognizer) - Azure AI services | Microsoft Learn
In my case I want to use prebuilt model as well custom model for additional fields on the invoice. I want to use same endpoint for getting the data from invoices (invoice-prebuilt + custom) and generic document with model prebuilt-document.
Attachment is stored in Xeelo so in the first place I download the attachment content using Xeelo Attachment Download connector. Then I pass the data into Azure services. I want to run the data extraction for prebuilt-invoice and custom model in parallel so I use const responseAll = await Promoise.all([promise1, promise2]);
where promises are coming from my own function that posts the data in Azure and in loop with one-second delay trying to get the results.
Azure credentials (key) as well as URL of endpoint under my subscription are stored in general variables as sensitive data so I do not expose them by accident.
// functions
async function getOCRData(stringURL){
const Key = "${#AzureOCR_Key}"
// upload file to Azure OCR
const responsePOST = await fetch(stringURL, {
method: "POST",
body: JSON.stringify({"base64Source": inputData[0].DocumentContent}),
//body: JSON.stringify({"urlSourc1e": DocumentUrl}),
headers: {
"Content-Type": "application/json",
"Ocp-Apim-Subscription-Key": Key
}
});
if(!responsePOST.ok){
log.error("Upload of document into Azure OCR was not successful with following error:");
const resp = await responsePOST.json();
log.error(JSON.stringify(resp.error.message));
log.error(JSON.stringify(resp.error?.innererror.message));
}
// extract call back url from document upload
const CallbackURL = responsePOST.headers.get("Operation-Location");
var OCRDataStatus = "new";
var OCRData = null;
// get document OCR in cycle with 3 second delay
while(OCRDataStatus != "succeeded"){
const responseGET = await fetch(CallbackURL, {
method: "GET",
headers: {
"Ocp-Apim-Subscription-Key": Key
}
});
if(!responseGET.ok){
log.error("Document fetch from Azure OCR was not successful.")
}
OCRData = await responseGET.json();
OCRDataStatus = OCRData.status;
if(OCRDataStatus != 'succeeded'){
log.warn("Azure OCR status: " + OCRDataStatus);
await sleep(1000);
};
};
return OCRData;
}
// main code
const EndpointURL = "${#AzureOCR_URL}";
const ModelID = inputData[0].ModelID;
var OCRData = {}
if(ModelID == "prebuilt-invoice"){
const AzureURL = EndpointURL + "/formrecognizer/documentModels/prebuilt-invoice:analyze?api-version=2023-07-31&features=keyValuePairs"
const OCRDataPromise = getOCRData(AzureURL);
const AzureURLExt = EndpointURL + "/formrecognizer/documentModels/Invoice_Extended_v12:analyze?api-version=2023-07-31"
const OCRData2Promise = getOCRData(AzureURLExt);
const OCRDataResult = await Promise.all([OCRDataPromise, OCRData2Promise]);
OCRData = OCRDataResult[0];
const OCRData2 = OCRDataResult[1];
if(OCRData2.analyzeResult.documents?.length > 0){
OCRData.analyzeResult.documents.push(...OCRData2.analyzeResult.documents)
}
} else {
const AzureURL = EndpointURL + "/formrecognizer/documentModels/prebuilt-document:analyze?api-version=2023-07-31"
OCRData = await getOCRData(AzureURL);
}
return [{
"JSON": OCRData
}];