Part 2: Form Table Extraction



Buy Me a Coffee? Your support is much appreciated!

Source Code:  

 

import json
from azure.core.exceptions import ResourceNotFoundError
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient, FormTrainingClient

credentials = json.load(open('./Azure-Form-Recognizer/credential.json'))
API_KEY = credentials['API_KEY']
ENDPOINT = credentials['ENDPOINT']

form_url = 'https://templates.invoicehome.com/invoice-template-us-neat-750px.png'
form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY))
poller = form_recognizer_client.begin_recognize_content_from_url(form_url)
form_result = poller.result()

for page in form_result:
    for table in page.tables:
        print('Column Count: {0}'.format(table.column_count))
        print('Row Count: {0}'.format(table.row_count))
        for cell in table.cells:
            print('Cell Value: {0}'.format(cell.text))
            print('Location: {0}'.format(cell.bounding_box))
            print('Confidence Score: {0}'.format(cell.confidence))

Part 3: Receipt Data Extraction



Buy Me a Coffee? Your support is much appreciated!

Source Code:  

 

import json
import time
from azure.core.exceptions import ResourceNotFoundError
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient

credentials = json.load(open('./Azure-Form-Recognizer/credential.json'))
API_KEY = credentials['API_KEY']
ENDPOINT = credentials['ENDPOINT']
form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY))

receipt_url = 'https://media-cdn.tripadvisor.com/media/photo-s/13/f7/7a/75/receipt.jpg'
poller = form_recognizer_client.begin_recognize_receipts_from_url(receipt_url)

time.sleep(3)
if poller.status() == 'succeeded':
    result = poller.result()
    for receipt in result:
        print(receipt.form_type)
        for name, field in receipt.fields.items():
            if name == 'Items':
                print('Purchase Item')
                for indx, item in enumerate(field.value):
                    print('\tItem #{0}'.format(indx + 1))
                    for item_name, item in item.value.items():
                        print('\t{0}: {1} Confidence: {2}'.format(item_name, item.value, item.confidence))
            else:
                print('{0}: {1} - Confidence {2}'.format(name, field.value, field.confidence))

Part 4: Business Card Info Extraction



Buy Me a Coffee? Your support is much appreciated!

Source Code:  

 

import json
from azure.core.exceptions import ResourceNotFoundError
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient

credentials = json.load(open('./Azure-Form-Recognizer/credential.json'))
API_KEY = credentials['API_KEY']
ENDPOINT = credentials['ENDPOINT']
form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY))

business_card_url = 'https://cdn.shopify.com/s/files/1/0256/9879/5601/products/improved_paul_allen_1_1600_1_1512x.jpg?v=1579733484'
poller = form_recognizer_client.begin_recognize_business_cards_from_url(business_card_url)
business_cards = poller.result()

form_type = business_cards[0].form_type
business_cards[0].fields.key

def extract_business_card_field_value(business_card, field_name):
    try:
        if field_name in ('ContactNames'):
            for value in business_card.fields.get(field_name).value:
                for key in value.value.keys():
                    print(str(key) + ':')
                    print(value.value[key].value, '|', value.value[key].confidence)
        else:
            for value in business_card.fields.get(field_name).value:
                print(value.value, '|', value.confidence)
    except KeyError:
        print('Field i snot found')
    except AttributeError:
        print('Nothing is returned')

business_fields = business_cards[0].fields.keys()

for field_name in business_fields:
    print(field_name)
    print('-'*25)
    extract_business_card_field_value(business_cards[0], field_name)
    print()

Part 5: Invoice Data Extraction



Buy Me a Coffee? Your support is much appreciated!

Source Code:  

 

import json
from azure.core.exceptions import ResourceNotFoundError
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient

credentials = json.load(open('./Azure-Form-Recognizer/credential.json'))
API_KEY = credentials['API_KEY']
ENDPOINT = credentials['ENDPOINT']
form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY))

invoice_url = 'http://www.latextemplates.com/wp-content/uploads/2012/11/invoice.png'
poller = form_recognizer_client.begin_recognize_invoices_from_url(invoice_url)
result = poller.result()

result[0].fields.keys()

def extract_invoice_field_value(invoice, field_name):
    try:
        if field_name == 'Items':
            for item in invoice.fields.get('Items').value:
                for key in item.value.keys():
                    print('\t' + str(key))
                    print('-'*25)
                    print('\t' + str(item.value.get(key).value) + '|' + str(item.value.get(key).confidence))
                    print()
        else:
            print(field_name)
            print('-'*25)
            print(str(invoice.fields.get(field_name).value) + '|' + str(invoice.fields.get(field_name).confidence))
    except AttributeError:
        print('Nothing is found')

if poller.status() == 'succeeded':
    for page in result:
        field_keys = page.fields.keys()
        for field_key in field_keys:
            extract_invoice_field_value(page, field_key)    

Part 6: Driver License/State Id Info Extraction



Buy Me a Coffee? Your support is much appreciated!

Source Code:  

 

import json
from azure.core.exceptions import ResourceNotFoundError
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient

def extract_id_field_value(id_card, field_name):
    try:
        print(field_name)
        print('-'*25)
        print('Field Value: {0}'.format(id_card.fields.get(field_name).value))
        print('Confidence Score: {0}'.format(id_card.fields.get(field_name).confidence))
        print()
    except AttributeError:
        print('Nothing returned')

credentials = json.load(open('credential.json'))
API_KEY = credentials['API_KEY']
ENDPOINT = credentials['ENDPOINT']
form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY))

driver_license_url = 'https://gray-kolo-prod.cdn.arcpublishing.com/resizer/5Gp4BHAD5XTGGPRAAvXetQAu3YY=/1200x675/smart/filters:quality(85)/cloudfront-us-east-1.images.arcpublishing.com/gray/KUKKLVRFP5BKJLNLGH2NWSEPRA.jpg'
poller = form_recognizer_client.begin_recognize_identity_documents_from_url(driver_license_url)

if poller.status() == 'succeeded':
    result = poller.result()
    field_names = result[0].fields.keys()
    for form in result:
        for field_name in field_names:
            extract_id_field_value(form, field_name)