Part 2: Form Table Extraction
Buy Me a Coffee? Your support is much appreciated!
Source Code:
import json from azure.core.exceptions import ResourceNotFoundError from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient, FormTrainingClient credentials = json.load(open('./Azure-Form-Recognizer/credential.json')) API_KEY = credentials['API_KEY'] ENDPOINT = credentials['ENDPOINT'] form_url = 'https://templates.invoicehome.com/invoice-template-us-neat-750px.png' form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY)) poller = form_recognizer_client.begin_recognize_content_from_url(form_url) form_result = poller.result() for page in form_result: for table in page.tables: print('Column Count: {0}'.format(table.column_count)) print('Row Count: {0}'.format(table.row_count)) for cell in table.cells: print('Cell Value: {0}'.format(cell.text)) print('Location: {0}'.format(cell.bounding_box)) print('Confidence Score: {0}'.format(cell.confidence))
Part 3: Receipt Data Extraction
Buy Me a Coffee? Your support is much appreciated!
Source Code:
import json import time from azure.core.exceptions import ResourceNotFoundError from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient credentials = json.load(open('./Azure-Form-Recognizer/credential.json')) API_KEY = credentials['API_KEY'] ENDPOINT = credentials['ENDPOINT'] form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY)) receipt_url = 'https://media-cdn.tripadvisor.com/media/photo-s/13/f7/7a/75/receipt.jpg' poller = form_recognizer_client.begin_recognize_receipts_from_url(receipt_url) time.sleep(3) if poller.status() == 'succeeded': result = poller.result() for receipt in result: print(receipt.form_type) for name, field in receipt.fields.items(): if name == 'Items': print('Purchase Item') for indx, item in enumerate(field.value): print('\tItem #{0}'.format(indx + 1)) for item_name, item in item.value.items(): print('\t{0}: {1} Confidence: {2}'.format(item_name, item.value, item.confidence)) else: print('{0}: {1} - Confidence {2}'.format(name, field.value, field.confidence))
Part 4: Business Card Info Extraction
Buy Me a Coffee? Your support is much appreciated!
Source Code:
import json from azure.core.exceptions import ResourceNotFoundError from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient credentials = json.load(open('./Azure-Form-Recognizer/credential.json')) API_KEY = credentials['API_KEY'] ENDPOINT = credentials['ENDPOINT'] form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY)) business_card_url = 'https://cdn.shopify.com/s/files/1/0256/9879/5601/products/improved_paul_allen_1_1600_1_1512x.jpg?v=1579733484' poller = form_recognizer_client.begin_recognize_business_cards_from_url(business_card_url) business_cards = poller.result() form_type = business_cards[0].form_type business_cards[0].fields.key def extract_business_card_field_value(business_card, field_name): try: if field_name in ('ContactNames'): for value in business_card.fields.get(field_name).value: for key in value.value.keys(): print(str(key) + ':') print(value.value[key].value, '|', value.value[key].confidence) else: for value in business_card.fields.get(field_name).value: print(value.value, '|', value.confidence) except KeyError: print('Field i snot found') except AttributeError: print('Nothing is returned') business_fields = business_cards[0].fields.keys() for field_name in business_fields: print(field_name) print('-'*25) extract_business_card_field_value(business_cards[0], field_name) print()
Part 5: Invoice Data Extraction
Buy Me a Coffee? Your support is much appreciated!
Source Code:
import json from azure.core.exceptions import ResourceNotFoundError from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient credentials = json.load(open('./Azure-Form-Recognizer/credential.json')) API_KEY = credentials['API_KEY'] ENDPOINT = credentials['ENDPOINT'] form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY)) invoice_url = 'http://www.latextemplates.com/wp-content/uploads/2012/11/invoice.png' poller = form_recognizer_client.begin_recognize_invoices_from_url(invoice_url) result = poller.result() result[0].fields.keys() def extract_invoice_field_value(invoice, field_name): try: if field_name == 'Items': for item in invoice.fields.get('Items').value: for key in item.value.keys(): print('\t' + str(key)) print('-'*25) print('\t' + str(item.value.get(key).value) + '|' + str(item.value.get(key).confidence)) print() else: print(field_name) print('-'*25) print(str(invoice.fields.get(field_name).value) + '|' + str(invoice.fields.get(field_name).confidence)) except AttributeError: print('Nothing is found') if poller.status() == 'succeeded': for page in result: field_keys = page.fields.keys() for field_key in field_keys: extract_invoice_field_value(page, field_key)
Part 6: Driver License/State Id Info Extraction
Buy Me a Coffee? Your support is much appreciated!
Source Code:
import json from azure.core.exceptions import ResourceNotFoundError from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient def extract_id_field_value(id_card, field_name): try: print(field_name) print('-'*25) print('Field Value: {0}'.format(id_card.fields.get(field_name).value)) print('Confidence Score: {0}'.format(id_card.fields.get(field_name).confidence)) print() except AttributeError: print('Nothing returned') credentials = json.load(open('credential.json')) API_KEY = credentials['API_KEY'] ENDPOINT = credentials['ENDPOINT'] form_recognizer_client = FormRecognizerClient(ENDPOINT, AzureKeyCredential(API_KEY)) driver_license_url = 'https://gray-kolo-prod.cdn.arcpublishing.com/resizer/5Gp4BHAD5XTGGPRAAvXetQAu3YY=/1200x675/smart/filters:quality(85)/cloudfront-us-east-1.images.arcpublishing.com/gray/KUKKLVRFP5BKJLNLGH2NWSEPRA.jpg' poller = form_recognizer_client.begin_recognize_identity_documents_from_url(driver_license_url) if poller.status() == 'succeeded': result = poller.result() field_names = result[0].fields.keys() for form in result: for field_name in field_names: extract_id_field_value(form, field_name)