"""DefaultAzureCredential will use the values from these environment
variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.identity import DefaultAzureCredential
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
credential = DefaultAzureCredential()
document_analysis_client = DocumentAnalysisClient(endpoint, credential)
DocumentAnalysisClient
DocumentAnalysisClient
提供通过 begin_analyze_document
和 begin_analyze_document_from_url
API 使用预生成模型和自定义模型分析输入文档的操作。
model_id
使用 参数选择要分析的模型类型。 在此处查看支持模型的完整列表。
DocumentAnalysisClient
还提供通过 begin_classify_document
和 begin_classify_document_from_url
API 对文档进行分类的操作。
自定义分类模型可对输入文件中的每一页进行分类,以识别其中的文档,还可以识别输入文件中的多个文档或单个文档的多个实例。
此处提供了示例代码片段,以使用 DocumentAnalysisClient 进行说明。
有关分析文档的详细信息,包括支持的功能、区域设置和文档类型,请参阅 服务文档。
DocumentModelAdministrationClient
DocumentModelAdministrationClient
提供操作以实现以下目的:
生成自定义模型以分析通过标记自定义文档指定的特定字段。 返回一个 DocumentModelDetails
,指示模型可以分析的文档类型 () ,以及每个字段的估计置信度。 有关更详细的说明,请参阅 服务文档 。
从现有模型集合创建组合模型。
管理在帐户中创建的模型。
列出操作或获取在过去 24 小时内创建的特定模型操作。
将自定义模型从一个表单识别器资源复制到另一个资源。
生成和管理自定义分类模型,以便对应用程序中处理的文档进行分类。
请注意,也可以使用图形用户界面(如 Document Intelligence Studio)生成模型。
此处提供了示例代码片段,以使用 DocumentModelAdministrationClient 进行说明。
长期运行的操作
长时间运行的操作包括发送到服务以启动操作的初始请求,然后每隔一段时间轮询服务以确定操作是否已完成或失败,以及是否成功获取结果。
分析文档、生成模型或复制/撰写模型的方法建模为长时间运行的操作。
客户端公开返回 begin_<method-name>
LROPoller
或 AsyncLROPoller
的方法。 调用方应通过对从 begin_<method-name>
方法返回的轮询器对象调用 result()
来等待操作完成。
提供了示例代码片段来说明如何使用长时间运行的操作 。
以下部分提供了几个代码片段,涵盖了一些最常见的文档智能任务,包括:
使用常规文档模型
使用预生成模型
生成自定义模型
使用自定义模型分析文档
对文档进行分类
从文档中提取文本、选择标记、文本样式和表结构及其边界区域坐标。
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
with open(path_to_sample_documents, "rb") as f:
poller = document_analysis_client.begin_analyze_document(
"prebuilt-layout", document=f
result = poller.result()
for idx, style in enumerate(result.styles):
print(
"Document contains {} content".format(
"handwritten" if style.is_handwritten else "no handwritten"
for page in result.pages:
print("----Analyzing layout from page #{}----".format(page.page_number))
print(
"Page has width: {} and height: {}, measured with unit: {}".format(
page.width, page.height, page.unit
for line_idx, line in enumerate(page.lines):
words = line.get_words()
print(
"...Line # {} has word count {} and text '{}' within bounding polygon '{}'".format(
line_idx,
len(words),
line.content,
line.polygon,
for word in words:
print(
"......Word '{}' has a confidence of {}".format(
word.content, word.confidence
for selection_mark in page.selection_marks:
print(
"...Selection mark is '{}' within bounding polygon '{}' and has a confidence of {}".format(
selection_mark.state,
selection_mark.polygon,
selection_mark.confidence,
for table_idx, table in enumerate(result.tables):
print(
"Table # {} has {} rows and {} columns".format(
table_idx, table.row_count, table.column_count
for region in table.bounding_regions:
print(
"Table # {} location on page: {} is {}".format(
table_idx,
region.page_number,
region.polygon,
for cell in table.cells:
print(
"...Cell[{}][{}] has content '{}'".format(
cell.row_index,
cell.column_index,
cell.content,
for region in cell.bounding_regions:
print(
"...content on page {} is within bounding polygon '{}'".format(
region.page_number,
region.polygon,
print("----------------------------------------")
使用常规文档模型
使用文档智能服务提供的常规文档模型分析文档中的键值对、表、样式和选择标记。
通过传入 model_id="prebuilt-document"
begin_analyze_document
方法选择“常规文档模型”:
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
with open(path_to_sample_documents, "rb") as f:
poller = document_analysis_client.begin_analyze_document(
"prebuilt-document", document=f
result = poller.result()
for style in result.styles:
if style.is_handwritten:
print("Document contains handwritten content: ")
print(",".join([result.content[span.offset:span.offset + span.length] for span in style.spans]))
print("----Key-value pairs found in document----")
for kv_pair in result.key_value_pairs:
if kv_pair.key:
print(
"Key '{}' found within '{}' bounding regions".format(
kv_pair.key.content,
kv_pair.key.bounding_regions,
if kv_pair.value:
print(
"Value '{}' found within '{}' bounding regions\n".format(
kv_pair.value.content,
kv_pair.value.bounding_regions,
for page in result.pages:
print("----Analyzing document from page #{}----".format(page.page_number))
print(
"Page has width: {} and height: {}, measured with unit: {}".format(
page.width, page.height, page.unit
for line_idx, line in enumerate(page.lines):
words = line.get_words()
print(
"...Line # {} has {} words and text '{}' within bounding polygon '{}'".format(
line_idx,
len(words),
line.content,
line.polygon,
for word in words:
print(
"......Word '{}' has a confidence of {}".format(
word.content, word.confidence
for selection_mark in page.selection_marks:
print(
"...Selection mark is '{}' within bounding polygon '{}' and has a confidence of {}".format(
selection_mark.state,
selection_mark.polygon,
selection_mark.confidence,
for table_idx, table in enumerate(result.tables):
print(
"Table # {} has {} rows and {} columns".format(
table_idx, table.row_count, table.column_count
for region in table.bounding_regions:
print(
"Table # {} location on page: {} is {}".format(
table_idx,
region.page_number,
region.polygon,
for cell in table.cells:
print(
"...Cell[{}][{}] has content '{}'".format(
cell.row_index,
cell.column_index,
cell.content,
for region in cell.bounding_regions:
print(
"...content on page {} is within bounding polygon '{}'\n".format(
region.page_number,
region.polygon,
print("----------------------------------------")
在此处阅读有关模型提供prebuilt-document
的功能的详细信息。
使用预生成模型
使用文档智能服务提供的预生成模型,从所选文档类型(如收据、发票、名片、标识文档和美国 W-2 税务文档)中提取字段。
例如,若要分析销售收据中的字段,请使用通过传入 model_id="prebuilt-receipt"
begin_analyze_document
方法提供的预生成收据模型:
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
with open(path_to_sample_documents, "rb") as f:
poller = document_analysis_client.begin_analyze_document(
"prebuilt-receipt", document=f, locale="en-US"
receipts = poller.result()
for idx, receipt in enumerate(receipts.documents):
print(f"--------Analysis of receipt #{idx + 1}--------")
print(f"Receipt type: {receipt.doc_type if receipt.doc_type else 'N/A'}")
merchant_name = receipt.fields.get("MerchantName")
if merchant_name:
print(
f"Merchant Name: {merchant_name.value} has confidence: "
f"{merchant_name.confidence}"
transaction_date = receipt.fields.get("TransactionDate")
if transaction_date:
print(
f"Transaction Date: {transaction_date.value} has confidence: "
f"{transaction_date.confidence}"
if receipt.fields.get("Items"):
print("Receipt items:")
for idx, item in enumerate(receipt.fields.get("Items").value):
print(f"...Item #{idx + 1}")
item_description = item.value.get("Description")
if item_description:
print(
f"......Item Description: {item_description.value} has confidence: "
f"{item_description.confidence}"
item_quantity = item.value.get("Quantity")
if item_quantity:
print(
f"......Item Quantity: {item_quantity.value} has confidence: "
f"{item_quantity.confidence}"
item_price = item.value.get("Price")
if item_price:
print(
f"......Individual Item Price: {item_price.value} has confidence: "
f"{item_price.confidence}"
item_total_price = item.value.get("TotalPrice")
if item_total_price:
print(
f"......Total Item Price: {item_total_price.value} has confidence: "
f"{item_total_price.confidence}"
subtotal = receipt.fields.get("Subtotal")
if subtotal:
print(f"Subtotal: {subtotal.value} has confidence: {subtotal.confidence}")
tax = receipt.fields.get("TotalTax")
if tax:
print(f"Total tax: {tax.value} has confidence: {tax.confidence}")
tip = receipt.fields.get("Tip")
if tip:
print(f"Tip: {tip.value} has confidence: {tip.confidence}")
total = receipt.fields.get("Total")
if total:
print(f"Total: {total.value} has confidence: {total.confidence}")
print("--------------------------------------")
不限于收据! 有几个预生成模型可供选择,每个模型都有自己的一组受支持字段。 在此处查看其他支持的预生成模型。
生成自定义模型
根据自己的文档类型生成自定义模型。 生成的模型可用于分析其训练所基于的文档类型的值。
提供用于存储训练文档的 Azure 存储 Blob 容器的容器 SAS URL。
有关设置容器和所需文件结构的更多详细信息,请参阅 服务文档。
from azure.ai.formrecognizer import (
DocumentModelAdministrationClient,
ModelBuildMode,
from azure.core.credentials import AzureKeyCredential
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
container_sas_url = os.environ["CONTAINER_SAS_URL"]
document_model_admin_client = DocumentModelAdministrationClient(
endpoint, AzureKeyCredential(key)
poller = document_model_admin_client.begin_build_document_model(
ModelBuildMode.TEMPLATE,
blob_container_url=container_sas_url,
description="my model description",
model = poller.result()
print(f"Model ID: {model.model_id}")
print(f"Description: {model.description}")
print(f"Model created on: {model.created_on}")
print(f"Model expires on: {model.expires_on}")
print("Doc types the model can recognize:")
for name, doc_type in model.doc_types.items():
print(
f"Doc Type: '{name}' built with '{doc_type.build_mode}' mode which has the following fields:"
for field_name, field in doc_type.field_schema.items():
print(
f"Field: '{field_name}' has type '{field['type']}' and confidence score "
f"{doc_type.field_confidence[field_name]}"
使用自定义模型分析文档
分析文档字段、表格、选择标记等。 这些模型是使用你自己的数据训练的,因此它们是针对你的文档定制的。
为了获得最佳结果,应仅分析生成自定义模型时使用的相同文档类型的文档。
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
model_id = os.getenv("CUSTOM_BUILT_MODEL_ID", custom_model_id)
document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
# Make sure your document's type is included in the list of document types the custom model can analyze
with open(path_to_sample_documents, "rb") as f:
poller = document_analysis_client.begin_analyze_document(
model_id=model_id, document=f
result = poller.result()
for idx, document in enumerate(result.documents):
print(f"--------Analyzing document #{idx + 1}--------")
print(f"Document has type {document.doc_type}")
print(f"Document has document type confidence {document.confidence}")
print(f"Document was analyzed with model with ID {result.model_id}")
for name, field in document.fields.items():
field_value = field.value if field.value else field.content
print(
f"......found field of type '{field.value_type}' with value '{field_value}' and with confidence {field.confidence}"
# iterate over tables, lines, and selection marks on each page
for page in result.pages:
print(f"\nLines found on page {page.page_number}")
for line in page.lines:
print(f"...Line '{line.content}'")
for word in page.words:
print(f"...Word '{word.content}' has a confidence of {word.confidence}")
if page.selection_marks:
print(f"\nSelection marks found on page {page.page_number}")
for selection_mark in page.selection_marks:
print(
f"...Selection mark is '{selection_mark.state}' and has a confidence of {selection_mark.confidence}"
for i, table in enumerate(result.tables):
print(f"\nTable {i + 1} can be found on page:")
for region in table.bounding_regions:
print(f"...{region.page_number}")
for cell in table.cells:
print(
f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'"
print("-----------------------------------")
或者,文档 URL 也可用于使用 begin_analyze_document_from_url
方法分析文档。
document_url = "<url_of_the_document>"
poller = document_analysis_client.begin_analyze_document_from_url(model_id=model_id, document_url=document_url)
result = poller.result()
管理附加到帐户的自定义模型。
from azure.ai.formrecognizer import DocumentModelAdministrationClient
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import ResourceNotFoundError
endpoint = "https://<my-custom-subdomain>.cognitiveservices.azure.com/"
credential = AzureKeyCredential("<api_key>")
document_model_admin_client = DocumentModelAdministrationClient(endpoint, credential)
account_details = document_model_admin_client.get_resource_details()
print("Our account has {} custom models, and we can have at most {} custom models".format(
account_details.custom_document_models.count, account_details.custom_document_models.limit
# Here we get a paged list of all of our models
models = document_model_admin_client.list_document_models()
print("We have models with the following ids: {}".format(
", ".join([m.model_id for m in models])
# Replace with the custom model ID from the "Build a model" sample
model_id = "<model_id from the Build a Model sample>"
custom_model = document_model_admin_client.get_document_model(model_id=model_id)
print("Model ID: {}".format(custom_model.model_id))
print("Description: {}".format(custom_model.description))
print("Model created on: {}\n".format(custom_model.created_on))
# Finally, we will delete this model by ID
document_model_admin_client.delete_document_model(model_id=custom_model.model_id)
document_model_admin_client.get_document_model(model_id=custom_model.model_id)
except ResourceNotFoundError:
print("Successfully deleted model with id {}".format(custom_model.model_id))
文档智能支持更复杂的分析功能。 根据文档提取方案,可以启用和禁用这些可选功能。
以下加载项功能适用于 2023-07-31 (GA) 及更高版本:
条形码/QR 码
font/style
高分辨率模式
language
请注意,某些加载项功能会产生额外费用。 请参阅定价: https://azure.microsoft.com/pricing/details/ai-document-intelligence/。
表单识别器客户端库将引发 Azure Core 中定义的异常。
文档智能服务引发的错误代码和消息可在 服务文档中找到。
此库使用标准 日志记录 库进行日志记录。
有关 HTTP 会话 (URL、标头等的基本信息,) 是在级别记录的 INFO
。
可以使用 关键字 (keyword) 参数在客户端上或按操作logging_enable
启用详细DEBUG
级别日志记录,包括请求/响应正文和未实现的标头。
请参阅此处提供示例的完整 SDK 日志记录文档。
可选关键字 (keyword) 参数可以在客户端和按操作级别传入。
azure-core 参考文档 介绍了重试、日志记录、传输协议等的可用配置。
更多示例代码
有关演示 表单识别器 Python API 中使用的常见模式的多个代码片段,请参阅示例自述文件。
有关 Azure AI 文档智能的更多文档,请参阅有关 docs.microsoft.com 的文档智能文档 。
本项目欢迎贡献和建议。 大多数贡献要求你同意贡献者许可协议 (CLA),并声明你有权(并且确实有权)授予我们使用你的贡献的权利。 有关详细信息,请访问 cla.microsoft.com。
提交拉取请求时,CLA 机器人将自动确定你是否需要提供 CLA,并相应地修饰 PR(例如标签、注释)。 直接按机器人提供的说明操作。 只需使用 CLA 对所有存储库执行一次这样的操作。
此项目采用了 Microsoft 开放源代码行为准则。 有关详细信息,请参阅行为准则常见问题解答,或如果有任何其他问题或意见,请与 联系。
即将发布:在整个 2024 年,我们将逐步淘汰作为内容反馈机制的“GitHub 问题”,并将其取代为新的反馈系统。 有关详细信息,请参阅:https://aka.ms/ContentUserFeedback。
提交和查看相关反馈