在 Python 中设置 Tagged PDF Structure Element 属性
Structure elements 定义 PDF 文档的语义层次结构,例如章节、标题、段落或表格。通过设置 title、language、alternative_text、actual_text 和 expansion_text 等属性,您可以提升 PDF 对屏幕阅读器等辅助技术的可访问性和语义意义。
以下代码片段展示了如何设置标记 PDF 文档的结构元素属性:
- 创建一个新的标记 PDF 文档。
- 设置文档元数据。
- 创建结构元素。
- 设置可访问性属性。
- 保存 Tagged PDF。
import aspose.pdf as ap
import sys
from os import path
def set_properties(outfile):
# Create PDF Document
with ap.Document() as document:
# Get Content for work with Tagged PDF
tagged_content = document.tagged_content
# Set Title and Language for Document
tagged_content.set_title("Tagged Pdf Document")
tagged_content.set_language("en-US")
# Create Structure Elements
root_element = tagged_content.root_element
section_element = tagged_content.create_sect_element()
root_element.append_child(section_element, True)
header_element = tagged_content.create_header_element(1)
section_element.append_child(header_element, True)
header_element.set_text("The Header")
header_element.title = "Title"
header_element.language = "en-US"
header_element.alternative_text = "Alternative Text"
header_element.expansion_text = "Expansion Text"
header_element.actual_text = "Actual Text"
# Save Tagged PDF Document
document.save(outfile)
设置文本结构元素
为了设置 Tagged PDF 文档的文本结构元素,Aspose.PDF 提供 段落元素 类。以下代码片段展示了如何设置 Tagged PDF 文档的文本结构元素:
import aspose.pdf as ap
import sys
from os import path
def set_text_elements(outfile):
# Create PDF Document
with ap.Document() as document:
# Get Content for work with Tagged PDF
tagged_content = document.tagged_content
# Set Title and Language for Document
tagged_content.set_title("Tagged Pdf Document")
tagged_content.set_language("en-US")
# Get Root Structure Elements
root_element = tagged_content.root_element
paragraph_element = tagged_content.create_paragraph_element()
# Set Text to Text Structure Element
paragraph_element.set_text("Paragraph.")
root_element.append_child(paragraph_element, True)
# Save Tagged PDF Document
document.save(outfile)
设置文本块结构元素
此 Python 示例使用 Aspose.PDF 创建一个带有结构化层次标题和段落的 Tagged PDF,提升文档的语义和可访问性特性。
- 创建一个新的标记 PDF 文档。
- 设置文档元数据。
- 访问根结构元素。
- 创建多级标题。
- 将标题追加到根结构。
- 创建一个段落元素。
- 将段落追加到根结构。
- 保存 Tagged PDF。
以下代码片段展示了如何设置 Tagged PDF 文档的文本块结构元素:
import aspose.pdf as ap
import sys
from os import path
def set_text_block_elements(outfile):
# Create PDF Document
with ap.Document() as document:
# Get Content for work with Tagged PDF
tagged_content = document.tagged_content
# Set Title and Language for Document
tagged_content.set_title("Tagged Pdf Document")
tagged_content.set_language("en-US")
# Get Root Structure Element
root_element = tagged_content.root_element
h1 = tagged_content.create_header_element(1)
h2 = tagged_content.create_header_element(2)
h3 = tagged_content.create_header_element(3)
h4 = tagged_content.create_header_element(4)
h5 = tagged_content.create_header_element(5)
h6 = tagged_content.create_header_element(6)
h1.set_text("H1. Header of Level 1")
h2.set_text("H2. Header of Level 2")
h3.set_text("H3. Header of Level 3")
h4.set_text("H4. Header of Level 4")
h5.set_text("H5. Header of Level 5")
h6.set_text("H6. Header of Level 6")
root_element.append_child(h1, True)
root_element.append_child(h2, True)
root_element.append_child(h3, True)
root_element.append_child(h4, True)
root_element.append_child(h5, True)
root_element.append_child(h6, True)
p = tagged_content.create_paragraph_element()
p.set_text(
"P. Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
"Aenean nec lectus ac sem faucibus imperdiet. Sed ut erat ac magna ullamcorper hendrerit."
" Cras pellentesque libero semper, gravida magna sed, luctus leo. "
"Fusce lectus odio, laoreet nec ullamcorper ut, molestie eu elit. "
"Interdum et malesuada fames ac ante ipsum primis in faucibus. "
"Aliquam lacinia sit amet elit ac consectetur. "
"Donec cursus condimentum ligula, vitae volutpat sem tristique eget. "
"Nulla in consectetur massa. Vestibulum vitae lobortis ante. "
"Nulla ullamcorper pellentesque justo rhoncus accumsan. "
"Mauris ornare eu odio non lacinia. Aliquam massa leo, rhoncus ac iaculis eget, tempus et magna. "
"Sed non consectetur elit. Sed vulputate, quam sed lacinia luctus, ipsum nibh fringilla purus, "
"vitae posuere risus odio id massa. Cras sed venenatis lacus."
)
root_element.append_child(p, True)
# Save Tagged PDF Document
document.save(outfile)
设置内联结构元素
使用 Aspose.PDF for Python via .NET 在 Tagged PDF 的标题和段落中创建内联文本元素(/Span)。
以下代码片段展示了如何设置 Tagged PDF 文档的内联结构元素:
import aspose.pdf as ap
import sys
from os import path
def set_inline_elements(outfile):
# Create PDF Document
with ap.Document() as document:
# Get Content for work with Tagged PDF
tagged_content = document.tagged_content
# Set Title and Language for Document
tagged_content.set_title("Tagged Pdf Document")
tagged_content.set_language("en-US")
# Get Root Structure Element
root_element = tagged_content.root_element
header_element_h1 = tagged_content.create_header_element(1)
header_element_h2 = tagged_content.create_header_element(2)
header_element_h3 = tagged_content.create_header_element(3)
header_element_h4 = tagged_content.create_header_element(4)
header_element_h5 = tagged_content.create_header_element(5)
header_element_h6 = tagged_content.create_header_element(6)
root_element.append_child(header_element_h1, True)
root_element.append_child(header_element_h2, True)
root_element.append_child(header_element_h3, True)
root_element.append_child(header_element_h4, True)
root_element.append_child(header_element_h5, True)
root_element.append_child(header_element_h6, True)
span_element_h11 = tagged_content.create_span_element()
span_element_h11.set_text("H1. ")
header_element_h1.append_child(span_element_h11, True)
span_element_h12 = tagged_content.create_span_element()
span_element_h12.set_text("Level 1 Header")
header_element_h1.append_child(span_element_h12, True)
span_element_h21 = tagged_content.create_span_element()
span_element_h21.set_text("H2. ")
header_element_h2.append_child(span_element_h21, True)
span_element_h22 = tagged_content.create_span_element()
span_element_h22.set_text("Level 2 Header")
header_element_h2.append_child(span_element_h22, True)
span_element_h31 = tagged_content.create_span_element()
span_element_h31.set_text("H3. ")
header_element_h3.append_child(span_element_h31, True)
span_element_h32 = tagged_content.create_span_element()
span_element_h32.set_text("Level 3 Header")
header_element_h3.append_child(span_element_h32, True)
span_element_h41 = tagged_content.create_span_element()
span_element_h41.set_text("H4. ")
header_element_h4.append_child(span_element_h41, True)
span_element_h42 = tagged_content.create_span_element()
span_element_h42.set_text("Level 4 Header")
header_element_h4.append_child(span_element_h42, True)
span_element_h51 = tagged_content.create_span_element()
span_element_h51.set_text("H5. ")
header_element_h5.append_child(span_element_h51, True)
span_element_h52 = tagged_content.create_span_element()
span_element_h52.set_text("Level 5 Header")
header_element_h5.append_child(span_element_h52, True)
span_element_h61 = tagged_content.create_span_element()
span_element_h61.set_text("H6. ")
header_element_h6.append_child(span_element_h61, True)
span_element_h62 = tagged_content.create_span_element()
span_element_h62.set_text("Level 6 Header")
header_element_h6.append_child(span_element_h62, True)
paragraph_element = tagged_content.create_paragraph_element()
paragraph_element.set_text("P. ")
root_element.append_child(paragraph_element, True)
span_element_1 = tagged_content.create_span_element()
span_element_1.set_text(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
)
paragraph_element.append_child(span_element_1, True)
span_element_2 = tagged_content.create_span_element()
span_element_2.set_text("Aenean nec lectus ac sem faucibus imperdiet. ")
paragraph_element.append_child(span_element_2, True)
span_element_3 = tagged_content.create_span_element()
span_element_3.set_text("Sed ut erat ac magna ullamcorper hendrerit. ")
paragraph_element.append_child(span_element_3, True)
span_element_4 = tagged_content.create_span_element()
span_element_4.set_text(
"Cras pellentesque libero semper, gravida magna sed, luctus leo. "
)
paragraph_element.append_child(span_element_4, True)
span_element_5 = tagged_content.create_span_element()
span_element_5.set_text(
"Fusce lectus odio, laoreet nec ullamcorper ut, molestie eu elit. "
)
paragraph_element.append_child(span_element_5, True)
span_element_6 = tagged_content.create_span_element()
span_element_6.set_text(
"Interdum et malesuada fames ac ante ipsum primis in faucibus. "
)
paragraph_element.append_child(span_element_6, True)
span_element_7 = tagged_content.create_span_element()
span_element_7.set_text(
"Aliquam lacinia sit amet elit ac consectetur. Donec cursus condimentum ligula, vitae volutpat sem tristique eget. "
)
paragraph_element.append_child(span_element_7, True)
span_element_8 = tagged_content.create_span_element()
span_element_8.set_text(
"Nulla in consectetur massa. Vestibulum vitae lobortis ante. Nulla ullamcorper pellentesque justo rhoncus accumsan. "
)
paragraph_element.append_child(span_element_8, True)
span_element_9 = tagged_content.create_span_element()
span_element_9.set_text(
"Mauris ornare eu odio non lacinia. Aliquam massa leo, rhoncus ac iaculis eget, tempus et magna. Sed non consectetur elit. "
)
paragraph_element.append_child(span_element_9, True)
span_element_10 = tagged_content.create_span_element()
span_element_10.set_text(
"Sed vulputate, quam sed lacinia luctus, ipsum nibh fringilla purus, vitae posuere risus odio id massa. Cras sed venenatis lacus."
)
paragraph_element.append_child(span_element_10, True)
# Save Tagged PDF Document
document.save(outfile)
设置自定义标签名称
使用 Aspose.PDF for Python 为带标签的 PDF 设置结构和内联元素的自定义标签名称。
以下代码片段展示了如何设置自定义标签名称:
- 创建一个新的标记 PDF 文档。
- 设置文档元数据。
- 创建一个section元素。
- 创建带有自定义标签的段落元素。
- 创建带有自定义标签的内联 span 元素。
- 保存 Tagged PDF。
import aspose.pdf as ap
import sys
from os import path
def set_tag_name(outfile):
# Create PDF Document
with ap.Document() as document:
# Get Content for work with Tagged PDF
tagged_content = document.tagged_content
# Set Title and Language for Document
tagged_content.set_title("Tagged Pdf Document")
tagged_content.set_language("en-US")
# Create Logical Structure Elements
section_element = tagged_content.create_sect_element()
tagged_content.root_element.append_child(section_element, True)
paragraph_element1 = tagged_content.create_paragraph_element()
paragraph_element2 = tagged_content.create_paragraph_element()
paragraph_element3 = tagged_content.create_paragraph_element()
paragraph_element4 = tagged_content.create_paragraph_element()
paragraph_element1.set_text("P1. ")
paragraph_element2.set_text("P2. ")
paragraph_element3.set_text("P3. ")
paragraph_element4.set_text("P4. ")
paragraph_element1.set_tag("P1")
paragraph_element2.set_tag("Para")
paragraph_element3.set_tag("Para")
paragraph_element4.set_tag("Paragraph")
section_element.append_child(paragraph_element1, True)
section_element.append_child(paragraph_element2, True)
section_element.append_child(paragraph_element3, True)
section_element.append_child(paragraph_element4, True)
span_element1 = tagged_content.create_span_element()
span_element2 = tagged_content.create_span_element()
span_element3 = tagged_content.create_span_element()
span_element4 = tagged_content.create_span_element()
span_element1.set_text("Span 1.")
span_element2.set_text("Span 2.")
span_element3.set_text("Span 3.")
span_element4.set_text("Span 4.")
span_element1.set_tag("SPAN")
span_element2.set_tag("Sp")
span_element3.set_tag("Sp")
span_element4.set_tag("TheSpan")
paragraph_element1.append_child(span_element1, True)
paragraph_element2.append_child(span_element2, True)
paragraph_element3.append_child(span_element3, True)
paragraph_element4.append_child(span_element4, True)
# Save Tagged PDF Document
document.save(outfile)
将结构元素添加到元素中
此功能在 19.4 版或更高版本中受支持。
使用 Aspose.PDF for Python via .NET 在标记 PDF 中创建链接和图像元素。
下面的代码片段演示了如何在段落中使用 Tagged PDF 文档的文本设置结构元素:
import aspose.pdf as ap
import sys
from os import path
def set_elements(imagefile, outfile):
# Create PDF document
with ap.Document() as document:
tagged_content = document.tagged_content
# Setting Title and Nature Language for document
tagged_content.set_title("Link Elements Example")
tagged_content.set_language("en-US")
# Getting Root structure element (Document structure element)
root_element = tagged_content.root_element
paragraph_element_1 = tagged_content.create_paragraph_element()
root_element.append_child(paragraph_element_1, True)
link_element_1 = tagged_content.create_link_element()
paragraph_element_1.append_child(link_element_1, True)
link_element_1.hyperlink = ap.WebHyperlink("http://google.com")
link_element_1.set_text("Google")
link_element_1.alternate_descriptions = "Link to Google"
paragraph_element_2 = tagged_content.create_paragraph_element()
root_element.append_child(paragraph_element_2, True)
link_element_2 = tagged_content.create_link_element()
paragraph_element_2.append_child(link_element_2, True)
link_element_2.hyperlink = ap.WebHyperlink("http://google.com")
span_element_2 = tagged_content.create_span_element()
span_element_2.set_text("Google")
link_element_2.append_child(span_element_2, True)
link_element_2.alternate_descriptions = "Link to Google"
paragraph_element_3 = tagged_content.create_paragraph_element()
root_element.append_child(paragraph_element_3, True)
link_element_3 = tagged_content.create_link_element()
paragraph_element_3.append_child(link_element_3, True)
link_element_3.hyperlink = ap.WebHyperlink("http://google.com")
span_element_31 = tagged_content.create_span_element()
span_element_31.set_text("G")
span_element_32 = tagged_content.create_span_element()
span_element_32.set_text("Google")
link_element_3.append_child(span_element_31, True)
link_element_3.set_text("-")
link_element_3.append_child(span_element_32, True)
link_element_3.alternate_descriptions = "Link to Google"
paragraph_element_4 = tagged_content.create_paragraph_element()
root_element.append_child(paragraph_element_4, True)
link_element_4 = tagged_content.create_link_element()
paragraph_element_4.append_child(link_element_4, True)
link_element_4.hyperlink = ap.WebHyperlink("http://google.com")
link_element_4.set_text(
"The multiline link: Google Google Google Google Google Google Google Google Google Google Google Google Google Google Google Google Google Google Google Google"
)
link_element_4.alternate_descriptions = "Link to Google (multiline)"
paragraph_element_5 = tagged_content.create_paragraph_element()
root_element.append_child(paragraph_element_5, True)
link_element_5 = tagged_content.create_link_element()
paragraph_element_5.append_child(link_element_5, True)
link_element_5.hyperlink = ap.WebHyperlink("http://google.com")
figure_element_5 = tagged_content.create_figure_element()
figure_element_5.set_image(imagefile, 1200)
figure_element_5.alternative_text = "Google icon"
link_layout_attributes = link_element_5.attributes.get_attributes(
ap.logicalstructure.AttributeOwnerStandard.LAYOUT
)
placement_attribute = ap.logicalstructure.StructureAttribute(
ap.logicalstructure.AttributeKey.PLACEMENT
)
placement_attribute.set_name_value(
ap.logicalstructure.AttributeName.PLACEMENT_BLOCK
)
link_layout_attributes.set_attribute(placement_attribute)
link_element_5.append_child(figure_element_5, True)
link_element_5.alternate_descriptions = "Link to Google"
# Save Tagged PDF Document
document.save(outfile)
设置链接结构元素
Aspose.PDF for Python via .NET API 还允许您添加链接结构元素。
下面的代码片段演示了如何向 Tagged PDF 文档中添加链接结构元素:
import aspose.pdf as ap
import sys
from os import path
def add_link_element(outfile):
# Create PDF document
with ap.Document() as document:
tagged_content = document.tagged_content
# Setting Title and Nature Language for document
tagged_content.set_title("Text Elements Example")
tagged_content.set_language("en-US")
# Getting Root structure element (Document structure element)
root_element = tagged_content.root_element
paragraph_element_1 = tagged_content.create_paragraph_element()
root_element.append_child(paragraph_element_1, True)
span_element_11 = tagged_content.create_span_element()
span_element_11.set_text("Span_11")
span_element_12 = tagged_content.create_span_element()
span_element_12.set_text(" and Span_12.")
paragraph_element_1.set_text("Paragraph with ")
paragraph_element_1.append_child(span_element_11, True)
paragraph_element_1.append_child(span_element_12, True)
paragraph_element_2 = tagged_content.create_paragraph_element()
root_element.append_child(paragraph_element_2, True)
span_element_21 = tagged_content.create_span_element()
span_element_21.set_text("Span_21")
span_element_22 = tagged_content.create_span_element()
span_element_22.set_text("Span_22.")
paragraph_element_2.append_child(span_element_21, True)
paragraph_element_2.set_text(" and ")
paragraph_element_2.append_child(span_element_22, True)
paragraph_element_3 = tagged_content.create_paragraph_element()
root_element.append_child(paragraph_element_3, True)
span_element_31 = tagged_content.create_span_element()
span_element_31.set_text("Span_31")
span_element_32 = tagged_content.create_span_element()
span_element_32.set_text(" and Span_32")
paragraph_element_3.append_child(span_element_31, True)
paragraph_element_3.append_child(span_element_32, True)
paragraph_element_3.set_text(".")
paragraph_element_4 = tagged_content.create_paragraph_element()
root_element.append_child(paragraph_element_4, True)
span_element_41 = tagged_content.create_span_element()
span_element_411 = tagged_content.create_span_element()
span_element_411.set_text("Span_411, ")
span_element_41.set_text("Span_41, ")
span_element_41.append_child(span_element_411, True)
span_element_42 = tagged_content.create_span_element()
span_element_421 = tagged_content.create_span_element()
span_element_421.set_text("Span 421 and ")
span_element_42.append_child(span_element_421, True)
span_element_42.set_text("Span_42")
paragraph_element_4.append_child(span_element_41, True)
paragraph_element_4.append_child(span_element_42, True)
paragraph_element_4.set_text(".")
# Save Tagged PDF Document
document.save(outfile)
设置 Note 结构元素
Aspose.PDF for Python via .NET API 还允许您添加 注释元素 在标记的 PDF 文档中。下面的代码片段展示了如何在 Tagged PDF 文档中添加 note 元素:
import aspose.pdf as ap
import sys
from os import path
def set_note_element(outfile):
# Create PDF Document
with ap.Document() as document:
tagged_content = document.tagged_content
tagged_content.set_title("Sample of Note Elements")
tagged_content.set_language("en-US")
# Add Paragraph Element
paragraph_element = tagged_content.create_paragraph_element()
tagged_content.root_element.append_child(paragraph_element, True)
# Add NoteElement
note_element_1 = tagged_content.create_note_element()
paragraph_element.append_child(note_element_1, True)
note_element_1.set_text("Note with auto generate ID. ")
# Add NoteElement
note_element_2 = tagged_content.create_note_element()
paragraph_element.append_child(note_element_2, True)
note_element_2.set_text("Note with ID = 'note_002'. ")
note_element_2.set_id("note_002")
# Add NoteElement
note_element_3 = tagged_content.create_note_element()
paragraph_element.append_child(note_element_3, True)
note_element_3.set_text("Note with ID = 'note_003'. ")
note_element_3.set_id("note_003")
# Must throw exception - Aspose.Pdf.Tagged.TaggedException : Structure element with ID='note_002' already exists
# note_element_3.set_id("note_002")
# Resultant document does not compliance to PDF/UA If ClearId() used for Note Structure Element
# note_element_3.clear_id()
# Save Tagged PDF Document
document.save(outfile)
如何设置语言和标题
Aspose.PDF for Python via .NET API 还允许您根据 PDF/UA 规范为文档设置语言和标题。语言既可以为整个文档设置,也可以为其各个结构元素单独设置。以下代码片段展示了如何在 Tagged PDF Document 中设置语言和标题:
- 创建一个新的标记 PDF 文档。
- 设置文档标题和语言。
- 创建一个标题元素。
- 添加具有特定语言的段落。
- 保存 Tagged PDF。
import aspose.pdf as ap
import sys
from os import path
def set_language_and_title(outfile):
# Create PDF Document
with ap.Document() as document:
# Get TaggedContent
tagged_content = document.tagged_content
# Set Title and Language
tagged_content.set_title("Example Tagged Document")
tagged_content.set_language("en-US")
# Header (en-US, inherited from document)
header_element = tagged_content.create_header_element(1)
header_element.set_text("Phrase on different languages")
tagged_content.root_element.append_child(header_element, True)
# Paragraph (English)
paragraph_element_en = tagged_content.create_paragraph_element()
paragraph_element_en.set_text("Hello, World!")
paragraph_element_en.language = "en-US"
tagged_content.root_element.append_child(paragraph_element_en, True)
# Paragraph (German)
paragraph_element_de = tagged_content.create_paragraph_element()
paragraph_element_de.set_text("Hallo Welt!")
paragraph_element_de.language = "de-DE"
tagged_content.root_element.append_child(paragraph_element_de, True)
# Paragraph (French)
paragraph_element_fr = tagged_content.create_paragraph_element()
paragraph_element_fr.set_text("Bonjour le monde!")
paragraph_element_fr.language = "fr-FR"
tagged_content.root_element.append_child(paragraph_element_fr, True)
# Paragraph (Spanish)
paragraph_element_sp = tagged_content.create_paragraph_element()
paragraph_element_sp.set_text("¡Hola Mundo!")
paragraph_element_sp.language = "es-ES"
tagged_content.root_element.append_child(paragraph_element_sp, True)
# Save Tagged PDF Document
document.save(outfile)
相关 Tagged PDF 主题
- 创建 Tagged PDF 在更新属性之前生成结构元素。
- 从已标记的 PDF 中提取标记内容 检查现有的结构节点和元数据。
- 在标记 PDF 中处理表格 如果您需要对表结构应用可访问属性。