266 lines
11 KiB
Python
266 lines
11 KiB
Python
|
|
# pyright: reportImportCycles=false
|
||
|
|
# pyright: reportPrivateUsage=false
|
||
|
|
|
||
|
|
"""|Document| and closely related objects."""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
from typing import IO, TYPE_CHECKING, Iterator, List, Sequence
|
||
|
|
|
||
|
|
from docx.blkcntnr import BlockItemContainer
|
||
|
|
from docx.enum.section import WD_SECTION
|
||
|
|
from docx.enum.text import WD_BREAK
|
||
|
|
from docx.section import Section, Sections
|
||
|
|
from docx.shared import ElementProxy, Emu, Inches, Length
|
||
|
|
from docx.text.run import Run
|
||
|
|
|
||
|
|
if TYPE_CHECKING:
|
||
|
|
import docx.types as t
|
||
|
|
from docx.comments import Comment, Comments
|
||
|
|
from docx.oxml.document import CT_Body, CT_Document
|
||
|
|
from docx.parts.document import DocumentPart
|
||
|
|
from docx.settings import Settings
|
||
|
|
from docx.styles.style import ParagraphStyle, _TableStyle
|
||
|
|
from docx.table import Table
|
||
|
|
from docx.text.paragraph import Paragraph
|
||
|
|
|
||
|
|
|
||
|
|
class Document(ElementProxy):
|
||
|
|
"""WordprocessingML (WML) document.
|
||
|
|
|
||
|
|
Not intended to be constructed directly. Use :func:`docx.Document` to open or create
|
||
|
|
a document.
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self, element: CT_Document, part: DocumentPart):
|
||
|
|
super(Document, self).__init__(element)
|
||
|
|
self._element = element
|
||
|
|
self._part = part
|
||
|
|
self.__body = None
|
||
|
|
|
||
|
|
def add_comment(
|
||
|
|
self,
|
||
|
|
runs: Run | Sequence[Run],
|
||
|
|
text: str | None = "",
|
||
|
|
author: str = "",
|
||
|
|
initials: str | None = "",
|
||
|
|
) -> Comment:
|
||
|
|
"""Add a comment to the document, anchored to the specified runs.
|
||
|
|
|
||
|
|
`runs` can be a single `Run` object or a non-empty sequence of `Run` objects. Only the
|
||
|
|
first and last run of a sequence are used, it's just more convenient to pass a whole
|
||
|
|
sequence when that's what you have handy, like `paragraph.runs` for example. When `runs`
|
||
|
|
contains a single `Run` object, that run serves as both the first and last run.
|
||
|
|
|
||
|
|
A comment can be anchored only on an even run boundary, meaning the text the comment
|
||
|
|
"references" must be a non-zero integer number of consecutive runs. The runs need not be
|
||
|
|
_contiguous_ per se, like the first can be in one paragraph and the last in the next
|
||
|
|
paragraph, but all runs between the first and the last will be included in the reference.
|
||
|
|
|
||
|
|
The comment reference range is delimited by placing a `w:commentRangeStart` element before
|
||
|
|
the first run and a `w:commentRangeEnd` element after the last run. This is why only the
|
||
|
|
first and last run are required and why a single run can serve as both first and last.
|
||
|
|
Word works out which text to highlight in the UI based on these range markers.
|
||
|
|
|
||
|
|
`text` allows the contents of a simple comment to be provided in the call, providing for
|
||
|
|
the common case where a comment is a single phrase or sentence without special formatting
|
||
|
|
such as bold or italics. More complex comments can be added using the returned `Comment`
|
||
|
|
object in much the same way as a `Document` or (table) `Cell` object, using methods like
|
||
|
|
`.add_paragraph()`, .add_run()`, etc.
|
||
|
|
|
||
|
|
The `author` and `initials` parameters allow that metadata to be set for the comment.
|
||
|
|
`author` is a required attribute on a comment and is the empty string by default.
|
||
|
|
`initials` is optional on a comment and may be omitted by passing |None|, but Word adds an
|
||
|
|
`initials` attribute by default and we follow that convention by using the empty string
|
||
|
|
when no `initials` argument is provided.
|
||
|
|
"""
|
||
|
|
# -- normalize `runs` to a sequence of runs --
|
||
|
|
runs = [runs] if isinstance(runs, Run) else runs
|
||
|
|
first_run = runs[0]
|
||
|
|
last_run = runs[-1]
|
||
|
|
|
||
|
|
# -- Note that comments can only appear in the document part --
|
||
|
|
comment = self.comments.add_comment(text=text, author=author, initials=initials)
|
||
|
|
|
||
|
|
# -- let the first run orchestrate placement of the comment range start and end --
|
||
|
|
first_run.mark_comment_range(last_run, comment.comment_id)
|
||
|
|
|
||
|
|
return comment
|
||
|
|
|
||
|
|
def add_heading(self, text: str = "", level: int = 1):
|
||
|
|
"""Return a heading paragraph newly added to the end of the document.
|
||
|
|
|
||
|
|
The heading paragraph will contain `text` and have its paragraph style
|
||
|
|
determined by `level`. If `level` is 0, the style is set to `Title`. If `level`
|
||
|
|
is 1 (or omitted), `Heading 1` is used. Otherwise the style is set to `Heading
|
||
|
|
{level}`. Raises |ValueError| if `level` is outside the range 0-9.
|
||
|
|
"""
|
||
|
|
if not 0 <= level <= 9:
|
||
|
|
raise ValueError("level must be in range 0-9, got %d" % level)
|
||
|
|
style = "Title" if level == 0 else "Heading %d" % level
|
||
|
|
return self.add_paragraph(text, style)
|
||
|
|
|
||
|
|
def add_page_break(self):
|
||
|
|
"""Return newly |Paragraph| object containing only a page break."""
|
||
|
|
paragraph = self.add_paragraph()
|
||
|
|
paragraph.add_run().add_break(WD_BREAK.PAGE)
|
||
|
|
return paragraph
|
||
|
|
|
||
|
|
def add_paragraph(self, text: str = "", style: str | ParagraphStyle | None = None) -> Paragraph:
|
||
|
|
"""Return paragraph newly added to the end of the document.
|
||
|
|
|
||
|
|
The paragraph is populated with `text` and having paragraph style `style`.
|
||
|
|
|
||
|
|
`text` can contain tab (``\\t``) characters, which are converted to the
|
||
|
|
appropriate XML form for a tab. `text` can also include newline (``\\n``) or
|
||
|
|
carriage return (``\\r``) characters, each of which is converted to a line
|
||
|
|
break.
|
||
|
|
"""
|
||
|
|
return self._body.add_paragraph(text, style)
|
||
|
|
|
||
|
|
def add_picture(
|
||
|
|
self,
|
||
|
|
image_path_or_stream: str | IO[bytes],
|
||
|
|
width: int | Length | None = None,
|
||
|
|
height: int | Length | None = None,
|
||
|
|
):
|
||
|
|
"""Return new picture shape added in its own paragraph at end of the document.
|
||
|
|
|
||
|
|
The picture contains the image at `image_path_or_stream`, scaled based on
|
||
|
|
`width` and `height`. If neither width nor height is specified, the picture
|
||
|
|
appears at its native size. If only one is specified, it is used to compute a
|
||
|
|
scaling factor that is then applied to the unspecified dimension, preserving the
|
||
|
|
aspect ratio of the image. The native size of the picture is calculated using
|
||
|
|
the dots-per-inch (dpi) value specified in the image file, defaulting to 72 dpi
|
||
|
|
if no value is specified, as is often the case.
|
||
|
|
"""
|
||
|
|
run = self.add_paragraph().add_run()
|
||
|
|
return run.add_picture(image_path_or_stream, width, height)
|
||
|
|
|
||
|
|
def add_section(self, start_type: WD_SECTION = WD_SECTION.NEW_PAGE):
|
||
|
|
"""Return a |Section| object newly added at the end of the document.
|
||
|
|
|
||
|
|
The optional `start_type` argument must be a member of the :ref:`WdSectionStart`
|
||
|
|
enumeration, and defaults to ``WD_SECTION.NEW_PAGE`` if not provided.
|
||
|
|
"""
|
||
|
|
new_sectPr = self._element.body.add_section_break()
|
||
|
|
new_sectPr.start_type = start_type
|
||
|
|
return Section(new_sectPr, self._part)
|
||
|
|
|
||
|
|
def add_table(self, rows: int, cols: int, style: str | _TableStyle | None = None):
|
||
|
|
"""Add a table having row and column counts of `rows` and `cols` respectively.
|
||
|
|
|
||
|
|
`style` may be a table style object or a table style name. If `style` is |None|,
|
||
|
|
the table inherits the default table style of the document.
|
||
|
|
"""
|
||
|
|
table = self._body.add_table(rows, cols, self._block_width)
|
||
|
|
table.style = style
|
||
|
|
return table
|
||
|
|
|
||
|
|
@property
|
||
|
|
def comments(self) -> Comments:
|
||
|
|
"""A |Comments| object providing access to comments added to the document."""
|
||
|
|
return self._part.comments
|
||
|
|
|
||
|
|
@property
|
||
|
|
def core_properties(self):
|
||
|
|
"""A |CoreProperties| object providing Dublin Core properties of document."""
|
||
|
|
return self._part.core_properties
|
||
|
|
|
||
|
|
@property
|
||
|
|
def inline_shapes(self):
|
||
|
|
"""The |InlineShapes| collection for this document.
|
||
|
|
|
||
|
|
An inline shape is a graphical object, such as a picture, contained in a run of
|
||
|
|
text and behaving like a character glyph, being flowed like other text in a
|
||
|
|
paragraph.
|
||
|
|
"""
|
||
|
|
return self._part.inline_shapes
|
||
|
|
|
||
|
|
def iter_inner_content(self) -> Iterator[Paragraph | Table]:
|
||
|
|
"""Generate each `Paragraph` or `Table` in this document in document order."""
|
||
|
|
return self._body.iter_inner_content()
|
||
|
|
|
||
|
|
@property
|
||
|
|
def paragraphs(self) -> List[Paragraph]:
|
||
|
|
"""The |Paragraph| instances in the document, in document order.
|
||
|
|
|
||
|
|
Note that paragraphs within revision marks such as ``<w:ins>`` or ``<w:del>`` do
|
||
|
|
not appear in this list.
|
||
|
|
"""
|
||
|
|
return self._body.paragraphs
|
||
|
|
|
||
|
|
@property
|
||
|
|
def part(self) -> DocumentPart:
|
||
|
|
"""The |DocumentPart| object of this document."""
|
||
|
|
return self._part
|
||
|
|
|
||
|
|
def save(self, path_or_stream: str | IO[bytes]):
|
||
|
|
"""Save this document to `path_or_stream`.
|
||
|
|
|
||
|
|
`path_or_stream` can be either a path to a filesystem location (a string) or a
|
||
|
|
file-like object.
|
||
|
|
"""
|
||
|
|
self._part.save(path_or_stream)
|
||
|
|
|
||
|
|
@property
|
||
|
|
def sections(self) -> Sections:
|
||
|
|
"""|Sections| object providing access to each section in this document."""
|
||
|
|
return Sections(self._element, self._part)
|
||
|
|
|
||
|
|
@property
|
||
|
|
def settings(self) -> Settings:
|
||
|
|
"""A |Settings| object providing access to the document-level settings."""
|
||
|
|
return self._part.settings
|
||
|
|
|
||
|
|
@property
|
||
|
|
def styles(self):
|
||
|
|
"""A |Styles| object providing access to the styles in this document."""
|
||
|
|
return self._part.styles
|
||
|
|
|
||
|
|
@property
|
||
|
|
def tables(self) -> List[Table]:
|
||
|
|
"""All |Table| instances in the document, in document order.
|
||
|
|
|
||
|
|
Note that only tables appearing at the top level of the document appear in this
|
||
|
|
list; a table nested inside a table cell does not appear. A table within
|
||
|
|
revision marks such as ``<w:ins>`` or ``<w:del>`` will also not appear in the
|
||
|
|
list.
|
||
|
|
"""
|
||
|
|
return self._body.tables
|
||
|
|
|
||
|
|
@property
|
||
|
|
def _block_width(self) -> Length:
|
||
|
|
"""A |Length| object specifying the space between margins in last section."""
|
||
|
|
section = self.sections[-1]
|
||
|
|
page_width = section.page_width or Inches(8.5)
|
||
|
|
left_margin = section.left_margin or Inches(1)
|
||
|
|
right_margin = section.right_margin or Inches(1)
|
||
|
|
return Emu(page_width - left_margin - right_margin)
|
||
|
|
|
||
|
|
@property
|
||
|
|
def _body(self) -> _Body:
|
||
|
|
"""The |_Body| instance containing the content for this document."""
|
||
|
|
if self.__body is None:
|
||
|
|
self.__body = _Body(self._element.body, self)
|
||
|
|
return self.__body
|
||
|
|
|
||
|
|
|
||
|
|
class _Body(BlockItemContainer):
|
||
|
|
"""Proxy for `<w:body>` element in this document.
|
||
|
|
|
||
|
|
It's primary role is a container for document content.
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self, body_elm: CT_Body, parent: t.ProvidesStoryPart):
|
||
|
|
super(_Body, self).__init__(body_elm, parent)
|
||
|
|
self._body = body_elm
|
||
|
|
|
||
|
|
def clear_content(self) -> _Body:
|
||
|
|
"""Return this |_Body| instance after clearing it of all content.
|
||
|
|
|
||
|
|
Section properties for the main document story, if present, are preserved.
|
||
|
|
"""
|
||
|
|
self._body.clear_content()
|
||
|
|
return self
|