abo/news_agent/venv/lib/python3.12/site-packages/docx/document.py

# pyright: reportImportCycles=false
# pyright: reportPrivateUsage=false

"""|Document| and closely related objects."""

from __future__ import annotations

from typing import IO, TYPE_CHECKING, Iterator, List, Sequence

from docx.blkcntnr import BlockItemContainer
from docx.enum.section import WD_SECTION
from docx.enum.text import WD_BREAK
from docx.section import Section, Sections
from docx.shared import ElementProxy, Emu, Inches, Length
from docx.text.run import Run

if TYPE_CHECKING:
    import docx.types as t
    from docx.comments import Comment, Comments
    from docx.oxml.document import CT_Body, CT_Document
    from docx.parts.document import DocumentPart
    from docx.settings import Settings
    from docx.styles.style import ParagraphStyle, _TableStyle
    from docx.table import Table
    from docx.text.paragraph import Paragraph


class Document(ElementProxy):
    """WordprocessingML (WML) document.

    Not intended to be constructed directly. Use :func:`docx.Document` to open or create
    a document.
    """

    def __init__(self, element: CT_Document, part: DocumentPart):
        super(Document, self).__init__(element)
        self._element = element
        self._part = part
        self.__body = None

    def add_comment(
        self,
        runs: Run | Sequence[Run],
        text: str | None = "",
        author: str = "",
        initials: str | None = "",
    ) -> Comment:
        """Add a comment to the document, anchored to the specified runs.

        `runs` can be a single `Run` object or a non-empty sequence of `Run` objects. Only the
        first and last run of a sequence are used, it's just more convenient to pass a whole
        sequence when that's what you have handy, like `paragraph.runs` for example. When `runs`
        contains a single `Run` object, that run serves as both the first and last run.

        A comment can be anchored only on an even run boundary, meaning the text the comment
        "references" must be a non-zero integer number of consecutive runs. The runs need not be
        _contiguous_ per se, like the first can be in one paragraph and the last in the next
        paragraph, but all runs between the first and the last will be included in the reference.

        The comment reference range is delimited by placing a `w:commentRangeStart` element before
        the first run and a `w:commentRangeEnd` element after the last run. This is why only the
        first and last run are required and why a single run can serve as both first and last.
        Word works out which text to highlight in the UI based on these range markers.

        `text` allows the contents of a simple comment to be provided in the call, providing for
        the common case where a comment is a single phrase or sentence without special formatting
        such as bold or italics. More complex comments can be added using the returned `Comment`
        object in much the same way as a `Document` or (table) `Cell` object, using methods like
        `.add_paragraph()`, .add_run()`, etc.

        The `author` and `initials` parameters allow that metadata to be set for the comment.
        `author` is a required attribute on a comment and is the empty string by default.
        `initials` is optional on a comment and may be omitted by passing |None|, but Word adds an
        `initials` attribute by default and we follow that convention by using the empty string
        when no `initials` argument is provided.
        """
        # -- normalize `runs` to a sequence of runs --
        runs = [runs] if isinstance(runs, Run) else runs
        first_run = runs[0]
        last_run = runs[-1]

        # -- Note that comments can only appear in the document part --
        comment = self.comments.add_comment(text=text, author=author, initials=initials)

        # -- let the first run orchestrate placement of the comment range start and end --
        first_run.mark_comment_range(last_run, comment.comment_id)

        return comment

    def add_heading(self, text: str = "", level: int = 1):
        """Return a heading paragraph newly added to the end of the document.

        The heading paragraph will contain `text` and have its paragraph style
        determined by `level`. If `level` is 0, the style is set to `Title`. If `level`
        is 1 (or omitted), `Heading 1` is used. Otherwise the style is set to `Heading
        {level}`. Raises |ValueError| if `level` is outside the range 0-9.
        """
        if not 0 <= level <= 9:
            raise ValueError("level must be in range 0-9, got %d" % level)
        style = "Title" if level == 0 else "Heading %d" % level
        return self.add_paragraph(text, style)

    def add_page_break(self):
        """Return newly |Paragraph| object containing only a page break."""
        paragraph = self.add_paragraph()
        paragraph.add_run().add_break(WD_BREAK.PAGE)
        return paragraph

    def add_paragraph(self, text: str = "", style: str | ParagraphStyle | None = None) -> Paragraph:
        """Return paragraph newly added to the end of the document.

        The paragraph is populated with `text` and having paragraph style `style`.

        `text` can contain tab (``\\t``) characters, which are converted to the
        appropriate XML form for a tab. `text` can also include newline (``\\n``) or
        carriage return (``\\r``) characters, each of which is converted to a line
        break.
        """
        return self._body.add_paragraph(text, style)

    def add_picture(
        self,
        image_path_or_stream: str | IO[bytes],
        width: int | Length | None = None,
        height: int | Length | None = None,
    ):
        """Return new picture shape added in its own paragraph at end of the document.

        The picture contains the image at `image_path_or_stream`, scaled based on
        `width` and `height`. If neither width nor height is specified, the picture
        appears at its native size. If only one is specified, it is used to compute a
        scaling factor that is then applied to the unspecified dimension, preserving the
        aspect ratio of the image. The native size of the picture is calculated using
        the dots-per-inch (dpi) value specified in the image file, defaulting to 72 dpi
        if no value is specified, as is often the case.
        """
        run = self.add_paragraph().add_run()
        return run.add_picture(image_path_or_stream, width, height)

    def add_section(self, start_type: WD_SECTION = WD_SECTION.NEW_PAGE):
        """Return a |Section| object newly added at the end of the document.

        The optional `start_type` argument must be a member of the :ref:`WdSectionStart`
        enumeration, and defaults to ``WD_SECTION.NEW_PAGE`` if not provided.
        """
        new_sectPr = self._element.body.add_section_break()
        new_sectPr.start_type = start_type
        return Section(new_sectPr, self._part)

    def add_table(self, rows: int, cols: int, style: str | _TableStyle | None = None):
        """Add a table having row and column counts of `rows` and `cols` respectively.

        `style` may be a table style object or a table style name. If `style` is |None|,
        the table inherits the default table style of the document.
        """
        table = self._body.add_table(rows, cols, self._block_width)
        table.style = style
        return table

    @property
    def comments(self) -> Comments:
        """A |Comments| object providing access to comments added to the document."""
        return self._part.comments

    @property
    def core_properties(self):
        """A |CoreProperties| object providing Dublin Core properties of document."""
        return self._part.core_properties

    @property
    def inline_shapes(self):
        """The |InlineShapes| collection for this document.

        An inline shape is a graphical object, such as a picture, contained in a run of
        text and behaving like a character glyph, being flowed like other text in a
        paragraph.
        """
        return self._part.inline_shapes

    def iter_inner_content(self) -> Iterator[Paragraph | Table]:
        """Generate each `Paragraph` or `Table` in this document in document order."""
        return self._body.iter_inner_content()

    @property
    def paragraphs(self) -> List[Paragraph]:
        """The |Paragraph| instances in the document, in document order.

        Note that paragraphs within revision marks such as ``<w:ins>`` or ``<w:del>`` do
        not appear in this list.
        """
        return self._body.paragraphs

    @property
    def part(self) -> DocumentPart:
        """The |DocumentPart| object of this document."""
        return self._part

    def save(self, path_or_stream: str | IO[bytes]):
        """Save this document to `path_or_stream`.

        `path_or_stream` can be either a path to a filesystem location (a string) or a
        file-like object.
        """
        self._part.save(path_or_stream)

    @property
    def sections(self) -> Sections:
        """|Sections| object providing access to each section in this document."""
        return Sections(self._element, self._part)

    @property
    def settings(self) -> Settings:
        """A |Settings| object providing access to the document-level settings."""
        return self._part.settings

    @property
    def styles(self):
        """A |Styles| object providing access to the styles in this document."""
        return self._part.styles

    @property
    def tables(self) -> List[Table]:
        """All |Table| instances in the document, in document order.

        Note that only tables appearing at the top level of the document appear in this
        list; a table nested inside a table cell does not appear. A table within
        revision marks such as ``<w:ins>`` or ``<w:del>`` will also not appear in the
        list.
        """
        return self._body.tables

    @property
    def _block_width(self) -> Length:
        """A |Length| object specifying the space between margins in last section."""
        section = self.sections[-1]
        page_width = section.page_width or Inches(8.5)
        left_margin = section.left_margin or Inches(1)
        right_margin = section.right_margin or Inches(1)
        return Emu(page_width - left_margin - right_margin)

    @property
    def _body(self) -> _Body:
        """The |_Body| instance containing the content for this document."""
        if self.__body is None:
            self.__body = _Body(self._element.body, self)
        return self.__body


class _Body(BlockItemContainer):
    """Proxy for `<w:body>` element in this document.

    It's primary role is a container for document content.
    """

    def __init__(self, body_elm: CT_Body, parent: t.ProvidesStoryPart):
        super(_Body, self).__init__(body_elm, parent)
        self._body = body_elm

    def clear_content(self) -> _Body:
        """Return this |_Body| instance after clearing it of all content.

        Section properties for the main document story, if present, are preserved.
        """
        self._body.clear_content()
        return self
初始提交 2025-07-16 15:34:54 +08:00			`# pyright: reportImportCycles=false`
			`# pyright: reportPrivateUsage=false`

			`"""\|Document\| and closely related objects."""`

			`from __future__ import annotations`

			`from typing import IO, TYPE_CHECKING, Iterator, List, Sequence`

			`from docx.blkcntnr import BlockItemContainer`
			`from docx.enum.section import WD_SECTION`
			`from docx.enum.text import WD_BREAK`
			`from docx.section import Section, Sections`
			`from docx.shared import ElementProxy, Emu, Inches, Length`
			`from docx.text.run import Run`

			`if TYPE_CHECKING:`
			`import docx.types as t`
			`from docx.comments import Comment, Comments`
			`from docx.oxml.document import CT_Body, CT_Document`
			`from docx.parts.document import DocumentPart`
			`from docx.settings import Settings`
			`from docx.styles.style import ParagraphStyle, _TableStyle`
			`from docx.table import Table`
			`from docx.text.paragraph import Paragraph`


			`class Document(ElementProxy):`
			`"""WordprocessingML (WML) document.`

			Not intended to be constructed directly. Use :func:`docx.Document` to open or create
			`a document.`
			`"""`

			`def __init__(self, element: CT_Document, part: DocumentPart):`
			`super(Document, self).__init__(element)`
			`self._element = element`
			`self._part = part`
			`self.__body = None`

			`def add_comment(`
			`self,`
			`runs: Run \| Sequence[Run],`
			`text: str \| None = "",`
			`author: str = "",`
			`initials: str \| None = "",`
			`) -> Comment:`
			`"""Add a comment to the document, anchored to the specified runs.`

			`runs` can be a single `Run` object or a non-empty sequence of `Run` objects. Only the
			`first and last run of a sequence are used, it's just more convenient to pass a whole`
			sequence when that's what you have handy, like `paragraph.runs` for example. When `runs`
			contains a single `Run` object, that run serves as both the first and last run.

			`A comment can be anchored only on an even run boundary, meaning the text the comment`
			`"references" must be a non-zero integer number of consecutive runs. The runs need not be`
			`_contiguous_ per se, like the first can be in one paragraph and the last in the next`
			`paragraph, but all runs between the first and the last will be included in the reference.`

			The comment reference range is delimited by placing a `w:commentRangeStart` element before
			the first run and a `w:commentRangeEnd` element after the last run. This is why only the
			`first and last run are required and why a single run can serve as both first and last.`
			`Word works out which text to highlight in the UI based on these range markers.`

			`text` allows the contents of a simple comment to be provided in the call, providing for
			`the common case where a comment is a single phrase or sentence without special formatting`
			such as bold or italics. More complex comments can be added using the returned `Comment`
			object in much the same way as a `Document` or (table) `Cell` object, using methods like
			`.add_paragraph()`, .add_run()`, etc.

			The `author` and `initials` parameters allow that metadata to be set for the comment.
			`author` is a required attribute on a comment and is the empty string by default.
			`initials` is optional on a comment and may be omitted by passing \|None\|, but Word adds an
			`initials` attribute by default and we follow that convention by using the empty string
			when no `initials` argument is provided.
			`"""`
			# -- normalize `runs` to a sequence of runs --
			`runs = [runs] if isinstance(runs, Run) else runs`
			`first_run = runs[0]`
			`last_run = runs[-1]`

			`# -- Note that comments can only appear in the document part --`
			`comment = self.comments.add_comment(text=text, author=author, initials=initials)`

			`# -- let the first run orchestrate placement of the comment range start and end --`
			`first_run.mark_comment_range(last_run, comment.comment_id)`

			`return comment`

			`def add_heading(self, text: str = "", level: int = 1):`
			`"""Return a heading paragraph newly added to the end of the document.`

			The heading paragraph will contain `text` and have its paragraph style
			determined by `level`. If `level` is 0, the style is set to `Title`. If `level`
			is 1 (or omitted), `Heading 1` is used. Otherwise the style is set to `Heading
			{level}`. Raises \|ValueError\| if `level` is outside the range 0-9.
			`"""`
			`if not 0 <= level <= 9:`
			`raise ValueError("level must be in range 0-9, got %d" % level)`
			`style = "Title" if level == 0 else "Heading %d" % level`
			`return self.add_paragraph(text, style)`

			`def add_page_break(self):`
			`"""Return newly \|Paragraph\| object containing only a page break."""`
			`paragraph = self.add_paragraph()`
			`paragraph.add_run().add_break(WD_BREAK.PAGE)`
			`return paragraph`

			`def add_paragraph(self, text: str = "", style: str \| ParagraphStyle \| None = None) -> Paragraph:`
			`"""Return paragraph newly added to the end of the document.`

			The paragraph is populated with `text` and having paragraph style `style`.

			`text` can contain tab (``\\t``) characters, which are converted to the
			appropriate XML form for a tab. `text` can also include newline (``\\n``) or
			carriage return (``\\r``) characters, each of which is converted to a line
			`break.`
			`"""`
			`return self._body.add_paragraph(text, style)`

			`def add_picture(`
			`self,`
			`image_path_or_stream: str \| IO[bytes],`
			`width: int \| Length \| None = None,`
			`height: int \| Length \| None = None,`
			`):`
			`"""Return new picture shape added in its own paragraph at end of the document.`

			The picture contains the image at `image_path_or_stream`, scaled based on
			`width` and `height`. If neither width nor height is specified, the picture
			`appears at its native size. If only one is specified, it is used to compute a`
			`scaling factor that is then applied to the unspecified dimension, preserving the`
			`aspect ratio of the image. The native size of the picture is calculated using`
			`the dots-per-inch (dpi) value specified in the image file, defaulting to 72 dpi`
			`if no value is specified, as is often the case.`
			`"""`
			`run = self.add_paragraph().add_run()`
			`return run.add_picture(image_path_or_stream, width, height)`

			`def add_section(self, start_type: WD_SECTION = WD_SECTION.NEW_PAGE):`
			`"""Return a \|Section\| object newly added at the end of the document.`

			The optional `start_type` argument must be a member of the :ref:`WdSectionStart`
			enumeration, and defaults to ``WD_SECTION.NEW_PAGE`` if not provided.
			`"""`
			`new_sectPr = self._element.body.add_section_break()`
			`new_sectPr.start_type = start_type`
			`return Section(new_sectPr, self._part)`

			`def add_table(self, rows: int, cols: int, style: str \| _TableStyle \| None = None):`
			"""Add a table having row and column counts of `rows` and `cols` respectively.

			`style` may be a table style object or a table style name. If `style` is \|None\|,
			`the table inherits the default table style of the document.`
			`"""`
			`table = self._body.add_table(rows, cols, self._block_width)`
			`table.style = style`
			`return table`

			`@property`
			`def comments(self) -> Comments:`
			`"""A \|Comments\| object providing access to comments added to the document."""`
			`return self._part.comments`

			`@property`
			`def core_properties(self):`
			`"""A \|CoreProperties\| object providing Dublin Core properties of document."""`
			`return self._part.core_properties`

			`@property`
			`def inline_shapes(self):`
			`"""The \|InlineShapes\| collection for this document.`

			`An inline shape is a graphical object, such as a picture, contained in a run of`
			`text and behaving like a character glyph, being flowed like other text in a`
			`paragraph.`
			`"""`
			`return self._part.inline_shapes`

			`def iter_inner_content(self) -> Iterator[Paragraph \| Table]:`
			"""Generate each `Paragraph` or `Table` in this document in document order."""
			`return self._body.iter_inner_content()`

			`@property`
			`def paragraphs(self) -> List[Paragraph]:`
			`"""The \|Paragraph\| instances in the document, in document order.`

			Note that paragraphs within revision marks such as ``<w:ins>`` or ``<w:del>`` do
			`not appear in this list.`
			`"""`
			`return self._body.paragraphs`

			`@property`
			`def part(self) -> DocumentPart:`
			`"""The \|DocumentPart\| object of this document."""`
			`return self._part`

			`def save(self, path_or_stream: str \| IO[bytes]):`
			"""Save this document to `path_or_stream`.

			`path_or_stream` can be either a path to a filesystem location (a string) or a
			`file-like object.`
			`"""`
			`self._part.save(path_or_stream)`

			`@property`
			`def sections(self) -> Sections:`
			`"""\|Sections\| object providing access to each section in this document."""`
			`return Sections(self._element, self._part)`

			`@property`
			`def settings(self) -> Settings:`
			`"""A \|Settings\| object providing access to the document-level settings."""`
			`return self._part.settings`

			`@property`
			`def styles(self):`
			`"""A \|Styles\| object providing access to the styles in this document."""`
			`return self._part.styles`

			`@property`
			`def tables(self) -> List[Table]:`
			`"""All \|Table\| instances in the document, in document order.`

			`Note that only tables appearing at the top level of the document appear in this`
			`list; a table nested inside a table cell does not appear. A table within`
			revision marks such as ``<w:ins>`` or ``<w:del>`` will also not appear in the
			`list.`
			`"""`
			`return self._body.tables`

			`@property`
			`def _block_width(self) -> Length:`
			`"""A \|Length\| object specifying the space between margins in last section."""`
			`section = self.sections[-1]`
			`page_width = section.page_width or Inches(8.5)`
			`left_margin = section.left_margin or Inches(1)`
			`right_margin = section.right_margin or Inches(1)`
			`return Emu(page_width - left_margin - right_margin)`

			`@property`
			`def _body(self) -> _Body:`
			`"""The \|_Body\| instance containing the content for this document."""`
			`if self.__body is None:`
			`self.__body = _Body(self._element.body, self)`
			`return self.__body`


			`class _Body(BlockItemContainer):`
			"""Proxy for `<w:body>` element in this document.

			`It's primary role is a container for document content.`
			`"""`

			`def __init__(self, body_elm: CT_Body, parent: t.ProvidesStoryPart):`
			`super(_Body, self).__init__(body_elm, parent)`
			`self._body = body_elm`

			`def clear_content(self) -> _Body:`
			`"""Return this \|_Body\| instance after clearing it of all content.`

			`Section properties for the main document story, if present, are preserved.`
			`"""`
			`self._body.clear_content()`
			`return self`