Source code for penman.codec

# -*- coding: utf-8 -*-

"""
Serialization of PENMAN graphs.
"""
from pathlib import Path
from typing import IO, Iterable, Iterator, List, Optional, Union

from penman import layout
from penman._format import (
    format,
    format_triples,
)
from penman._parse import (
    iterparse,
    parse,
    parse_triples,
)
from penman.graph import Graph
from penman.model import Model
from penman.tree import Tree
from penman.types import (
    BasicTriple,
    Variable,
)

# "Utility" types; not Penman-specific

FileOrFilename = Union[str, Path, IO[str]]



[docs]
class PENMANCodec(object):
    """
    An encoder/decoder for PENMAN-serialized graphs.
    """

    def __init__(self, model: Optional[Model] = None):
        if model is None:
            model = Model()
        self.model = model


[docs]
    def decode(self, s: str) -> Graph:
        """
        Deserialize PENMAN-notation string *s* into its Graph object.

        Args:
            s: a string containing a single PENMAN-serialized graph
        Returns:
            The :class:`~penman.graph.Graph` object described by *s*.
        Example:
            >>> from penman.codec import PENMANCodec
            >>> codec = PENMANCodec()
            >>> codec.decode('(b / bark-01 :ARG0 (d / dog))')
            <Graph object (top=b) at ...>
        """
        tree = parse(s)
        return layout.interpret(tree, self.model)



[docs]
    def iterdecode(self, lines: Union[Iterable[str], str]) -> Iterator[Graph]:
        """
        Yield graphs parsed from *lines*.

        Args:
            lines: a string or open file with PENMAN-serialized graphs
        Returns:
            The :class:`~penman.graph.Graph` objects described in
            *lines*.
        """
        for tree in iterparse(lines):
            yield layout.interpret(tree, self.model)



[docs]
    def iterparse(self, lines: Union[Iterable[str], str]) -> Iterator[Tree]:
        """
        Yield trees parsed from *lines*.

        Args:
            lines: a string or open file with PENMAN-serialized graphs
        Returns:
            The :class:`~penman.tree.Tree` object described in
            *lines*.
        """
        yield from iterparse(lines)



[docs]
    def parse(self, s: str) -> Tree:
        """
        Parse PENMAN-notation string *s* into its tree structure.

        Args:
            s: a string containing a single PENMAN-serialized graph
        Returns:
            The tree structure described by *s*.
        Example:
            >>> from penman.codec import PENMANCodec
            >>> codec = PENMANCodec()
            >>> codec.parse('(b / bark-01 :ARG0 (d / dog))')  # noqa
            Tree(('b', [('/', 'bark-01'), (':ARG0', ('d', [('/', 'dog')]))]))
        """
        return parse(s)



[docs]
    def parse_triples(self, s: str) -> List[BasicTriple]:
        """Parse a triple conjunction from *s*."""
        return parse_triples(s)



[docs]
    def encode(
        self,
        g: Graph,
        top: Optional[Variable] = None,
        indent: Union[int, None] = -1,
        compact: bool = False,
    ) -> str:
        """
        Serialize the graph *g* into PENMAN notation.

        Args:
            g: the Graph object
            top: if given, the node to use as the top in serialization
            indent: how to indent formatted strings
            compact: if ``True``, put initial attributes on the first line
        Returns:
            the PENMAN-serialized string of the Graph *g*
        Example:
            >>> from penman.graph import Graph
            >>> from penman.codec import PENMANCodec
            >>> codec = PENMANCodec()
            >>> codec.encode(Graph([('h', 'instance', 'hi')]))
            '(h / hi)'

        """
        tree = layout.configure(g, top=top, model=self.model)
        return self.format(tree, indent=indent, compact=compact)



[docs]
    def format(
        self,
        tree: Tree,
        indent: Union[int, None] = -1,
        compact: bool = False,
    ) -> str:
        """
        Format *tree* into a PENMAN string.
        """
        return format(tree, indent=indent, compact=compact)



[docs]
    def format_triples(
        self,
        triples: Iterable[BasicTriple],
        indent: bool = True,
    ) -> str:
        """
        Return the formatted triple conjunction of *triples*.

        Args:
            triples: an iterable of triples
            indent: how to indent formatted strings
        Returns:
            the serialized triple conjunction of *triples*
        Example:
            >>> from penman.codec import PENMANCodec
            >>> codec = PENMANCodec()
            >>> codec.format_triples([('a', ':instance', 'alpha'),
            ...                       ('a', ':ARG0', 'b'),
            ...                       ('b', ':instance', 'beta')])
            ...
            'instance(a, alpha) ^\\nARG0(a, b) ^\\ninstance(b, beta)'

        """
        return format_triples(triples, indent=indent)




# The following are for the top-level API. They are renamed when they
# are imported into __init__.py. They are named with the leading
# underscore here so they are not included as part of penman.codec's
# public API.


def _decode(s: str, model: Optional[Model] = None) -> Graph:
    """
    Deserialize PENMAN-serialized *s* into its Graph object

    Args:
        s: a string containing a single PENMAN-serialized graph
        model: the model used for interpreting the graph
    Returns:
        the Graph object described by *s*
    Example:
        >>> import penman
        >>> penman.decode('(b / bark-01 :ARG0 (d / dog))')
        <Graph object (top=b) at ...>

    """
    codec = PENMANCodec(model=model)
    return codec.decode(s)


def _iterdecode(
    lines: Union[Iterable[str], str],
    model: Optional[Model] = None,
) -> Iterator[Graph]:
    """
    Yield graphs parsed from *lines*.

    Args:
        lines: a string or open file with PENMAN-serialized graphs
        model: the model used for interpreting the graph
    Returns:
        The :class:`~penman.graph.Graph` objects described in
        *lines*.
    Example:
        >>> import penman
        >>> for g in penman.iterdecode('(a / alpha) (b / beta)'):
        ...     print(repr(g))
        <Graph object (top=a) at ...>
        <Graph object (top=b) at ...>
    """
    codec = PENMANCodec(model=model)
    yield from codec.iterdecode(lines)


def _encode(
    g: Graph,
    top: Optional[Variable] = None,
    model: Optional[Model] = None,
    indent: Union[int, bool] = -1,
    compact: bool = False,
) -> str:
    """
    Serialize the graph *g* from *top* to PENMAN notation.

    Args:
        g: the Graph object
        top: if given, the node to use as the top in serialization
        model: the model used for interpreting the graph
        indent: how to indent formatted strings
        compact: if ``True``, put initial attributes on the first line
    Returns:
        the PENMAN-serialized string of the Graph *g*
    Example:
        >>> import penman
        >>> from penman.graph import Graph
        >>> penman.encode(Graph([('h', 'instance', 'hi')]))
        '(h / hi)'

    """
    codec = PENMANCodec(model=model)
    return codec.encode(g, top=top, indent=indent, compact=compact)


def _load(
    source: FileOrFilename,
    model: Optional[Model] = None,
    encoding: Optional[str] = None,
) -> List[Graph]:
    """
    Deserialize a list of PENMAN-encoded graphs from *source*.

    Args:
        source: a filename or file-like object to read from
        model: the model used for interpreting the graph
    Returns:
        a list of Graph objects
    """
    codec = PENMANCodec(model=model)
    if isinstance(source, (str, Path)):
        with open(source, encoding=encoding) as fh:
            return list(codec.iterdecode(fh))
    else:
        assert hasattr(source, 'read')
        return list(codec.iterdecode(source))


def _loads(string: str, model: Optional[Model] = None) -> List[Graph]:
    """
    Deserialize a list of PENMAN-encoded graphs from *string*.

    Args:
        string: a string containing graph data
        model: the model used for interpreting the graph
    Returns:
        a list of Graph objects
    """
    codec = PENMANCodec(model=model)
    return list(codec.iterdecode(string))


def _dump(
    graphs: Iterable[Graph],
    file: FileOrFilename,
    model: Optional[Model] = None,
    indent: Union[int, bool] = -1,
    compact: bool = False,
    encoding: Optional[str] = None,
) -> None:
    """
    Serialize each graph in *graphs* to PENMAN and write to *file*.

    Args:
        graphs: an iterable of Graph objects
        file: a filename or file-like object to write to
        model: the model used for interpreting the graph
        indent: how to indent formatted strings
        compact: if ``True``, put initial attributes on the first line
    """
    codec = PENMANCodec(model=model)
    if isinstance(file, (str, Path)):
        with open(file, 'w', encoding=encoding) as fh:
            _dump_stream(fh, graphs, codec, indent, compact)
    else:
        assert hasattr(file, 'write')
        _dump_stream(file, graphs, codec, indent, compact)


def _dump_stream(fh, gs, codec, indent, compact):
    """Helper method for dump() for incremental printing."""
    ss = (codec.encode(g, indent=indent, compact=compact) for g in gs)
    try:
        print(next(ss), file=fh)
    except StopIteration:
        return
    for s in ss:
        print(file=fh)
        print(s, file=fh)


def _dumps(
    graphs: Iterable[Graph],
    model: Optional[Model] = None,
    indent: Union[int, bool] = -1,
    compact: bool = False,
) -> str:
    """
    Serialize each graph in *graphs* to the PENMAN format.

    Args:
        graphs: an iterable of Graph objects
        model: the model used for interpreting the graph
        indent: how to indent formatted strings
        compact: if ``True``, put initial attributes on the first line
    Returns:
        the string of serialized graphs
    """
    codec = PENMANCodec(model=model)
    strings = [codec.encode(g, indent=indent, compact=compact) for g in graphs]
    return '\n\n'.join(strings)