Source code for penman.graph

# -*- coding: utf-8 -*-

"""
Data structures for Penman graphs and triples.
"""

from typing import (Union, Optional, Mapping, List, Dict, Set, NamedTuple)
from collections import defaultdict
import copy

from penman.exceptions import GraphError
from penman.types import (
    Variable,
    Constant,
    Role,
    Target,
    BasicTriple,
    Triples,
)
from penman.epigraph import Epidata


CONCEPT_ROLE = ':instance'


[docs]class Triple(NamedTuple):
    """
    A relation between nodes or between a node and an constant.

    Args:
        source: the source variable of the triple
        role: the edge label between the source and target
        target: the target variable or constant
    """

    source: Variable
    """The source variable of the triple."""

    role: Role
    """The edge label between the source and target."""

    target: Target
    """The target variable or constant."""


[docs]class Edge(Triple):
    """A relation between nodes."""

    target: Variable
    """The target variable."""


[docs]class Attribute(Triple):
    """A relation between a node and a constant."""

    target: Constant
    """The target constant."""


[docs]class Graph(object):
    """
    A basic class for modeling a rooted, directed acyclic graph.

    A Graph is defined by a list of triples, which can be divided into
    two parts: a list of graph edges where both the source and target
    are variables (node identifiers), and a list of node attributes
    where only the source is a variable and the target is a
    constant. The raw triples are available via the :attr:`triples`
    attribute, while the :meth:`edges` and :meth:`attributes` methods
    return only those that are edges between nodes or between a node
    and a constant, respectively.

    Args:
        triples: an iterable of triples (:class:`Triple` or 3-tuples)
        top: the variable of the top node; if unspecified, the source
            of the first triple is used
        epidata: a mapping of triples to epigraphical markers
        metadata: a mapping of metadata types to descriptions
    Example:
        >>> Graph([('b', ':instance', 'bark'),
        ...        ('d', ':instance', 'dog'),
        ...        ('b', ':ARG1', 'd')])
    """

    def __init__(self,
                 triples: Triples = None,
                 top: Variable = None,
                 epidata: Mapping[BasicTriple, Epidata] = None,
                 metadata: Mapping[str, str] = None):
        if not triples:
            triples = []
        if not epidata:
            epidata = {}
        if not metadata:
            metadata = {}

        # the following (a) creates a new list (b) validates that
        # they are triples, and (c) ensures roles begin with :
        self.triples = [(src, _ensure_colon(role), tgt)
                        for src, role, tgt in triples]
        self._top = top
        self.epidata = dict(epidata)
        self.metadata = dict(metadata)

    def __repr__(self):
        return '<{} object (top={}) at {}>'.format(
            self.__class__.__name__,
            self.top,
            id(self)
        )

    def __str__(self):
        triples = '[{}]'.format(',\n   '.join(map(repr, self.triples)))
        epidata = '{{{}}}'.format(',\n    '.join(
            map('{0[0]!r}: {0[1]!r}'.format, self.epidata.items())))
        return 'Graph(\n  {},\n  epidata={})'.format(triples, epidata)

    def __eq__(self, other):
        if not isinstance(other, Graph):
            return NotImplemented
        return (self.top == other.top
                and len(self.triples) == len(other.triples)
                and set(self.triples) == set(other.triples))

    def __or__(self, other):
        if isinstance(other, Graph):
            g = copy.deepcopy(self)
            g.metadata.clear()
            g |= other
            return g
        else:
            return NotImplemented

    def __ior__(self, other):
        if isinstance(other, Graph):
            new = set(other.triples) - set(self.triples)
            self.triples.extend(t for t in other.triples if t in new)
            for t in new:
                if t in other.epidata:
                    self.epidata[t] = list(other.epidata[t])
            self.epidata.update(other.epidata)
            return self
        else:
            return NotImplemented

    def __sub__(self, other):
        if isinstance(other, Graph):
            g = copy.deepcopy(self)
            g.metadata.clear()
            g -= other
            return g
        else:
            return NotImplemented

    def __isub__(self, other):
        if isinstance(other, Graph):
            removed = set(other.triples)
            self.triples[:] = [t for t in self.triples if t not in removed]
            for t in removed:
                if t in self.epidata:
                    del self.epidata[t]
            possible_variables = set(v for t in self.triples for v in t[::2])
            if self._top not in possible_variables:
                self._top = None
            return self
        else:
            return NotImplemented

    @property
    def top(self) -> Union[Variable, None]:
        """
        The top variable.
        """
        top = self._top
        if top is None and len(self.triples) > 0:
            top = self.triples[0][0]  # implicit top
        return top

    @top.setter
    def top(self, top: Union[Variable, None]):
        if top is not None and top not in self.variables():
            raise GraphError('top must be a valid node')
        self._top = top  # check if top is valid variable?

[docs]    def variables(self) -> Set[Variable]:
        """
        Return the set of variables (nonterminal node identifiers).
        """
        vs = set(src for src, _, _ in self.triples)
        if self._top is not None:
            vs.add(self._top)
        return vs

[docs]    def edges(self,
              source: Optional[Variable] = None,
              role: Role = None,
              target: Variable = None) -> List[Edge]:
        """
        Return edges filtered by their *source*, *role*, or *target*.

        Edges don't include terminal triples (concepts or attributes).
        """
        variables = self.variables()
        return [Edge(*t)
                for t in self._filter_triples(source, role, target)
                if t[2] in variables]

[docs]    def attributes(self,
                   source: Optional[Variable] = None,
                   role: Role = None,
                   target: Constant = None) -> List[Attribute]:
        """
        Return attributes filtered by their *source*, *role*, or *target*.

        Attributes don't include triples where the target is a nonterminal.
        """
        variables = self.variables()
        return [Attribute(*t)
                for t in self._filter_triples(source, role, target)
                if t[1] == CONCEPT_ROLE or t[2] not in variables]

    def _filter_triples(self,
                        source: Optional[Variable],
                        role: Optional[Role],
                        target: Optional[Target]) -> List[BasicTriple]:
        """
        Filter triples based on their source, role, and/or target.
        """
        if source is role is target is None:
            triples = list(self.triples)
        else:
            triples = [
                t for t in self.triples
                if ((source is None or source == t[0])
                    and (role is None or role == t[1])
                    and (target is None or target == t[2]))
            ]
        return triples

[docs]    def reentrancies(self) -> Dict[Variable, int]:
        """
        Return a mapping of variables to their re-entrancy count.

        A re-entrancy is when more than one edge selects a node as its
        target. These graphs are rooted, so the top node always has an
        implicit entrancy. Only nodes with re-entrancies are reported,
        and the count is only for the entrant edges beyond the first.
        Also note that these counts are for the interpreted graph, not
        for the linearized form, so inverted edges are always
        re-entrant.
        """
        entrancies: Dict[Variable, int] = defaultdict(int)
        if self.top is not None:
            entrancies[self.top] += 1  # implicit entrancy to top
        for t in self.edges():
            entrancies[t.target] += 1
        return dict((v, cnt - 1) for v, cnt in entrancies.items() if cnt >= 2)


def _ensure_colon(role):
    if not role.startswith(':'):
        return ':' + role
    return role