Source code for penman.model

# -*- coding: utf-8 -*-

"""
Semantic models for interpreting graphs.
"""

from typing import (cast, Tuple, List, Dict, Set, Iterable, Mapping, Any)
import re
from collections import defaultdict
import random

from penman.exceptions import ModelError
from penman.types import (
    Variable,
    Role,
    Constant,
    BasicTriple
)
from penman.tree import Branch
from penman.graph import CONCEPT_ROLE


_ReificationSpec = Tuple[Role, Constant, Role, Role]
_Reified = Tuple[Constant, Role, Role]
_Reification = Tuple[BasicTriple, BasicTriple, BasicTriple]


[docs]class Model(object):
    """
    A semantic model for Penman graphs.

    The model defines things like valid roles and transformations.

    Args:
        top_variable: the variable of the graph's top
        top_role: the role linking the graph's top to the top node
        concept_role: the role associated with node concepts
        roles: a mapping of roles to associated data
        normalizations: a mapping of roles to normalized roles
        reifications: a list of 4-tuples used to define reifications
    """
    def __init__(self,
                 top_variable: Variable = 'top',
                 top_role: Role = ':TOP',
                 concept_role: Role = CONCEPT_ROLE,
                 roles: Mapping[Role, Any] = None,
                 normalizations: Mapping[Role, Role] = None,
                 reifications: Iterable[_ReificationSpec] = None):
        self.top_variable = top_variable
        self.top_role = top_role
        self.concept_role = concept_role

        if roles:
            roles = dict(roles)
        self.roles = roles or {}
        self._role_re = re.compile(
            '^({})$'.format(
                '|'.join(list(self.roles) + [top_role, concept_role])))

        if normalizations:
            normalizations = dict(normalizations)
        self.normalizations = normalizations or {}

        reifs: Dict[Role, List[_Reified]] = defaultdict(list)
        if reifications:
            for role, concept, source, target in reifications:
                reifs[role].append((concept, source, target))
        self.reifications = dict(reifs)

    def __eq__(self, other):
        if not isinstance(other, Model):
            return NotImplemented
        return (self.top_variable == other.top_variable
                and self.top_role == other.top_role
                and self.concept_role == other.concept_role
                and self.roles == other.roles
                and self.normalizations == other.normalizations
                and self.reifications == other.reifications)

[docs]    @classmethod
    def from_dict(cls, d):
        """Instantiate a model from a dictionary."""
        return cls(**d)

[docs]    def has_role(self, role: Role) -> bool:
        """
        Return ``True`` if *role* is defined by the model.

        If *role* is not in the model but a single deinversion of
        *role* is in the model, then ``True`` is returned. Otherwise
        ``False`` is returned, even if something like
        :meth:`canonicalize_role` could return a valid role.
        """
        return (self._has_role(role)
                or (role.endswith('-of') and self._has_role(role[:-3])))

    def _has_role(self, role: Role) -> bool:
        return self._role_re.match(role) is not None

[docs]    def is_role_inverted(self, role: Role) -> bool:
        """Return ``True`` if *role* is inverted."""
        return not self._has_role(role) and role.endswith('-of')

[docs]    def invert_role(self, role: Role) -> Role:
        """Invert *role*."""
        if not self._has_role(role) and role.endswith('-of'):
            inverse = role[:-3]
        else:
            inverse = role + '-of'
        return inverse

[docs]    def invert(self, triple: BasicTriple) -> BasicTriple:
        """
        Invert *triple*.

        This will invert or deinvert a triple regardless of its
        current state. :meth:`deinvert` will deinvert a triple only if
        it is already inverted. Unlike :meth:`canonicalize`, this will
        not perform multiple inversions or replace the role with a
        normalized form.
        """
        source, role, target = triple
        inverse = self.invert_role(role)
        # casting is just for the benefit of the type checker; it does
        # not actually check that target is a valid variable type
        target = cast(Variable, target)
        return (target, inverse, source)

[docs]    def deinvert(self, triple: BasicTriple) -> BasicTriple:
        """
        De-invert *triple* if it is inverted.

        Unlike :meth:`invert`, this only inverts a triple if the model
        considers it to be already inverted, otherwise it is left
        alone. Unlike :meth:`canonicalize`, this will not normalize
        multiple inversions or replace the role with a normalized
        form.
        """
        if self.is_role_inverted(triple[1]):
            triple = self.invert(triple)
        return triple

[docs]    def canonicalize_role(self, role: Role) -> Role:
        """
        Canonicalize *role*.

        Role canonicalization will do the following:

        * Ensure the role starts with `':'`

        * Normalize multiple inversions (e.g., ``ARG0-of-of`` becomes
          ``ARG0``), but it does *not* change the direction of the role

        * Replace the resulting role with a normalized form if one is
          defined in the model
        """
        if role != '/' and not role.startswith(':'):
            role = ':' + role
        role = self._canonicalize_inversion(role)
        role = self.normalizations.get(role, role)
        return role

    def _canonicalize_inversion(self, role: Role) -> Role:
        invert = self.invert_role
        if not self._has_role(role):
            while True:
                prev = role
                inverse = invert(role)
                role = invert(inverse)
                if prev == role:
                    break
        return role

[docs]    def canonicalize(self, triple: BasicTriple) -> BasicTriple:
        """
        Canonicalize *triple*.

        See :meth:`canonicalize_role` for a description of how the
        role is canonicalized. Unlike :meth:`invert`, this does not
        swap the source and target of *triple*.
        """
        source, role, target = triple
        canonical = self.canonicalize_role(role)
        return (source, canonical, target)

[docs]    def is_reifiable(self, triple: BasicTriple) -> bool:
        """Return ``True`` if the role of *triple* can be reified."""
        return triple[1] in self.reifications

[docs]    def reify(self,
              triple: BasicTriple,
              variables: Set[Variable] = None) -> _Reification:
        """
        Return the three triples that reify *triple*.

        Note that, unless *variables* is given, the node variable
        for the reified node is not necessarily valid for the target
        graph. When incorporating the reified triples, this variable
        should then be replaced.

        If the role of *triple* does not have a defined reification, a
        :exc:`ModelError` is raised.

        Args:
            triple: the triple to reify
            variables: a set of variables that should not be used for
                the reified node's variable
        Returns:
            The 3-tuple of triples that reify *triple*.
        """
        source, role, target = triple
        if role not in self.reifications:
            raise ModelError("'{}' cannot be reified".format(role))
        concept, source_role, target_role = next(iter(self.reifications[role]))

        var = '_'
        if variables:
            i = 2
            while var in variables:
                var = '_{}'.format(i)
                i += 1

        return ((var, source_role, source),
                (var, CONCEPT_ROLE, concept),
                (var, target_role, target))

[docs]    def original_order(self, branch: Branch):
        """Branch sorting key that does not change the order."""
        return True

[docs]    def canonical_order(self, branch: Branch):
        """Branch sorting key that finds a canonical order."""
        role, _, _ = branch
        m = re.match(r'(.*\D)(\d+)$', role)
        if m:
            rolename = m.group(1)
            roleno = int(m.group(2))
        else:
            rolename, roleno = role, 0
        return (self.is_role_inverted(role), rolename, roleno)

[docs]    def random_order(self, branch: Branch):
        """Branch sorting key that randomizes the order."""
        return random.random()