Source code for penman.model

# -*- coding: utf-8 -*-

"""
Semantic models for interpreting graphs.
"""

from typing import (cast, Tuple, List, Dict, Set, Iterable, Mapping, Any)
import re
from collections import defaultdict

from penman.exceptions import ModelError
from penman.types import (
    Variable,
    Role,
    Constant,
    BasicTriple
)
from penman.graph import CONCEPT_ROLE


_ReificationSpec = Tuple[Role, Constant, Role, Role]
_Reified = Tuple[Constant, Role, Role]
_Reification = Tuple[BasicTriple, BasicTriple, BasicTriple]


[docs]class Model(object): """ A semantic model for Penman graphs. The model defines things like valid roles and transformations. Args: top_variable: the variable of the graph's top top_role: the role linking the graph's top to the top node concept_role: the role associated with node concepts roles: a mapping of roles to associated data normalizations: a mapping of roles to normalized roles reifications: a list of 4-tuples used to define reifications """ def __init__(self, top_variable: Variable = 'top', top_role: Role = ':TOP', concept_role: Role = CONCEPT_ROLE, roles: Mapping[Role, Any] = None, normalizations: Mapping[Role, Role] = None, reifications: Iterable[_ReificationSpec] = None): self.top_variable = top_variable self.top_role = top_role self.concept_role = concept_role if roles: roles = dict(roles) self.roles = roles or {} self._role_re = re.compile( '^({})$'.format( '|'.join(list(self.roles) + [top_role, concept_role]))) if normalizations: normalizations = dict(normalizations) self.normalizations = normalizations or {} reifs: Dict[Role, List[_Reified]] = defaultdict(list) if reifications: for role, concept, source, target in reifications: reifs[role].append((concept, source, target)) self.reifications = dict(reifs) def __eq__(self, other): if not isinstance(other, Model): return NotImplemented return (self.top_variable == other.top_variable and self.top_role == other.top_role and self.concept_role == other.concept_role and self.roles == other.roles and self.normalizations == other.normalizations and self.reifications == other.reifications)
[docs] @classmethod def from_dict(cls, d): """Instantiate a model from a dictionary.""" return cls(**d)
[docs] def has_role(self, role: Role) -> bool: """ Return `True` if *role* is defined by the model. If *role* is not in the model but a single deinversion of *role* is in the model, then `True` is returned. Otherwise `False` is returned, even if something like :meth:`canonicalize_role` could return a valid role. """ return (self._has_role(role) or (role.endswith('-of') and self._has_role(role[:-3])))
def _has_role(self, role: Role) -> bool: return self._role_re.match(role) is not None
[docs] def is_role_inverted(self, role: Role) -> bool: """Return `True` if *role* is inverted.""" return not self._has_role(role) and role.endswith('-of')
[docs] def invert_role(self, role: Role) -> Role: """Invert *role*.""" if not self._has_role(role) and role.endswith('-of'): inverse = role[:-3] else: inverse = role + '-of' return inverse
[docs] def invert(self, triple: BasicTriple) -> BasicTriple: """ Invert *triple*. This will invert or deinvert a triple regardless of its current state. :meth:`deinvert` will deinvert a triple only if it is already inverted. Unlike :meth:`canonicalize`, this will not perform multiple inversions or replace the role with a normalized form. """ source, role, target = triple inverse = self.invert_role(role) # casting is just for the benefit of the type checker; it does # not actually check that target is a valid variable type target = cast(Variable, target) return (target, inverse, source)
[docs] def deinvert(self, triple: BasicTriple) -> BasicTriple: """ De-invert *triple* if it is inverted. Unlike :meth:`invert`, this only inverts a triple if the model considers it to be already inverted, otherwise it is left alone. Unlike :meth:`canonicalize`, this will not normalize multiple inversions or replace the role with a normalized form. """ if self.is_role_inverted(triple[1]): triple = self.invert(triple) return triple
[docs] def canonicalize_role(self, role: Role) -> Role: """ Canonicalize *role*. Role canonicalization will do the following: * Ensure the role starts with `':'` * Normalize multiple inversions (e.g., `ARG0-of-of` becomes `ARG0`), but it does *not* change the direction of the role * Replace the resulting role with a normalized form if one is defined in the model """ if role != '/' and not role.startswith(':'): role = ':' + role role = self._canonicalize_inversion(role) role = self.normalizations.get(role, role) return role
def _canonicalize_inversion(self, role: Role) -> Role: invert = self.invert_role if not self._has_role(role): while True: prev = role inverse = invert(role) role = invert(inverse) if prev == role: break return role
[docs] def canonicalize(self, triple: BasicTriple) -> BasicTriple: """ Canonicalize *triple*. See :meth:`canonicalize_role` for a description of how the role is canonicalized. Unlike :meth:`invert`, this does not swap the source and target of *triple*. """ source, role, target = triple canonical = self.canonicalize_role(role) return (source, canonical, target)
[docs] def is_reifiable(self, triple: BasicTriple) -> bool: """Return `True` if the role of *triple* can be reified.""" return triple[1] in self.reifications
[docs] def reify(self, triple: BasicTriple, variables: Set[Variable] = None) -> _Reification: """ Return the three triples that reify *triple*. Note that, unless *variables* is given, the node variable for the reified node is not necessarily valid for the target graph. When incorporating the reified triples, this variable should then be replaced. If the role of *triple* does not have a defined reification, a :exc:`ModelError` is raised. Args: triple: the triple to reify variables: a set of variables that should not be used for the reified node's variable Returns: The 3-tuple of triples that reify *triple*. """ source, role, target = triple if role not in self.reifications: raise ModelError("'{}' cannot be reified".format(role)) concept, source_role, target_role = next(iter(self.reifications[role])) var = '_' if variables: i = 2 while var in variables: var = '_{}'.format(i) i += 1 return ((var, source_role, source), (var, CONCEPT_ROLE, concept), (var, target_role, target))