Source code for penman.model

# -*- coding: utf-8 -*-

"""
Semantic models for interpreting graphs.
"""

from typing import (cast, Tuple, List, Dict, Set, Iterable, Mapping, Any)
import re
from collections import defaultdict
import random

from penman.exceptions import ModelError
from penman.types import (
    Variable,
    Role,
    Constant,
    BasicTriple
)
from penman.tree import Branch
from penman.graph import CONCEPT_ROLE


_ReificationSpec = Tuple[Role, Constant, Role, Role]
_Reified = Tuple[Constant, Role, Role]
_Reification = Tuple[BasicTriple, BasicTriple, BasicTriple]


[docs]class Model(object): """ A semantic model for Penman graphs. The model defines things like valid roles and transformations. Args: top_variable: the variable of the graph's top top_role: the role linking the graph's top to the top node concept_role: the role associated with node concepts roles: a mapping of roles to associated data normalizations: a mapping of roles to normalized roles reifications: a list of 4-tuples used to define reifications """ def __init__(self, top_variable: Variable = 'top', top_role: Role = ':TOP', concept_role: Role = CONCEPT_ROLE, roles: Mapping[Role, Any] = None, normalizations: Mapping[Role, Role] = None, reifications: Iterable[_ReificationSpec] = None): self.top_variable = top_variable self.top_role = top_role self.concept_role = concept_role if roles: roles = dict(roles) self.roles = roles or {} self._role_re = re.compile( '^({})$'.format( '|'.join(list(self.roles) + [top_role, concept_role]))) if normalizations: normalizations = dict(normalizations) self.normalizations = normalizations or {} reifs: Dict[Role, List[_Reified]] = defaultdict(list) if reifications: for role, concept, source, target in reifications: reifs[role].append((concept, source, target)) self.reifications = dict(reifs) def __eq__(self, other): if not isinstance(other, Model): return NotImplemented return (self.top_variable == other.top_variable and self.top_role == other.top_role and self.concept_role == other.concept_role and self.roles == other.roles and self.normalizations == other.normalizations and self.reifications == other.reifications)
[docs] @classmethod def from_dict(cls, d): """Instantiate a model from a dictionary.""" return cls(**d)
[docs] def has_role(self, role: Role) -> bool: """ Return ``True`` if *role* is defined by the model. If *role* is not in the model but a single deinversion of *role* is in the model, then ``True`` is returned. Otherwise ``False`` is returned, even if something like :meth:`canonicalize_role` could return a valid role. """ return (self._has_role(role) or (role.endswith('-of') and self._has_role(role[:-3])))
def _has_role(self, role: Role) -> bool: return self._role_re.match(role) is not None
[docs] def is_role_inverted(self, role: Role) -> bool: """Return ``True`` if *role* is inverted.""" return not self._has_role(role) and role.endswith('-of')
[docs] def invert_role(self, role: Role) -> Role: """Invert *role*.""" if not self._has_role(role) and role.endswith('-of'): inverse = role[:-3] else: inverse = role + '-of' return inverse
[docs] def invert(self, triple: BasicTriple) -> BasicTriple: """ Invert *triple*. This will invert or deinvert a triple regardless of its current state. :meth:`deinvert` will deinvert a triple only if it is already inverted. Unlike :meth:`canonicalize`, this will not perform multiple inversions or replace the role with a normalized form. """ source, role, target = triple inverse = self.invert_role(role) # casting is just for the benefit of the type checker; it does # not actually check that target is a valid variable type target = cast(Variable, target) return (target, inverse, source)
[docs] def deinvert(self, triple: BasicTriple) -> BasicTriple: """ De-invert *triple* if it is inverted. Unlike :meth:`invert`, this only inverts a triple if the model considers it to be already inverted, otherwise it is left alone. Unlike :meth:`canonicalize`, this will not normalize multiple inversions or replace the role with a normalized form. """ if self.is_role_inverted(triple[1]): triple = self.invert(triple) return triple
[docs] def canonicalize_role(self, role: Role) -> Role: """ Canonicalize *role*. Role canonicalization will do the following: * Ensure the role starts with `':'` * Normalize multiple inversions (e.g., ``ARG0-of-of`` becomes ``ARG0``), but it does *not* change the direction of the role * Replace the resulting role with a normalized form if one is defined in the model """ if role != '/' and not role.startswith(':'): role = ':' + role role = self._canonicalize_inversion(role) role = self.normalizations.get(role, role) return role
def _canonicalize_inversion(self, role: Role) -> Role: invert = self.invert_role if not self._has_role(role): while True: prev = role inverse = invert(role) role = invert(inverse) if prev == role: break return role
[docs] def canonicalize(self, triple: BasicTriple) -> BasicTriple: """ Canonicalize *triple*. See :meth:`canonicalize_role` for a description of how the role is canonicalized. Unlike :meth:`invert`, this does not swap the source and target of *triple*. """ source, role, target = triple canonical = self.canonicalize_role(role) return (source, canonical, target)
[docs] def is_reifiable(self, triple: BasicTriple) -> bool: """Return ``True`` if the role of *triple* can be reified.""" return triple[1] in self.reifications
[docs] def reify(self, triple: BasicTriple, variables: Set[Variable] = None) -> _Reification: """ Return the three triples that reify *triple*. Note that, unless *variables* is given, the node variable for the reified node is not necessarily valid for the target graph. When incorporating the reified triples, this variable should then be replaced. If the role of *triple* does not have a defined reification, a :exc:`ModelError` is raised. Args: triple: the triple to reify variables: a set of variables that should not be used for the reified node's variable Returns: The 3-tuple of triples that reify *triple*. """ source, role, target = triple if role not in self.reifications: raise ModelError("'{}' cannot be reified".format(role)) concept, source_role, target_role = next(iter(self.reifications[role])) var = '_' if variables: i = 2 while var in variables: var = '_{}'.format(i) i += 1 return ((var, source_role, source), (var, CONCEPT_ROLE, concept), (var, target_role, target))
[docs] def original_order(self, branch: Branch): """Branch sorting key that does not change the order.""" return True
[docs] def canonical_order(self, branch: Branch): """Branch sorting key that finds a canonical order.""" role, _, _ = branch m = re.match(r'(.*\D)(\d+)$', role) if m: rolename = m.group(1) roleno = int(m.group(2)) else: rolename, roleno = role, 0 return (self.is_role_inverted(role), rolename, roleno)
[docs] def random_order(self, branch: Branch): """Branch sorting key that randomizes the order.""" return random.random()