"""
Tree and graph transformations.
"""
import logging
from typing import Dict, List, Optional, Set, Tuple
from penman.epigraph import Epidata, Epidatum
from penman.exceptions import ModelError
from penman.graph import CONCEPT_ROLE, Graph
from penman.layout import (
POP,
Pop,
Push,
appears_inverted,
get_pushed_variable,
)
from penman.model import Model
from penman.surface import Alignment, RoleAlignment, alignments
from penman.tree import Tree, is_atomic
from penman.types import BasicTriple, Node, Target, Variable
logger = logging.getLogger(__name__)
[docs]
def canonicalize_roles(t: Tree, model: Model) -> Tree:
"""
Normalize roles in *t* so they are canonical according to *model*.
This is a tree transformation instead of a graph transformation
because the orientation of the pure graph's triples is not decided
until the graph is configured into a tree.
Args:
t: a :class:`~penman.tree.Tree` object
model: a model defining role normalizations
Returns:
A new :class:`~penman.tree.Tree` object with canonicalized
roles.
Example:
>>> from penman.codec import PENMANCodec
>>> from penman.models.amr import model
>>> from penman.transform import canonicalize_roles
>>> codec = PENMANCodec()
>>> t = codec.parse('(c / chapter :domain-of 7)')
>>> t = canonicalize_roles(t, model)
>>> print(codec.format(t))
(c / chapter
:mod 7)
"""
if model is None:
model = Model()
tree = Tree(_canonicalize_node(t.node, model), metadata=t.metadata)
logger.info('Canonicalized roles: %s', tree)
return tree
def _canonicalize_node(node: Node, model: Model) -> Node:
var, edges = node
canonical_edges = []
for edge in edges:
role, tgt = edge
# alignments aren't parsed off yet, so handle them superficially
role, tilde, alignment = role.partition('~')
if not is_atomic(tgt):
tgt = _canonicalize_node(tgt, model)
canonical_role = model.canonicalize_role(role) + tilde + alignment
canonical_edges.append((canonical_role, tgt))
return (var, canonical_edges)
[docs]
def reify_edges(g: Graph, model: Model) -> Graph:
"""
Reify all edges in *g* that have reifications in *model*.
Args:
g: a :class:`~penman.graph.Graph` object
model: a model defining reifications
Returns:
A new :class:`~penman.graph.Graph` object with reified edges.
Example:
>>> from penman.codec import PENMANCodec
>>> from penman.models.amr import model
>>> from penman.transform import reify_edges
>>> codec = PENMANCodec(model=model)
>>> g = codec.decode('(c / chapter :mod 7)')
>>> g = reify_edges(g, model)
>>> print(codec.encode(g))
(c / chapter
:ARG1-of (_ / have-mod-91
:ARG2 7))
"""
vars = g.variables()
if model is None:
model = Model()
new_epidata = dict(g.epidata)
new_triples: List[BasicTriple] = []
for triple in g.triples:
if model.is_role_reifiable(triple[1]):
in_triple, node_triple, out_triple = model.reify(triple, vars)
if appears_inverted(g, triple):
in_triple, out_triple = out_triple, in_triple
new_triples.extend((in_triple, node_triple, out_triple))
var = node_triple[0]
vars.add(var)
# manage epigraphical markers
new_epidata[in_triple] = [Push(var)]
old_epis = new_epidata.pop(triple) if triple in new_epidata else []
node_epis, out_epis = _edge_markers(old_epis)
new_epidata[node_triple] = node_epis
new_epidata[out_triple] = out_epis
# we don't know where to put the final POP without configuring
# the tree; maybe this should be a tree operation?
else:
new_triples.append(triple)
g = Graph(new_triples, epidata=new_epidata, metadata=g.metadata)
logger.info('Reified edges: %s', g)
return g
[docs]
def dereify_edges(g: Graph, model: Model) -> Graph:
"""
Dereify edges in *g* that have reifications in *model*.
Args:
g: a :class:`~penman.graph.Graph` object
Returns:
A new :class:`~penman.graph.Graph` object with dereified
edges.
Example:
>>> from penman.codec import PENMANCodec
>>> from penman.models.amr import model
>>> from penman.transform import dereify_edges
>>> codec = PENMANCodec(model=model)
>>> g = codec.decode(
... '(c / chapter'
... ' :ARG1-of (_ / have-mod-91'
... ' :ARG2 7))')
>>> g = dereify_edges(g, model)
>>> print(codec.encode(g))
(c / chapter
:mod 7)
"""
if model is None:
model = Model()
agenda = _dereify_agenda(g, model)
new_epidata = dict(g.epidata)
new_triples: List[BasicTriple] = []
for triple in g.triples:
var = triple[0]
if var in agenda:
first, dereified, epidata = agenda[var]
# only insert at the first triple so the dereification
# appears in the correct location
if triple == first:
new_triples.append(dereified)
new_epidata[dereified] = epidata
if triple in new_epidata:
del new_epidata[triple]
else:
new_triples.append(triple)
g = Graph(new_triples, epidata=new_epidata, metadata=g.metadata)
logger.info('Dereified edges: %s', g)
return g
[docs]
def reify_attributes(g: Graph) -> Graph:
"""
Reify all attributes in *g*.
Args:
g: a :class:`~penman.graph.Graph` object
Returns:
A new :class:`~penman.graph.Graph` object with reified
attributes.
Example:
>>> from penman.codec import PENMANCodec
>>> from penman.models.amr import model
>>> from penman.transform import reify_attributes
>>> codec = PENMANCodec(model=model)
>>> g = codec.decode('(c / chapter :mod 7)')
>>> g = reify_attributes(g)
>>> print(codec.encode(g))
(c / chapter
:mod (_ / 7))
"""
variables = g.variables()
new_epidata = dict(g.epidata)
new_triples: List[BasicTriple] = []
i = 2
for triple in g.triples:
source, role, target = triple
if role != CONCEPT_ROLE and target not in variables:
# get unique var for new node
var = '_'
while var in variables:
var = f'_{i}'
i += 1
variables.add(var)
role_triple = (source, role, var)
node_triple = (var, CONCEPT_ROLE, target)
new_triples.extend((role_triple, node_triple))
# manage epigraphical markers
old_epis = new_epidata.pop(triple) if triple in new_epidata else []
role_epis, node_epis = _attr_markers(old_epis)
new_epidata[role_triple] = role_epis + [Push(var)]
new_epidata[node_triple] = node_epis + [POP]
else:
new_triples.append(triple)
g = Graph(new_triples, epidata=new_epidata, metadata=g.metadata)
logger.info('Reified attributes: %s', g)
return g
[docs]
def indicate_branches(g: Graph, model: Model) -> Graph:
"""
Insert TOP triples in *g* indicating the tree structure.
Note:
This depends on *g* containing the epigraphical layout markers
from parsing; it will not work with programmatically
constructed Graph objects or those whose epigraphical data
were removed.
Args:
g: a :class:`~penman.graph.Graph` object
model: a model defining the TOP role
Returns:
A new :class:`~penman.graph.Graph` object with TOP roles
indicating tree branches.
Example:
>>> from penman.codec import PENMANCodec
>>> from penman.models.amr import model
>>> from penman.transform import indicate_branches
>>> codec = PENMANCodec(model=model)
>>> g = codec.decode('''
... (w / want-01
... :ARG0 (b / boy)
... :ARG1 (g / go-02
... :ARG0 b))''')
>>> g = indicate_branches(g, model)
>>> print(codec.encode(g))
(w / want-01
:TOP b
:ARG0 (b / boy)
:TOP g
:ARG1 (g / go-02
:ARG0 b))
"""
new_triples: List[BasicTriple] = []
for t in g.triples:
push = next(
(epi for epi in g.epidata.get(t, []) if isinstance(epi, Push)),
None,
)
if push is not None:
if push.variable == t[2]:
new_triples.append((t[0], model.top_role, t[2]))
elif push.variable == t[0]:
assert isinstance(t[2], str)
new_triples.append((t[2], model.top_role, t[0]))
new_triples.append(t)
g = Graph(new_triples, epidata=g.epidata, metadata=g.metadata)
logger.info('Indicated branches: %s', g)
return g
_SplitMarkers = Tuple[Optional[Push], List[Pop], Epidata, Epidata]
def _reified_markers(epidata: Epidata) -> _SplitMarkers:
"""
Return epigraphical markers broken down by function.
When a relation is reified the original triple disappears so its
epigraphical data needs to be moved and sometimes altered.
Consider the following, which has surface alignment markers::
(a :role~1 b~2)
Under edge reification, the desired outcome is::
(a :ARG1-of (_ / role-label~1 :ARG2 b~2))
Under attribute reification, it is::
(a :role~1 (_ / b~2))
"""
push = None
pops = []
role_epis = []
other_epis = []
for epi in epidata:
if isinstance(epi, Push):
push = epi
elif isinstance(epi, Pop):
pops.append(epi)
elif epi.mode == 1:
role_epis.append(epi)
else:
other_epis.append(epi)
return push, pops, role_epis, other_epis
def _edge_markers(epidata: Epidata) -> Tuple[Epidata, Epidata]:
push, pops, role_epis, other_epis = _reified_markers(epidata)
# role markers on the original triple need to be converted to
# target markers, if possible
node_epis: List[Epidatum] = []
for epi in role_epis:
if isinstance(epi, RoleAlignment):
node_epis.append(Alignment(epi.indices, prefix=epi.prefix))
else:
pass # discard things we can't convert
# other markers on the original triple get grouped for the
# new outgoing triple
out_epis = other_epis
if push:
out_epis.append(push)
out_epis.extend(pops)
return node_epis, out_epis
_Dereification = Dict[
Variable,
Tuple[
BasicTriple, # inverted triple of reification
BasicTriple, # dereified triple
List[Epidatum],
],
] # computed epidata
def _dereify_agenda(g: Graph, model: Model) -> _Dereification:
alns = alignments(g)
agenda: _Dereification = {}
fixed: Set[Target] = set([g.top])
inst: Dict[Variable, BasicTriple] = {}
other: Dict[Variable, List[BasicTriple]] = {}
for triple in g.triples:
var, role, tgt = triple
if role == CONCEPT_ROLE:
inst[var] = triple
else:
fixed.add(tgt)
if var not in other:
other[var] = [triple]
else:
other[var].append(triple)
for var, instance in inst.items():
if (
var not in fixed
and len(other.get(var, [])) == 2
and model.is_concept_dereifiable(instance[2])
):
# passed initial checks
# now figure out which other edge is the first one
first, second = other[var]
if get_pushed_variable(g, second) == var:
first, second = second, first
try:
dereified = model.dereify(instance, first, second)
except ModelError:
pass
else:
# migrate epidata
epidata: List[Epidatum] = []
if instance in alns:
aln = alns[instance]
epidata.append(
RoleAlignment(aln.indices, prefix=aln.prefix)
)
epidata.extend(
epi
for epi in g.epidata[second]
if not isinstance(epi, RoleAlignment)
)
agenda[var] = (first, dereified, epidata)
return agenda
def _attr_markers(epidata: Epidata) -> Tuple[Epidata, Epidata]:
_, pops, role_epis, other_epis = _reified_markers(epidata)
node_epis = other_epis
node_epis.extend(pops)
return role_epis, node_epis