Source code for ebu_tt_live.bindings.validation.base

"""
This file contains all the pyxb helpers needed for enabling a concise semantic validation approach.
"""
import copy
import logging
import re
import six

from pyxb.namespace import ExpandedName

from ebu_tt_live.errors import SemanticValidationError
from ebu_tt_live.strings import DOC_SYNTACTIC_VALIDATION_SUCCESSFUL, ERR_SEMANTIC_ID_UNIQUENESS
from pyxb.binding.basis import NonElementContent, ElementContent

log = logging.getLogger(__name__)
document_logger = logging.getLogger('document_logger')


[docs]class SemanticValidationMixin(object):
    """
    This mixin contains the necessary boilerplate to enable semantic validation as well as enabling _setAttribute hooks
    to help populate the context object with useful data.
    """

    # This dictionary exists to override attribute setters. Used in contextual parsing
    _attr_en_pre = {}
    _attr_en_post = {}

[docs]    def _setAttribute(self, attr_en, value_lex):
        uri_tuple = attr_en.uriTuple()
        if uri_tuple in self._attr_en_pre:
            self._attr_en_pre[uri_tuple](self, attr_en, value_lex)
        au = super(SemanticValidationMixin, self)._setAttribute(attr_en, value_lex)
        if uri_tuple in self._attr_en_post:
            self._attr_en_post[uri_tuple](self, au)
        return au

[docs]    def _semantic_before_traversal(self, dataset, element_content=None, parent_binding=None):
        """
        Semantic validation preprocess hook.
        :param dataset: semantic context object
        :param element_content: the element itself
        """
        pass

[docs]    def _semantic_after_traversal(self, dataset, element_content=None, parent_binding=None):
        """
        Semantic validation postprocess hook.
        :param dataset: semantic context object
        :param element_content: the element itself
        """
        pass

[docs]    def _do_link_copy_with_copied_parent(self, dataset, element_content, parent_binding):
        celem = dataset['instance_mapping'][self]
        # Link with parent
        cparent = dataset['instance_mapping'][parent_binding]

        if element_content.elementDeclaration.isPlural():
            cparent.append(celem)
        else:
            setattr(cparent, element_content.elementDeclaration.name().localName(), celem)

[docs]    def _semantic_before_copy(self, dataset, element_content=None):
        """
        Meant for checks before attempting to copy an element
        :param dataset:
        :param element_content:
        :return:
        """
        pass

[docs]    def _semantic_before_subtree_copy(self, copied_instance, dataset, element_content=None):
        """
        This is helpful hook function at the copying operation
        :param dataset:
        :param element_content:
        :return:
        """
        pass

[docs]    def _semantic_after_subtree_copy(self, copied_instance, dataset, element_content=None):
        """
        This is helpful hook function at the copying operation
        :param dataset:
        :param element_content:
        :return:
        """
        pass

[docs]    def _semantic_attributes_missing(self, attr_names):
        """
        Making sure that attributes specified in attr_names have no value defined on the binding.
        :param attr_names: The attributes that were defined on the element.
        :return:
        """
        result = [attr for attr in attr_names if getattr(self, attr) is None]
        return result

[docs]    def _semantic_attributes_present(self, attr_names):
        """
        Making sure that attributes specified in attr_names have a value defined on the binding
        :param attr_names: The missing attributes that were not defined.
        :return:
        """
        result = [attr for attr in attr_names if getattr(self, attr) is not None]
        return result

[docs]    def _semantic_copy(self, dataset):
        """
        This copy function is more powerful as it accepts an extra copying context where a smarter copy can be made.
        It can be customised by classes. The default is the shallow copy.
        :param dataset:
        :return: cloned element
        """
        return copy.copy(self)

[docs]    def merge(self, other_elem, dataset):
        """
        Try and merge the contents of 2 elements of the same type.
        :param other_elem:
        :return:
        """
        raise NotImplementedError()

[docs]    def _find_deconflicted_elem_by_id(self, elem_id, dataset):
        old_elem = dataset['tt_element'].get_element_by_id(elem_id)
        new_elem = dataset['instance_mapping'][old_elem]
        return new_elem

[docs]    def _semantic_deconflicted_ids(self, attr_name, dataset):
        """
        Looks up its referenced styles/region in the conversion mapping and returns the new idref string
        :param datset:
        :return:
        """
        old_elem_ids = getattr(self, attr_name)
        if old_elem_ids is None:
            return None

        if isinstance(old_elem_ids, six.text_type):
            new_elem = self._find_deconflicted_elem_by_id(elem_id=old_elem_ids, dataset=dataset)
            return new_elem.id
        else:
            new_elem_ids = []
            for elem_id in old_elem_ids:
                new_elem = self._find_deconflicted_elem_by_id(elem_id=elem_id, dataset=dataset)
                new_elem_ids.append(new_elem.id)
            return new_elem_ids

[docs]    def get_attribute_value(self, att_name):
        """
        This function is a handy extension that allows us to easily look up attribute values regardless whether they
        are local or namespaced attribute names. I did not find its equivalent in PyXB.
        :param att_name:
        :return:
        """
        attr_en = ExpandedName(*att_name.split(':'))
        # NOTE: At this point we should go to attribute map locate the attribute but for that the namespace has to be
        # located too because its fully qualified name is required... etc. cutting corners here as we don't mix local
        # and namespaced attributes of the same name so fairly safe to just take the localname bit. But this
        # is not a fully XML compliant way to support all possibilities in all cases.
        return getattr(self, attr_en.localName())


[docs]class SemanticDocumentMixin(SemanticValidationMixin):

    _validator_class = None

[docs]    def _semantic_before_validation(self):
        """
        Before PyXB starts its syntactic validation this hook runs where the user may execute custom code.
        """
        pass

[docs]    def validateBinding (self, **extra_kwargs):
        """Check whether the binding content matches its content model.

        @return: C{True} if validation was not performed due to settings or complex result dictionary with success and semantic_dataset keys.
        @raise pyxb.BatchContentValidationError: complex content does not match model # Wondering about this...
        @raise pyxb.SimpleTypeValueError: attribute or simple content fails to satisfy constraints
        """
        if self._performValidation():
            # Step1: Before
            self._semantic_before_validation()
            # Step2: DFS of syntactic validation
            self._validateBinding_vx()
            # Step3: DFS of semantic validation
            validator = self._validator_class(root_element=self)
            result = validator.proceed(**extra_kwargs)
            return {
                "success": True,
                "semantic_dataset": result
            }
        return True


[docs]class IDMixin(object):
    """
    Making sure the IDs are collected and maintained appropriately
    """

    _re_ebu_id_deconflict = re.compile('SEQ([0-9]+)\.(.*)')
    _tp_ebu_id_deconflict = 'SEQ{sequence_number}.{original_id}'

[docs]    def deconflict_id(self, seq_num):
        if self.id is not None:
            self.id = self._tp_ebu_id_deconflict.format(
                sequence_number=seq_num,
                original_id=self.id
            )

[docs]    def _semantic_register_id(self, dataset):
        ebid = dataset['elements_by_id']
        if self.id is not None:
            if self.id in ebid:
                raise SemanticValidationError(
                    ERR_SEMANTIC_ID_UNIQUENESS.format(
                        id=self.id
                    )
                )
            ebid[self.id] = self