Source code for ebu_tt_live.bindings.validation.base

"""
This file contains all the pyxb helpers needed for enabling a concise semantic validation approach.
"""
import copy
import logging
import re
import six

from pyxb.namespace import ExpandedName

from ebu_tt_live.errors import SemanticValidationError
from ebu_tt_live.strings import DOC_SYNTACTIC_VALIDATION_SUCCESSFUL, ERR_SEMANTIC_ID_UNIQUENESS
from pyxb.binding.basis import NonElementContent, ElementContent

log = logging.getLogger(__name__)
document_logger = logging.getLogger('document_logger')


[docs]class SemanticValidationMixin(object): """ This mixin contains the necessary boilerplate to enable semantic validation as well as enabling _setAttribute hooks to help populate the context object with useful data. """ # This dictionary exists to override attribute setters. Used in contextual parsing _attr_en_pre = {} _attr_en_post = {}
[docs] def _setAttribute(self, attr_en, value_lex): uri_tuple = attr_en.uriTuple() if uri_tuple in self._attr_en_pre: self._attr_en_pre[uri_tuple](self, attr_en, value_lex) au = super(SemanticValidationMixin, self)._setAttribute(attr_en, value_lex) if uri_tuple in self._attr_en_post: self._attr_en_post[uri_tuple](self, au) return au
[docs] def _semantic_before_traversal(self, dataset, element_content=None, parent_binding=None): """ Semantic validation preprocess hook. :param dataset: semantic context object :param element_content: the element itself """ pass
[docs] def _semantic_after_traversal(self, dataset, element_content=None, parent_binding=None): """ Semantic validation postprocess hook. :param dataset: semantic context object :param element_content: the element itself """ pass
[docs] def _semantic_before_copy(self, dataset, element_content=None): """ Meant for checks before attempting to copy an element :param dataset: :param element_content: :return: """ pass
[docs] def _semantic_before_subtree_copy(self, copied_instance, dataset, element_content=None): """ This is helpful hook function at the copying operation :param dataset: :param element_content: :return: """ pass
[docs] def _semantic_after_subtree_copy(self, copied_instance, dataset, element_content=None): """ This is helpful hook function at the copying operation :param dataset: :param element_content: :return: """ pass
[docs] def _semantic_attributes_missing(self, attr_names): """ Making sure that attributes specified in attr_names have no value defined on the binding. :param attr_names: The attributes that were defined on the element. :return: """ result = [attr for attr in attr_names if getattr(self, attr) is None] return result
[docs] def _semantic_attributes_present(self, attr_names): """ Making sure that attributes specified in attr_names have a value defined on the binding :param attr_names: The missing attributes that were not defined. :return: """ result = [attr for attr in attr_names if getattr(self, attr) is not None] return result
[docs] def _semantic_copy(self, dataset): """ This copy function is more powerful as it accepts an extra copying context where a smarter copy can be made. It can be customised by classes. The default is the shallow copy. :param dataset: :return: cloned element """ return copy.copy(self)
[docs] def merge(self, other_elem, dataset): """ Try and merge the contents of 2 elements of the same type. :param other_elem: :return: """ raise NotImplementedError()
[docs] def _find_deconflicted_elem_by_id(self, elem_id, dataset): old_elem = dataset['tt_element'].get_element_by_id(elem_id) new_elem = dataset['instance_mapping'][old_elem] return new_elem
[docs] def _semantic_deconflicted_ids(self, attr_name, dataset): """ Looks up its referenced styles/region in the conversion mapping and returns the new idref string :param datset: :return: """ old_elem_ids = getattr(self, attr_name) if old_elem_ids is None: return None if isinstance(old_elem_ids, six.text_type): new_elem = self._find_deconflicted_elem_by_id(elem_id=old_elem_ids, dataset=dataset) return new_elem.id else: new_elem_ids = [] for elem_id in old_elem_ids: new_elem = self._find_deconflicted_elem_by_id(elem_id=elem_id, dataset=dataset) new_elem_ids.append(new_elem.id) return new_elem_ids
[docs] def get_attribute_value(self, att_name): """ This function is a handy extension that allows us to easily look up attribute values regardless whether they are local or namespaced attribute names. I did not find its equivalent in PyXB. :param att_name: :return: """ attr_en = ExpandedName(*att_name.split(':')) # NOTE: At this point we should go to attribute map locate the attribute but for that the namespace has to be # located too because its fully qualified name is required... etc. cutting corners here as we don't mix local # and namespaced attributes of the same name so fairly safe to just take the localname bit. But this # is not a fully XML compliant way to support all possibilities in all cases. return getattr(self, attr_en.localName())
[docs]class SemanticDocumentMixin(SemanticValidationMixin): _validator_class = None
[docs] def _semantic_before_validation(self): """ Before PyXB starts its syntactic validation this hook runs where the user may execute custom code. """ pass
[docs] def validateBinding (self, **extra_kwargs): """Check whether the binding content matches its content model. @return: C{True} if validation was not performed due to settings or complex result dictionary with success and semantic_dataset keys. @raise pyxb.BatchContentValidationError: complex content does not match model # Wondering about this... @raise pyxb.SimpleTypeValueError: attribute or simple content fails to satisfy constraints """ if self._performValidation(): # Step1: Before self._semantic_before_validation() # Step2: DFS of syntactic validation self._validateBinding_vx() # Step3: DFS of semantic validation validator = self._validator_class(root_element=self) result = validator.proceed(**extra_kwargs) return { "success": True, "semantic_dataset": result } return True
[docs]class IDMixin(object): """ Making sure the IDs are collected and maintained appropriately """ _re_ebu_id_deconflict = re.compile('SEQ([0-9]+)\.(.*)') _tp_ebu_id_deconflict = 'SEQ{sequence_number}.{original_id}'
[docs] def deconflict_id(self, seq_num): if self.id is not None: self.id = self._tp_ebu_id_deconflict.format( sequence_number=seq_num, original_id=self.id )
[docs] def _semantic_register_id(self, dataset): ebid = dataset['elements_by_id'] if self.id is not None: if self.id in ebid: raise SemanticValidationError( ERR_SEMANTIC_ID_UNIQUENESS.format( id=self.id ) ) ebid[self.id] = self