Source code for ihm.reference

"""Classes for providing extra information about an :class:`ihm.Entity`"""

# Handle different naming of urllib in Python 2/3
try:
    import urllib.request as urlreq
except ImportError:    # pragma: no cover
    import urllib2
    import contextlib

    class CompatRequest(object):
        pass

    # Python 2's urlopen is not a context manager, so wrap it
    @contextlib.contextmanager
    def urlopen(*args, **keys):
        try:
            fh = urllib2.urlopen(*args, **keys)
            yield fh
        finally:
            fh.close()
    # Provide Python-3-like urllib.request.urlopen
    urlreq = CompatRequest()
    urlreq.urlopen = urlopen
import sys


[docs] class Reference(object): """Base class for extra information about an :class:`ihm.Entity`. This class is not used directly; instead, use a subclass such as :class:`Sequence` or :class:`UniProtSequence`. These objects are then typically passed to the :class:`ihm.Entity` constructor.""" pass
[docs] class Sequence(Reference): """Point to the sequence of an :class:`ihm.Entity` in a sequence database; convenience subclasses are provided for common sequence databases such as :class:`UniProtSequence`. These objects are typically passed to the :class:`ihm.Entity` constructor. See also :attr:`alignments` to describe the correspondence between the database and entity sequences. :param str db_name: The name of the database. :param str db_code: The name of the sequence in the database. :param str accession: The database accession. :param str sequence: The complete sequence, as a string of one-letter codes. :param str details: Longer text describing the sequence. """ def __init__(self, db_name, db_code, accession, sequence, details=None): self.db_name, self.db_code = db_name, db_code self.accession = accession self.sequence, self.details = sequence, details #: All alignments between the reference and entity sequences, as #: :class:`Alignment` objects. If none are provided, a simple 1:1 #: alignment is assumed. self.alignments = [] def _get_alignments(self): if self.alignments: return self.alignments elif not hasattr(self, '_default_alignment'): self._default_alignment = Alignment() return [self._default_alignment]
[docs] class UniProtSequence(Sequence): """Point to the sequence of an :class:`ihm.Entity` in UniProt. These objects are typically passed to the :class:`ihm.Entity` constructor. :param str db_code: The UniProt name (e.g. NUP84_YEAST) :param str accession: The UniProt accession (e.g. P52891) See :class:`Sequence` for a description of the remaining parameters. """ _db_name = 'UNP' def __init__(self, db_code, accession, sequence, details=None): super(UniProtSequence, self).__init__( self._db_name, db_code, accession, sequence, details) def __str__(self): return "<ihm.reference.UniProtSequence(%r)>" % self.accession
[docs] @classmethod def from_accession(cls, accession): """Create :class:`UniProtSequence` from just an accession. This is done by querying the UniProt web API, so requires network access. :param str accession: The UniProt accession (e.g. P52891) """ # urlopen returns bytes if sys.version_info[0] >= 3: def decode(t): return t.decode('ascii') else: def decode(t): # pragma: no cover return t url = 'https://www.uniprot.org/uniprot/%s.fasta' % accession with urlreq.urlopen(url) as fh: header = decode(fh.readline()) spl = header.split('|') if len(spl) < 3 or spl[0] not in ('>sp', '>tr'): raise ValueError("Cannot parse UniProt header %s" % header) cd = spl[2].split(None, 1) code = cd[0] details = cd[1].rstrip('\r\n') if len(cd) > 1 else None seq = decode(fh.read()).replace('\n', '') return cls(code, accession, seq, details)
[docs] class Alignment(object): """A sequence range that aligns between the database and the entity. This describes part of the sequence in the sequence database (:class:`Sequence`) and in the :class:`ihm.Entity`. The two ranges must be the same length and have the same primary sequence (any differences must be described with :class:`SeqDif` objects). :param int db_begin: The first residue in the database sequence that is used (defaults to the entire sequence). :param int db_end: The last residue in the database sequence that is used (or None, the default, to use the entire sequence). :param int entity_begin: The first residue in the :class:`~ihm.Entity` sequence that is taken from the reference (defaults to the entire entity sequence). :param int entity_end: The last residue in the :class:`~ihm.Entity` sequence that is taken from the reference (or None, the default, to use the entire sequence). :param seq_dif: Single-point mutations made to the sequence. :type seq_dif: Sequence of :class:`SeqDif` objects. """ def __init__(self, db_begin=1, db_end=None, entity_begin=1, entity_end=None, seq_dif=[]): self.db_begin, self.db_end = db_begin, db_end self.entity_begin, self.entity_end = entity_begin, entity_end self.seq_dif = [] self.seq_dif.extend(seq_dif)
[docs] class SeqDif(object): """Annotate a sequence difference between a reference and entity sequence. See :class:`Alignment`. :param int seq_id: The residue index in the entity sequence. :param db_monomer: The monomer type (as a :class:`~ihm.ChemComp` object) in the reference sequence. :type db_monomer: :class:`ihm.ChemComp` :param monomer: The monomer type (as a :class:`~ihm.ChemComp` object) in the entity sequence. :type monomer: :class:`ihm.ChemComp` :param str details: Descriptive text for the sequence difference. """ def __init__(self, seq_id, db_monomer, monomer, details=None): self.seq_id, self.db_monomer = seq_id, db_monomer self.monomer, self.details = monomer, details