Source code for ihm.location

"""Classes for tracking external data used by mmCIF models.
"""

import os


[docs] class Location(object): """Identifies the location where a resource can be found. Do not use this class itself, but one of its subclasses. Typically the resource may be found in a file (either on the local disk or at a DOI) - for this use one of the subclasses of :class:`FileLocation`. Alternatively the resource may be found in an experiment-specific database such as PDB or EMDB - for this use :class:`DatabaseLocation` or one of its subclasses. A Location may be passed to - a :class:`~ihm.dataset.Dataset` to point to where an experimental dataset may be found; - an :class:`~ihm.model.Ensemble` to point to coordinates for an entire ensemble, for example as a DCD file; - a :class:`ihm.model.LocalizationDensity` to point to an external localization density, for example in MRC format; - :data:`ihm.System.locations` to point to other files relating to the modeling in general, such as a modeling control script (:class:`WorkflowFileLocation`) or a command script for a visualization package such as ChimeraX (:class:`VisualizationFileLocation`); - a :class:`ihm.protocol.Step` or :class:`ihm.analysis.Step` to describe an individual modeling step; - or a :class:`~ihm.startmodel.StartingModel` to describe how a starting model was constructed. :param str details: Additional details about the dataset, if known. """ # 'details' can differ without affecting dataset equality _eq_keys = [] _allow_duplicates = False def __init__(self, details=None): self.details = details # Locations compare equal iff they are the same class, have the # same attributes, and allow_duplicates=False def _eq_vals(self): if self._allow_duplicates: return id(self) else: return tuple([self.__class__] + [getattr(self, x) for x in self._eq_keys]) def __eq__(self, other): # We can never be equal to None return other is not None and self._eq_vals() == other._eq_vals() def __hash__(self): return hash(self._eq_vals())
[docs] class DatabaseLocation(Location): """A dataset stored in an official database (PDB, EMDB, PRIDE, etc.). Generally a subclass should be used specific to the database - for example, :class:`PDBLocation`, :class:`EMDBLocation`, or :class:`PRIDELocation`, although this base class can be used directly for "other" databases not currently supported by the IHM dictionary. :param str db_code: The accession code inside the database. :param str version: The version of the dataset in the database. :param str details: Additional details about the dataset, if known. """ _eq_keys = Location._eq_keys + ['db_name', 'access_code', 'version'] db_name = 'Other' def __init__(self, db_code, version=None, details=None): super(DatabaseLocation, self).__init__(details) self.access_code = db_code self.version = version def __str__(self): return "<%s.%s(%s)>" % (self.__module__, self.__class__.__name__, repr(self.access_code))
[docs] class EMDBLocation(DatabaseLocation): """Something stored in the EMDB database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'EMDB'
[docs] class PDBLocation(DatabaseLocation): """Something stored in the PDB database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'PDB'
[docs] class PDBDevLocation(DatabaseLocation): """Something stored in the PDB-Dev database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'PDB-Dev'
[docs] class ModelArchiveLocation(DatabaseLocation): """Something stored in Model Archive. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'MODEL ARCHIVE'
[docs] class BMRBLocation(DatabaseLocation): """Something stored in the BMRB database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'BMRB'
[docs] class MassIVELocation(DatabaseLocation): """Something stored in the MassIVE database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'MASSIVE'
[docs] class EMPIARLocation(DatabaseLocation): """Something stored in the EMPIAR database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'EMPIAR'
[docs] class SASBDBLocation(DatabaseLocation): """Something stored in the SASBDB database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'SASBDB'
[docs] class PRIDELocation(DatabaseLocation): """Something stored in the PRIDE database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'PRIDE'
[docs] class JPOSTLocation(DatabaseLocation): """Something stored in the JPOST database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'jPOSTrepo'
[docs] class BioGRIDLocation(DatabaseLocation): """Something stored in the BioGRID database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'BioGRID'
[docs] class ProXLLocation(DatabaseLocation): """Something stored in the ProXL database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'ProXL'
[docs] class IProXLocation(DatabaseLocation): """Something stored in the iProX database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'iProX'
[docs] class AlphaFoldDBLocation(DatabaseLocation): """Something stored in the AlphaFoldDB database. See :class:`DatabaseLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects.""" db_name = 'AlphaFoldDB'
[docs] class FileLocation(Location): """Base class for an individual file or directory stored externally. :param str path: the location of the file or directory (this can be `None` if `repo` is set, to refer to the entire repository) :param repo: object that describes the repository containing the file, or `None` if it is stored on the local disk :type repo: :class:`Repository` :param str details: optional description of the file """ _eq_keys = Location._eq_keys + ['repo', 'path', 'content_type'] content_type = None def __init__(self, path, repo=None, details=None): super(FileLocation, self).__init__(details) self.repo = repo if repo: self.path = path # Cannot determine file size if non-local self.file_size = None else: if not os.path.exists(path): raise ValueError("%s does not exist" % path) self.file_size = os.stat(path).st_size # Store absolute path in case the working directory changes later self.path = os.path.abspath(path) def __str__(self): return "<%s.%s(%s)>" % (self.__module__, self.__class__.__name__, repr(self.path))
[docs] class InputFileLocation(FileLocation): """An externally stored file used as input. See :class:`FileLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects. For example, any :class:`~ihm.dataset.Dataset` that isn't stored in a domain-specific database would use this class.""" content_type = 'Input data or restraints'
[docs] class OutputFileLocation(FileLocation): """An externally stored file used for output. See :class:`FileLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects. For example, this can be used to point to an externally-stored :class:`model ensemble <ihm.model.Ensemble>` or a :class:`localization density <ihm.model.LocalizationDensity>`. """ content_type = "Modeling or post-processing output"
[docs] class WorkflowFileLocation(FileLocation): """An externally stored file that controls the workflow (e.g. a script). See :class:`FileLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects. Typically these objects are used to provide more information on how a :class:`~ihm.startmodel.StartingModel` was generated, how an individual :class:`ihm.protocol.Step` or :class:`ihm.analysis.Step` was performed, or to describe the overall modeling (by addition to :data:`ihm.System.locations`). This can be useful to capture fine details of the modeling that aren't covered by the mmCIF dictionary, and to allow models to be precisely reproduced. """ content_type = "Modeling workflow or script"
[docs] class VisualizationFileLocation(FileLocation): """An externally stored file that is used for visualization. See :class:`FileLocation` for a description of the parameters and :class:`Location` for discussion of the usage of these objects. """ content_type = "Visualization script"
[docs] class Repository(object): """A repository containing modeling files, i.e. a collection of related files at a remote, public location. This can include code repositories such as GitHub, file archival services such as Zenodo, or any other service that provides a DOI, such as the supplementary information for a publication. This can also be used if the script plus related files are part of a repository, which has been archived somewhere with a DOI. This will be used to construct permanent references to files used in this modeling, even if they haven't been uploaded to a database such as PDB or EMDB. See :meth:`ihm.System.update_locations_in_repositories`. See also :class:`FileLocation`. :param str doi: the Digital Object Identifier for the repository :param str root: the path on the local disk to the top-level directory of the repository, or `None` if files in this repository aren't checked out. :param str url: If given, a location that this repository can be downloaded from. :param str top_directory: If given, prefix all paths for files in this repository with this value. This is useful when the archived version of the repository is found in a subdirectory at the URL or DOI (for example, GitHub repositories archived at Zenodo get placed in a subdirectory named for the repository and git hash). :param str details: Additional text describing this repository """ reference_type = 'DOI' # Two repositories compare equal if their DOIs and URLs are the same def __eq__(self, other): return self.doi == other.doi and self.url == other.url def __hash__(self): return hash((self.doi, self.url)) def __str__(self): return "<ihm.location.Repository(%r)>" % self.doi def __init__(self, doi, root=None, url=None, top_directory=None, details=None): # todo: DOI should be optional (could also use URL, local path) self.doi = doi self.url, self.top_directory = url, top_directory self.details = details if root is not None: # Store absolute path in case the working directory changes later self._root = os.path.abspath(root) reference = property(lambda self: self.doi) def __get_reference_provider(self): if 'zenodo' in self.reference: return 'Zenodo' reference_provider = property(__get_reference_provider) def __get_refers_to(self): if self.url: return 'Archive' if self.url.endswith(".zip") else 'File' return 'Other' refers_to = property(__get_refers_to) @staticmethod def _update_in_repos(fileloc, repos): """If the given FileLocation maps to somewhere within one of the passed repositories, update it to reflect that.""" if fileloc.repo: return orig_path = fileloc.path for repo in repos: relpath = os.path.relpath(orig_path, repo._root) if not relpath.startswith('..'): # Prefer the shortest paths if multiple repositories can match if fileloc.repo is None or len(fileloc.path) > len(relpath): fileloc.repo = repo fileloc.path = relpath def _get_full_path(self, path): """Prefix the given path with our top-level directory""" return os.path.join(self.top_directory or "", path)