Source code for pbxplore.structure.PDB

#! /usr/bin/env python
# -*- coding: utf-8 -*-



# Standard modules
import os
import gzip

# Local module
from .structure import Atom, Chain

# =============================================================================
# Data
# =============================================================================
# file extensions for PDB and PDBx/mmCIF files
PDB_EXTENSIONS = ('.pdb', '.PDB', '.pdb.gz', '.pdb.GZ', '.PDB.gz', '.PDB.GZ', '.ent', '.ENT4')
PDBx_EXTENSIONS = ('.cif', '.CIF', '.cif.gz', '.CIF.GZ')


[docs]class PDB:
    """
    Class to read PDB files.

    """
    def __init__(self, name):
        """
        Default constructor for PDB file.
        """
        self.filename = name
        self.chains = []
        # check that file exists
        if not os.path.isfile(self.filename):
            raise IOError("Cannot read {}: does not exist or is not a file."
                          .format(self.filename))
        if self.filename.endswith(PDB_EXTENSIONS):
            self.__read_PDB()
        elif self.filename.endswith(PDBx_EXTENSIONS):
            self.__read_PDBx()
        else:
            raise IOError("File extension is not a valid one. "
                          "Corrects one are {}".format(", ".join(PDB_EXTENSIONS + PDBx_EXTENSIONS)))

    def __read_PDB(self):
        """
        Read PDB file.
        """
        # create new chain
        chain = Chain()
        # get chains from file
        # A PDB file can have several models
        # that can have several chains themselves.
        if self.filename.endswith(('.gz', '.GZ')):
            # for compressed file
            f_in = gzip.open(self.filename, 'rt')
        else:
            f_in = open(self.filename, 'rt')
        for line in f_in:
            flag = line[0:6].strip()
            if flag == "MODEL":
                chain.set_model(line.split()[1])
            if flag == "ATOM":
                atom = Atom.read_from_PDB(line)
                # store current chain and clean object
                if chain.size() != 0 and chain.name != atom.chain:
                    self.chains.append(chain)
                    chain = Chain()
                # append structure with atom
                chain.add_atom(atom)
            # store chain after end of model or chain
            if chain.size() != 0 and flag in ["TER", "ENDMDL"]:
                self.chains.append(chain)
                chain = Chain()
        # store last chain
        if chain.size() != 0:
            self.chains.append(chain)
        f_in.close()

    def __read_PDBx(self):
        """
        Read PDBx/mmCIF file
        """
        # create new chain
        chain = Chain()
        # get chains from file
        # A PDBx file can have several models
        # that can have several chains themselves.
        atom_fields = []
        atom_coordinates = []
        if self.filename.endswith(('.gz', '.GZ')):
            # for compressed file
            f_in = gzip.open(self.filename, 'rt')
        else:
            f_in = open(self.filename, 'rt')
        for line in f_in:
            item = line.strip()
            # then store atom field definitions
            if item.startswith("_atom_site."):
                atom_fields.append(item.replace("_atom_site.", ""))
            # then store atom coordinates
            if atom_fields and item.startswith('ATOM'):
                atom_coordinates.append(item)
        f_in.close()
        # separate all chains and store atoms
        chain = Chain()
        for atom_line in atom_coordinates:
            atom = Atom.read_from_PDBx(atom_line, atom_fields)
            # define model at first atom
            if chain.size() == 1:
                chain.set_model(atom.model)
            # store current chain when chain name changed
            if chain.size() != 0 and chain.name != atom.chain:
                # store model number only if there is more than one model
                if chain.model == atom.model:
                    chain.set_model("")
                self.chains.append(chain)
                chain = Chain()
            # store current chain when model number changed
            if chain.size() != 0 and chain.model != atom.model:
                self.chains.append(chain)
                chain = Chain()
            # append structure with atom
            chain.add_atom(atom)
        # store last chain
        if chain.size() != 0:
            self.chains.append(chain)

    def get_chains(self):
        """
        Give chains, one at a time.

        Returns
        -------
        generator
            Chains in PDB structure.
        """
        for chain in self.chains:
            yield chain

    @property
    def nb_chains(self):
        """
        Give the number of chains
        """
        return len(self.chains)