Source code for builder.chimera

from protlego.definitions import logger
from moleculekit.projections.metricdistance import MetricDistance
from protlego.structural.clusters import *
from protlego.structural.hh_networks import *
import os
from moleculekit.support import string_to_tempfile
import urllib.request

[docs]def get_SCOP_domain(domain): """ :param domain: str. The SCOPe domain to download as pdb :return: """ logger.info(f'Attempting to download domain {domain} from the SCOP server') url = f'https://scop.berkeley.edu/astral/pdbstyle/ver=2.07&id={domain}&output=text' connected = False while not connected: try: response = urllib.request.urlopen(url) text = response.read() except Exception as e: import time logger.warning(f'Failed to connect to SCOP with error {e}. Sleeping 5s and retrying.') time.sleep(5) continue connected = True filepath = string_to_tempfile(text.decode('ascii'), 'pdb') logger.info(f"File downloaded as {filepath}") return filepath
[docs]def get_FUZZLE_hhs(domain): """ :param domain: str. The domain to download from Fuzzle as hhs :return: filepath: path where the file is located. """ logger.info(f'Attempting to download hhs file for {domain} from the FUZZLE server') url = f'https://fuzzle.uni-bayreuth.de/hhs/scop95_2.07.psi.hhs/{domain}.hhs' connected = False while not connected: try: response = urllib.request.urlopen(url) text = response.read() except Exception as e: import time logger.warning(f'Failed to connect to FUZZLE with error {e}. Sleeping 5s and retrying.') time.sleep(5) continue connected = True filepath = string_to_tempfile(text.decode('ascii'), 'hhs') logger.info(f"File downloaded as {filepath}") return filepath
[docs]class Chimera(Molecule): def add_crossover(self, crossover: int): start = self.resid[0] end = self.resid[-1] self.reps.add(sel=f"protein and resid '{start}' to '{crossover}'", style='NewCartoon', color=1) self.reps.add(sel=f"protein and resid '{crossover+1}' to '{end}'", style='NewCartoon', color=7) def __str__(self): # TODO fix the naming of parents. return str(f"Protein with:" f"\nNumber of residues: {self.numResidues}" f"\nNumber of atoms: {self.numAtoms}") @property def sasa(self): return
[docs] def calc_resid_dist(self, resid1: int, resid2: int) -> np.array: """ Returns the distance between the alpha carbons of two residues" :param resid1:index of residue 1 in the PDB structure :param resid2:index of residue 2 in the PDB structure :return: np.array. """ coord1 = self.get("coords", sel=f'resid {resid1} and name CA') coord2 = self.get("coords", sel=f'resid {resid2} and name CA') return cdist(coord1, coord2)
[docs] def compute_hydrophobic_clusters(self, chain: str = 'A', sel: str = "protein and not backbone and noh and resname ILE VAL LEU", cutoff_area: float = 10): """ :param chain: Chain in the PDB to compute the hydrophobic clusters. Examples: "A", "A B C". Default: "A" :param sel: VMD selection on which to compute the clusters. Default is every sidechain heavy atom ILE, VAL and LEU residues. "protein and not backbone and noh and resname ILE VAL LEU" :return: A representation for each cluster """ clusters = None # Removing previous visualizations [self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist)))] resids = np.unique(self.get("resid", sel=f"{sel} and chain {chain}")) dims = len(resids) indices = self.get("index", sel=f"{sel} and chain {chain}") dims_indices = len(self.get("index", sel=f"protein and chain {chain}")) logger.info("Initializing final output") contacts = np.zeros((dims, dims)) atoms_to_atoms = np.zeros((dims_indices, dims_indices)) logger.info("Computing clusters") for index in indices: a = Atom(index, self) if not a.neighbor_indices.any(): continue _, contacts = fill_matrices(a, self, atoms_to_atoms, contacts, indices, resids) graph = create_graph(contacts, resids, cutoff_area=cutoff_area) comp, _ = label_components(graph) if comp.a.any(): clusters = add_clusters(self, graph, comp) else: logger.warning("There are not residues in contact for this selection") return clusters
[docs] def compute_hydrogen_networks(self, sidechain_only: bool = True): """ Analyzes the hydrogen bonds following the Baker Hubbard algorithm and clusters them. (Two residues are in the same cluster if there's a path of hydrogen networks between them). This function adds protons to the protein. between them :param sidechain_only: The whole residue or sidechain only. Note: Computing networks including backbone leads to very large clusters harder to visualize. :return: A representation for each network """ # Remove previous structural representations bonds = None [self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist)))] # [self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist))) if # rep.style == 'VDW'] newmol = proteinPrepare(self) newmol.write(f"/tmp/structure.pdb") t = md.load(f"/tmp/structure.pdb") os.remove(f"/tmp/structure.pdb") hbonds = md.baker_hubbard(t, sidechain_only=sidechain_only) graph = make_graph_hh(hbonds, t) comp, _ = label_components(graph) if comp.a.any(): bonds = add_networks(self, graph, comp) else: logging.warning("No Hydrogen Bonds found") return bonds
def compute_salt_bridges(self): salts = [] [self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist)))] metr = MetricDistance('sidechain and acidic and element O', 'sidechain and basic and element N', metric="contacts", threshold=3.2, pbc=False) try: data = metr.project(self) except: logger.error("Molecule has no basic or acidic residues") raise if len(np.shape(data)) > 1: data = data[0].copy() # handling NMR structures mapping = metr.getMapping(self) self.reps.add(sel='protein', style='NewCartoon', color=8) if mapping[data].atomIndexes.values.any(): for bond in mapping[data].atomIndexes.values: print(bond[0],bond[1]) resid1 = self.get("resid", sel=f"same residue as index {bond[0]}")[0] resid2 = self.get("resid", sel=f"same residue as index {bond[1]}")[0] if [resid1, resid2] not in salts: salts.append([resid1, resid2]) self.reps.add(f"protein and resid {resid1}", style="Licorice", color="1") self.reps.add(f"protein and resid {resid2}", style="Licorice", color="0") else: logger.warning("No salt bridges found in this protein") return salts