from protlego.database.data import Result, Hit
from protlego.definitions import logger
from graph_tool.all import *
from protlego.database.data import fetch_id
from protlego.network.colors import *
import os
from protlego.builder.chimera import Chimera
from protlego.builder.builder import Builder
import numpy as np
from typing import Tuple
[docs]class Network():
"""
Graph constructor and visualizer
Examples
--------
hits = fetch_subspace(*args)
g = Network(hits)
g.create_graph()
g.plot_graph(labels=['ids','domains','folds'])
g.get_fragments()
g.view_vertex(v1)
g.view_edge(v1,v2)
g.view_component(n)
"""
def __init__(self, hits: Result) -> None:
self.hits = hits
self.graph = None
self.comp = None
self.degrees = None
def _add_new_vertex_universe(self, graph: Graph, o: Hit, boolean: int) -> Vertex:
v = graph.add_vertex()
if boolean == 0:
graph.vp.cluster[v] = o.q_cluster
graph.vp.domain[v] = o.query
graph.vp.scopclass[v] = o.q_sufam_id.split('.')[0]
graph.vp.fold[v] = o.q_fold_id
graph.vp.sufam[v] = o.q_sufam_id
graph.vp.fam[v] = o.q_scop_id
graph.vp.start[v].append(o.q_start)
graph.vp.end[v].append(o.q_end)
else:
graph.vp.cluster[v] = o.s_cluster
graph.vp.domain[v] = o.sbjct
graph.vp.scopclass[v] = o.s_sufam_id.split('.')[0]
graph.vp.fold[v] = o.s_fold_id
graph.vp.sufam[v] = o.s_sufam_id
# Then the 400s in a different folder
graph.vp.fam[v] = o.s_scop_id
graph.vp.start[v].append(o.s_start)
graph.vp.end[v].append(o.s_end)
return v
[docs] def create_network(self) -> Graph:
"""
Creates a network based on the hits. It draws a node for every unique cluster
and linkes two cluster when they have a fragment in common.
:return: graph. A Graph object
"""
graph = Graph(directed=False)
# Nodes properties
graph.vp.cluster = graph.new_vertex_property("string")
graph.vp.domain = graph.new_vertex_property("string")
graph.vp.scopclass = graph.new_vertex_property("string")
graph.vp.fold = graph.new_vertex_property("string")
graph.vp.sufam = graph.new_vertex_property("string")
graph.vp.fam = graph.new_vertex_property("string")
graph.vp.start = graph.new_vertex_property("vector<int>")
graph.vp.end = graph.new_vertex_property("vector<int>")
# Edge properties
graph.ep.prob = graph.new_edge_property("int")
graph.ep.id = graph.new_edge_property("int")
graph.ep.ident = graph.new_edge_property("int")
graph.ep.no = graph.new_edge_property("int")
graph.ep.cols = graph.new_edge_property("int")
graph.ep.rmsd = graph.new_edge_property("float")
for o in self.hits.hits:
# vertex 1
if o.query == o.sbjct: continue # do not include links to themselves
vertices_in_v1 = find_vertex(graph, graph.vp.cluster, o.q_cluster)
if vertices_in_v1:
v1 = vertices_in_v1[0]
graph.vp.start[v1].append(o.q_start)
graph.vp.end[v1].append(o.q_end)
else:
v1 = self._add_new_vertex_universe(graph, o, 0)
# Vertex2
vertices_in_v2 = find_vertex(graph, graph.vp.cluster, o.s_cluster)
if vertices_in_v2:
v2 = vertices_in_v2[0]
graph.vp.start[v2].append(o.s_start)
graph.vp.end[v2].append(o.s_end)
else:
v2 = self._add_new_vertex_universe(graph, o, 1)
# Add edge
ae = graph.edge(v1, v2)
if not ae:
ae = graph.add_edge(v1, v2)
graph.ep.prob[ae] = o.prob
graph.ep.id[ae] = o.id
graph.ep.ident[ae] = o.ident
graph.ep.cols[ae] = o.cols
graph.ep.no[ae] = o.no
graph.ep.rmsd[ae] = o.rmsd_tm_pair
# save the position
pos = sfdp_layout(graph)
graph.vp.pos = graph.new_vertex_property("vector<double>")
graph.vp.pos = pos
self.graph = graph
return graph
[docs] def plot_graph(self, graph, fill, **keyword_parameters):
"""
This function plots the graph
with customized labels
fill = ['fam','sufam','fold','class']
keyword_parameters = labels, output (filename)
labels can be: "domain","fam","sufam","fold","scopclass"
"""
if fill == 'class':
graph.vertex_properties['class_color'] = graph.new_vertex_property("string")
for v in graph.vertices():
graph.vertex_properties['class_color'][v] = fillcolors['class_color'][graph.vp.scopclass[v]]
if fill == 'fold' or fill == 'sufam' or fill == 'fam':
graph.vertex_properties[f'{fill}_color'] = graph.new_vertex_property("string")
for v in graph.vertices():
graph.vertex_properties[f'{fill}_color'][v] = fillcolors[f'{fill}_color'][
graph.vertex_properties[fill][v]]
if not keyword_parameters:
graph_draw(graph, graph.vp.pos, output_size=(1500, 1500),
vertex_fill_color=graph.vertex_properties[f"{fill}_color"])
if 'labels' in keyword_parameters:
graph_draw(graph, graph.vp.pos, output_size=(1500, 1500),
vertex_fill_color=graph.vertex_properties[f"{fill}_color"],
vertex_text=graph.vertex_properties[keyword_parameters['labels']])
if 'output' in keyword_parameters:
graph_draw(graph, graph.vp.pos, output_size=(1500, 1500),
vertex_fill_color=graph.vertex_properties[f"{fill}_color"],
vertex_font_size=8, output=keyword_parameters['output'])
@property
def fragments(self):
"""
This function creates and prints out the number of fragments in the graph
:rtype : integer, number of fragments
"""
if not self.graph:
logger.info(" You need to create a network first before computing its sizes."
" Call create_network(). Exiting...")
return
self.comp, hist = label_components(self.graph)
self.numFrags = max(self.comp.a) + 1
print("There are ", self.numFrags, " fragments")
return self.comp
def vertex_of_fragment(self, frag: int) -> list:
if not self.comp:
_ = self.fragments()
frag = [i for i, x in enumerate(self.comp.a) if x == frag]
return frag
[docs] def get_representative_domain(self, frag: int) -> int:
"""
:param frag:
:return:
"""
if not self.comp:
_ = self.fragments()
degrees=[]
for vertex in self.graph.vertices():
v1 = self.graph.vertex(vertex)
degrees.append(v1.out_degree())
degree = [(x, i) for i, x in enumerate(degrees) if self.comp.a[i] == frag]
most_connected_vertices = [x[1] for x in degree if x[0] == max(degree)[0]]
# if there are several vertices equally connected, select that one
# whose length is the closest to the average.
if len(most_connected_vertices) > 1:
target = self.sizes[frag][0]
verts_len = []
for vertex in most_connected_vertices:
start = int(round(np.mean(self.graph.vp.start[vertex])))
end = int(round(np.mean(self.graph.vp.end[vertex])))
length = int(end) - int(start)
verts_len.append(length)
vertice = most_connected_vertices[min(range(len(verts_len)), key=lambda i: abs(verts_len[i] - target))]
else:
vertice = most_connected_vertices[0]
return vertice
@property
def sizes(self):
"""
Computes the average size for each fragment.
:return: list of tuples with the average and the stds of sizes (float)
"""
if not self.comp:
_ = self.fragments
sizes = []
for fragment_index in range(max(self.comp.a) + 1):
frag_sizes = []
frag = [i for i, x in enumerate(self.comp.a) if x == fragment_index]
vfilt = self.graph.new_vertex_property('bool')
for i in frag:
vfilt[i] = True
sub = GraphView(self.graph, vfilt)
sizes.append((np.mean([self.graph.ep.cols[edge] for edge in sub.edges()]),
np.std([self.graph.ep.cols[edge] for edge in sub.edges()])))
return sizes
[docs] def show_vertex(self, vertex: Graph.vertex) -> Chimera:
"""
Shows the protein that corresponds to that specific vertex with the
fragment colored in red
:param vertex: A Graph.vertex object. The domain to be shown,
:return: A Chimera object with an internal representation of the fragment
"""
graph = self.graph
domain = graph.vp.domain[vertex]
start = int(round(np.mean(graph.vp.start[vertex])))
end = int(round(np.mean(graph.vp.end[vertex])))
domain_path = Chimera.get_SCOP_domain(domain)
mol = Chimera(filename=domain_path, validateElements=False)
mol.renumberResidues()
mol.reps.add(sel='protein', style='NewCartoon', color=8)
mol.reps.add(sel=f"protein and resid '{start}' to '{end}'", style='NewCartoon', color=1)
mol.view(name=domain)
return mol
[docs] def show_edge(self, edge: Graph.edge) -> Tuple[Chimera, Chimera]:
"""
Shows an alignment of the two domains that the edge links with their respective
fragment they have in common colored in red
:param edge:
:return:
"""
graph = self.graph
id = graph.ep.id[edge]
hit = fetch_id(id)
a = Builder(hit)
aln = a.get_alignment(hit.query, hit.no)
a._get_pairs(aln)
a.superimpose_structures(aln) # made with global alignment
query_mol = a.qaPDB[0].copy()
sbjct_mol = a.saPDB[0].copy()
qstart = query_mol.get("resid", sel=f"protein and name CA and same residue as index {a.global_qpairs[0][0]}")[0]
qend = query_mol.get("resid", sel=f"protein and name CA and same residue as index {a.global_qpairs[0][-1]}")[0]
sstart = sbjct_mol.get("resid", sel=f"protein and name CA and same residue as index {a.global_spairs[0][0]}")[0]
send = sbjct_mol.get("resid", sel=f"protein and name CA and same residue as index {a.global_spairs[0][-1]}")[0]
query_mol.reps.add(sel='protein', style='NewCartoon', color=8)
query_mol.reps.add(sel=f"protein and resid '{qstart}' to '{qend}'", style='NewCartoon', color=1)
sbjct_mol.reps.add(sel='protein', style='NewCartoon', color=8)
sbjct_mol.reps.add(sel=f"protein and resid '{sstart}' to '{send}'", style='NewCartoon', color=1)
query_mol.view()
sbjct_mol.view()
print(hit.query,hit.sbjct,qstart,qend,sstart,send)
return query_mol, sbjct_mol
[docs] def show_component(self, fragment: int):
"""
Aligns the fragment(s) of all domains belonging to a component, provided the component
has less than 50 vertices.
Note that all domains in the same component present a fragment
in common in different protein environments
:param fragment:
:return:
"""
graph = self.graph
if not self.comp:
comp, _ = label_components(graph)
else:
comp = self.comp
vfilt = graph.new_vertex_property('bool')
frag = [i for i, x in enumerate(comp.a) if x == fragment]
for i in frag:
vfilt[i] = True
sub = GraphView(graph, vfilt)
if sub.num_vertices() > 50:
raise RuntimeError("The component to visualize is too large")
# TODO