Skip to content
Snippets Groups Projects
Commit 92a795c3 authored by Javier González-Delgado's avatar Javier González-Delgado
Browse files

Add option to only analyze an arbitrary sequence subset

parent 5365b706
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:29767a81 tags: %% Cell type:code id:29767a81 tags:
``` python ``` python
# Load notebooks with required functions # Load notebooks with required functions
import ipynb import ipynb
from ipynb.fs.full.get_coordinates import * from ipynb.fs.full.get_coordinates import *
from ipynb.fs.full.get_contacts import * from ipynb.fs.full.get_contacts import *
# Load required libraries # Load required libraries
import numpy as np import numpy as np
import os import os
from tqdm import tqdm from tqdm import tqdm
from joblib import Parallel, delayed from joblib import Parallel, delayed
from functools import partial from functools import partial
import mdtraj as md import mdtraj as md
import itertools import itertools
import pandas as pd import pandas as pd
import warnings #Optional import warnings #Optional
warnings.filterwarnings("ignore") #Optional warnings.filterwarnings("ignore") #Optional
``` ```
%% Cell type:code id:32f62d90 tags: %% Cell type:code id:32f62d90 tags:
``` python ``` python
def wcontact_matrix(thresholds, num_cores = 1, prot_name = None, save_to = None, pdb_folder = None, xtc_file = None, top_file = None, start = None, end = None, select_chain = None, name_variable = '__main__'): def wcontact_matrix(thresholds, num_cores = 1, prot_name = None, save_to = None, pdb_folder = None, xtc_file = None, top_file = None, start = None, end = None, subsequence = None, select_chain = None, name_variable = '__main__'):
if save_to is None and prot_name is None: if save_to is None and prot_name is None:
quit('Please set save_to = None or prot_name != None and save_to != None.') quit('Please set save_to = None or prot_name != None and save_to != None.')
if xtc_file is None and top_file is None and pdb_folder is not None: if xtc_file is None and top_file is None and pdb_folder is not None:
traj_file = None traj_file = None
conf_list = os.listdir(pdb_folder) conf_list = os.listdir(pdb_folder)
N_conformations = len(conf_list) # Number of conformations N_conformations = len(conf_list) # Number of conformations
elif xtc_file is not None and top_file is not None and pdb_folder is None: elif xtc_file is not None and top_file is not None and pdb_folder is None:
if top_file.endswith(".gro"): if top_file.endswith(".gro"):
top_file = md.formats.GroTrajectoryFile(top_file).topology top_file = md.formats.GroTrajectoryFile(top_file).topology
traj_file = md.load_xtc(xtc_file, top = top_file) traj_file = md.load_xtc(xtc_file, top = top_file)
N_conformations = len(traj_file) N_conformations = len(traj_file)
conf_list = np.arange(N_conformations) conf_list = np.arange(N_conformations)
else: else:
quit('Please set pdb_folder != None and xtc_file = top_file = None, or pdb_folder = None and xtc_file != None, top_file != None.') quit('Please set pdb_folder != None and xtc_file = top_file = None, or pdb_folder = None and xtc_file != None, top_file != None.')
def comp_function(conf_comp, thresholds_comp, pdb_data_comp, traj_data_comp, start_comp, end_comp, sel_chain): def comp_function(conf_comp, thresholds_comp, pdb_data_comp, traj_data_comp, start_comp, end_comp, subset_comp, sel_chain):
coordinates = get_coordinates(conf_name = conf_comp, pdb = pdb_data_comp, traj = traj_data_comp, res_start = start_comp, res_end = end_comp, which_chain = sel_chain) coordinates = get_coordinates(conf_name = conf_comp, pdb = pdb_data_comp, traj = traj_data_comp, res_start = start_comp, res_end = end_comp, seq_subset = subset_comp, which_chain = sel_chain)
contacts = get_contacts(coordinates, thresholds_comp) contacts = get_contacts(coordinates, thresholds_comp)
return contacts return contacts
it_function = partial(comp_function, thresholds_comp = thresholds, pdb_data_comp = pdb_folder, traj_data_comp = traj_file, start_comp = start, end_comp = end, sel_chain = select_chain) it_function = partial(comp_function, thresholds_comp = thresholds, pdb_data_comp = pdb_folder, traj_data_comp = traj_file, start_comp = start, end_comp = end, subset_comp = subsequence, sel_chain = select_chain)
N_pairs = len(it_function(conf_list[0])) N_pairs = len(it_function(conf_list[0]))
def it_function_error(conf): def it_function_error(conf):
try: try:
output = it_function(conf) output = it_function(conf)
except: except:
output = np.repeat(np.nan, N_pairs) output = np.repeat(np.nan, N_pairs)
return output return output
if __name__ == name_variable: if __name__ == name_variable:
os.environ['PYTHONWARNINGS'] = 'ignore' os.environ['PYTHONWARNINGS'] = 'ignore'
wcont_matrix = Parallel(n_jobs = num_cores, prefer = 'processes')(delayed(it_function_error)(i) for i in tqdm(conf_list)) wcont_matrix = Parallel(n_jobs = num_cores, prefer = 'processes')(delayed(it_function_error)(i) for i in tqdm(conf_list))
wcont_data = pd.DataFrame(np.reshape(np.asarray(wcont_matrix), [len(conf_list), N_pairs])) wcont_data = pd.DataFrame(np.reshape(np.asarray(wcont_matrix), [len(conf_list), N_pairs]))
if save_to is None: if save_to is None:
return wcont_data return wcont_data
elif save_to is not None and prot_name is not None: elif save_to is not None and prot_name is not None:
wcont_data.to_csv('_'.join(['/'.join([save_to, prot_name]), 'wcontmatrix.txt']), header = None, index = None, sep = ' ') wcont_data.to_csv('_'.join(['/'.join([save_to, prot_name]), 'wcontmatrix.txt']), header = None, index = None, sep = ' ')
``` ```
%% Cell type:code id:6046b36d tags: %% Cell type:code id:6046b36d tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment