From b2efa2511e4fceaf98ac8d5bd2df86603b9e5e66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez-Delgado?= <jgonzalezd@laas.fr> Date: Wed, 8 Nov 2023 14:05:06 +0000 Subject: [PATCH] Add option to only analyze an arbitrary sequence subset --- wario/get_coordinates.ipynb | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/wario/get_coordinates.ipynb b/wario/get_coordinates.ipynb index 09c3727..849ad6e 100644 --- a/wario/get_coordinates.ipynb +++ b/wario/get_coordinates.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "a3eaebb3", "metadata": {}, "outputs": [], @@ -21,12 +21,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "23d430e2", "metadata": {}, "outputs": [], "source": [ - "def get_coordinates(conf_name, pdb = None, traj = None, res_start = None, res_end = None, which_chain = None):\n", + "def get_coordinates(conf_name, pdb = None, traj = None, res_start = None, res_end = None, seq_subset = None, which_chain = None):\n", " \n", " aa_list = list([\"ALA\", \"ARG\", \"ASN\", \"ASP\", \"CYS\", \"GLN\", \"GLU\",\"GLY\",\"HIS\", \"ILE\", \"LEU\", \"LYS\", \"MET\", \"PHE\",\"PRO\", \"SER\", \"THR\", \"TRP\", \"TYR\", \"VAL\"])\n", " \n", @@ -102,17 +102,30 @@ " \n", " # Correct division by 10\n", " df.loc[:,['coor_x', 'coor_y', 'coor_z']] = df.loc[:,['coor_x', 'coor_y', 'coor_z']]*10\n", - " \n", - " if res_start is not None:\n", + " \n", + " \n", + " if seq_subset is not None and res_start is None and res_end is None: \n", " \n", - " df = df.loc[df.Position >= res_start]\n", - " L = len(np.unique(df.Position))\n", + " df = df.loc[np.isin(df.Position, seq_subset)]\n", " \n", - " if res_end is not None:\n", + " if seq_subset is not None and (res_start is not None or res_end is not None):\n", + " print('\\nseq_subset has been ignored as res_start or res_end are not set to None. Set res_start and res_end to None to use seq_subset.\\n')\n", + "\n", + " if res_start is None:\n", " \n", - " df = df.loc[df.Position <= res_end]\n", - " L = len(np.unique(df.Position))\n", + " res_start = np.min(df.Position)\n", + " \n", + " if res_end is None:\n", + " \n", + " res_end = np.max(df.Position)\n", + " \n", + " if seq_subset is None:\n", + " \n", + " seq_subset = np.arange(res_start, res_end + 1, 1)\n", " \n", + " L = len(np.unique(df.Position))\n", + " \n", + " \n", " # Build reference systems\n", " \n", " basis_angles = np.array([1.917213, 1.921843, 2.493444])\n", @@ -173,8 +186,10 @@ " d = {item: idx for idx, item in enumerate(aa_list)}\n", " aa_index = np.array([d.get(item) for item in aa_seq])\n", " aa_pairs = np.concatenate([aa_index[pos_pairs[:,0]][:,None],aa_index[pos_pairs[:,1]][:, None]], axis = 1)\n", + " pos_pairs = np.concatenate([seq_subset[pos_pairs[:,0]][:,None],seq_subset[pos_pairs[:,1]][:, None]], axis = 1)\n", + "\n", " positions_and_frames = np.concatenate([relative_pairwise_positions, relative_pairwise_or1,\n", - " relative_pairwise_or2, aa_pairs], axis = 1) \n", + " relative_pairwise_or2, aa_pairs, pos_pairs], axis = 1) \n", " \n", " return positions_and_frames" ] -- GitLab