From b2efa2511e4fceaf98ac8d5bd2df86603b9e5e66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez-Delgado?= <jgonzalezd@laas.fr>
Date: Wed, 8 Nov 2023 14:05:06 +0000
Subject: [PATCH] Add option to only analyze an arbitrary sequence subset

---
 wario/get_coordinates.ipynb | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/wario/get_coordinates.ipynb b/wario/get_coordinates.ipynb
index 09c3727..849ad6e 100644
--- a/wario/get_coordinates.ipynb
+++ b/wario/get_coordinates.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "id": "a3eaebb3",
    "metadata": {},
    "outputs": [],
@@ -21,12 +21,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "id": "23d430e2",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def get_coordinates(conf_name, pdb = None, traj = None, res_start = None, res_end = None, which_chain = None):\n",
+    "def get_coordinates(conf_name, pdb = None, traj = None, res_start = None, res_end = None, seq_subset = None, which_chain = None):\n",
     "    \n",
     "    aa_list = list([\"ALA\", \"ARG\", \"ASN\", \"ASP\", \"CYS\", \"GLN\", \"GLU\",\"GLY\",\"HIS\", \"ILE\", \"LEU\", \"LYS\", \"MET\", \"PHE\",\"PRO\", \"SER\", \"THR\", \"TRP\", \"TYR\", \"VAL\"])\n",
     "    \n",
@@ -102,17 +102,30 @@
     "        \n",
     "        # Correct division by 10\n",
     "        df.loc[:,['coor_x', 'coor_y', 'coor_z']] = df.loc[:,['coor_x', 'coor_y', 'coor_z']]*10\n",
-    "               \n",
-    "    if res_start is not None:\n",
+    "     \n",
+    "     \n",
+    "    if seq_subset is not None and res_start is None and res_end is None:   \n",
     "        \n",
-    "        df = df.loc[df.Position >= res_start]\n",
-    "        L = len(np.unique(df.Position))\n",
+    "        df = df.loc[np.isin(df.Position, seq_subset)]\n",
     "        \n",
-    "    if res_end is not None:\n",
+    "    if seq_subset is not None and (res_start is not None or res_end is not None):\n",
+    "        print('\\nseq_subset has been ignored as res_start or res_end are not set to None. Set res_start and res_end to None to use seq_subset.\\n')\n",
+    "\n",
+    "    if res_start is None:\n",
     "        \n",
-    "        df = df.loc[df.Position <= res_end]\n",
-    "        L = len(np.unique(df.Position))\n",
+    "        res_start = np.min(df.Position)\n",
+    "    \n",
+    "    if res_end is None:\n",
+    "        \n",
+    "        res_end = np.max(df.Position)\n",
+    "         \n",
+    "    if seq_subset is None:\n",
+    "        \n",
+    "        seq_subset = np.arange(res_start, res_end + 1, 1)\n",
     "        \n",
+    "    L = len(np.unique(df.Position))\n",
+    " \n",
+    "    \n",
     "    # Build reference systems\n",
     " \n",
     "    basis_angles = np.array([1.917213, 1.921843, 2.493444])\n",
@@ -173,8 +186,10 @@
     "    d = {item: idx for idx, item in enumerate(aa_list)}\n",
     "    aa_index = np.array([d.get(item) for item in aa_seq])\n",
     "    aa_pairs = np.concatenate([aa_index[pos_pairs[:,0]][:,None],aa_index[pos_pairs[:,1]][:, None]], axis = 1)\n",
+    "    pos_pairs = np.concatenate([seq_subset[pos_pairs[:,0]][:,None],seq_subset[pos_pairs[:,1]][:, None]], axis = 1)\n",
+    "\n",
     "    positions_and_frames = np.concatenate([relative_pairwise_positions, relative_pairwise_or1,\n",
-    "                                           relative_pairwise_or2, aa_pairs], axis = 1)        \n",
+    "                                           relative_pairwise_or2, aa_pairs, pos_pairs], axis = 1)        \n",
     "    \n",
     "    return positions_and_frames"
    ]
-- 
GitLab