septum-mec/actions/stimulus-lfp-response/data/10-calculate-stimulus-lfp-r...

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "19:43:24 [I] klustakwik KlustaKwik2 version 0.2.6\n",
      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/ipykernel_launcher.py:24: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n",
      "Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`\n"
     ]
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import spatial_maps as sp\n",
    "import septum_mec.analysis.data_processing as dp\n",
    "import septum_mec.analysis.registration\n",
    "import expipe\n",
    "import os\n",
    "import pathlib\n",
    "import numpy as np\n",
    "import exdir\n",
    "import pandas as pd\n",
    "import optogenetics as og\n",
    "import quantities as pq\n",
    "import shutil\n",
    "from distutils.dir_util import copy_tree\n",
    "import scipy\n",
    "import scipy.signal as ss\n",
    "\n",
    "from scipy.signal import find_peaks\n",
    "from scipy.interpolate import interp1d\n",
    "from matplotlib import mlab\n",
    "\n",
    "from tqdm import tqdm_notebook as tqdm\n",
    "from tqdm._tqdm_notebook import tqdm_notebook\n",
    "tqdm_notebook.pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_loader = dp.Data()\n",
    "actions = data_loader.actions\n",
    "project = data_loader.project"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#############################\n",
    "\n",
    "perform_zscore = True\n",
    "\n",
    "if not perform_zscore:\n",
    "    zscore_str = \"-no-zscore\"\n",
    "else:\n",
    "    zscore_str = \"\"\n",
    "\n",
    "#################################"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "output = pathlib.Path('output/stimulus-lfp-response' + zscore_str)\n",
    "(output / 'data').mkdir(parents=True, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "identify_neurons = actions['identify-neurons']\n",
    "sessions = pd.read_csv(identify_neurons.data_path('sessions'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "channel_groups = []\n",
    "for i, row in sessions.iterrows():\n",
    "    for ch in range(8):\n",
    "        row['channel_group'] = ch\n",
    "        channel_groups.append(row.to_dict())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "channel_groups = pd.DataFrame(channel_groups)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_lim(action_id):\n",
    "    stim_times = data_loader.stim_times(action_id)\n",
    "    if stim_times is None:\n",
    "        return [0, np.inf]\n",
    "    stim_times = np.array(stim_times)\n",
    "    return [stim_times.min(), stim_times.max()]\n",
    "\n",
    "def get_mask(lfp, lim):\n",
    "    return (lfp.times >= lim[0]) & (lfp.times <= lim[1])\n",
    "\n",
    "def zscore(a):\n",
    "    return (a - a.mean()) / a.std()\n",
    "\n",
    "def compute_stim_freq(action_id):\n",
    "    stim_times = data_loader.stim_times(action_id)\n",
    "    if stim_times is None:\n",
    "        return np.nan\n",
    "    stim_times = np.array(stim_times)\n",
    "    return 1 / np.mean(np.diff(stim_times))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def signaltonoise(a, axis=0, ddof=0):\n",
    "    a = np.asanyarray(a)\n",
    "    m = a.mean(axis)\n",
    "    sd = a.std(axis=axis, ddof=ddof)\n",
    "    return np.where(sd == 0, 0, m / sd)\n",
    "\n",
    "\n",
    "def select_and_clean(anas, width=500, threshold=2):\n",
    "    anas = np.array(anas)\n",
    "\n",
    "    for ch in range(anas.shape[1]):\n",
    "        idxs, = np.where(abs(anas[:, ch]) > threshold)\n",
    "        for idx in idxs:\n",
    "            anas[idx-width:idx+width, ch] = 0 # TODO AR model prediction\n",
    "    return anas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_energy(p, f, f1, f2):\n",
    "    if np.isnan(f1):\n",
    "        return np.nan\n",
    "    mask = (f > f1) & (f < f2)\n",
    "    df = f[1] - f[0]\n",
    "    return np.sum(p[mask]) * df\n",
    "\n",
    "\n",
    "def compute_band_power(p, f, f1, f2):\n",
    "    if np.isnan(f1) or np.all(np.isnan(p)):\n",
    "        return [np.nan] * 2\n",
    "    from scipy.integrate import simps\n",
    "    dx = f[1] - f[0]\n",
    "    mask = (f > f1) & (f < f2)\n",
    "    # Compute the absolute power by approximating the area under the curve\n",
    "    band_power = simps(p[mask], dx=dx)\n",
    "    total_power = simps(p, dx=dx)\n",
    "    rel_power = band_power / total_power\n",
    "    return band_power, rel_power"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_theta_peak(p, f, f1, f2):\n",
    "    if np.all(np.isnan(p)):\n",
    "        return np.nan, np.nan\n",
    "    mask = (f > f1) & (f < f2)\n",
    "    p_m = p[mask]\n",
    "    f_m = f[mask]\n",
    "    peaks, _ = find_peaks(p_m)\n",
    "    idx = np.argmax(p_m[peaks])\n",
    "    return f_m[peaks[idx]], p_m[peaks[idx]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_half_width(power, freq, max_power, max_frequency, band, band_width=1):\n",
    "    if np.isnan(max_power):\n",
    "        return [np.nan] * 3\n",
    "    \n",
    "    # estimate baseline power\n",
    "    low_baseline_mask = (freq > band[0] - band_width) & (freq < band[0])\n",
    "    high_baseline_mask = (freq > band[1]) & (freq < band[1] + band_width)\n",
    "    baseline = np.mean(np.concatenate([power[low_baseline_mask], power[high_baseline_mask]]))\n",
    "    p = power - baseline\n",
    "    m_p = max_power - baseline\n",
    "    m_f = max_frequency\n",
    "    f = freq\n",
    "    \n",
    "    # estimate half width\n",
    "    m_p_half = m_p / 2\n",
    "    half_p = p - m_p_half\n",
    "    idx_f = np.where(f <= m_f)[0].max()\n",
    "    idxs_p1, = np.where(np.diff(half_p[:idx_f + 1] > 0) == 1)\n",
    "    if len(idxs_p1) == 0:\n",
    "        return [np.nan] * 3\n",
    "    m1 = idxs_p1.max()\n",
    "    idxs_p2, = np.where(np.diff(half_p[idx_f:] > 0) == 1)\n",
    "    if len(idxs_p2) == 0:\n",
    "        return [np.nan] * 3\n",
    "    m2 = idxs_p2.min() + idx_f\n",
    "#     assert p[m1] < m_p_half < p[m1+1], (p[m1], m_p_half, p[m1+1])\n",
    "#     assert p[m2] > m_p_half > p[m2+1], (p[m2], m_p_half, p[m2+1])\n",
    "    \n",
    "    f1 = interp1d([half_p[m1], half_p[m1 + 1]], [f[m1], f[m1 + 1]])(0)\n",
    "    f2 = interp1d([half_p[m2], half_p[m2 + 1]], [f[m2], f[m2 + 1]])(0)\n",
    "    return f1, f2, m_p_half + baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_stim_peak(p, f, s_f):\n",
    "    if np.isnan(s_f):\n",
    "        return np.nan\n",
    "    return interp1d(f, p)(s_f)\n",
    "\n",
    "\n",
    "def compute_relative_peak(power, freq, max_power, band, band_width=1):\n",
    "    # estimate baseline power\n",
    "    low_baseline_mask = (freq > band[0] - band_width) & (freq < band[0])\n",
    "    high_baseline_mask = (freq > band[1]) & (freq < band[1] + band_width)\n",
    "    baseline = np.mean(np.concatenate([power[low_baseline_mask], power[high_baseline_mask]]))\n",
    "    return (max_power - baseline) / abs(baseline)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta_band_f1, theta_band_f2 = 6, 10 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "psd_data, freq_data = {}, {}\n",
    "\n",
    "def process(row, perform_zscore):\n",
    "    action_id = row['action']\n",
    "    channel_group = row['channel_group']\n",
    "    name =  f'{action_id}_{channel_group}'\n",
    "    lfp = data_loader.lfp(action_id, channel_group)\n",
    "    clean_lfp = select_and_clean(lfp)\n",
    "    snls = signaltonoise(clean_lfp)\n",
    "    best_channel = np.argmax(snls)\n",
    "    snl = snls[best_channel]\n",
    "    \n",
    "    lim = get_lim(action_id)\n",
    "    \n",
    "    mask = get_mask(lfp, lim)\n",
    "    if perform_zscore:\n",
    "        signal = zscore(clean_lfp[mask, best_channel].ravel())\n",
    "    else:\n",
    "        signal = clean_lfp[mask, best_channel].ravel()\n",
    "    \n",
    "    window = 6 * lfp.sampling_rate.magnitude\n",
    "    \n",
    "#     p_xx, freq = mlab.psd(signal, Fs=lfp.sampling_rate.magnitude, NFFT=NFFT)\n",
    "    freq, p_xx = ss.welch(signal, fs=lfp.sampling_rate.magnitude, nperseg=window, nfft=scipy.fftpack.next_fast_len(window))\n",
    "    p_xx = 10 * np.log10(p_xx)\n",
    "    \n",
    "    theta_f, theta_p_max = find_theta_peak(p_xx, freq, theta_band_f1, theta_band_f2)\n",
    "    \n",
    "    theta_bandpower, theta_relpower = compute_band_power(p_xx, freq, theta_band_f1, theta_band_f2)\n",
    "    \n",
    "    theta_relpeak = compute_relative_peak(p_xx, freq, theta_p_max, [theta_band_f1, theta_band_f2])\n",
    "        \n",
    "    theta_half_f1, theta_half_f2, theta_half_power = compute_half_width(p_xx, freq, theta_p_max, theta_f, [theta_band_f1, theta_band_f2])\n",
    "    \n",
    "    theta_half_width = theta_half_f2 - theta_half_f1\n",
    "    \n",
    "    psd_data.update({name: p_xx})\n",
    "    freq_data.update({name: freq})\n",
    "\n",
    "    \n",
    "    # stim\n",
    "    \n",
    "    stim_freq = compute_stim_freq(action_id)\n",
    "    \n",
    "    stim_p_max = compute_stim_peak(p_xx, freq, stim_freq)\n",
    "    \n",
    "    stim_half_f1, stim_half_f2, stim_half_power = compute_half_width(p_xx, freq, stim_p_max, stim_freq, [stim_freq - 1, stim_freq + 1])\n",
    "    \n",
    "    stim_half_width = stim_half_f2 - stim_half_f1\n",
    "    \n",
    "    stim_bandpower, stim_relpower = compute_band_power(p_xx, freq, stim_freq - 1, stim_freq + 1)\n",
    "    \n",
    "    stim_relpeak = compute_relative_peak(p_xx, freq, stim_p_max, [stim_freq - 1, stim_freq + 1])\n",
    "    \n",
    "    result = pd.Series({\n",
    "        'signal_to_noise': snl,\n",
    "        'best_channel': best_channel,\n",
    "        'theta_freq': theta_f,\n",
    "        'theta_peak': theta_p_max,\n",
    "        'theta_bandpower': theta_bandpower,\n",
    "        'theta_relpower': theta_relpower,\n",
    "        'theta_relpeak': theta_relpeak,\n",
    "        'theta_half_f1': theta_half_f1, \n",
    "        'theta_half_f2': theta_half_f2,\n",
    "        'theta_half_width': theta_half_width,\n",
    "        'stim_freq': stim_freq,\n",
    "        'stim_p_max': stim_p_max,\n",
    "        'stim_half_f1': stim_half_f1, \n",
    "        'stim_half_f2': stim_half_f2,\n",
    "        'stim_half_width': stim_half_width,\n",
    "        'stim_bandpower': stim_bandpower,\n",
    "        'stim_relpower': stim_relpower,\n",
    "        'stim_relpeak': stim_relpeak,\n",
    "    })\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0a39fef20b724ef186ce6ce5299b3719",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=696), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/ipykernel_launcher.py:9: RuntimeWarning: invalid value encountered in greater\n",
      "  if __name__ == '__main__':\n",
      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/ipykernel_launcher.py:9: RuntimeWarning: invalid value encountered in less\n",
      "  if __name__ == '__main__':\n",
      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in greater\n",
      "  # Remove the CWD from sys.path while we load stuff.\n",
      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in less\n",
      "  # Remove the CWD from sys.path while we load stuff.\n",
      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/numpy/core/fromnumeric.py:3257: RuntimeWarning: Mean of empty slice.\n",
      "  out=out, **kwargs)\n",
      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/numpy/core/_methods.py:161: RuntimeWarning: invalid value encountered in true_divide\n",
      "  ret = ret.dtype.type(ret / rcount)\n"
     ]
    }
   ],
   "source": [
    "results = channel_groups.merge(\n",
    "    channel_groups.progress_apply(process, perform_zscore=perform_zscore, axis=1), \n",
    "    left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(psd_data).to_feather(output / 'data' / 'psd.feather')\n",
    "pd.DataFrame(freq_data).to_feather(output / 'data' / 'freqs.feather')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Save to expipe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "action = project.require_action(\"stimulus-lfp-response\" + zscore_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "action.modules['parameters'] = {\n",
    "    'window': 6,\n",
    "    'theta_band_f1': theta_band_f1,\n",
    "    'theta_band_f2': theta_band_f2\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "action.data['results'] = 'results.csv'\n",
    "results.to_csv(action.data_path('results'), index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "copy_tree(output, str(action.data_path()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "septum_mec.analysis.registration.store_notebook(action, \"10-calculate-stimulus-lfp-response.ipynb\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}