auto commit actions/calculate-statistics

2019-10-08 16:00:21 +02:00 · 2019-10-08 16:00:21 +02:00 · 84e7d41d57
parent 26de363501
commit 84e7d41d57
5 changed files with 16731 additions and 0 deletions
--- a/actions/calculate-statistics/attributes.yaml
+++ b/actions/calculate-statistics/attributes.yaml
@ -0,0 +1,6 @@
+registered: '2019-10-04T08:11:18'
+data:
+  units: units.csv
+  results: results.csv
+  notebook: 10_calculate_spatial_statistics.ipynb
+  html: 10_calculate_spatial_statistics.html
--- a/actions/calculate-statistics/data/10_calculate_spatial_statistics.html
+++ b/actions/calculate-statistics/data/10_calculate_spatial_statistics.html
--- a/actions/calculate-statistics/data/10_calculate_spatial_statistics.ipynb
+++ b/actions/calculate-statistics/data/10_calculate_spatial_statistics.ipynb
@ -0,0 +1,482 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "08:27:22 [I] klustakwik KlustaKwik2 version 0.2.6\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import expipe\n",
+    "import pathlib\n",
+    "import numpy as np\n",
+    "import spatial_maps.stats as stats\n",
+    "import septum_mec.analysis.data_processing as dp\n",
+    "import head_direction.head as head\n",
+    "import spatial_maps as sp\n",
+    "import septum_mec.analysis.registration\n",
+    "import speed_cells.speed as spd\n",
+    "import septum_mec.analysis.spikes as spikes\n",
+    "import re\n",
+    "import joblib\n",
+    "import multiprocessing\n",
+    "import shutil\n",
+    "import psutil\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import septum_mec\n",
+    "import scipy.ndimage.measurements\n",
+    "from distutils.dir_util import copy_tree\n",
+    "\n",
+    "from tqdm import tqdm_notebook as tqdm\n",
+    "from tqdm._tqdm_notebook import tqdm_notebook\n",
+    "tqdm_notebook.pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_speed = 1, # m/s only used for speed score\n",
+    "min_speed = 0.02, # m/s only used for speed score\n",
+    "position_sampling_rate = 100 # for interpolation\n",
+    "position_low_pass_frequency = 6 # for low pass filtering of position\n",
+    "\n",
+    "box_size = 1.0\n",
+    "bin_size = 0.02\n",
+    "smoothing_low = 0.03\n",
+    "smoothing_high = 0.06"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "project_path = dp.project_path()\n",
+    "\n",
+    "project = expipe.get_project(project_path)\n",
+    "actions = project.actions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>action</th>\n",
+       "      <th>channel_group</th>\n",
+       "      <th>unit_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1834-150319-3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>71</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1834-150319-3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>75</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1834-120319-4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>85</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1834-120319-1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1834-120319-2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>39</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          action  channel_group  unit_name\n",
+       "0  1834-150319-3              0         71\n",
+       "1  1834-150319-3              0         75\n",
+       "2  1834-120319-4              0         85\n",
+       "3  1834-120319-1              0          1\n",
+       "4  1834-120319-2              0         39"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "identify_neurons = actions['identify-neurons']\n",
+    "units = pd.read_csv(identify_neurons.data_path('units'))\n",
+    "units.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_loader = dp.Data(\n",
+    "    position_sampling_rate=position_sampling_rate, \n",
+    "    position_low_pass_frequency=position_low_pass_frequency,\n",
+    "    box_size=box_size, bin_size=bin_size\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "first_row = units[units['action'] == '1849-060319-3'].iloc[0]\n",
+    "#first_row = sessions.iloc[50]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/elephant/statistics.py:835: UserWarning: Instantaneous firing rate approximation contains negative values, possibly caused due to machine precision errors.\n",
+      "  warnings.warn(\"Instantaneous firing rate approximation contains \"\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "average_rate               3.095328\n",
+       "speed_score               -0.063922\n",
+       "out_field_mean_rate        1.837642\n",
+       "in_field_mean_rate         5.122323\n",
+       "max_field_mean_rate        8.882211\n",
+       "max_rate                  23.006163\n",
+       "sparsity                   0.468122\n",
+       "selectivity                7.306812\n",
+       "interspike_interval_cv     3.970863\n",
+       "burst_event_ratio          0.397921\n",
+       "bursty_spike_ratio         0.676486\n",
+       "gridness                  -0.459487\n",
+       "border_score               0.078474\n",
+       "information_rate           0.965845\n",
+       "head_mean_ang              5.788704\n",
+       "head_mean_vec_len          0.043321\n",
+       "spacing                    0.624971\n",
+       "orientation               22.067900\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def process(row):\n",
+    "    action_id = row['action']\n",
+    "    channel_id = row['channel_group']\n",
+    "    unit_id = row['unit_name']\n",
+    "    \n",
+    "    # common values for all units == faster calculations\n",
+    "    x, y, t, speed = map(data_loader.tracking(action_id).get, ['x', 'y', 't', 'v'])\n",
+    "    ang, ang_t = map(data_loader.head_direction(action_id).get, ['a', 't'])\n",
+    "    occupancy_map = data_loader.occupancy(action_id)\n",
+    "    xbins, ybins = data_loader.spatial_bins\n",
+    "    box_size_, bin_size_ = data_loader.box_size_, data_loader.bin_size_\n",
+    "    prob_dist = data_loader.prob_dist(action_id)\n",
+    "    \n",
+    "    smooth_low_occupancy_map = sp.maps.smooth_map(occupancy_map, bin_size=bin_size_, smoothing=smoothing_low)\n",
+    "    smooth_high_occupancy_map = sp.maps.smooth_map(occupancy_map, bin_size=bin_size_, smoothing=smoothing_high)\n",
+    "            \n",
+    "    spike_times = data_loader.spike_train(action_id, channel_id, unit_id)\n",
+    "\n",
+    "    # common\n",
+    "    spike_map = sp.maps._spike_map(x, y, t, spike_times, xbins, ybins)\n",
+    "\n",
+    "    smooth_low_spike_map = sp.maps.smooth_map(spike_map, bin_size=bin_size_, smoothing=smoothing_low)\n",
+    "    smooth_high_spike_map = sp.maps.smooth_map(spike_map, bin_size=bin_size_, smoothing=smoothing_high)\n",
+    "\n",
+    "    smooth_low_rate_map = smooth_low_spike_map / smooth_low_occupancy_map\n",
+    "    smooth_high_rate_map = smooth_high_spike_map / smooth_high_occupancy_map\n",
+    "\n",
+    "    # find fields with laplace\n",
+    "    fields_laplace = sp.separate_fields_by_laplace(smooth_high_rate_map)\n",
+    "    fields = fields_laplace.copy() # to be cleaned by Ismakov\n",
+    "    fields_areas = scipy.ndimage.measurements.sum(\n",
+    "        np.ones_like(fields), fields, index=np.arange(fields.max() + 1))\n",
+    "    fields_area = fields_areas[fields]\n",
+    "    fields[fields_area < 9.0] = 0\n",
+    "\n",
+    "    # find fields with Ismakov-method\n",
+    "    fields_ismakov, radius = sp.separate_fields_by_distance(smooth_high_rate_map)\n",
+    "    fields_ismakov_real = fields_ismakov * bin_size\n",
+    "    approved_fields = []\n",
+    "\n",
+    "    # remove fields not found by both methods\n",
+    "    for point in fields_ismakov:\n",
+    "        field_id = fields[tuple(point)]\n",
+    "        approved_fields.append(field_id)\n",
+    "\n",
+    "    for field_id in np.arange(1, fields.max() + 1):\n",
+    "        if not field_id in approved_fields:\n",
+    "            fields[fields == field_id] = 0\n",
+    "\n",
+    "    # varying statistics\n",
+    "    average_rate = len(spike_times) / (t.max() - t.min())\n",
+    "\n",
+    "    max_rate = smooth_low_rate_map.max()\n",
+    "\n",
+    "    out_field_mean_rate = smooth_low_rate_map[np.where(fields == 0)].mean()\n",
+    "    in_field_mean_rate = smooth_low_rate_map[np.where(fields != 0)].mean()\n",
+    "    max_field_mean_rate = smooth_low_rate_map[np.where(fields == 1)].mean()\n",
+    "\n",
+    "    interspike_interval = np.diff(spike_times)\n",
+    "    interspike_interval_cv = interspike_interval.std() / interspike_interval.mean()\n",
+    "\n",
+    "    autocorrelogram = sp.autocorrelation(smooth_high_rate_map)\n",
+    "    peaks = sp.fields.find_peaks(autocorrelogram)\n",
+    "    real_peaks = peaks * bin_size\n",
+    "    autocorrelogram_box_size = box_size * autocorrelogram.shape[0] / smooth_high_rate_map.shape[0]\n",
+    "    spacing, orientation = sp.spacing_and_orientation(real_peaks, autocorrelogram_box_size)\n",
+    "    orientation *= 180 / np.pi\n",
+    "\n",
+    "    selectivity = stats.selectivity(smooth_low_rate_map, prob_dist)\n",
+    "\n",
+    "    sparsity = stats.sparsity(smooth_low_rate_map, prob_dist)\n",
+    "\n",
+    "    gridness = sp.gridness(smooth_high_rate_map)\n",
+    "\n",
+    "    border_score = sp.border_score(smooth_high_rate_map, fields_laplace)\n",
+    "\n",
+    "    information_rate = stats.information_rate(smooth_high_rate_map, prob_dist)\n",
+    "\n",
+    "    single_spikes, bursts, bursty_spikes = spikes.find_bursts(spike_times, threshold=0.01)\n",
+    "    burst_event_ratio = np.sum(bursts) / (np.sum(single_spikes) + np.sum(bursts))\n",
+    "    bursty_spike_ratio = np.sum(bursty_spikes) / (np.sum(bursty_spikes) + np.sum(single_spikes))\n",
+    "    mean_spikes_per_burst = np.sum(bursty_spikes) / np.sum(bursts)\n",
+    "\n",
+    "    speed_score = spd.speed_correlation(\n",
+    "        speed, t, spike_times, min_speed=min_speed, max_speed=max_speed)\n",
+    "\n",
+    "    ang_bin, ang_rate = head.head_direction_rate(spike_times, ang, ang_t)\n",
+    "\n",
+    "    head_mean_ang, head_mean_vec_len = head.head_direction_score(ang_bin, ang_rate)\n",
+    "\n",
+    "    result = pd.Series({\n",
+    "        'average_rate': average_rate,\n",
+    "        'speed_score': speed_score,\n",
+    "        'out_field_mean_rate': out_field_mean_rate,\n",
+    "        'in_field_mean_rate': in_field_mean_rate,\n",
+    "        'max_field_mean_rate': max_field_mean_rate,\n",
+    "        'max_rate': max_rate,\n",
+    "        'sparsity': sparsity,\n",
+    "        'selectivity': selectivity,\n",
+    "        'interspike_interval_cv': float(interspike_interval_cv),\n",
+    "        'burst_event_ratio': burst_event_ratio,\n",
+    "        'bursty_spike_ratio': bursty_spike_ratio,\n",
+    "        'gridness': gridness,\n",
+    "        'border_score': border_score,\n",
+    "        'information_rate': information_rate,\n",
+    "        'head_mean_ang': head_mean_ang,\n",
+    "        'head_mean_vec_len': head_mean_vec_len,\n",
+    "        'spacing': spacing,\n",
+    "        'orientation': orientation\n",
+    "    })\n",
+    "    return result\n",
+    "        \n",
+    "process(first_row)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "837fde7fe486422bac67341ff512a4e1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=1281), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mikkel/apps/expipe-project/spatial-maps/spatial_maps/stats.py:13: RuntimeWarning: invalid value encountered in log2\n",
+      "  return (np.nansum(np.ravel(tmp_rate_map * np.log2(tmp_rate_map/avg_rate) *\n",
+      "/home/mikkel/apps/expipe-project/spatial-maps/spatial_maps/stats.py:13: RuntimeWarning: divide by zero encountered in log2\n",
+      "  return (np.nansum(np.ravel(tmp_rate_map * np.log2(tmp_rate_map/avg_rate) *\n",
+      "/home/mikkel/apps/expipe-project/spatial-maps/spatial_maps/stats.py:13: RuntimeWarning: invalid value encountered in multiply\n",
+      "  return (np.nansum(np.ravel(tmp_rate_map * np.log2(tmp_rate_map/avg_rate) *\n",
+      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/ipykernel_launcher.py:56: RuntimeWarning: Mean of empty slice.\n",
+      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/numpy/core/_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars\n",
+      "  ret = ret.dtype.type(ret / rcount)\n",
+      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/ipykernel_launcher.py:57: RuntimeWarning: Mean of empty slice.\n",
+      "/home/mikkel/.virtualenvs/expipe/lib/python3.6/site-packages/ipykernel_launcher.py:82: RuntimeWarning: invalid value encountered in long_scalars\n"
+     ]
+    }
+   ],
+   "source": [
+    "results = units.merge(\n",
+    "    units.progress_apply(process, axis=1), \n",
+    "    left_index=True, right_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_path = pathlib.Path(\"output\") / \"calculate-statistics\"\n",
+    "output_path.mkdir(exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "units.to_csv(output_path / \"units.csv\", index=False)\n",
+    "results.to_csv(output_path / \"results.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Store results in Expipe action"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "statistics_action = project.require_action(\"calculate-statistics\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "statistics_action.data[\"units\"] = \"units.csv\"\n",
+    "statistics_action.data[\"results\"] = \"results.csv\"\n",
+    "copy_tree(output_path, str(statistics_action.data_path()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "septum_mec.analysis.registration.store_notebook(statistics_action, \"10_calculate_spatial_statistics.ipynb\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/actions/calculate-statistics/data/results.csv
+++ b/actions/calculate-statistics/data/results.csv
--- a/actions/calculate-statistics/data/units.csv
+++ b/actions/calculate-statistics/data/units.csv