In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
import os
import pathlib
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import re
import shutil
import pandas as pd
import scipy.stats
from functools import reduce
import statsmodels
import seaborn as sns
import exdir
import expipe
from distutils.dir_util import copy_tree
import septum_mec
import spatial_maps as sp
import head_direction.head as head
import septum_mec.analysis.data_processing as dp
import septum_mec.analysis.registration
from septum_mec.analysis.plotting import violinplot, savefig, despine

from septum_mec.analysis.statistics import load_data_frames, make_paired_tables, make_statistics_table
In [3]:
project_path = dp.project_path()
project = expipe.get_project(project_path)
actions = project.actions

output_path = pathlib.Path("output") / "comparisons-allcells"
(output_path / "statistics").mkdir(exist_ok=True, parents=True)
(output_path / "figures").mkdir(exist_ok=True, parents=True)

Load cell statistics and shuffling quantiles

In [ ]:
 
In [4]:
data, labels, colors, queries = load_data_frames()
Number of sessions above threshold 194
Number of animals 4
Number of individual gridcells 139
Number of gridcell recordings 230
In [5]:
data.keys()
Out[5]:
Index(['action', 'baseline', 'entity', 'frequency', 'i', 'ii', 'session',
       'stim_location', 'stimulated', 'tag', 'date', 'entity_date', 'Hz11',
       'Hz30', 'channel_group', 'max_depth_delta', 'max_dissimilarity',
       'unit_id', 'unit_idnum', 'unit_name', 'average_rate', 'speed_score',
       'out_field_mean_rate', 'in_field_mean_rate', 'max_field_mean_rate',
       'max_rate', 'sparsity', 'selectivity', 'interspike_interval_cv',
       'burst_event_ratio', 'bursty_spike_ratio', 'gridness', 'border_score',
       'information_rate', 'information_specificity', 'head_mean_ang',
       'head_mean_vec_len', 'spacing', 'orientation', 'field_area',
       'theta_score', 'unit_day', 't_e_peak', 'p_e_peak', 't_i_peak',
       'p_i_peak', 'border_score_threshold', 'gridness_threshold',
       'head_mean_ang_threshold', 'head_mean_vec_len_threshold',
       'information_rate_threshold', 'speed_score_threshold', 'specificity',
       'half_width', 'peak_to_trough', 'average_firing_rate', 'bs', 'bs_stim',
       'bs_ctrl', 'ns_inhibited', 'ns_not_inhibited', 'gridcell',
       'bs_not_gridcell', 'label', 'label_num', 'query', 'color', 'cell_type'],
      dtype='object')

Calculate statistics

In [6]:
columns = [
    'average_rate', 
#     'spatial_average_rate',
    'gridness', 
    'sparsity', 
    'selectivity',
    'information_rate',
    'information_specificity',
    'max_rate',  
    'interspike_interval_cv', 
    'burst_event_ratio',  
    'in_field_mean_rate', 
    'out_field_mean_rate',
    'max_field_mean_rate',
    'specificity', 
    'speed_score', 
    'spacing', 
    'field_area', 
    'head_mean_vec_len', 
    'border_score'
]
In [7]:
results, labels = make_paired_tables(data, columns)
In [8]:
results['gridcell']['average_rate']
Out[8]:
entity unit_idnum channel_group date Baseline I 11 Hz Baseline II 30 Hz
51 1833 8 0 20719 7.900527 NaN NaN NaN
85 1833 13 0 20719 NaN 5.415361 6.456882 9.526984
86 1833 14 0 20719 NaN 16.681112 16.498883 NaN
58 1833 23 0 200619 16.171078 NaN NaN NaN
127 1833 26 0 200619 NaN NaN 6.990117 6.953565
... ... ... ... ... ... ... ... ...
139 1849 835 4 150319 NaN NaN NaN 3.680061
43 1849 851 5 60319 23.736187 NaN NaN NaN
65 1849 932 7 280219 5.661950 NaN NaN NaN
74 1849 937 7 280219 NaN 9.485790 NaN NaN
105 1849 939 7 280219 NaN NaN 12.721755 NaN

137 rows × 8 columns

In [9]:
# TODO: control for information rate

Create nice table

In [10]:
stat = {}
stat_values = {}
for cell_type, result in results.items():
    stat[cell_type], stat_values[cell_type] = make_statistics_table(result, labels)
In [11]:
stat['gridcell'].T
Out[11]:
Baseline I 11 Hz Baseline II 30 Hz LMM Baseline I - 11 Hz LMM Baseline I - Baseline II LMM Baseline I - 30 Hz LMM 11 Hz - Baseline II LMM 11 Hz - 30 Hz LMM Baseline II - 30 Hz
Average rate 9.8e+00 ± 9.3e-01 (63) 1.1e+01 ± 1.0e+00 (56) 1.0e+01 ± 1.0e+00 (46) 8.5e+00 ± 1.1e+00 (35) 6.9e-01, -6.0e-01 8.2e-01, 3.1e-01 8.4e-01, 3.3e-01 5.1e-01, 5.9e-01 6.2e-01, -1.2e+00 6.8e-01, -5.1e-01
Gridness 3.7e-01 ± 5.6e-02 (63) 4.0e-01 ± 5.1e-02 (56) 5.3e-01 ± 3.7e-02 (46) 5.7e-01 ± 4.9e-02 (35) 8.5e-01, -2.8e-02 NaN 4.1e-01, -1.5e-01 2.2e-01, 1.5e-01 3.2e-01, 1.2e-01 8.6e-01, -2.7e-02
Sparsity 6.6e-01 ± 2.4e-02 (63) 7.1e-01 ± 3.1e-02 (56) 6.6e-01 ± 3.2e-02 (46) 6.4e-01 ± 3.7e-02 (35) 2.5e-01, -2.6e-02 9.3e-01, -5.2e-03 5.0e-01, -4.1e-02 7.5e-02, -2.5e-02 NaN 5.8e-01, -1.9e-02
Selectivity 5.3e+00 ± 4.3e-01 (63) 5.4e+00 ± 5.9e-01 (56) 6.2e+00 ± 6.7e-01 (46) 6.8e+00 ± 6.7e-01 (35) 4.1e-01, 1.9e-01 6.6e-01, 6.6e-01 5.5e-01, -9.3e-01 3.8e-01, 3.7e-01 1.6e-01, 1.1e+00 9.3e-01, 4.1e-02
Information rate 1.4e+00 ± 8.6e-02 (63) 9.3e-01 ± 6.9e-02 (56) 1.3e+00 ± 1.0e-01 (46) 1.1e+00 ± 1.1e-01 (35) 1.7e-02, 4.0e-01 4.3e-02, -9.7e-02 4.3e-06, 3.0e-01 1.0e-01, 3.3e-01 1.6e-01, 2.5e-01 9.5e-01, 2.7e-02
Information specificity 2.5e-01 ± 3.4e-02 (63) 2.2e-01 ± 4.5e-02 (56) 2.3e-01 ± 3.4e-02 (46) 2.4e-01 ± 3.8e-02 (35) NaN 2.3e-01, -3.0e-02 6.4e-01, 2.0e-02 3.2e-01, 5.6e-02 2.2e-01, 5.6e-02 3.3e-01, 2.0e-02
Max rate 3.6e+01 ± 2.1e+00 (63) 3.6e+01 ± 2.2e+00 (56) 4.2e+01 ± 2.4e+00 (46) 3.7e+01 ± 2.5e+00 (35) 7.6e-01, 1.3e+00 3.4e-01, 3.3e+00 7.8e-01, 1.9e+00 1.7e-01, 5.5e+00 9.6e-02, 6.5e+00 7.2e-01, 1.7e+00
Interspike interval cv 2.2e+00 ± 8.9e-02 (63) 2.0e+00 ± 1.4e-01 (56) 2.2e+00 ± 8.0e-02 (46) 2.3e+00 ± 1.1e-01 (35) 8.3e-02, 1.7e-01 NaN 8.3e-01, 5.5e-02 NaN 9.4e-02, 4.1e-01 9.6e-01, 1.4e-02
Burst event ratio 2.3e-01 ± 1.1e-02 (63) 2.3e-01 ± 1.2e-02 (56) 2.3e-01 ± 1.1e-02 (46) 1.8e-01 ± 1.2e-02 (35) 7.7e-01, -4.8e-03 7.4e-01, -1.1e-02 4.7e-03, 5.7e-02 7.7e-01, 3.5e-03 NaN 1.3e-02, 3.9e-02
In-field mean rate 1.5e+01 ± 1.2e+00 (63) 1.6e+01 ± 1.3e+00 (56) 1.7e+01 ± 1.2e+00 (46) 1.3e+01 ± 1.3e+00 (35) 9.7e-01, -6.9e-02 7.5e-01, 4.7e-01 3.6e-01, 1.8e+00 4.6e-01, 1.2e+00 6.9e-01, -9.9e-01 6.7e-01, 5.9e-01
Out-field mean rate 7.3e+00 ± 7.9e-01 (63) 8.8e+00 ± 9.0e-01 (56) 7.8e+00 ± 8.6e-01 (46) 6.0e+00 ± 8.7e-01 (35) 5.1e-01, -7.0e-01 7.3e-01, 3.3e-01 6.5e-01, 6.8e-01 7.3e-01, 2.2e-01 3.1e-01, -1.9e+00 9.1e-01, 9.4e-02
Max-field mean rate 1.2e+01 ± 1.3e+00 (35) 1.1e+01 ± 1.6e+00 (31) 1.7e+01 ± 2.6e+00 (33) 7.8e+00 ± 1.2e+00 (19) 2.6e-01, 4.1e+00 8.9e-01, 1.3e+00 1.0e+00, -1.6e-02 3.8e-01, 5.3e+00 NaN 9.8e-01, -2.2e-01
Specificity 4.2e-01 ± 2.9e-02 (63) 3.9e-01 ± 3.5e-02 (56) 4.3e-01 ± 3.1e-02 (46) 4.7e-01 ± 4.0e-02 (35) 2.0e-01, 2.6e-02 9.3e-01, -1.0e-02 5.5e-01, 4.1e-02 5.9e-01, 2.4e-02 4.2e-01, 4.5e-02 7.6e-01, -1.5e-02
Speed score 1.1e-01 ± 1.1e-02 (63) 9.4e-02 ± 1.2e-02 (56) 8.7e-02 ± 8.0e-03 (46) 7.4e-02 ± 9.9e-03 (35) 5.4e-01, 1.3e-02 6.8e-01, 9.4e-03 5.6e-01, 3.3e-02 1.5e-01, 3.6e-02 8.4e-01, 1.7e-02 NaN
Spacing 5.6e-01 ± 2.0e-02 (63) 5.4e-01 ± 2.0e-02 (56) 5.2e-01 ± 1.6e-02 (46) 4.9e-01 ± 1.5e-02 (35) 3.0e-01, 3.5e-02 4.4e-01, -2.9e-02 2.2e-01, 6.5e-02 7.1e-01, 8.8e-03 1.7e-01, -4.1e-02 1.5e-01, 2.5e-02
Field area 4.5e-01 ± 7.6e-03 (63) 4.3e-01 ± 7.8e-03 (56) 4.4e-01 ± 8.6e-03 (46) 4.4e-01 ± 9.3e-03 (35) 2.3e-01, 1.5e-02 6.6e-01, -7.3e-03 7.8e-01, -6.6e-03 4.5e-01, 2.2e-03 8.6e-01, -2.6e-03 9.0e-01, 1.2e-03
Head mean vec len 8.7e-02 ± 8.5e-03 (63) 7.7e-02 ± 8.6e-03 (56) 6.7e-02 ± 7.1e-03 (46) 8.8e-02 ± 7.9e-03 (35) 7.5e-01, 6.8e-03 8.2e-04, -1.3e-02 6.7e-01, -6.8e-03 2.4e-01, -2.8e-02 8.7e-01, 2.3e-03 5.6e-01, -9.6e-03
Border score 2.1e-01 ± 1.3e-02 (63) 2.1e-01 ± 1.2e-02 (56) 1.8e-01 ± 1.3e-02 (46) 1.9e-01 ± 1.5e-02 (35) 9.5e-01, -2.4e-03 4.9e-01, -2.9e-02 5.8e-01, 2.0e-02 5.2e-08, -3.7e-02 9.5e-01, -2.1e-03 5.8e-01, -3.6e-02
In [12]:
for cell_type, sta in stat.items():
    sta.to_latex(output_path / "statistics" / f"statistics_{cell_type}.tex")
    sta.to_csv(output_path / "statistics" / f"statistics_{cell_type}.csv")
In [13]:
for cell_type, cell_results in results.items():
    for key, result in cell_results.items():
        result.to_latex(output_path / "statistics" / f"values_{cell_type}_{key}.tex")
        result.to_csv(output_path / "statistics" / f"values_{cell_type}_{key}.csv")

Cumulative density plots

In [14]:
title_xlabel = {
    'information_rate': ('Spatial information','bits/s'),
    'information_specificity': ("Spatial information specificity","bits/spike"),
    'specificity': ("Spatial specificity", ""),
    'average_rate': ("Average rate", "spikes/s"),
#     'spatial_average_rate': ("Spatial average rate", "spikes/s"),
    'max_rate': ("Max rate", "spikes/s"),
    'interspike_interval_cv': ("ISI CV", "Coefficient of variation"),
    'in_field_mean_rate': ("In-field rate", "spikes/s"),
    'out_field_mean_rate': ("Out-of-field rate", "spikes/s"),
    'burst_event_ratio': ("Bursting ratio", ""),
    'gridness': ("Gridness", "Gridness"),
    'speed_score': ("Speed score", "Speed score"),
    'spacing': ('Field spacing', 'cm'),
    'field_area': ('Field area', 'cm$^2$'),
    'border_score': ('Border Score', ''),
    'head_mean_vec_len': ('Head direction score', ''),
    'max_field_mean_rate': ('Max field mean rate', 'spike/s'),
    'selectivity': ('Selectivity', ''),
    'sparsity': ('Sparsity', '')
}
In [15]:
set(columns) - set(title_xlabel.keys()).intersection(set(columns))
Out[15]:
set()
In [ ]:
plt.rc('axes', titlesize=12)
plt.rcParams.update({
    'font.size': 12, 
    'figure.figsize': (3.7, 2.2), 
    'figure.dpi': 150
})
In [ ]:
for cell_type, cell_results in results.items():
    for key, result in cell_results.items():
        if key not in title_xlabel:
            continue
        fig = plt.figure()
        plt.suptitle(cell_type + ' ' + title_xlabel[key][0])
        legend_lines = []
        for color, label in zip(colors, labels):
            legend_lines.append(matplotlib.lines.Line2D([0], [0], color=color, label=label))
        sns.kdeplot(data=result.loc[:,labels], cumulative=True, legend=False, palette=colors, common_norm=False)
        plt.xlabel(title_xlabel[key][1])
        plt.legend(
            handles=legend_lines,
            bbox_to_anchor=(1.04,1), borderaxespad=0, frameon=False)
        plt.tight_layout()
        plt.grid(False)
        despine()
        savefig(output_path / "figures" / f'{cell_type}_{key}')

Violinplot

In [ ]:
%matplotlib inline
plt.rc('axes', titlesize=12)
plt.rcParams.update({
    'font.size': 12, 
    'figure.figsize': (1.7*3, 3), 
    'figure.dpi': 150
})

Information rate

In [ ]:
def violinplot(df, labels, colors, statistics=None):
    data = np.array([df.loc[:,label].dropna().to_numpy() for label in labels])
    pos = np.array([i * 0.6 for i in range(len(data))])
    labels = np.array(labels)
#     print(pos)
    violins = plt.violinplot(data, pos, showmedians=True, showextrema=False)
    
    for i, b in enumerate(violins['bodies']):
        b.set_color(colors[i])
        b.set_alpha (0.8)

    # for i, body in enumerate(violins['cbars']):
    #     body.set_color('C{}'.format(i))

    for category in ['cbars', 'cmins', 'cmaxes', 'cmedians']:
        if category in violins:
            violins[category].set_color(['k', 'k'])
            violins[category].set_linewidth(2.0)
    plt.xticks(pos, labels, rotation=45)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)

           
    if statistics is not None:
        tests = [[0,1], [2,3], [0,2]]
        ds = [0,0,1]
        for test, d in zip(tests, ds):
            pvalue = statistics.loc[' - '.join(labels[test])]
            # significance
            if pvalue < 0.0001:
                significance = "****"
            elif pvalue < 0.001:
                significance = "***"
            elif pvalue < 0.01:
                significance = "**"
            elif pvalue < 0.05:
                significance = "*"
            else:
                significance = "ns"

            x1, x2 = pos[test]
            data_max = np.max([a.max() for a in data[test]])
            data_min = np.min([a.min() for a in data[test]])
            y = (data_max * 1.05)
            h = 0.025 * (data_max - data_min)
            d_ =  d * 0.15 * (data_max - data_min)
            plt.plot([x1, x1, x2, x2], np.array([y - h, y, y, y - h]) + d_, c='k')
            plt.text((x1 + x2) / 2, y + h + d_, significance, ha='center', va='bottom')
           
            
        
#     plt.gca().spines['top'].set_visible(False)
#     plt.gca().spines['right'].set_visible(False)
In [ ]:
for cell_type, cell_results in results.items():
    for key, result in cell_results.items():
        if key not in title_xlabel:
            continue
        fig = plt.figure()
        plt.suptitle(cell_type + ' ' + title_xlabel[key][0])
        violinplot(result, labels=labels, colors=colors, statistics=stat_values[cell_type][key])
        plt.ylabel(title_xlabel[key][1])
#         plt.tight_layout()
        plt.grid(False)
        despine()
        savefig(output_path / "figures" / f'{cell_type}_violin_{key}')

Register in Expipe

In [ ]:
action = project.require_action("comparisons-allcells")
In [ ]:
copy_tree(output_path, str(action.data_path()))
In [ ]:
septum_mec.analysis.registration.store_notebook(action, "20_comparisons_allcells.ipynb")
In [ ]: