diff --git a/actions/comparisons-power/attributes.yaml b/actions/comparisons-power/attributes.yaml new file mode 100644 index 000000000..d0ee5dfce --- /dev/null +++ b/actions/comparisons-power/attributes.yaml @@ -0,0 +1,4 @@ +registered: '2021-02-20T12:20:09' +data: + notebook: 20_comparisons_power.ipynb + html: 20_comparisons_power.html diff --git a/actions/comparisons-power/data/20_comparisons_power.html b/actions/comparisons-power/data/20_comparisons_power.html new file mode 100644 index 000000000..a59d06553 --- /dev/null +++ b/actions/comparisons-power/data/20_comparisons_power.html @@ -0,0 +1,16869 @@ + + +
+ +%load_ext autoreload
+%autoreload 2
+
import os
+import pathlib
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import re
+import shutil
+import pandas as pd
+import scipy.stats
+from functools import reduce
+import statsmodels
+import seaborn as sns
+import exdir
+import expipe
+from distutils.dir_util import copy_tree
+import septum_mec
+import spatial_maps as sp
+import head_direction.head as head
+import septum_mec.analysis.data_processing as dp
+import septum_mec.analysis.registration
+from septum_mec.analysis.plotting import violinplot, savefig, despine
+from tqdm.notebook import tqdm
+tqdm.pandas()
+
+from septum_mec.analysis.statistics import (
+ load_data_frames,
+ make_paired_tables,
+ make_statistics_table,
+ estimate_power_lmm,
+ estimate_power_lmm_paralell,
+ LMM,
+ estimate_sample_size_lmm,
+ estimate_sample_size_lmm_paralell
+)
+
project_path = dp.project_path()
+project = expipe.get_project(project_path)
+actions = project.actions
+
+output_path = pathlib.Path("output") / "comparisons-power"
+(output_path / "statistics").mkdir(exist_ok=True, parents=True)
+(output_path / "figures").mkdir(exist_ok=True, parents=True)
+(output_path / "data").mkdir(exist_ok=True, parents=True)
+
+
data, labels, colors, queries = load_data_frames()
+
columns = [
+ 'average_rate',
+ 'gridness',
+ 'information_specificity',
+ 'max_rate',
+ 'information_rate',
+ 'in_field_mean_rate',
+ 'out_field_mean_rate',
+ 'speed_score',
+ 'spacing',
+ 'field_area'
+
+]
+
results, labels = make_paired_tables(data, columns)
+
plt.rc('axes', titlesize=12)
+plt.rcParams.update({
+ 'font.size': 12,
+ 'figure.figsize': (6, 4),
+ 'figure.dpi': 150
+})
+
+results['gridcell']['gridness'][labels].plot.density()
+
summary = pd.DataFrame()
+for key, df in results['gridcell'].items():
+ Key = key.replace('_', ' ').capitalize()
+ for label in labels:
+ summary.loc[label, Key] = "{:.3f} ± {:.3f}".format(df[label].mean(), df[label].std())
+summary.T
+
mdf.summary()
+
vss = [
+ ('Baseline I', '11 Hz'),
+ ('Baseline I', 'Baseline II'),
+ ('Baseline II', '30 Hz'),
+]
+
ps = pd.DataFrame()
+ci = pd.DataFrame()
+ef = pd.DataFrame()
+mf = pd.DataFrame()
+for key, df in results['gridcell'].items():
+ Key = key.replace('_', ' ').capitalize()
+ for vs in vss:
+ pval, low, high, mdf = LMM(df, *vs, key)
+ ps.loc[f'LMM {vs[0]} - {vs[1]}', key] = pval
+ ci.loc[f'LMM {vs[0]} - {vs[1]}', key] = f'{low}, {high}'
+ ef.loc[f'LMM {vs[0]} - {vs[1]}', key] = mdf.params[1]
+ mf.loc[f'LMM {vs[0]} - {vs[1]}', key] = abs(df[vs[0]].mean() - df[vs[1]].mean())
+
ps
+
ci
+
ef
+
mf
+
effect_ranges = {
+ 'gridness': (.01, .3, .01),
+ 'average_rate': (.5, 6, .2),
+ 'information_specificity': (.01,.2,.01),
+ 'max_rate': (1,15,.5),
+ 'information_rate': (.1,.4,.02),
+ 'in_field_mean_rate': (.5, 6, .2),
+ 'out_field_mean_rate': (.5, 6, .2),
+ 'speed_score': (.01,.1,.005), # if run again, change this to go to 0.1
+ 'spacing': (.01,.1,.005), # if run again, change this to go to 0.1
+ 'field_area': (.01,.1,.005),# if run again, change this to go to 0.1
+}
+
powers = {}
+for vs in vss:
+ powers[vs] = {}
+ for key, df in tqdm(results['gridcell'].items(), desc=' - '.join(vs)):
+ power, effect_size = estimate_power_lmm_paralell(results['gridcell'][key], *vs, effect_range=effect_ranges[key])
+ powers[vs][key] = {'p': power, 'e': effect_size}
+
plt.rc('axes', titlesize=12)
+plt.rcParams.update({
+ 'font.size': 12,
+ 'figure.figsize': (3.7, 2.2),
+ 'figure.dpi': 150
+})
+
for vs, vals in powers.items():
+ for key, val in vals.items():
+ plt.figure()
+ plt.plot(val['e'], val['p'])
+ plt.title(' - '.join(vs) + ', ' + key.replace('_', ' ').capitalize())
+ savefig(output_path / 'figures' / f"{'-'.join(vs)}_{key}")
+
np.savez(output_path / 'data' / 'powers.npz', powers)
+
from scipy.interpolate import interp1d
+
effect_size_power_80p = pd.DataFrame()
+for vs, vals in powers.items():
+ for key, val in vals.items():
+ e = interp1d(val['p'], val['e'])(0.8)
+ effect_size_power_80p.loc[key.replace('_', ' ').capitalize(), ' - '.join(vs)] = e
+
effect_size_power_80p
+
effect_size_power_80p.to_latex(output_path / "statistics" / "effect_size_power_80p.tex")
+effect_size_power_80p.to_csv(output_path / "statistics" / "effect_size_power_80p.csv")
+
effect_sizes = {
+ 'gridness': .1,
+ 'average_rate': 1,
+ 'information_specificity': 0.1,
+ 'max_rate': 5,
+ 'information_rate': 0.2,
+ 'in_field_mean_rate': 3,
+ 'out_field_mean_rate': 2,
+ 'speed_score': 0.1,
+ 'spacing': 0.05,
+ 'field_area': 0.05
+}
+
+n_samples_ranges = {
+ 'gridness': (10, 300, 10),
+ 'average_rate': (10, 300, 10),
+ 'information_specificity': (10, 120, 10),
+ 'max_rate': (10, 120, 10),
+ 'information_rate': (10, 120, 10),
+ 'in_field_mean_rate': (10, 120, 10),
+ 'out_field_mean_rate': (10, 120, 10),
+ 'speed_score': (5, 60, 5),
+ 'spacing': (5, 60, 5),
+ 'field_area': (5, 60, 5)
+}
+
powers_sample_size = {}
+for vs in vss:
+ powers_sample_size[vs] = {}
+ for key, df in tqdm(results['gridcell'].items(), desc=' - '.join(vs)):
+ power, sample_size, effective_sample_size = estimate_sample_size_lmm(
+ results['gridcell'][key], *vs, effect_size=effect_sizes[key], n_samples_range=n_samples_ranges[key], key=key)
+ powers_sample_size[vs][key] = {'p': power, 's': sample_size, 'es': effective_sample_size}
+
for vs, vals in powers_sample_size.items():
+ for key, val in vals.items():
+ plt.figure()
+ plt.plot(val['es'], val['p'])
+ try:
+ e = interp1d(val['p'], val['es'])(0.8)
+ plt.axvline(e, label=f'N={e:.0f}', color='k')
+ plt.axhline(0.8, color='k', alpha=0.1)
+ plt.legend()
+ except:
+ pass
+ n1, n2 = results['gridcell'][key][list(vs)].count().values
+ plt.title(f"{vs[0]} ({n1}), {vs[1]} ({n2}), {key.replace('_', ' ').capitalize()} ({effect_sizes[key]})")
+ plt.ylabel('Power p(~H0|H1)')
+ plt.xlabel('N neurons')
+ savefig(output_path / 'figures' / f"sample_size_{'-'.join(vs)}_{key}")
+
sample_size_power_80p = pd.DataFrame()
+for vs, vals in powers_sample_size.items():
+ for key, val in vals.items():
+ e = interp1d(val['p'], val['es'])(0.8)
+ name = f"{key.replace('_', ' ').capitalize()}"
+ sample_size_power_80p.loc[name, ' - '.join(vs)] = e
+
sample_size_power_80p
+
sample_size_power_80p.to_latex(output_path / "statistics" / f"sample_size_power.tex")
+sample_size_power_80p.to_csv(output_path / "statistics" / f"sample_size_power.csv")
+
np.savez(output_path / 'data' / 'powers_sample_size.npz', powers_sample_size)
+
action = project.require_action("comparisons-power")
+
copy_tree(output_path, str(action.data_path()))
+
septum_mec.analysis.registration.store_notebook(action, "20_comparisons_power.ipynb")
+
vs, key = vss[0], 'gridness'
+
power, sample_size, effective_sample_size = estimate_sample_size_lmm(
+ results['gridcell'][key], *vs, effect_size=effect_sizes[key], n_samples_range=(10, 300, 10), key=key)
+
power, sample_size, effective_sample_size =estimate_sample_size_lmm(
+ results['gridcell'][key], *vs, effect_size=effect_sizes[key], n_samples_range=(10, 200, 10), n_repeats=100, key=key)
+
%%time
+power, sample_size, effective_sample_size = estimate_sample_size_lmm_paralell(
+ results['gridcell'][key], *vs, effect_size=effect_sizes[key], n_samples_range=(10, 130, 10), n_repeats=100, key=key)
+
+
plt.plot(effective_sample_size, power)
+estimated_sample_size = interp1d(power, effective_sample_size)(0.8)
+plt.axvline(estimated_sample_size, label=f'N={estimated_sample_size:.0f}', color='k')
+plt.axhline(0.8, color='k', alpha=0.1)
+n1, n2 = results['gridcell'][key][list(vs)].count().values
+plt.title(f"{vs[0]} ({n1}), {vs[1]} ({n2}), {key.replace('_', ' ').capitalize()} ({effect_sizes[key]})")
+plt.legend()
+plt.ylabel('Power p(~H0|H1)')
+plt.xlabel('N neurons')
+# plt.xlim(0,200)
+
\n", + " | Baseline I | \n", + "11 Hz | \n", + "Baseline II | \n", + "30 Hz | \n", + "
---|---|---|---|---|
Average rate | \n", + "9.040 ± 6.369 | \n", + "8.934 ± 6.545 | \n", + "8.368 ± 6.154 | \n", + "7.553 ± 5.359 | \n", + "
Gridness | \n", + "0.527 ± 0.353 | \n", + "0.393 ± 0.369 | \n", + "0.579 ± 0.287 | \n", + "0.479 ± 0.355 | \n", + "
Information specificity | \n", + "0.245 ± 0.229 | \n", + "0.208 ± 0.264 | \n", + "0.218 ± 0.177 | \n", + "0.226 ± 0.209 | \n", + "
Max rate | \n", + "37.533 ± 15.081 | \n", + "32.799 ± 14.292 | \n", + "37.684 ± 16.860 | \n", + "34.584 ± 12.544 | \n", + "
Information rate | \n", + "1.324 ± 0.617 | \n", + "0.906 ± 0.543 | \n", + "1.177 ± 0.640 | \n", + "0.983 ± 0.555 | \n", + "
In field mean rate | \n", + "14.973 ± 8.381 | \n", + "13.344 ± 8.073 | \n", + "14.126 ± 7.835 | \n", + "12.109 ± 6.060 | \n", + "
Out field mean rate | \n", + "6.389 ± 5.358 | \n", + "6.712 ± 5.898 | \n", + "5.787 ± 5.225 | \n", + "5.265 ± 4.488 | \n", + "
Speed score | \n", + "0.142 ± 0.081 | \n", + "0.105 ± 0.090 | \n", + "0.120 ± 0.060 | \n", + "0.104 ± 0.073 | \n", + "
Spacing | \n", + "0.439 ± 0.121 | \n", + "0.456 ± 0.123 | \n", + "0.416 ± 0.093 | \n", + "0.424 ± 0.080 | \n", + "
Field area | \n", + "0.431 ± 0.053 | \n", + "0.417 ± 0.051 | \n", + "0.423 ± 0.051 | \n", + "0.431 ± 0.052 | \n", + "
Model: | MixedLM | Dependent Variable: | val | \n", + "
No. Observations: | 119 | Method: | REML | \n", + "
No. Groups: | 4 | Scale: | 0.0932 | \n", + "
Min. group size: | 7 | Log-Likelihood: | -49.4026 | \n", + "
Max. group size: | 81 | Converged: | Yes | \n", + "
Mean group size: | 29.8 | \n", + " |
Coef. | Std.Err. | z | P>|z| | [0.025 | 0.975] | \n", + "|
---|---|---|---|---|---|---|
Intercept | 0.409 | 0.048 | 8.488 | 0.000 | 0.315 | 0.504 | \n", + "
label[T.Baseline I] | 0.124 | 0.062 | 1.997 | 0.046 | 0.002 | 0.246 | \n", + "
unit_idnum Var | 0.036 | 0.090 | \n", + " |
\n", + " | average_rate | \n", + "gridness | \n", + "information_specificity | \n", + "max_rate | \n", + "information_rate | \n", + "in_field_mean_rate | \n", + "out_field_mean_rate | \n", + "speed_score | \n", + "spacing | \n", + "field_area | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
LMM Baseline I - 11 Hz | \n", + "0.928209 | \n", + "0.054459 | \n", + "0.427792 | \n", + "0.088252 | \n", + "0.000080 | \n", + "0.285853 | \n", + "0.759764 | \n", + "0.016639 | \n", + "0.441497 | \n", + "0.112510 | \n", + "
LMM Baseline I - Baseline II | \n", + "0.626481 | \n", + "0.587566 | \n", + "0.618199 | \n", + "0.955198 | \n", + "0.265741 | \n", + "0.595705 | \n", + "0.642428 | \n", + "0.040733 | \n", + "0.985474 | \n", + "0.535198 | \n", + "
LMM Baseline II - 30 Hz | \n", + "0.556933 | \n", + "0.107195 | \n", + "0.839598 | \n", + "0.315473 | \n", + "0.081515 | \n", + "0.195477 | \n", + "0.665571 | \n", + "0.382314 | \n", + "0.603904 | \n", + "0.729193 | \n", + "
\n", + " | average_rate | \n", + "gridness | \n", + "information_specificity | \n", + "max_rate | \n", + "information_rate | \n", + "in_field_mean_rate | \n", + "out_field_mean_rate | \n", + "speed_score | \n", + "spacing | \n", + "field_area | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
LMM Baseline I - 11 Hz | \n", + "-2.180247281054466, 2.390356722874524 | \n", + "-0.0023867695467641864, 0.25185586146463945 | \n", + "-0.051885367585011555, 0.12239826383625366 | \n", + "-0.6829644446775598, 9.804487199879677 | \n", + "0.19919191566537162, 0.5924315270050661 | \n", + "-1.331724110761481, 4.516001132500179 | \n", + "-2.2810214616259765, 1.6653183103649472 | \n", + "0.006403510267647752, 0.0641586683092662 | \n", + "-0.03396312071531312, 0.014810117866574444 | \n", + "-0.0034073546240015826, 0.03240571021475902 | \n", + "
LMM Baseline I - Baseline II | \n", + "-2.9916621645393118, 1.8014641972655332 | \n", + "-0.08520533103529787, 0.15040363600350248 | \n", + "-0.09750576854071875, 0.05796956577487157 | \n", + "-5.89536982035741, 6.243311205778393 | \n", + "-0.33674475640623824, 0.09282359337229243 | \n", + "-3.975512425442468, 2.2816253105581294 | \n", + "-2.481648525103001, 1.531057021714961 | \n", + "-0.05295935481734079, -0.0011404936657067148 | \n", + "-0.01536441943997291, 0.015081590471976884 | \n", + "-0.025402212987792654, 0.013191871767799448 | \n", + "
LMM Baseline II - 30 Hz | \n", + "-1.6613041417405623, 3.083251607547346 | \n", + "-0.02440548847993497, 0.2496890970945263 | \n", + "-0.06538802363578307, 0.08044872106578226 | \n", + "-3.155261296686419, 9.780244204335176 | \n", + "-0.02805504733315664, 0.47641353819177146 | \n", + "-1.0103474044666294, 4.941540621504646 | \n", + "-1.5577059549335808, 2.439136826914514 | \n", + "-0.015556136704263908, 0.04057715367485169 | \n", + "-0.01107815629347443, 0.019054030954896606 | \n", + "-0.023819756067117653, 0.016668142405318446 | \n", + "
\n", + " | average_rate | \n", + "gridness | \n", + "information_specificity | \n", + "max_rate | \n", + "information_rate | \n", + "in_field_mean_rate | \n", + "out_field_mean_rate | \n", + "speed_score | \n", + "spacing | \n", + "field_area | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
LMM Baseline I - 11 Hz | \n", + "0.105055 | \n", + "0.124735 | \n", + "0.035256 | \n", + "4.560761 | \n", + "0.395812 | \n", + "1.592139 | \n", + "-0.307852 | \n", + "0.035281 | \n", + "-0.009577 | \n", + "0.014499 | \n", + "
LMM Baseline I - Baseline II | \n", + "-0.595099 | \n", + "0.032599 | \n", + "-0.019768 | \n", + "0.173971 | \n", + "-0.121961 | \n", + "-0.846944 | \n", + "-0.475296 | \n", + "-0.027050 | \n", + "-0.000141 | \n", + "-0.006105 | \n", + "
LMM Baseline II - 30 Hz | \n", + "0.710974 | \n", + "0.112642 | \n", + "0.007530 | \n", + "3.312491 | \n", + "0.224179 | \n", + "1.965597 | \n", + "0.440715 | \n", + "0.012511 | \n", + "0.003988 | \n", + "-0.003576 | \n", + "
\n", + " | average_rate | \n", + "gridness | \n", + "information_specificity | \n", + "max_rate | \n", + "information_rate | \n", + "in_field_mean_rate | \n", + "out_field_mean_rate | \n", + "speed_score | \n", + "spacing | \n", + "field_area | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
LMM Baseline I - 11 Hz | \n", + "0.105449 | \n", + "0.133233 | \n", + "0.036166 | \n", + "4.734064 | \n", + "0.417936 | \n", + "1.629185 | \n", + "0.322805 | \n", + "0.037336 | \n", + "0.016343 | \n", + "0.014129 | \n", + "
LMM Baseline I - Baseline II | \n", + "0.671719 | \n", + "0.052668 | \n", + "0.026500 | \n", + "0.151797 | \n", + "0.147302 | \n", + "0.847032 | \n", + "0.602720 | \n", + "0.021480 | \n", + "0.023525 | \n", + "0.008499 | \n", + "
LMM Baseline II - 30 Hz | \n", + "0.814797 | \n", + "0.100659 | \n", + "0.007509 | \n", + "3.100584 | \n", + "0.193361 | \n", + "2.016844 | \n", + "0.521574 | \n", + "0.016176 | \n", + "0.007847 | \n", + "0.008441 | \n", + "
\n", + " | Baseline I - 11 Hz | \n", + "Baseline I - Baseline II | \n", + "Baseline II - 30 Hz | \n", + "
---|---|---|---|
Average rate | \n", + "2.669697 | \n", + "2.775000 | \n", + "2.814286 | \n", + "
Gridness | \n", + "0.144211 | \n", + "0.136500 | \n", + "0.154615 | \n", + "
Information specificity | \n", + "0.105789 | \n", + "0.091765 | \n", + "0.087297 | \n", + "
Max rate | \n", + "6.108696 | \n", + "6.951613 | \n", + "7.593750 | \n", + "
Information rate | \n", + "0.231034 | \n", + "0.253158 | \n", + "0.285000 | \n", + "
In field mean rate | \n", + "3.410345 | \n", + "3.646667 | \n", + "3.468000 | \n", + "
Out field mean rate | \n", + "2.285000 | \n", + "2.340000 | \n", + "2.404762 | \n", + "
Speed score | \n", + "0.365714 | \n", + "0.029091 | \n", + "0.036545 | \n", + "
Spacing | \n", + "0.029259 | \n", + "0.018495 | \n", + "0.018404 | \n", + "
Field area | \n", + "0.260000 | \n", + "0.358824 | \n", + "0.378481 | \n", + "