%load_ext autoreload
%autoreload 2
import os
import pathlib
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import re
import shutil
import pandas as pd
import scipy.stats
from functools import reduce
import statsmodels
import seaborn as sns
import exdir
import expipe
from distutils.dir_util import copy_tree
import septum_mec
import spatial_maps as sp
import head_direction.head as head
import septum_mec.analysis.data_processing as dp
import septum_mec.analysis.registration
from septum_mec.analysis.plotting import violinplot, savefig, despine
from tqdm.notebook import tqdm
tqdm.pandas()
from septum_mec.analysis.statistics import (
load_data_frames,
make_paired_tables,
make_statistics_table,
estimate_power_lmm,
estimate_power_lmm_paralell,
LMM,
estimate_sample_size_lmm,
estimate_sample_size_lmm_paralell
)
project_path = dp.project_path()
project = expipe.get_project(project_path)
actions = project.actions
output_path = pathlib.Path("output") / "comparisons-power"
(output_path / "statistics").mkdir(exist_ok=True, parents=True)
(output_path / "figures").mkdir(exist_ok=True, parents=True)
(output_path / "data").mkdir(exist_ok=True, parents=True)
data, labels, colors, queries = load_data_frames()
columns = [
'average_rate',
'gridness',
'information_specificity',
'max_rate',
'information_rate',
'in_field_mean_rate',
'out_field_mean_rate',
'speed_score',
'spacing',
'field_area'
]
results, labels = make_paired_tables(data, columns)
plt.rc('axes', titlesize=12)
plt.rcParams.update({
'font.size': 12,
'figure.figsize': (6, 4),
'figure.dpi': 150
})
results['gridcell']['gridness'][labels].plot.density()
summary = pd.DataFrame()
for key, df in results['gridcell'].items():
Key = key.replace('_', ' ').capitalize()
for label in labels:
summary.loc[label, Key] = "{:.3f} ± {:.3f}".format(df[label].mean(), df[label].std())
summary.T
mdf.summary()
vss = [
('Baseline I', '11 Hz'),
('Baseline I', 'Baseline II'),
('Baseline II', '30 Hz'),
]
ps = pd.DataFrame()
ci = pd.DataFrame()
ef = pd.DataFrame()
mf = pd.DataFrame()
for key, df in results['gridcell'].items():
Key = key.replace('_', ' ').capitalize()
for vs in vss:
pval, low, high, mdf = LMM(df, *vs, key)
ps.loc[f'LMM {vs[0]} - {vs[1]}', key] = pval
ci.loc[f'LMM {vs[0]} - {vs[1]}', key] = f'{low}, {high}'
ef.loc[f'LMM {vs[0]} - {vs[1]}', key] = mdf.params[1]
mf.loc[f'LMM {vs[0]} - {vs[1]}', key] = abs(df[vs[0]].mean() - df[vs[1]].mean())
ps
ci
ef
mf
effect_ranges = {
'gridness': (.01, .3, .01),
'average_rate': (.5, 6, .2),
'information_specificity': (.01,.2,.01),
'max_rate': (1,15,.5),
'information_rate': (.1,.4,.02),
'in_field_mean_rate': (.5, 6, .2),
'out_field_mean_rate': (.5, 6, .2),
'speed_score': (.01,.1,.005), # if run again, change this to go to 0.1
'spacing': (.01,.1,.005), # if run again, change this to go to 0.1
'field_area': (.01,.1,.005),# if run again, change this to go to 0.1
}
powers = {}
for vs in vss:
powers[vs] = {}
for key, df in tqdm(results['gridcell'].items(), desc=' - '.join(vs)):
power, effect_size = estimate_power_lmm_paralell(results['gridcell'][key], *vs, effect_range=effect_ranges[key])
powers[vs][key] = {'p': power, 'e': effect_size}
plt.rc('axes', titlesize=12)
plt.rcParams.update({
'font.size': 12,
'figure.figsize': (3.7, 2.2),
'figure.dpi': 150
})
for vs, vals in powers.items():
for key, val in vals.items():
plt.figure()
plt.plot(val['e'], val['p'])
plt.title(' - '.join(vs) + ', ' + key.replace('_', ' ').capitalize())
savefig(output_path / 'figures' / f"{'-'.join(vs)}_{key}")
np.savez(output_path / 'data' / 'powers.npz', powers)
from scipy.interpolate import interp1d
effect_size_power_80p = pd.DataFrame()
for vs, vals in powers.items():
for key, val in vals.items():
e = interp1d(val['p'], val['e'])(0.8)
effect_size_power_80p.loc[key.replace('_', ' ').capitalize(), ' - '.join(vs)] = e
effect_size_power_80p
effect_size_power_80p.to_latex(output_path / "statistics" / "effect_size_power_80p.tex")
effect_size_power_80p.to_csv(output_path / "statistics" / "effect_size_power_80p.csv")
effect_sizes = {
'gridness': .1,
'average_rate': 1,
'information_specificity': 0.1,
'max_rate': 5,
'information_rate': 0.2,
'in_field_mean_rate': 3,
'out_field_mean_rate': 2,
'speed_score': 0.1,
'spacing': 0.05,
'field_area': 0.05
}
n_samples_ranges = {
'gridness': (10, 300, 10),
'average_rate': (10, 300, 10),
'information_specificity': (10, 120, 10),
'max_rate': (10, 120, 10),
'information_rate': (10, 120, 10),
'in_field_mean_rate': (10, 120, 10),
'out_field_mean_rate': (10, 120, 10),
'speed_score': (5, 60, 5),
'spacing': (5, 60, 5),
'field_area': (5, 60, 5)
}
powers_sample_size = {}
for vs in vss:
powers_sample_size[vs] = {}
for key, df in tqdm(results['gridcell'].items(), desc=' - '.join(vs)):
power, sample_size, effective_sample_size = estimate_sample_size_lmm(
results['gridcell'][key], *vs, effect_size=effect_sizes[key], n_samples_range=n_samples_ranges[key], key=key)
powers_sample_size[vs][key] = {'p': power, 's': sample_size, 'es': effective_sample_size}
for vs, vals in powers_sample_size.items():
for key, val in vals.items():
plt.figure()
plt.plot(val['es'], val['p'])
try:
e = interp1d(val['p'], val['es'])(0.8)
plt.axvline(e, label=f'N={e:.0f}', color='k')
plt.axhline(0.8, color='k', alpha=0.1)
plt.legend()
except:
pass
n1, n2 = results['gridcell'][key][list(vs)].count().values
plt.title(f"{vs[0]} ({n1}), {vs[1]} ({n2}), {key.replace('_', ' ').capitalize()} ({effect_sizes[key]})")
plt.ylabel('Power p(~H0|H1)')
plt.xlabel('N neurons')
savefig(output_path / 'figures' / f"sample_size_{'-'.join(vs)}_{key}")