# This script is part of pymaid (http://www.github.com/navis-org/pymaid).
# Copyright (C) 2017 Philipp Schlegel
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
""" This module contains functions to analyse connectivity.
"""
import math
from itertools import combinations
import pandas as pd
import numpy as np
import scipy.spatial
import scipy.stats
from navis import graph_utils, intersect
from . import fetch, core, utils, config
# Set up logging
logger = config.get_logger(__name__)
__all__ = sorted(['filter_connectivity', 'cable_overlap',
'predict_connectivity', 'adjacency_matrix', 'group_matrix',
'adjacency_from_connectors', 'cn_table_from_connectors',
'connection_density', 'sparseness', 'shared_partners'])
[docs]def filter_connectivity(x, restrict_to, remote_instance=None):
"""Filter connectivity data by volume or skeleton data.
Use this e.g. to restrict connectivity to edges within a given volume or
to certain compartments of neurons.
Important
---------
Duplicate skeleton IDs (e.g. two fragments from the same neuron) will be
collapsed back into a single neuron! Use only a single fragment per neuron.
For multiple fragments/neuron see :func:`~pymaid.adjacency_from_connectors`.
Order of columns/rows may change during filtering.
Parameters
----------
x : Connectivity object
Currently accepts either:
(1) Connectivity table from :func:`~pymaid.get_partners`
(2) Adjacency matrix from :func:`~pymaid.adjacency_matrix`
restrict_to : str | pymaid.Volume | CatmaidNeuronList
Volume or neurons to restrict connectivity to. Strings
will be interpreted as volumes.
remote_instance : CatmaidInstance, optional
If not passed, will try using globally defined.
Returns
-------
Restricted connectivity data
See Also
--------
:func:`~pymaid.adjacency_from_connectors`
Use this function if you have multiple fragments per neuron.
"""
if not isinstance(restrict_to, (str, core.Volume,
core.CatmaidNeuron,
core.CatmaidNeuronList)):
raise TypeError('Unable to restrict connectivity '
'to type {}'.format(type(restrict_to)))
if isinstance(restrict_to, str):
restrict_to = fetch.get_volume(
restrict_to, remote_instance=remote_instance)
if not isinstance(x, pd.DataFrame):
raise TypeError('Input must be pandas DataFrame, got '
' "{}"'.format(type(x)))
datatype = getattr(x, 'datatype', None)
# If no datatype attribute, try guessing
if isinstance(datatype, type(None)):
if 'relation' in x.columns:
datatype = 'connectivity_table'
else:
datatype = 'adjacency_matrix'
if datatype == 'connectivity_table':
neurons = [c for c in x.columns if str(c).isnumeric()]
"""
# Keep track of existing edges
old_upstream = np.array([ [ (source, n) for n in neurons ] for source in x[x.relation=='upstream'].skeleton_id ])
old_upstream = old_upstream[ x[x.relation=='upstream'][neurons].values > 0 ]
old_downstream = np.array([ [ (n, target) for n in neurons ] for target in x[x.relation=='downstream'].skeleton_id ])
old_downstream = old_downstream[ x[x.relation=='downstream'][neurons].values > 0 ]
old_edges = np.concatenate([old_upstream, old_downstream], axis=0).astype(int)
"""
# First get connector between neurons on the table
if not x[x.relation == 'upstream'].empty:
upstream = fetch.get_connectors_between(x[x.relation == 'upstream'].skeleton_id,
neurons,
directional=True,
remote_instance=remote_instance)
# Now filter connectors
if isinstance(restrict_to, (core.CatmaidNeuron, core.CatmaidNeuronList)):
upstream = upstream[upstream.connector_id.isin(
restrict_to.connectors.connector_id.values)]
elif isinstance(restrict_to, core.Volume):
cn_locs = np.vstack(upstream.connector_loc.values)
upstream = upstream[intersect.in_volume(cn_locs, restrict_to)]
else:
upstream = None
if not x[x.relation == 'downstream'].empty:
downstream = fetch.get_connectors_between(neurons,
x[x.relation == 'downstream'].skeleton_id,
directional=True,
remote_instance=remote_instance)
# Now filter connectors
if isinstance(restrict_to, (core.CatmaidNeuron, core.CatmaidNeuronList)):
downstream = downstream[downstream.connector_id.isin(
restrict_to.connectors.connector_id.values)]
elif isinstance(restrict_to, core.Volume):
cn_locs = np.vstack(downstream.connector_loc.values)
downstream = downstream[intersect.in_volume(
cn_locs, restrict_to)]
else:
downstream = None
if not isinstance(downstream, type(None)) and not isinstance(upstream, type(None)):
cn_data = pd.concat([upstream, downstream], axis=0)
elif isinstance(downstream, type(None)):
cn_data = upstream
else:
cn_data = downstream
elif datatype == 'adjacency_matrix':
if getattr(x, 'is_grouped', False):
raise TypeError('Adjacency matrix appears to be grouped. Unable '
'to process that.')
cn_data = fetch.get_connectors_between(x.index.values,
x.columns.values,
directional=True,
remote_instance=remote_instance)
# Now filter connectors
if isinstance(restrict_to, (core.CatmaidNeuron, core.CatmaidNeuronList)):
cn_data = cn_data[cn_data.connector_id.isin(
restrict_to.connectors.connector_id.values)]
elif isinstance(restrict_to, core.Volume):
cn_locs = np.vstack(cn_data.connector_loc.values)
cn_data = cn_data[intersect.in_volume(cn_locs, restrict_to)]
else:
raise TypeError('Unknown connectivity data type "{}".'.format(datatype)
+ ' See help(filter_connectivity) for details.')
if cn_data.empty:
logger.warning('No connectivity left after filtering')
# return
# Reconstruct connectivity data:
# Collect edges
edges = cn_data[['source_neuron', 'target_neuron']].values
if edges.shape[0] > 0:
# Turn individual edges into synaptic connections
unique_edges, counts = np.unique(edges, return_counts=True, axis=0)
unique_skids = np.unique(edges).astype(str)
unique_edges = unique_edges.astype(str)
else:
unique_edges = []
counts = []
unique_skids = []
# Create empty adj_mat
adj_mat = pd.DataFrame(np.zeros((len(unique_skids), len(unique_skids))),
columns=unique_skids, index=unique_skids)
# Fill in values
for i, e in enumerate(config.tqdm(unique_edges,
disable=config.pbar_hide,
desc='Regenerating',
leave=config.pbar_leave)):
# using df.at here speeds things up tremendously!
adj_mat.at[str(e[0]), str(e[1])] = counts[i]
if datatype == 'adjacency_matrix':
return adj_mat.reindex(index=x.index.astype(str),
columns=x.columns.astype(str)).fillna(0)
# Generate connectivity table by subsetting adjacency matrix to our
# neurons of interest
us_neurons = [n for n in neurons if n in adj_mat.columns]
all_upstream = adj_mat[adj_mat[us_neurons].sum(axis=1) > 0][us_neurons]
all_upstream['skeleton_id'] = all_upstream.index
all_upstream['relation'] = 'upstream'
ds_neurons = [n for n in neurons if n in adj_mat.T.columns]
all_downstream = adj_mat.T[adj_mat.T[ds_neurons].sum(
axis=1) > 0][ds_neurons]
all_downstream['skeleton_id'] = all_downstream.index
all_downstream['relation'] = 'downstream'
# Merge tables
df = pd.concat([all_upstream, all_downstream], axis=0, ignore_index=True)
remaining_neurons = [n for n in neurons if n in df.columns]
# Remove neurons that were not in the original data - under certain
# circumstances, neurons can sneak back in
df = df[df.skeleton_id.isin(x.skeleton_id)]
# Use original connectivity table to populate data
aux = x.set_index('skeleton_id')[['neuron_name', 'num_nodes']].to_dict()
df['num_nodes'] = [aux['num_nodes'][s] for s in df.skeleton_id.values]
df['neuron_name'] = [aux['neuron_name'][s] for s in df.skeleton_id.values]
df['total'] = df[remaining_neurons].sum(axis=1)
# Reorder columns
df = df[['neuron_name', 'skeleton_id', 'num_nodes',
'relation', 'total'] + remaining_neurons]
df.sort_values(['relation', 'total'], inplace=True, ascending=False)
df.type = 'connectivity_table'
df.reset_index(drop=True, inplace=True)
return df
def cable_overlap(a, b, dist=2, method='min'):
"""Calculate the amount of cable of neuron A within distance of neuron B.
DEPCRECATED! PLEASE USE `navis.cable_overlap` INSTEAD!
Parameters
----------
a,b : CatmaidNeuron | CatmaidNeuronList
Neuron(s) for which to compute cable within distance.
dist : int, optional
Maximum distance in microns [um].
method : 'min' | 'max' | 'avg'
Method by which to calculate the overlapping cable between
two cables. Assuming that neurons A and B have 300 and 150
um of cable within given distances, respectively:
1. 'min' returns 150
2. 'max' returns 300
3. 'avg' returns 225
Returns
-------
pandas.DataFrame
Matrix in which neurons A are rows, neurons B are columns. Cable
within distance is given in microns::
skidB1 skidB2 skidB3 ...
skidA1 5 1 0
skidA2 10 20 5
skidA3 4 3 15
...
"""
raise DeprecationWarning('This function has been moved to `navis`. Please '
'use `navis.cable_overlap` as drop-in replacement.'
' Note that the navis function will return '
'in the same units as the neuron - i.e. typically '
'nanometers for CatmaidNeurons.')
[docs]def predict_connectivity(source, target, method='possible_contacts',
remote_instance=None, **kwargs):
"""Calculate potential synapses from source onto target neurons.
Based on a concept by `Alexander Bates <https://github.com/alexanderbates/catnat>`_.
Parameters
----------
source,target : CatmaidNeuron | CatmaidNeuronList
Neuron(s) for which to compute potential connectivity.
This is unidirectional: source -> target.
method : 'possible_contacts'
Method to use for calculations. See Notes.
**kwargs
1. For method 'possible_contacts':
- ``dist`` to set distance between connectors and
nodes manually.
- ``n_irq`` to set number of interquartile ranges of
harmonic mean. Default = 2.
Notes
-----
Method ``possible_contacts``:
1. Calculating harmonic mean of distances ``d`` (connector->node)
at which onnections between neurons A and neurons B occur.
2. For all presynapses of neurons A, check if they are within
``n_irq`` (default=2) interquartile range of ``d`` of a
neuron B node.
Neurons without cable or presynapses will be assigned a predicted
connectivity of 0.
Returns
-------
pandas.DataFrame
Matrix holding possible synaptic contacts. Sources are rows,
targets are columns::
target1 target2 target3 ...
source1 5 1 0
source2 10 20 5
source3 4 3 15
...
"""
remote_instance = utils._eval_remote_instance(remote_instance)
if not remote_instance:
try:
remote_instance = source._remote_instance
except BaseException:
pass
for _ in [source, target]:
if not isinstance(_, (core.CatmaidNeuron, core.CatmaidNeuronList)):
raise TypeError('Need CatmaidNeuron/List, got '
'"{}"'.format(type(_)))
if isinstance(source, core.CatmaidNeuron):
source = core.CatmaidNeuronList(source)
if isinstance(target, core.CatmaidNeuron):
target = core.CatmaidNeuronList(target)
allowed_methods = ['possible_contacts']
if method not in allowed_methods:
raise ValueError('Unknown method "{0}". Allowed methods: "{0}"'.format(
method, ','.join(allowed_methods)))
matrix = pd.DataFrame(np.zeros((source.shape[0], target.shape[0])),
index=source.skeleton_id,
columns=target.skeleton_id)
if kwargs.get('dist', None):
dist_threshold = kwargs.get('dist')
else:
# First let's calculate at what distance synapses are being made
cn_between = fetch.get_connectors_between(source, target,
remote_instance=remote_instance)
if cn_between.shape[0] > 0:
cn_locs = np.vstack(cn_between.connector_loc.values)
tn_locs = np.vstack(cn_between.node2_loc.values)
distances = np.sqrt(np.sum((cn_locs - tn_locs) ** 2, axis=1))
logger.info('Average connector->node distances: '
'{:.2f} +/- {:.2f} nm'.format(distances.mean(),
distances.std()))
else:
logger.warning('No existing connectors to calculate average'
'connector->node distance found. Falling'
'back to default of 1um. Use <dist> argument'
'to set manually.')
distances = [1000]
# Calculate distances threshold
n_irq = kwargs.get('n_irq', 2)
# We use the median because some very large connector->treenode
# distances can massively skew the average
dist_threshold = scipy.stats.hmean(distances) + n_irq * scipy.stats.iqr(distances)
with config.tqdm(total=len(target), desc='Predicting',
disable=config.pbar_hide,
leave=config.pbar_leave) as pbar:
for t in target:
# If no nodes, predict 0 connectivity and skip
if t.nodes.empty:
matrix.loc[t.skeleton_id, source.skeleton_id] = 0
continue
# Create cKDTree for target
tree = scipy.spatial.cKDTree(
t.nodes[['x', 'y', 'z']].values, leafsize=10)
for s in source:
# If not synapses, predict 0 connectivity and skip
if s.presynapses.empty:
matrix.at[s.skeleton_id, t.skeleton_id] = 0
continue
# Query against presynapses
dist, ix = tree.query(s.presynapses[['x', 'y', 'z']].values,
k=1,
distance_upper_bound=dist_threshold,
workers=-1
)
# Calculate possible contacts
possible_contacts = sum(dist != float('inf'))
matrix.at[s.skeleton_id, t.skeleton_id] = possible_contacts
pbar.update(1)
return matrix.astype(int)
[docs]def cn_table_from_connectors(x, remote_instance=None):
"""Generate connectivity table from neurons' connectors.
This function creates the connectivity table from scratch using just the
neurons' connectors. This function is able to deal with non-unique
skeleton IDs (most other functions won't). Use it e.g. when you
split neurons into multiple fragments. *The order of the input
CatmaidNeuronList is preserved!*
Parameters
----------
x : CatmaidNeuron | CatmaidNeuronList
Neuron(s) for which to generate connectivity table.
remote_instance : CatmaidInstance, optional
If not passed, will try using globally defined.
Returns
-------
pandas.DataFrame
DataFrame in which each row represents a neuron and the number of
synapses with the query neurons::
neuron_name skeleton_id relation total skid1 skid2 ...
0 name1 skid1 upstream n_syn n_syn ...
1 name2 skid2 downstream n_syn n_syn ..
2 name3 skid3 usptream n_syn n_syn .
... ...
``relation`` can be ``'upstream'`` (incoming), ``'downstream'``
(outgoing), ``'attachment'`` or ``'gapjunction'`` (gap junction).
See Also
--------
:func:`~pymaid.get_partners`
If you are working with "intact" neurons. Much faster!
:func:`~pymaid.filter_connectivity`
Use this function if you have only a single fragment per neuron
(e.g. just the axon). Also way faster.
Examples
--------
>>> # Fetch some neurons
>>> x = pymaid.get_neuron('annotation:PD2a1/b1')
>>> # Split into axon / dendrites
>>> x.reroot(x.soma)
>>> split = pymaid.split_axon_dendrite(x)
>>> # Regenerate cn_table
>>> cn_table = pymaid.cn_table_from_connectors(split)
>>> # Skeleton IDs are non-unique but column order = input order:
>>> # in this example the first occurrence is axon, the second dendrites
>>> cn_table.head()
"""
remote_instance = utils._eval_remote_instance(remote_instance)
if not isinstance(x, (core.CatmaidNeuron, core.CatmaidNeuronList)):
raise TypeError('Need CatmaidNeuron/List, got "{}"'.format(type(x)))
if isinstance(x, core.CatmaidNeuron):
x = core.CatmaidNeuronList(x)
# Get connector details for all neurons
all_cn = x.connectors.connector_id.values
cn_details = fetch.get_connector_details(all_cn, remote_instance=remote_instance)
# Remove connectors for which there are either no pre- or no postsynaptic
# neurons
cn_details = cn_details[cn_details.postsynaptic_to.apply(len) != 0]
cn_details = cn_details[~cn_details.presynaptic_to.isnull()]
# We need to map treenode ID to skeleton ID in cases where there are more
# links (postsynaptic_to_node) than targets (postsynaptic_to)
multi_links = cn_details[cn_details.postsynaptic_to.apply(len) < cn_details.postsynaptic_to_node.apply(len)]
if not multi_links.empty:
tn_to_fetch = [tn for l in multi_links.postsynaptic_to_node for tn in l]
tn_to_skid = fetch.get_skid_from_node(tn_to_fetch, remote_instance=remote_instance)
else:
tn_to_skid = {}
# Collect all pre and postsynaptic neurons
all_pre = cn_details[~cn_details.presynaptic_to.isin(x.skeleton_id.astype(int))]
all_post = cn_details[cn_details.presynaptic_to.isin(x.skeleton_id.astype(int))]
all_partners = np.append(all_pre.presynaptic_to.values,
[n for l in all_post.postsynaptic_to.values for n in l])
us_dict = {}
ds_dict = {}
# Go over over all neurons and process connectivity
for i, n in enumerate(config.tqdm(x, desc='Processing',
disable=config.pbar_hide, leave=config.pbar_leave)):
# First prepare upstream partners:
# Get all treenodes
this_tn = set(n.nodes.node_id.values)
# Prepare upstream partners
this_us = all_pre[all_pre.connector_id.isin(n.connectors.connector_id.values)].copy()
# Get the number of all links per connector
this_us['n_links'] = [len(this_tn & set(r.postsynaptic_to_node))
for r in this_us.itertuples()]
# Group by input and store as dict. Attention: we NEED to index by
# neuron as skeleton IDs might not be unique!
us_dict[n] = this_us.groupby('presynaptic_to').n_links.sum().to_dict()
this_us = this_us.groupby('presynaptic_to').n_links.sum()
# Now prepare downstream partners:
# Get all downstream connectors
this_ds = all_post[all_post.presynaptic_to == int(n.skeleton_id)]
# Prepare dict
ds_dict[n] = {p: 0 for p in all_partners}
# Easy cases first (single link to target per connector)
is_single = this_ds.postsynaptic_to.apply(len) >= this_ds.postsynaptic_to_node.apply(len)
for r in this_ds[is_single].itertuples():
for s in r.postsynaptic_to:
ds_dict[n][s] += 1
# Now hard cases - will have to look up skeleton ID via treenode ID
for r in this_ds[~is_single].itertuples():
for s in r.postsynaptic_to_node:
ds_dict[n][tn_to_skid[s]] += 1
# Now that we have all data, let's generate the table
us_table = pd.DataFrame.from_dict(us_dict)
ds_table = pd.DataFrame.from_dict(ds_dict)
# Make sure we keep the order of the original neuronlist
us_table = us_table[[n for n in x]]
us_table.columns=[n.skeleton_id for n in us_table.columns]
ds_table = ds_table[[n for n in x]]
ds_table.columns=[n.skeleton_id for n in ds_table.columns]
ds_table['relation'] = 'downstream'
us_table['relation'] = 'upstream'
# Generate table
cn_table = pd.concat([us_table, ds_table], axis=0)
# Replace NaN with 0
cn_table = cn_table.fillna(0)
# Make skeleton ID a column
cn_table = cn_table.reset_index(drop=False)
cn_table.columns = ['skeleton_id'] + list(cn_table.columns[1:])
# Add names
names = fetch.get_names(cn_table.skeleton_id.values,
remote_instance=remote_instance)
cn_table['neuron_name'] = [names[str(s)] for s in cn_table.skeleton_id.values]
cn_table['total'] = cn_table[x.skeleton_id].sum(axis=1)
# Drop rows with 0 synapses (e.g. if neuron is only up- but not downstream)
cn_table = cn_table[cn_table.total > 0]
# Sort by number of synapses
cn_table = cn_table.sort_values(['relation', 'total'], ascending=False).reset_index()
# Sort columnes
cn_table = cn_table[['neuron_name', 'skeleton_id', 'relation', 'total'] + list(set(x.skeleton_id))]
return cn_table
[docs]def adjacency_from_connectors(source, target=None, remote_instance=None):
"""Regenerate adjacency matrices from neurons' connectors.
Notes
-----
This function creates an adjacency matrix from scratch using just the
neurons' connectors. This function is able to deal with non-unique
skeleton IDs (most other functions are not). Use it e.g. when you
split neurons into multiple fragments.
Parameters
----------
source,target : skeleton IDs | CatmaidNeuron | CatmaidNeuronList
Neuron(s) for which to generate adjacency matrix.
If ``target==None``, will use ``target=source``.
remote_instance : CatmaidInstance, optional
If not passed, will try using globally defined.
Returns
-------
pandas.DataFrame
Matrix holding possible synaptic contacts. Sources are rows,
targets are columns. Labels are skeleton IDs. Order is preserved::
target1 target2 target3 ...
source1 5 1 0
source2 10 20 5
source3 4 3 15
...
See Also
--------
:func:`~pymaid.adjacency_matrix`
If you are working with "intact" neurons. Much faster!
:func:`~pymaid.filter_connectivity`
Use this function if you have only a single fragment per neuron
(e.g. just the axon). Also way faster.
Examples
--------
>>> # Fetch some neurons
>>> x = pymaid.get_neuron('annotation:PD2a1/b1')
>>> # Split into axon / dendrites
>>> x.reroot(x.soma)
>>> split = pymaid.split_axon_dendrite(x)
>>> # Regenerate all-by-all adjacency matrix
>>> adj = pymaid.adjacency_from_connectors(split)
>>> # Skeleton IDs are non-unique but column/row order = input order:
>>> # in this example, the first occurrence is axon, the second dendrites
>>> adj.head()
"""
remote_instance = utils._eval_remote_instance(remote_instance)
if not isinstance(source, (core.CatmaidNeuron, core.CatmaidNeuronList)):
skids = utils.eval_skids(source, remote_instance=remote_instance)
source = fetch.get_neuron(skids, remote_instance=remote_instance)
if isinstance(target, type(None)):
target = source
elif not isinstance(target, (core.CatmaidNeuron, core.CatmaidNeuronList)):
skids = utils.eval_skids(target, remote_instance=remote_instance)
target = fetch.get_neuron(skids, remote_instance=remote_instance)
if isinstance(source, core.CatmaidNeuron):
source = core.CatmaidNeuronList(source)
if isinstance(target, core.CatmaidNeuron):
target = core.CatmaidNeuronList(target)
# Generate empty adjacency matrix
adj = np.zeros((len(source), len(target)))
# Get connector details for all neurons
all_cn = list(set(np.append(source.connectors.connector_id.values,
target.connectors.connector_id.values)))
cn_details = fetch.get_connector_details(all_cn,
remote_instance=remote_instance)
# Now go over all source neurons and process connections
for i, s in enumerate(config.tqdm(source, desc='Processing',
disable=config.pbar_hide, leave=config.pbar_leave)):
# Get all connectors presynaptic for this source
this_cn = cn_details[(cn_details.presynaptic_to == int(s.skeleton_id)) &
(cn_details.connector_id.isin(s.connectors.connector_id))
]
# Go over all target neurons
for k, t in enumerate(target):
t_tn = set(t.nodes.node_id.values)
t_post = t.postsynapses.connector_id.values
# Extract number of connections from source to this target
this_t = this_cn[this_cn.connector_id.isin(t_post)]
# Now figure out how many links are between this connector and
# the target
n_links = sum([len(t_tn & set(r.postsynaptic_to_node))
for r in this_t.itertuples()])
adj[i][k] = n_links
return pd.DataFrame(adj,
index=source.skeleton_id,
columns=target.skeleton_id)
def _edges_from_connectors(a, b=None, remote_instance=None):
"""Generate list of edges between two sets of neurons from their
connector data.
Attention: this is UNIDIRECTIONAL (a->b)!
Parameters
----------
a,b : CatmaidNeuron | CatmaidNeuronList | skeleton IDs
Either a or b HAS to be a neuron object.
If ``b=None``, will use ``b=a``.
"""
if not isinstance(a, (core.CatmaidNeuronList, core.CatmaidNeuron)) and \
not isinstance(b, (core.CatmaidNeuronList, core.CatmaidNeuron)):
raise ValueError('Either neuron a or b has to be a neuron object.')
if isinstance(b, type(None)):
b = a
cn_between = fetch.get_connectors_between(
a, b, remote_instance=remote_instance)
if isinstance(a, (core.CatmaidNeuron, core.CatmaidNeuronList)):
cn_a = a.connectors.connector_id.values
cn_between = cn_between[cn_between.connector_id.isin(cn_a)]
if isinstance(b, (core.CatmaidNeuron, core.CatmaidNeuronList)):
cn_b = b.connectors.connector_id.values
cn_between = cn_between[cn_between.connector_id.isin(cn_b)]
# Count source -> target connectors
edges = cn_between.groupby(['source_neuron', 'target_neuron']).count()
# Melt into edge list
edges = edges.reset_index().iloc[:, :3]
edges.columns = ['source_skid', 'target_skid', 'weight']
return edges
[docs]def adjacency_matrix(sources, targets=None, source_grp={}, target_grp={},
fractions=False, syn_threshold=None, syn_cutoff=None,
use_connectors=False, volume_filter=None, remote_instance=None):
"""Generate adjacency matrix between source and target neurons.
Directional: sources = rows, targets = columns.
Parameters
----------
sources
Source neurons as single or list of either:
1. skeleton IDs (int or str)
2. neuron name (str, exact match)
3. annotation: e.g. 'annotation:PN right'
4. CatmaidNeuron or CatmaidNeuronList object
targets
Optional. Target neurons as single or list of either:
1. skeleton IDs (int or str)
2. neuron name (str, exact match)
3. annotation: e.g. ``'annotation:PN right'`
4. CatmaidNeuron or CatmaidNeuronList object
If not provided, ``source neurons = target neurons``.
fractions : bool, optional
If True, will return connectivity as fraction of total
number of postsynaptic links to target neuron.
syn_cutoff : int, optional
If set, will cut off connections ABOVE given value.
syn_threshold : int, optional
If set, will ignore connections with LESS synapses.
source_grp : dict, optional
Use to collapse sources into groups. Can be either:
1. ``{group1: [neuron1, neuron2, ... ], ..}``
2. ``{neuron1: group1, neuron2 : group2, ..}``
``syn_cutoff`` and ``syn_threshold`` are applied
BEFORE grouping!
target_grp : dict, optional
See ``source_grp`` for possible formats.
use_connectors : bool, optional
If True AND ``s`` or ``t`` are ``CatmaidNeuron/List``,
restrict adjacency matrix to their connectors. Use
if e.g. you are using pruned neurons.
volume_filter : Volume | list of Volumes, optional
Volume(s) to restrict connections to. Can be a
pymaid.Volume, the name of a CATMAID volume or a
list thereof.
remote_instance : CatmaidInstance, optional
If not passed, will try using globally defined.
Returns
-------
matrix : pandas.Dataframe
See Also
--------
:func:`~pymaid.group_matrix`
More fine-grained control over matrix grouping.
:func:`~pymaid.adjacency_from_connectors`
Use this function if you are working with multiple fragments
per neuron.
Examples
--------
Generate and plot a adjacency matrix:
>>> import seaborn as sns
>>> import matplotlib.pyplot as plt
>>> neurons = pymaid.get_neurons('annotation:test')
>>> mat = pymaid.adjacency_matrix(neurons)
>>> g = sns.heatmap(adj_mat, square=True)
>>> g.set_yticklabels(g.get_yticklabels(), rotation=0, fontsize=7)
>>> g.set_xticklabels(g.get_xticklabels(), rotation=90, fontsize=7)
>>> plt.show()
Cut neurons into axon dendrites and compare their connectivity:
>>> # Get a set of neurons
>>> nl = pymaid.get_neurons('annnotation:type_16_candidates')
>>> # Split into axon dendrite by using a tag
>>> nl.reroot(nl.soma)
>>> nl_axon = nl.prune_proximal_to('axon', inplace=False)
>>> nl_dend = nl.prune_distal_to('axon', inplace=False)
>>> # Get a list of the downstream partners
>>> cn_table = pymaid.get_partners(nl)
>>> ds_partners = cn_table[cn_table.relation == 'downstream']
>>> # Take the top 10 downstream partners
>>> top_ds = ds_partners.iloc[:10].skeleton_id.values
>>> # Generate separate adjacency matrices for axon and dendrites
>>> adj_axon = pymaid.adjacency_matrix(nl_axon, top_ds,
... use_connectors=True)
>>> adj_dend = pymaid.adjacency_matrix(nl_dend, top_ds,
... use_connectors=True)
>>> # Rename rows and merge dataframes
>>> adj_axon.index += '_axon'
>>> adj_dend.index += '_dendrite'
>>> adj_merged = pd.concat([adj_axon, adj_dend], axis=0)
>>> # Plot heatmap using seaborn
>>> ax = sns.heatmap(adj_merged)
>>> plt.show()
Restrict adjacency matrix to a given volume:
>>> neurons = pymaid.get_neurons('annotation:glomerulus DA1')
>>> lh = pymaid.get_volume('LH_R')
>>> adj = pymaid.adjacency_matrix(neurons, volume_filter=lh)
Get adjacency matrix with fraction of inputs instead of total
synapse count:
>>> neurons = pymaid.get_neurons('annotation:glomerulus DA1')
>>> adj = pymaid.adjacency_matrix(neurons, fractions=True)
"""
remote_instance = utils._eval_remote_instance(remote_instance)
source_skids = utils.eval_skids(sources, remote_instance=remote_instance)
if isinstance(targets, type(None)):
targets = sources
target_skids = source_skids
else:
target_skids = utils.eval_skids(targets, remote_instance=remote_instance)
# Make sure neurons are integers
source_skids = [int(n) for n in source_skids]
target_skids = [int(n) for n in target_skids]
# Make sure skeleton IDs are unique
source_skids = sorted(set(source_skids), key=source_skids.index)
target_skids = sorted(set(target_skids), key=target_skids.index)
# Get the adjacency matrix
url = remote_instance._get_connectivity_matrix_url()
post = {}
post.update({'rows[{}]'.format(i): s for i, s in enumerate(source_skids)})
post.update({'columns[{}]'.format(i): s for i, s in enumerate(target_skids)})
post['with_locations'] = bool(volume_filter) or use_connectors
# Data will be in format::
# {'source_skid': {'target_skid': {'count': int,
# 'locations': {connector_id: {'pos': [x, y, z],
# 'count: int'}}}}}
data = remote_instance.fetch(url, post=post)
# Check which connectors to keep
if use_connectors or bool(volume_filter):
# Extract connector IDs and their locations
cn_loc = {cn: t['locations'][cn]['pos'] for s in data.values() for t in s.values() for cn in t['locations']}
to_keep = set(cn_loc.keys())
# Remove connectors that aren't part of the neurons anymore
if use_connectors:
if isinstance(sources, (core.CatmaidNeuron, core.CatmaidNeuronList)):
to_keep = to_keep & set(sources.connectors.connector_id.values.astype(str))
if isinstance(targets, (core.CatmaidNeuron, core.CatmaidNeuronList)):
to_keep = to_keep & set(targets.connectors.connector_id.values.astype(str))
# Turn into list so we can check for in_volume
to_keep = np.array(list(to_keep))
if bool(volume_filter):
volume_filter = utils._make_iterable(volume_filter)
for vol in volume_filter:
if not isinstance(vol, core.Volume):
vol = fetch.get_volume(vol, remote_instance=remote_instance)
# Get positions of remaining connectors
pos = np.array([cn_loc[cn] for cn in to_keep])
in_vol = intersect.in_volume(pos, vol)
to_keep = to_keep[in_vol]
# Now recount depending on left-over connectors
to_keep = set(to_keep) # this speeds up the "in" query by ALOT
for s in data:
for t in data[s]:
# Sum up count for connectors that we have kept
new_count = sum([val['count'] for cn, val in data[s][t]['locations'].items() if cn in to_keep])
data[s][t]['count'] = new_count
# Change to records
records = {s: {t: val['count'] for t, val in data[s].items()} for s in data}
else:
records = data
# Parse data into adjacency matrix
matrix = pd.DataFrame.from_records(records).fillna(0).T
# Make sure Skids are integers
matrix.index = matrix.index.astype(int)
matrix.columns = matrix.columns.astype(int)
# Filter and sort to actual sources and targets
matrix = matrix.reindex(index=source_skids, columns=target_skids, fill_value=0)
# Apply cutoff and threshold
matrix = matrix.clip(upper=syn_cutoff)
if syn_threshold:
matrix[matrix < syn_threshold] = 0
# Convert to fractions
if fractions:
cn_counts = fetch.get_connectivity_counts(target_skids,
source_relations=['postsynaptic_to'],
target_relations=['presynaptic_to'],
remote_instance=remote_instance)
cn_counts = cn_counts['connectivity']
div = [list(cn_counts.get(s).values())[0] for s in matrix.columns.astype(str)]
matrix = matrix / div
matrix.datatype = 'adjacency_matrix'
if source_grp or target_grp:
matrix = group_matrix(matrix,
source_grp,
target_grp,
drop_ungrouped=False)
# Make pretty
matrix.columns.name = 'targets'
matrix.index.name = 'sources'
return matrix
[docs]def group_matrix(mat, row_groups={}, col_groups={}, drop_ungrouped=False,
method='SUM', remote_instance=None):
"""Groups adjacency matrix into neuron groups.
Parameters
----------
mat : pandas.DataFrame | numpy.array
Matrix to group.
row_groups : dict, optional
Row groups to be formed. Can be either:
1. ``{group1: [neuron1, neuron2, ...], ...}``
2. ``{neuron1: group1, neuron2:group2, ...}``
If grouping numpy arrays, use indices!
col_groups : dict, optional
Col groups. See ``row_groups`` for details.
drop_ungrouped : bool, optional
If ungrouped, neurons that are not part of a
row/col_group are dropped from the matrix.
method : 'AVERAGE' | 'MAX' | 'MIN' | 'SUM', optional
Method by which values are collapsed into groups.
remote_instance : CatmaidInstance, optional
If not passed, will try using globally defined.
Returns
-------
pandas.DataFrame
"""
remote_instance = utils._eval_remote_instance(remote_instance,
raise_error=False)
PERMISSIBLE_METHODS = ['AVERAGE', 'MIN', 'MAX', 'SUM']
if method not in PERMISSIBLE_METHODS:
raise ValueError('Unknown method "{0}". Please use either {1}'.format(
method, ','.join(PERMISSIBLE_METHODS)))
if not row_groups and not col_groups:
logger.warning('No column/row groups provided - skipping.')
return mat
# Convert numpy array to DataFrame
if isinstance(mat, np.ndarray):
mat = pd.DataFrame(mat)
# Make copy of original DataFrame
elif isinstance(mat, pd.DataFrame):
mat = mat.copy()
else:
raise TypeError('Can only work with numpy arrays or pandas '
'DataFrames, got "{}"'.format(type(mat)))
# Convert to neuron->group format if necessary
if col_groups and utils._is_iterable(list(col_groups.values())[0]):
col_groups = {n: g for g in col_groups for n in utils.eval_skids(col_groups[g], remote_instance=remote_instance)}
if row_groups and utils._is_iterable(list(row_groups.values())[0]):
row_groups = {n: g for g in row_groups for n in utils.eval_skids(row_groups[g], remote_instance=remote_instance)}
# Make sure everything is string
mat.index = mat.index.astype(str)
mat.columns = mat.columns.astype(str)
col_groups = {str(k): str(v) for k, v in col_groups.items()}
row_groups = {str(k): str(v) for k, v in row_groups.items()}
if row_groups:
# Drop non-grouped values if applicable
if drop_ungrouped:
mat = mat.loc[mat.index.isin(row_groups.keys())]
# Add temporary grouping column
mat['row_groups'] = [row_groups.get(s, s) for s in mat.index]
if method == 'AVERAGE':
mat = mat.groupby('row_groups').mean()
elif method == 'MAX':
mat = mat.groupby('row_groups').max()
elif method == 'MIN':
mat = mat.groupby('row_groups').min()
elif method == 'SUM':
mat = mat.groupby('row_groups').sum()
if col_groups:
# Transpose for grouping
mat = mat.T
# Drop non-grouped values if applicable
if drop_ungrouped:
mat = mat.loc[mat.index.isin(col_groups.keys())]
# Add temporary grouping column
mat['col_groups'] = [col_groups.get(s, s) for s in mat.index]
if method == 'AVERAGE':
mat = mat.groupby('col_groups').mean()
elif method == 'MAX':
mat = mat.groupby('col_groups').max()
elif method == 'MIN':
mat = mat.groupby('col_groups').min()
elif method == 'SUM':
mat = mat.groupby('col_groups').sum()
# Transpose back
mat = mat.T
# Preserve datatype
mat.datatype = 'adjacency_matrix'
# Add flag that this matrix has been grouped
mat.is_grouped = True
return mat
[docs]def connection_density(s, t, method='MEDIAN', normalize='DENSITY',
remote_instance=None):
"""Calculate connection density.
Given source neuron(s) ``s`` and a target neuron ``t``, calculate the
local density of connections as function of the geodesic distance between
the postsynaptic contacts on target neuron ``t``.
The general idea here is that spread out contacts might be more
effective in depolarizing dendrites of the postsynaptic neuron than highly
localized ones. See `Gouwens and Wilson, Journal of Neuroscience (2009)
<https://www.ncbi.nlm.nih.gov/pubmed/19439602>`_ for example.
Parameters
----------
s,t : skeleton ID | CatmaidNeuron | CatmaidNeuronList
Source and target neuron, respectively. Multiple
sources are allowed. Target must be single neuron.
If ``t`` is a CatmaidNeuron, will use connectors and
total cable to this neuron. Use to subset density
calculations to e.g. the dendrites.
method : 'SUM' | 'AVERAGE' | 'MEDIAN', optional
Arithmetic method used to collapse pairwise geodesic
distances over all synaptic contacts on ``t`` from
``s`` into connection density ``D``.
normalize : 'DENSITY' | 'CABLE' | False
Normalization method:
- ``DENSITY``: normalize by synapse density
over all postsynapses of the target
- ``CABLE``: normalize by total cable length
of the target
- ``False``: no normalization
remote_instance : CatmaidInstance, optional
Returns
-------
connection density : float
Will return ``None`` if no connections or if only a
single connection between source and target.
"""
remote_instance = utils._eval_remote_instance(remote_instance)
source_skid = utils.eval_skids(s, remote_instance=remote_instance)
target_skid = utils.eval_skids(t, remote_instance=remote_instance)
if len(target_skid) != 1:
raise ValueError('Must provide a single target neuron.')
if normalize not in [False, 'DENSITY', 'CABLE']:
raise ValueError('Unknown normalization method "{}"'.format(normalize))
# Get connectors between source and target and extract postsynaptic
# treenode IDs
cn_between = fetch.get_connectors_between(source_skid, target_skid,
directional=True,
remote_instance=remote_instance)
post_tn = cn_between.node2_id.values
# Make sure we have neuron to work with
if not isinstance(t, (core.CatmaidNeuron, core.CatmaidNeuronList)):
t = fetch.get_neuron(t, remote_instance=remote_instance)
# If t already is a neuron, subset connectors to those that actually exists
else:
post_tn = np.intersect1d(post_tn, t.nodes.node_id.values)
if isinstance(t, core.CatmaidNeuronList):
t = t[0]
# If no connections, return 0
if not any(post_tn):
return None
# If there is only one connection, we won't be able to calculate a density
elif post_tn.shape[0] == 1:
return None
# If we are normalizing to density, include all synapses in geodesic
# calculations
if normalize == 'DENSITY':
to_calc = t.postsynapses.node_id.values
else:
to_calc = post_tn
# Get geodesic distances
m = graph_utils.geodesic_matrix(t,
to_calc,
directed=False,
weight='weight')
# Get distances from matrix and convert to microns
dist = np.array([m.loc[i, j] for i, j in combinations(post_tn, 2)]) / 1000
# Prepare normalization
if normalize is None:
norm = [1]
elif normalize == 'DENSITY':
all_post = t.postsynapses.node_id.values
norm = np.array([m.loc[i, j] for i, j in combinations(all_post, 2)]) / 1000
elif normalize == 'CABLE':
norm = [t.cable_length]
# Combine distances and turn distance into density (1/dist)
if method == 'SUM':
dist = np.sum(norm) / np.sum(dist)
elif method == 'AVERAGE':
dist = np.average(norm) / np.average(dist)
elif method == 'MEDIAN':
dist = np.median(norm) / np.median(dist)
else:
raise ValueError('Unknown method "{}"'.format(method))
# It's possible that all connections are onto the same treenode in which
# case average/sum/median distance would be 0 -> we will return this as
# None
if dist == 0:
return None
return dist
[docs]def sparseness(x, which='LTS'):
"""Calculate sparseness.
Sparseness comes in two flavors:
**Lifetime kurtosis (LTK)** quantifies the widths of tuning curves
(according to Muench & Galizia, 2016):
.. math::
S = \\Bigg\\{ \\frac{1}{N} \\sum^N_{i=1} \\Big[ \\frac{r_i - \\overline{r}}{\\sigma_r} \\Big] ^4 \\Bigg\\} - 3
where :math:`N` is the number of observations, :math:`r_i` the value of
observation :math:`i`, and :math:`\\overline{r}` and
:math:`\\sigma_r` the mean and the standard deviation of the observations'
values, respectively. LTK is assuming a normal, or at least symmetric
distribution.
**Lifetime sparseness (LTS)** quantifies selectivity
(Bhandawat et al., 2007):
.. math::
S = \\frac{1}{1-1/N} \\Bigg[1- \\frac{\\big(\\sum^N_{j=1} r_j / N\\big)^2}{\\sum^N_{j=1} r_j^2 / N} \\Bigg]
where :math:`N` is the number of observations, and :math:`r_j` is the
value of an observation.
Notes
-----
``NaN`` values will be ignored. You can use that to e.g. ignore zero
values in a large connectivity matrix by changing these values to ``NaN``
before passing it to ``pymaid.sparseness``.
Parameters
----------
x : DataFrame | array-like
(N, M) dataset with N (rows) observations for M (columns)
neurons. One-dimensional data will be converted to two
dimensions (N rows, 1 column).
which : "LTS" | "LTK"
Determines whether lifetime sparseness (LTS) or lifetime
kurtosis (LTK) is returned.
Returns
-------
sparseness
``pandas.Series`` if input was pandas DataFrame, else
``numpy.array``.
Examples
--------
Calculate sparseness of olfactory inputs to group of neurons:
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> # Generate adjacency matrix
>>> adj = pymaid.adjacency_matrix(s='annotation:WTPN2017_excitatory_uPN_right',
... t='annotation:ASB LHN')
>>> # Calculate lifetime sparseness
>>> S = pymaid.sparseness(adj, which='LTS')
>>> # Plot distribution
>>> ax = S.plot.hist(bins=np.arange(0, 1, .1))
>>> ax.set_xlabel('LTS')
>>> plt.show()
"""
if not isinstance(x, (pd.DataFrame, np.ndarray)):
x = np.array(x)
# Make sure we are working with 2 dimensional data
if isinstance(x, np.ndarray) and x.ndim == 1:
x = x.reshape(x.shape[0], 1)
N = np.sum(~np.isnan(x), axis=0)
if which == 'LTK':
return np.nansum(((x - np.nanmean(x, axis=0)) / np.nanstd(x, axis=0)) ** 4, axis=0) / N - 3
elif which == 'LTS':
return 1 / (1 - (1 / N)) * (1 - np.nansum(x / N, axis=0) ** 2 / np.nansum(x ** 2 / N, axis=0))
else:
raise ValueError('Parameter "which" must be either "LTS" or "LTK"')
[docs]def shared_partners(a, b, upstream=True, downstream=True, syn_threshold=None,
restrict_to=None, remote_instance=None):
"""Return shared partners between neuron(s) A and B.
Parameters
----------
a,b : CatmaidNeuron/List
Neurons to search shared partners for.
upstream, downstream: bool, int, optional
Set to True/False to restrict direction.
syn_threshold : int, optional
Synapse threshold. Edges to both neurons A and B
must be above threshold!
restrict_to : str | pymaid.Volume, optional
Volume to restrict connectivity to.
remote_instance : CatmaidInstance, optional
If not passed, will try using globally defined.
Returns
-------
pandas.DataFrame
Pandas DataFrame with shared partners and edges::
neuron_name skeleton_id relation edges_a edges_b
0
1
2
"""
if isinstance(a, core.CatmaidNeuron):
a = core.CatmaidNeuronList(a)
elif not isinstance(a, core.CatmaidNeuronList):
raise TypeError('Expected CatmaidNeuron/List, got {}'.format(type(a)))
if isinstance(b, core.CatmaidNeuron):
b = core.CatmaidNeuronList(b)
elif not isinstance(b, core.CatmaidNeuronList):
raise TypeError('Expected CatmaidNeuron/List, got {}'.format(type(b)))
if not isinstance(syn_threshold, (float, int)):
syn_threshold = 1
elif syn_threshold <= 0:
raise ValueError('Synapse threshold must not be <= 0.')
remote_instance = utils._eval_remote_instance(remote_instance)
cn = fetch.get_partners(a + b, remote_instance=remote_instance)
if not upstream:
cn = cn[cn.relation != 'upstream']
if not downstream:
cn = cn[cn.relation != 'downstream']
if restrict_to:
cn = filter_connectivity(cn, restrict_to=restrict_to,
remote_instance=remote_instance)
# Collapse into A + B by direction
cn['edges_a'] = 0
cn['edges_b'] = 0
for rel in cn.relation.unique():
cn.loc[cn.relation == rel, 'edges_a'] = cn.loc[cn.relation == rel, a.skeleton_id].sum(axis=1)
cn.loc[cn.relation == rel, 'edges_b'] = cn.loc[cn.relation == rel, b.skeleton_id].sum(axis=1)
# Remove connections where either a or b are sub-threshold
cn = cn[(cn['edges_a'] >= syn_threshold) & (cn['edges_b'] >= syn_threshold)]
return cn[['neuron_name', 'skeleton_id', 'relation', 'edges_a', 'edges_b']].reset_index()