Kidney
[1]:
import os
import phlower
import numpy as np
import pandas as pd
import networkx as nx
from itertools import chain
import scipy
import pickle
import sklearn
import pydot
import scanpy as sc
import seaborn as sns
import colorcet as cc
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from datetime import datetime
from collections import Counter
import networkx as nx
[2]:
import phlower
phlower.__version__
[2]:
'0.1.3'
[3]:
knn_k = 6
ndc_n = 40
s_n = 4
[4]:
import logging
if not os.path.exists("logs"):
os.mkdir("logs")
logger = logging.getLogger()
fhandler = logging.FileHandler(filename=f'logs/i_{knn_k}_{datetime.now()}.log'.replace(" ", "_"), mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)
logger.info(f'begining: {datetime.now()}')
load kidney anndata with MOJITOO reduction and clustering.
[5]:
adata = phlower.dataset.kidney()
Downloading data to data/kidney.h5ad
[6]:
dt1 = datetime.now()
print('ddhodge Datetime Start:', dt1)
logger.info(f'ddhodge Datetime Start: {dt1}')
phlower.ext.ddhodge(adata, basis='X_pca', npc=30, roots=adata.obs['group']==6, k=knn_k, ndc=ndc_n,s=s_n)
dt2 = datetime.now()
print('ddhodge Datetime End:', dt2)
logger.info(f'ddhodge Datetime End: {dt2}')
delta = dt2 - dt1
print('ddhodge Difference is:', delta)
logger.info(f'ddhodge Difference is: {delta}')
ddhodge Datetime Start: 2024-09-29 18:28:09.553560
2024-09-29 18:28:09.557446 distance_matrix
2024-09-29 18:28:50.421441 Diffusionmaps:
done.
2024-09-29 18:31:14.670074 diffusion distance:
2024-09-29 18:31:38.995999 transition matrix:
2024-09-29 18:32:41.495777 graph from A
2024-09-29 18:32:47.660425 Rewiring:
2024-09-29 18:32:47.660522 div(g_o)...
2024-09-29 18:37:17.269293 edge weight...
2024-09-29 18:37:18.744480 cholesky solve ax=b...
+ 1e-06 I is positive-definite
2024-09-29 18:37:23.942825 grad...
2024-09-29 18:43:41.807040 potential.
2024-09-29 18:50:44.198517 ddhodge done.
done.
2024-09-29 18:50:45.650726 calculate layouts
2024-09-29 19:08:55.358329 done
ddhodge Datetime End: 2024-09-29 19:08:55.358683
ddhodge Difference is: 0:40:45.805123
show the pseudotime on the neato layout
[7]:
phlower.pl.nxdraw_score(adata, color='u',node_size=10)
show the clusters
[8]:
_, ax = plt.subplots(1,1, )
phlower.pl.nxdraw_group(adata,node_size=10, show_edges=False, ax=ax, legend_col=2)
ax.legend(ncol=2, markerscale=1, loc="center left", bbox_to_anchor=(1, 0.5))
[8]:
<matplotlib.legend.Legend at 0x154e09b01ea0>
show the days
[9]:
dic = {"S1":"Day7_18", "S2":"Day7_12", "S3":"Day7_5", "S4":"Day7_0"}
adata.obs['time'] = [dic[i] for i in adata.obs['sample']]
phlower.pl.nxdraw_group(adata,node_size=10, group_name='time',show_edges=False, show_legend=True, legend_col=1, markerscale=3, label=False)
connect start state cells and terminal state cells
[10]:
phlower.tl.construct_delaunay(adata, trunc_quantile = 0.9, trunc_times = 3, start_n=10, end_n=10, separate_ends_triangle=True, circle_quant=0.1, calc_layout=False)
start clusters [0, 1, 3, 4, 5, 6, 7, 8, 11, 12, 13, 15, 16, 19, 21, 23, 24, 25, 28]
end clusters [2, 9, 10, 12, 13, 14, 18, 20, 21, 27, 29]
Perform hodge laplacian decomposition
[11]:
from datetime import datetime
dt1 = datetime.now()
print('L1 Datetime Start:', dt1)
logging.info(f'L1 Datetime Start: {dt1}')
phlower.tl.L1Norm_decomp(adata, L1_mode='sym', isnorm = True)
dt2 = datetime.now()
print('L1 Datetime End:', dt2)
logger.info(f'L1 Datetime End: {dt2}')
delta = dt2 - dt1
print('L1 Difference is:', delta)
logger.info(f'L1 Difference: {delta}')
L1 Datetime Start: 2024-09-29 19:09:01.098998
29.895473957061768 sec
L1 Datetime End: 2024-09-29 19:09:31.975115
L1 Difference is: 0:00:30.876117
check the number of harmonics
[12]:
_, ax = plt.subplots(1,2, figsize=(9,3))
phlower.pl.plot_eigen_line(adata, n_eig=10,ax=ax[0])
phlower.pl.plot_eigen_line(adata, n_eig=100, step_size=10, ax=ax[1])
[13]:
phlower.tl.knee_eigen(adata)
knee eigen value is 28
preference random walk to sample trajectories
[14]:
phlower.tl.random_climb_knn(adata, n=10000)
100%|██████████| 10000/10000 [00:33<00:00, 300.71it/s]
display 2 sampled trjaectoris
[15]:
from scipy import constants
fig_width = 9
fig, axs = plt.subplots(1, 4,figsize=(fig_width*1.5, fig_width*0.5 / constants.golden))
phlower.pl.plot_traj(adata, trajectory=adata.uns['knn_trajs'][0], colorid=0, node_size=1, ax=axs[0])
phlower.pl.plot_traj(adata, trajectory=adata.uns['knn_trajs'][0], colorid=0, node_size=0, ax=axs[1])
phlower.pl.plot_traj(adata, trajectory=adata.uns['knn_trajs'][1], colorid=0, node_size=1, ax=axs[2])
phlower.pl.plot_traj(adata, trajectory=adata.uns['knn_trajs'][1], colorid=0, node_size=0, ax=axs[3])
project trajectories to harmonic space
[16]:
phlower.tl.trajs_matrix(adata)
2024-09-29 19:12:53.166895 projecting trajectories to simplics...
2024-09-29 19:13:14.491851 Embedding trajectory harmonics...
eigen_n < 1, use knee_eigen to find the number of eigen vectors to use: 28
/home/sz753404/miniconda3/envs/R422/lib/python3.10/site-packages/umap/umap_.py:1943: UserWarning: n_jobs value -1 overridden to 1 by setting random_state. Use no seed for parallelism.
warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
2024-09-29 19:13:54.220696 done.
clustering trajectory paths in t-map space
[17]:
phlower.tl.trajs_clustering(adata, eps=0.5, oname_basis='')
clean bad clusters
[18]:
phlower.tl.select_trajectory_clusters(adata, manual_rm_clusters=[2], verbose=False)
phlower.tl.unique_trajectory_clusters(adata, group_name='group', verbose=False)
show trajectory clusters in umap (derived from t-map space)
[19]:
_, ax = plt.subplots(1,1, figsize=(7, 4))
phlower.pl.plot_trajs_embedding(adata, clusters="trajs_clusters", ax=ax)
show sampled trajectory paths in neato layout space
[20]:
fig, ax = phlower.pl.plot_density_grid(adata, figsize=(10, 7), cluster_name='trajs_clusters', bg_alpha=0.1, return_fig=True, sample_n=1000)
z_min, z_max 1.7272154769335022e-08 3.0619701760704366e-06
[21]:
fig, ax = plt.subplots(1,2, figsize=(10,4))
phlower.pl.nxdraw_group(adata, node_size=20, show_edges=True, ax=ax[0], show_legend=False, labelstyle='text', labelsize=8)
phlower.pl.nxdraw_group(adata, node_size=20, show_edges=True, ax=ax[1], show_legend=True, labelstyle='box', labelsize=8, legend_col=2)
annotate the branches
[22]:
anno_dic = {
0: "Neuron-2",
1: "Tubular",
3: "Stromal-3",
5: "Stromal-4",
6: "Podocytes",
7: "Stromal-2",
9: "Muscle",
11: "Stromal-1",
10: "Neuron-1",
}
adata.uns['annotation']= [anno_dic.get(int(i), i) for i in adata.uns['trajs_clusters']]
create stream tree
[23]:
phlower.tl.harmonic_stream_tree(adata,
trajs_clusters='annotation',
retain_clusters=list(set(anno_dic.values())),
min_bin_number=20,
cut_threshold=1.5,
verbose=True)
2024-09-29 19:14:04.776740 trajectory groups ranking...
eigen_n < 1, use knee_eigen to find the number of eigen vectors to use
eigen_n: 28
['Neuron-1', 'Tubular', 'Podocytes', 'Muscle', 'Stromal-2', 'Neuron-2', 'Stromal-4', 'Stromal-3', 'Stromal-1']
traj bins: 100%|██████████| 9/9 [00:30<00:00, 3.41s/it]
2024-09-29 19:14:51.156594 sync bins to by pseudo time...
2024-09-29 19:14:53.315156 calculating distances between bin pairs...
2024-09-29 19:14:58.197484 fate tree creating...
[('Podocytes', 'Tubular'), ('Neuron-1', 'Muscle', 'Neuron-2'), ('Stromal-3', 'Stromal-2', 'Stromal-1', 'Stromal-4')]
2024-09-29 19:14:58.199676 tree attributes adding...
/home/sz753404/miniconda3/envs/R422/lib/python3.10/site-packages/phlower/tools/harmonic_pseudo_tree.py:697: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
roots = np.array(htree_roots)
2024-09-29 19:15:00.960068 plugin to STREAM...
2024-09-29 19:15:00.964338 projecting trajs to harmonic...
2024-09-29 19:15:29.490440 cumsum...
2024-09-29 19:15:30.186386 edge cumsum dict...
edges projection: 100%|██████████| 5105/5105 [00:09<00:00, 553.55it/s]
2024-09-29 19:15:42.025383 done...
there are 967 nodes not visited in the tree
will approximate the node cumsum coordinate by 5 nearest accessible neighbors
2024-09-29 19:17:45.660848 done...
[24]:
adata.uns['workdir'] = ''
adata.obs['group_str'] = [str(i) for i in adata.obs['group']]
plot stream tree
[25]:
phlower.ext.plot_stream_sc(adata, fig_size=(8,5), color=['group_str'],show_legend=False, dist_scale=1,s=10)
show trajectory paths in umap and in ct-map space
[26]:
figs = []
fig, ax = plt.subplots(1,2, figsize=(6,3))
phlower.pl.plot_trajs_embedding(adata, clusters="annotation",show_legend=False, labelsize=5, node_size=10, ax=ax[0])
phlower.pl.plot_trajectory_harmonic_lines(adata, clusters="annotation", sample_ratio=0.05, show_legend=True, ax=ax[1])
show trajctory paths in t-map space
[27]:
phlower.pl.plot_trajectory_harmonic_points_3d(adata, clusters="annotation", sample_ratio=.05, show_legend=True, fig_path=None)
3
show trajctory paths in ct-map space
[28]:
phlower.pl.plot_trajectory_harmonic_lines_3d(adata, clusters="annotation", sample_ratio=.05, show_legend=True, fig_path=None)
save adata with networkx object using pickle
[29]:
import pickle
import os
from datetime import date
if not os.path.exists("save"):
os.mkdir('save')
today_ = date.today().strftime("%Y-%m-%d")
pickle.dump(adata, open(f"save/kidney_{today_}.pickle", 'wb'))
show session info
[30]:
import session_info
session_info.show()
[30]:
Click to view session information
----- anndata 0.9.2 colorcet 3.0.1 matplotlib 3.9.1.post1 networkx 2.8.8 numpy 1.23.5 pandas 2.2.3 phlower 0.1.3 pydot 1.4.2 scanpy 1.9.3 scipy 1.14.0 seaborn 0.13.2 session_info 1.0.0 sklearn 1.5.1 -----
Click to view modules imported as dependencies
Cython 0.29.33 PIL 9.4.0 anyio NA argcomplete NA arrow 1.2.3 asciitree NA asttokens NA attr 22.2.0 attrs 22.2.0 babel 2.12.1 backcall 0.2.0 brotli NA certifi 2024.07.04 cffi 1.15.1 charset_normalizer 3.1.0 cloudpickle 2.2.1 colorama 0.4.6 comm 0.2.2 cycler 0.12.1 cython 0.29.33 cython_runtime NA dask 2024.6.2 dateutil 2.8.2 debugpy 1.6.6 decorator 5.1.1 defusedxml 0.7.1 dill 0.3.6 dot_parser NA exceptiongroup 1.1.3 executing 1.2.0 fastjsonschema NA fqdn NA google NA h5py 3.10.0 idna 3.4 igraph 0.10.5 importlib_metadata NA ipykernel 6.21.3 ipython_genutils 0.2.0 ipywidgets 8.1.3 isoduration NA jedi 0.19.0 jinja2 3.1.2 joblib 1.3.2 json5 NA jsonpointer 2.3 jsonschema 4.17.3 jsonschema_specifications NA jupyter_events 0.6.3 jupyter_server 2.4.0 jupyterlab_server 2.20.0 kaleido 0.2.1 kiwisolver 1.4.5 leidenalg 0.9.1 llvmlite 0.39.1 louvain 0.8.0 markupsafe 2.1.3 matplotlib_inline 0.1.6 mpl_toolkits NA mpmath 1.3.0 msgpack 1.0.5 natsort 8.4.0 nbformat 5.9.2 numba 0.56.4 numcodecs 0.12.1 numexpr 2.8.4 nvfuser NA opt_einsum v3.3.0 packaging 23.1 parso 0.8.3 patsy 0.5.3 pexpect 4.8.0 pickleshare 0.7.5 pkg_resources NA platformdirs 3.8.1 plotly 5.23.0 prometheus_client NA prompt_toolkit 3.0.39 psutil 5.9.4 ptyprocess 0.7.0 pure_eval 0.2.2 pyarrow 16.1.0 pycparser 2.21 pydev_ipython NA pydevconsole NA pydevd 2.9.5 pydevd_file_utils NA pydevd_plugins NA pydevd_tracing NA pygments 2.15.1 pynndescent 0.5.11 pyparsing 3.1.2 pythonjsonlogger NA pytz 2023.3 referencing NA requests 2.29.0 rfc3339_validator 0.1.4 rfc3986_validator 0.1.1 rpds NA send2trash NA setuptools_scm NA six 1.16.0 sksparse 0.4.8 sniffio 1.3.0 socks 1.7.1 sparse 0.14.0 sphinxcontrib NA stack_data 0.6.2 statsmodels 0.14.0 sympy 1.11.1 tblib 3.0.0 tenacity NA texttable 1.6.7 threadpoolctl 3.4.0 tlz 0.12.0 toolz 0.12.0 torch 2.0.1+cu117 tornado 6.3 tqdm 4.65.0 traitlets 5.9.0 typing_extensions NA umap 0.5.5 uri_template NA urllib3 1.26.15 wcwidth 0.2.6 webcolors 1.11.1 websocket 1.5.1 yaml 6.0 zarr 2.18.2 zipp NA zmq 25.0.2
----- IPython 8.15.0 jupyter_client 8.2.0 jupyter_core 5.3.0 jupyterlab 3.6.1 notebook 6.5.3 ----- Python 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:23:14) [GCC 10.4.0] Linux-4.18.0-553.5.1.el8_10.x86_64-x86_64-with-glibc2.28 ----- Session information updated at 2024-09-29 19:18