Pancreas

[1]:
import phlower
import matplotlib.pyplot as plt
import scanpy as sc
import numpy as np
import pickle
from collections import Counter
[2]:
adata = phlower.dataset.pancreas()
adata
[2]:
AnnData object with n_obs × n_vars = 3696 × 27998
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score'
    var: 'highly_variable_genes'
    uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca'
    obsm: 'X_pca', 'X_umap'
    layers: 'spliced', 'unspliced'
    obsp: 'connectivities', 'distances'

Preprocessing and dimensonality reduction

[3]:
adata.var_names_make_unique()

# annotate the group of mitochondrial genes as "mt"
adata.var["mt"] = adata.var_names.str.startswith("mt-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
)
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata = adata[:, adata.var.highly_variable]
sc.pp.regress_out(adata, ["total_counts", "pct_counts_mt"])
sc.tl.pca(adata, svd_solver="arpack")
adata.obs['group'] = adata.obs['clusters']

ddhodge

[4]:
phlower.ext.ddhodge(adata, basis='X_pca', roots=list(adata.obs['group']=='Ductal'), k=4,npc=40, ndc=30,s=1)
2025-02-14 12:37:55.570603 distance_matrix
2025-02-14 12:37:56.428301 Diffusionmaps:
done.
2025-02-14 12:38:00.917971 diffusion distance:
2025-02-14 12:38:02.066982 transition matrix:
2025-02-14 12:38:02.754823 graph from A
2025-02-14 12:38:02.797369 Rewiring:
2025-02-14 12:38:02.797426 div(g_o)...
2025-02-14 12:38:08.124798 edge weight...
2025-02-14 12:38:08.363934 cholesky solve ax=b...
+ 1e-06 I is positive-definite
2025-02-14 12:38:09.316448 potential.
2025-02-14 12:38:23.174327 ddhodge done.
done.
2025-02-14 12:38:23.297023 calculate layouts
2025-02-14 12:39:48.192348 done

Delaunay triangulation

[5]:
phlower.tl.construct_delaunay(adata, trunc_quantile = 0.9,  start_n=10, end_n=10, separate_ends_triangle=True, circle_quant=0.7, calc_layout=False)
start clusters  ['Ngn3 high EP', 'Ductal', 'Beta', 'Epsilon', 'Alpha', 'Delta', 'Pre-endocrine', 'Ngn3 low EP']
end clusters  ['Ngn3 high EP', 'Ductal', 'Beta', 'Epsilon', 'Alpha', 'Delta', 'Pre-endocrine', 'Ngn3 low EP']
[6]:
fig, ax = plt.subplots(1,2, figsize=(20,8))
phlower.pl.nxdraw_group(adata, node_size=20, show_edges=True, ax=ax[0], show_legend=False, labelstyle='text', labelsize=8)
phlower.pl.nxdraw_group(adata, graph_name='X_pca_ddhodge_g_triangulation_circle', node_size=20, show_edges=True, ax=ax[1], show_legend=True, labelstyle='text', labelsize=8)
../_images/notebooks_pancreas_9_0.png
[7]:
phlower.pl.nxdraw_score(adata, node_size=5)
../_images/notebooks_pancreas_10_0.png

Hodge decomposition

[8]:
from datetime import datetime

dt1 = datetime.now()
print('L1 Datetime Start:', dt1)

phlower.tl.L1Norm_decomp(adata, L1_mode='sym', check_symmetric=False, isnorm = True)
dt2 = datetime.now()
print('L1 Datetime End:', dt2)
delta = dt2 - dt1
print('L1 Difference is:', delta)
L1 Datetime Start: 2025-02-14 12:39:51.185159
9.136894941329956  sec
L1 Datetime End: 2025-02-14 12:40:00.567911
L1 Difference is: 0:00:09.382752

determine harmonic

[9]:
_, ax = plt.subplots(1,2, figsize=(9,3))
phlower.pl.plot_eigen_line(adata, n_eig=10,ax=ax[0])
phlower.pl.plot_eigen_line(adata, n_eig=100, step_size=10, ax=ax[1])
../_images/notebooks_pancreas_14_0.png
[10]:
phlower.tl.knee_eigen(adata)
knee eigen value is  26

preference random walk

[11]:
phlower.tl.random_climb_knn(adata, n=10000, roots_ratio=0.001, knn_edges_k=5)
100%|██████████| 10000/10000 [00:17<00:00, 573.46it/s]

Project paths to harmonic space

[12]:
phlower.tl.trajs_matrix(adata)
2025-02-14 12:40:33.820577 projecting trajectories to simplics...
2025-02-14 12:40:48.576892 Embedding trajectory harmonics...
eigen_n < 1, use knee_eigen to find the number of eigen vectors to use: 26
/home/sz753404/miniconda3/envs/R422/lib/python3.10/site-packages/umap/umap_.py:1943: UserWarning: n_jobs value -1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
2025-02-14 12:41:23.889403 done.

Paths clustering

[13]:
phlower.tl.trajs_clustering(adata, clustering_method='leiden', resolution=1.5, oname_basis='')
calcaulating distances...
leiden clustering...
[14]:
phlower.tl.select_trajectory_clusters(adata, verbose=False)
/home/sz753404/miniconda3/envs/R422/lib/python3.10/site-packages/phlower/tools/trajectory.py:436: RuntimeWarning: Precision loss occurred in moment calculation due to catastrophic cancellation. This occurs when the data are nearly identical. Results may be unreliable.
  sk = scipy.stats.skew(l)
[15]:
phlower.tl.unique_trajectory_clusters(adata, verbose=False)
[16]:
phlower.pl.plot_trajs_embedding(adata, clusters="trajs_clusters")
../_images/notebooks_pancreas_24_0.png
[17]:
fig, ax = phlower.pl.plot_density_grid(adata,  figsize=(10, 7), cluster_name='trajs_clusters', bg_alpha=0.2, return_fig=True)
z_min, z_max 2.551998446841963e-09 2.342906917185627e-06
../_images/notebooks_pancreas_25_1.png
[18]:
phlower.pl.nxdraw_group(adata, node_size=20, show_edges=True, show_legend=False, labelstyle='text', labelsize=8)
../_images/notebooks_pancreas_26_0.png

Tree creating

[19]:
phlower.tl.harmonic_stream_tree(adata, min_bin_number=30, retain_clusters=[0,1,3,10],trim_end=True,verbose=True)
2025-02-14 12:44:18.608951 trajectory groups ranking...
eigen_n < 1, use knee_eigen to find the number of eigen vectors to use
eigen_n: 26
[0, 1, 3, 10]
traj bins: 100%|██████████| 4/4 [00:14<00:00,  3.56s/it]
2025-02-14 12:44:34.646005 sync bins to by pseudo time...
2025-02-14 12:44:35.068413 calculating distances between bin pairs...
2025-02-14 12:44:35.481963 fate tree creating...
[(0, 1, 10, 3)]
2025-02-14 12:44:35.482928 tree attributes adding...
del 6_31
del 6_30
del 6_28
del 5_34
2025-02-14 12:44:36.197103 plugin to STREAM...
2025-02-14 12:44:36.206856 projecting trajs to harmonic...
2025-02-14 12:44:37.467089 cumsum...
2025-02-14 12:44:38.009291 edge cumsum dict...
edges projection: 100%|██████████| 5186/5186 [00:06<00:00, 784.35it/s]
2025-02-14 12:44:45.807129 done...
there are 56 nodes not visited in the tree
will approximate the node cumsum coordinate by 5 nearest accessible neighbors
2025-02-14 12:44:50.694360 done...
[20]:
phlower.ext.plot_stream(adata, fig_size=(8,5), dist_scale=1, log_scale=True, factor_min_win=1, factor_num_win=5, show_legend=True)
../_images/notebooks_pancreas_29_0.png
[21]:
pickle.dump(adata, open("pancreas_phlower.pickle", 'wb'))
[22]:
import session_info
session_info.show()
[22]:
Click to view session information
-----
anndata             0.9.2
matplotlib          3.9.1.post1
numpy               1.23.5
phlower             0.1.3
scanpy              1.9.3
session_info        1.0.0
-----
Click to view modules imported as dependencies
PIL                         9.4.0
anyio                       NA
argcomplete                 NA
arrow                       1.2.3
asciitree                   NA
asttokens                   NA
attr                        22.2.0
attrs                       22.2.0
babel                       2.12.1
backcall                    0.2.0
brotli                      NA
certifi                     2024.08.30
cffi                        1.15.1
charset_normalizer          3.1.0
cloudpickle                 2.2.1
colorama                    0.4.6
colorcet                    3.0.1
comm                        0.2.2
cycler                      0.12.1
cython_runtime              NA
dask                        2024.6.2
dateutil                    2.8.2
debugpy                     1.6.6
decorator                   5.1.1
defusedxml                  0.7.1
dill                        0.3.6
dot_parser                  NA
exceptiongroup              1.1.3
executing                   1.2.0
fastjsonschema              NA
fqdn                        NA
gudhi                       3.10.1
h5py                        3.10.0
idna                        3.4
igraph                      0.10.5
importlib_metadata          NA
ipykernel                   6.21.3
ipython_genutils            0.2.0
ipywidgets                  8.1.3
isoduration                 NA
jedi                        0.19.0
jinja2                      3.1.2
joblib                      1.3.2
json5                       NA
jsonpointer                 2.3
jsonschema                  4.17.3
jsonschema_specifications   NA
jupyter_events              0.6.3
jupyter_server              2.4.0
jupyterlab_server           2.20.0
kiwisolver                  1.4.5
leidenalg                   0.9.1
llvmlite                    0.39.1
louvain                     0.8.0
markupsafe                  2.1.3
matplotlib_inline           0.1.6
mpl_toolkits                NA
mpmath                      1.3.0
msgpack                     1.0.5
natsort                     8.4.0
nbformat                    5.9.2
networkx                    2.8.8
numba                       0.56.4
numcodecs                   0.12.1
numexpr                     2.8.4
nvfuser                     NA
opt_einsum                  v3.3.0
packaging                   23.1
pandas                      2.2.3
parso                       0.8.3
patsy                       0.5.3
pexpect                     4.8.0
pickleshare                 0.7.5
pkg_resources               NA
platformdirs                3.8.1
plotly                      5.23.0
prometheus_client           NA
prompt_toolkit              3.0.39
psutil                      5.9.4
ptyprocess                  0.7.0
pure_eval                   0.2.2
pyarrow                     16.1.0
pycparser                   2.21
pydev_ipython               NA
pydevconsole                NA
pydevd                      2.9.5
pydevd_file_utils           NA
pydevd_plugins              NA
pydevd_tracing              NA
pydot                       1.4.2
pygments                    2.15.1
pynndescent                 0.5.11
pyparsing                   3.1.2
pythonjsonlogger            NA
pytz                        2023.3
referencing                 NA
requests                    2.29.0
rfc3339_validator           0.1.4
rfc3986_validator           0.1.1
rpds                        NA
scipy                       1.11.4
seaborn                     0.13.2
send2trash                  NA
setuptools_scm              NA
six                         1.16.0
sklearn                     1.5.1
sksparse                    0.4.8
sniffio                     1.3.0
socks                       1.7.1
sparse                      0.14.0
sphinxcontrib               NA
stack_data                  0.6.2
statsmodels                 0.14.0
sympy                       1.11.1
tblib                       3.0.0
texttable                   1.6.7
threadpoolctl               3.4.0
tlz                         0.12.0
toolz                       0.12.0
torch                       2.0.1+cu117
tornado                     6.3
tqdm                        4.65.0
traitlets                   5.9.0
typing_extensions           NA
umap                        0.5.5
uri_template                NA
urllib3                     1.26.15
wcwidth                     0.2.6
webcolors                   1.11.1
websocket                   1.5.1
yaml                        6.0
zarr                        2.18.2
zipp                        NA
zmq                         25.0.2
-----
IPython             8.15.0
jupyter_client      8.2.0
jupyter_core        5.3.0
jupyterlab          3.6.1
notebook            6.5.3
-----
Python 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:23:14) [GCC 10.4.0]
Linux-4.18.0-477.21.1.el8_8.x86_64-x86_64-with-glibc2.28
-----
Session information updated at 2025-02-14 12:44