Visium HD Human Colorectal Cancer#

data: https://www.10xgenomics.com/datasets/visium-hd-cytassist-gene-expression-libraries-of-human-crc-v4

[ ]:

import scanpy as sc
import trackcell as tcl
import numpy as np
import squidpy as sq
import matplotlib.pyplot as plt

print(tcl.__version__)

[2]:

def subset_data(Adata, xlim=(54500, 56000), ylim=(15000, 16000)):
    # Define region of interest
    x_min, x_max = xlim[0], xlim[1]
    y_min, y_max = ylim[0], ylim[1]

    # Create mask for spatial coordinates
    spatial_coords = adata.obsm['spatial']
    mask = ((spatial_coords[:, 0] >= x_min) & (spatial_coords[:, 0] <= x_max) &
            (spatial_coords[:, 1] >= y_min) & (spatial_coords[:, 1] <= y_max))

    # Create subset
    bdata = adata[mask].copy()
    bdata.obs_names_make_unique()
    bdata.var_names_make_unique()
    # QC
    # mitochondrial genes, "MT-" for human, "Mt-" for mouse
    bdata.var["mt"] = bdata.var_names.str.startswith("MT-")
    # ribosomal genes
    bdata.var["ribo"] = bdata.var_names.str.startswith(("RPS", "RPL"))
    # hemoglobin genes
    bdata.var["hb"] = bdata.var_names.str.contains("^HB[^(P)]")
    sc.pp.calculate_qc_metrics(bdata, qc_vars=["mt", "ribo", "hb"], inplace=True, log1p=True)
    bdata.layers["counts"] = bdata.X.copy()

    # Normalizing to median total counts
    sc.pp.normalize_total(bdata)
    # Logarithmize the data
    sc.pp.log1p(bdata)

    return bdata

[ ]:

datapath = 'outs/segmented_outputs'
adata = tcl.io.read_hd_cellseg(
    datapath=datapath,
    sample="CC"
)

scanpy 可视化#

[4]:

sc.pl.spatial(adata, color="classification",
              size=3, linewidth=0,
              legend_fontsize=12, frameon=True)

/tmp/ipykernel_3659314/3682908051.py:1: FutureWarning: Use `squidpy.pl.spatial_scatter` instead.
  sc.pl.spatial(adata, color="classification",

../_images/notebooks_Colon_Cancer_demo_5_1.png

squidpy method to vis#

[5]:

fig, ax = plt.subplots(1, 1, figsize=(7, 6))

sq.pl.spatial_scatter(
            adata, shape=None, color=['classification'],
            #na_color='#F5F5F5',
            size=3, linewidth=0,
            library_id="CC",
            legend_na=True,
            ax=ax
        )

/annogene/data2/share/software/SCV/SC_tools/Miniforge3/envs/st/lib/python3.12/site-packages/squidpy/pl/_spatial_utils.py:976: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  _cax = scatter(

../_images/notebooks_Colon_Cancer_demo_7_1.png

subset data select ROI to vis#

[ ]:

bdata = subset_data(adata)

[7]:

paletes= {'Cluster-2': '#1F77B4',
          'Cluster-4': '#07CCD2',
          'Cluster-7': '#F3CCD2',
          'Cluster-9': '#F340D2',
          'Cluster-11': '#E0CC71'}

sc.pl.spatial(bdata, color="classification",
              palette=paletes, frameon=True,
              size=3, spot_size=10,
              legend_fontsize=12)

/tmp/ipykernel_3659314/3159057110.py:7: FutureWarning: Use `squidpy.pl.spatial_scatter` instead.
  sc.pl.spatial(bdata, color="classification",

../_images/notebooks_Colon_Cancer_demo_10_1.png

[8]:

fig, ax = plt.subplots(1, 1, figsize=(7, 4))

sc.pl.spatial(bdata, color=['COL3A1'], size=3, ax=ax,
              legend_fontsize=12, spot_size=10, frameon=True)

/tmp/ipykernel_3659314/462138400.py:3: FutureWarning: Use `squidpy.pl.spatial_scatter` instead.
  sc.pl.spatial(bdata, color=['COL3A1'], size=3, ax=ax,

../_images/notebooks_Colon_Cancer_demo_11_1.png

Visualization spatial cell#

[14]:

paletes2 = ['#1F77B4','#07CCD2','#F3CCD2','#F340D2','#E0CC71', 'y']

_ = tcl.pl.spatial_cell(
    bdata,
    color="classification",
    palette=paletes2,
    edges_color='white',
    edges_width=0.4,
    figsize=(7, 4)
)

../_images/notebooks_Colon_Cancer_demo_13_0.png

[17]:

import trackcell as tcl

_ = tcl.pl.spatial_cell(
    bdata,
    color="COL3A1",
    palete=paletes,
    alpha=0.5,
    edges_color='white',
    edges_width=0.4,
    cmap='magma',
    figsize=(7, 4)
)

../_images/notebooks_Colon_Cancer_demo_14_0.png

custom cmap#

[33]:

import matplotlib.colors as mcolors
pink_colors = ["#FFFFFF", "#FFE4E1", "#FFB6C1", "#FF69B4", "#FF1493", "#C71585"]
pink_cmap = mcolors.LinearSegmentedColormap.from_list('custom_pink', pink_colors, N=256)

_ = tcl.pl.spatial_cell(
    bdata,
    color="total_counts",
    #palete=paletes,
    alpha=1,
    groupby="classification",
    groups=['Cluster-7'],  # Only plot cells of interest
    edges_color='white',
    edges_width=0.4,
    cmap=pink_cmap,
    figsize=(7, 4)
)

../_images/notebooks_Colon_Cancer_demo_16_0.png

[ ]:

label distance#

[21]:

tcl.tl.hd_labeldist(
    adata,
    groupby="classification",
    label="Cluster-7",
    inplace=True
)

[ ]:

sc.pl.spatial(adata, color="Cluster-7_dist",
              size=3, linewidth=0,
              legend_fontsize=12, frameon=True)

[ ]:

import numpy as np
adata.obs['log_Cluster-7_dist'] = np.log(adata.obs['Cluster-7_dist'])

sc.pl.spatial(adata, color="log_Cluster-7_dist",
              size=3, linewidth=0,
              legend_fontsize=12, frameon=True)

保存和读取带有 geometries 的 h5ad 文件#

问题说明#

当使用 Space Ranger 4.0 数据并保存包含 geometries 的 h5ad 文件时，可能会遇到以下错误：

raised while writing key 'geometries' of <class 'h5py._hl.group.Group'>

这是因为 GeoDataFrame 中的 Shapely Polygon 对象无法直接序列化到 HDF5 格式。

解决方案#

trackcell 提供了自动处理这个问题的函数：

保存时自动转换：使用 tcio.add_geometries_to_annohdcell_output() 或 tcio.convert_annohdcell_to_trackcell() 保存时，会自动将 GeoDataFrame 转换为 WKT 字符串
读取后恢复：使用 tcio.restore_geometries() 将 WKT 字符串恢复为 GeoDataFrame

[ ]:

# 示例：正确保存和读取带有 geometries 的 h5ad 文件

# 保存前需要将 GeoDataFrame 转换为 WKT 字符串
import pandas as pd

# 备份 GeoDataFrame
gdf_backup = adata.uns["spatial"]["CC"]["geometries"]

# 转换为 DataFrame with WKT strings
adata.uns["spatial"]["CC"]["geometries"] = pd.DataFrame(
    {"geometry": gdf_backup.geometry.apply(lambda g: g.wkt)},
    index=gdf_backup.index
)

# 保存
adata.write_h5ad("output.h5ad")

# 恢复内存中的 GeoDataFrame（如果继续使用）
adata.uns["spatial"]["CC"]["geometries"] = gdf_backup

# ===== 读取后恢复 GeoDataFrame =====
# adata = sc.read_h5ad("output.h5ad")
# adata = tcio.restore_geometries(adata)
# tcl.pl.spatial_cell(adata, sample="CC")