Visium HD Human Colorectal Cancer#
data: https://www.10xgenomics.com/datasets/visium-hd-cytassist-gene-expression-libraries-of-human-crc-v4
[ ]:
import scanpy as sc
import trackcell as tcl
import numpy as np
import squidpy as sq
import matplotlib.pyplot as plt
print(tcl.__version__)
[2]:
def subset_data(Adata, xlim=(54500, 56000), ylim=(15000, 16000)):
# Define region of interest
x_min, x_max = xlim[0], xlim[1]
y_min, y_max = ylim[0], ylim[1]
# Create mask for spatial coordinates
spatial_coords = adata.obsm['spatial']
mask = ((spatial_coords[:, 0] >= x_min) & (spatial_coords[:, 0] <= x_max) &
(spatial_coords[:, 1] >= y_min) & (spatial_coords[:, 1] <= y_max))
# Create subset
bdata = adata[mask].copy()
bdata.obs_names_make_unique()
bdata.var_names_make_unique()
# QC
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
bdata.var["mt"] = bdata.var_names.str.startswith("MT-")
# ribosomal genes
bdata.var["ribo"] = bdata.var_names.str.startswith(("RPS", "RPL"))
# hemoglobin genes
bdata.var["hb"] = bdata.var_names.str.contains("^HB[^(P)]")
sc.pp.calculate_qc_metrics(bdata, qc_vars=["mt", "ribo", "hb"], inplace=True, log1p=True)
bdata.layers["counts"] = bdata.X.copy()
# Normalizing to median total counts
sc.pp.normalize_total(bdata)
# Logarithmize the data
sc.pp.log1p(bdata)
return bdata
[ ]:
datapath = 'outs/segmented_outputs'
adata = tcl.io.read_hd_cellseg(
datapath=datapath,
sample="CC"
)
scanpy 可视化#
[4]:
sc.pl.spatial(adata, color="classification",
size=3, linewidth=0,
legend_fontsize=12, frameon=True)
/tmp/ipykernel_3659314/3682908051.py:1: FutureWarning: Use `squidpy.pl.spatial_scatter` instead.
sc.pl.spatial(adata, color="classification",
squidpy method to vis#
[5]:
fig, ax = plt.subplots(1, 1, figsize=(7, 6))
sq.pl.spatial_scatter(
adata, shape=None, color=['classification'],
#na_color='#F5F5F5',
size=3, linewidth=0,
library_id="CC",
legend_na=True,
ax=ax
)
/annogene/data2/share/software/SCV/SC_tools/Miniforge3/envs/st/lib/python3.12/site-packages/squidpy/pl/_spatial_utils.py:976: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
_cax = scatter(
subset data select ROI to vis#
[ ]:
bdata = subset_data(adata)
[7]:
paletes= {'Cluster-2': '#1F77B4',
'Cluster-4': '#07CCD2',
'Cluster-7': '#F3CCD2',
'Cluster-9': '#F340D2',
'Cluster-11': '#E0CC71'}
sc.pl.spatial(bdata, color="classification",
palette=paletes, frameon=True,
size=3, spot_size=10,
legend_fontsize=12)
/tmp/ipykernel_3659314/3159057110.py:7: FutureWarning: Use `squidpy.pl.spatial_scatter` instead.
sc.pl.spatial(bdata, color="classification",
[8]:
fig, ax = plt.subplots(1, 1, figsize=(7, 4))
sc.pl.spatial(bdata, color=['COL3A1'], size=3, ax=ax,
legend_fontsize=12, spot_size=10, frameon=True)
/tmp/ipykernel_3659314/462138400.py:3: FutureWarning: Use `squidpy.pl.spatial_scatter` instead.
sc.pl.spatial(bdata, color=['COL3A1'], size=3, ax=ax,
Visualization spatial cell#
[14]:
paletes2 = ['#1F77B4','#07CCD2','#F3CCD2','#F340D2','#E0CC71', 'y']
_ = tcl.pl.spatial_cell(
bdata,
color="classification",
palette=paletes2,
edges_color='white',
edges_width=0.4,
figsize=(7, 4)
)
[17]:
import trackcell as tcl
_ = tcl.pl.spatial_cell(
bdata,
color="COL3A1",
palete=paletes,
alpha=0.5,
edges_color='white',
edges_width=0.4,
cmap='magma',
figsize=(7, 4)
)
custom cmap#
[33]:
import matplotlib.colors as mcolors
pink_colors = ["#FFFFFF", "#FFE4E1", "#FFB6C1", "#FF69B4", "#FF1493", "#C71585"]
pink_cmap = mcolors.LinearSegmentedColormap.from_list('custom_pink', pink_colors, N=256)
_ = tcl.pl.spatial_cell(
bdata,
color="total_counts",
#palete=paletes,
alpha=1,
groupby="classification",
groups=['Cluster-7'], # Only plot cells of interest
edges_color='white',
edges_width=0.4,
cmap=pink_cmap,
figsize=(7, 4)
)
[ ]:
label distance#
[21]:
tcl.tl.hd_labeldist(
adata,
groupby="classification",
label="Cluster-7",
inplace=True
)
[ ]:
sc.pl.spatial(adata, color="Cluster-7_dist",
size=3, linewidth=0,
legend_fontsize=12, frameon=True)
[ ]:
import numpy as np
adata.obs['log_Cluster-7_dist'] = np.log(adata.obs['Cluster-7_dist'])
sc.pl.spatial(adata, color="log_Cluster-7_dist",
size=3, linewidth=0,
legend_fontsize=12, frameon=True)
保存和读取带有 geometries 的 h5ad 文件#
问题说明#
当使用 Space Ranger 4.0 数据并保存包含 geometries 的 h5ad 文件时,可能会遇到以下错误:
raised while writing key 'geometries' of <class 'h5py._hl.group.Group'>
这是因为 GeoDataFrame 中的 Shapely Polygon 对象无法直接序列化到 HDF5 格式。
解决方案#
trackcell 提供了自动处理这个问题的函数:
保存时自动转换:使用
tcio.add_geometries_to_annohdcell_output()或tcio.convert_annohdcell_to_trackcell()保存时,会自动将 GeoDataFrame 转换为 WKT 字符串读取后恢复:使用
tcio.restore_geometries()将 WKT 字符串恢复为 GeoDataFrame
[ ]:
# 示例:正确保存和读取带有 geometries 的 h5ad 文件
# 保存前需要将 GeoDataFrame 转换为 WKT 字符串
import pandas as pd
# 备份 GeoDataFrame
gdf_backup = adata.uns["spatial"]["CC"]["geometries"]
# 转换为 DataFrame with WKT strings
adata.uns["spatial"]["CC"]["geometries"] = pd.DataFrame(
{"geometry": gdf_backup.geometry.apply(lambda g: g.wkt)},
index=gdf_backup.index
)
# 保存
adata.write_h5ad("output.h5ad")
# 恢复内存中的 GeoDataFrame(如果继续使用)
adata.uns["spatial"]["CC"]["geometries"] = gdf_backup
# ===== 读取后恢复 GeoDataFrame =====
# adata = sc.read_h5ad("output.h5ad")
# adata = tcio.restore_geometries(adata)
# tcl.pl.spatial_cell(adata, sample="CC")