{ "cells": [ { "cell_type": "markdown", "id": "d50ee813", "metadata": {}, "source": [ "# Protein Panel Expansion\n", "\n", "This notebook shows how to transfer customize protein data into correct shape for scLinguist." ] }, { "cell_type": "markdown", "id": "3235ec81", "metadata": {}, "source": [ "## 0. Imports" ] }, { "cell_type": "code", "id": "8615057b", "metadata": { "ExecuteTime": { "end_time": "2025-08-21T15:02:23.997313Z", "start_time": "2025-08-21T15:02:23.989899Z" } }, "source": [ "import sys\n", "sys.path.append('../../')\n", "import numpy as np\n", "import pandas as pd\n", "import anndata as anndata\n", "import scanpy as sc\n", "from pathlib import Path\n", "from scipy.sparse import csr_matrix\n", "from scLinguist.data_loaders.data_loader import expand_protein_to_panel" ], "outputs": [], "execution_count": 17 }, { "cell_type": "markdown", "id": "81afc49b", "metadata": {}, "source": "## 1. Parameters" }, { "cell_type": "code", "id": "9970e90d", "metadata": { "ExecuteTime": { "end_time": "2025-08-21T15:02:25.517797Z", "start_time": "2025-08-21T15:02:25.510183Z" } }, "source": [ "PANEL_PATH = Path('../../docs/tutorials/protein_index_map.csv')\n", "PROTEIN_PATH = Path('../../data/test_sample_adt.h5ad')\n", "OUTPUT_DIR = Path('../../docs/tutorials/expanded_output')\n", "OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n", "OUT_H5AD = OUTPUT_DIR / 'pro_expanded_6427.h5ad'\n", "COMPRESSION = 'gzip'" ], "outputs": [], "execution_count": 18 }, { "cell_type": "markdown", "id": "e148f9a5", "metadata": {}, "source": "## 2. Load data & target panel" }, { "cell_type": "code", "id": "6e4021f5", "metadata": { "ExecuteTime": { "end_time": "2025-08-21T15:02:51.928887Z", "start_time": "2025-08-21T15:02:51.466995Z" } }, "source": [ "pro = sc.read_h5ad(PROTEIN_PATH)[:, :10] # take 10 proteins as example\n", "panel = pd.read_csv(PANEL_PATH, index_col=None)\n", "panel = panel.name.tolist()\n", "print('Protein AnnData:', pro.shape)\n", "print('Panel length:', len(panel))" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Protein AnnData: (10546, 10)\n", "Panel length: 6427\n" ] } ], "execution_count": 21 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T15:02:53.080075Z", "start_time": "2025-08-21T15:02:53.053636Z" } }, "cell_type": "code", "source": "panel", "id": "6f28e03de7ae7c6a", "outputs": [ { "data": { "text/plain": [ "['SP110',\n", " 'GTPBA',\n", " 'SNX2',\n", " 'FRG1',\n", " 'TT21A',\n", " 'RHG18',\n", " 'AR',\n", " 'DOCK1',\n", " 'RAB1A',\n", " 'MUC1.HMFG2',\n", " 'H2B1L',\n", " 'RFC1',\n", " 'TXTP',\n", " 'MER34',\n", " 'IL.3',\n", " 'FXR2',\n", " 'ARMD3',\n", " 'ZDHC9',\n", " 'KAPCA',\n", " 'HH3',\n", " 'ZBT21',\n", " 'F149B',\n", " 'PDE10',\n", " 'PCD18',\n", " 'PEBB',\n", " 'PRI1',\n", " 'NU214',\n", " 'TNAP',\n", " 'JHD2C',\n", " 'MIC19',\n", " 'CD213A2',\n", " 'HYEP',\n", " 'ZFR',\n", " 'PUF60',\n", " 'GPAT3',\n", " 'DIDO1',\n", " 'CD185',\n", " 'AT12A',\n", " 'TATD1',\n", " 'ZN341',\n", " 'RPB3',\n", " 'APOC3',\n", " 'TCR.VY9',\n", " 'HS90B',\n", " 'EMAL6',\n", " 'WFS1',\n", " 'RS9',\n", " 'PGK2',\n", " 'TYW2',\n", " 'EF1B',\n", " 'IMB1',\n", " 'ACADM',\n", " 'PAR6A',\n", " 'CAP1',\n", " 'CD357',\n", " 'RGS7',\n", " 'TCTP',\n", " 'EXOS9',\n", " 'RT34',\n", " 'SQSTM',\n", " 'TBA1A',\n", " 'ENOPH',\n", " 'BAP18',\n", " 'AQR',\n", " 'SMRC1',\n", " 'FRIH',\n", " 'HS105',\n", " 'MORN3',\n", " 'TIM23',\n", " 'K1143',\n", " 'CXB3',\n", " 'CD72',\n", " 'UBP25',\n", " 'ECP',\n", " 'EF2',\n", " 'NSF1C',\n", " 'CD196',\n", " 'SPDE3',\n", " 'CCR10',\n", " 'PON2',\n", " 'PURB',\n", " 'PLCG2',\n", " 'TCPD',\n", " 'CPNE3',\n", " 'TCRG',\n", " 'MAOX',\n", " 'PEPL',\n", " 'NUF2',\n", " 'RYR2',\n", " 'LIMC1',\n", " 'CHM2B',\n", " 'LNX1',\n", " 'MARCS',\n", " 'ATP5E',\n", " 'ZBED6',\n", " 'FA47C',\n", " 'UBE4B',\n", " 'AAMDC',\n", " 'CGAS',\n", " 'AL1L1',\n", " 'PDE1A',\n", " 'VIR',\n", " 'AKP8L',\n", " 'APC',\n", " 'PLXD1',\n", " 'RNZ2',\n", " 'MYPC3',\n", " 'MRP5',\n", " 'PROF2',\n", " 'UBA3',\n", " 'ZN599',\n", " 'TCR.VB13.2',\n", " 'TTC1',\n", " 'NOP58',\n", " 'RL27A',\n", " 'INVO',\n", " 'IF16',\n", " 'SNX15',\n", " 'ARL8B',\n", " 'UNC80',\n", " 'RMD1',\n", " 'CD141',\n", " 'CS044',\n", " 'MXRA5',\n", " 'S10A6',\n", " 'LORF2',\n", " 'NDKB',\n", " 'CD56.1',\n", " 'EPB42',\n", " 'DIRC2',\n", " 'COPZ1',\n", " 'ECH1',\n", " 'SYNE1',\n", " 'MBB1A',\n", " 'GTF2I',\n", " 'PTN1',\n", " 'PGTB2',\n", " 'TRPM6',\n", " 'CORO7',\n", " 'DJC13',\n", " 'O14K1',\n", " 'PLIN3',\n", " 'LDHA',\n", " 'PP2BA',\n", " 'COPE',\n", " 'DPOLA',\n", " 'FHL5',\n", " 'SNX9',\n", " 'F200B',\n", " 'CD10',\n", " 'MPV17',\n", " 'STAT3',\n", " 'PAR3L',\n", " 'INT13',\n", " 'RPB11',\n", " 'CTL2',\n", " 'PACN1',\n", " 'GLYG2',\n", " 'KMT2B',\n", " 'LXN',\n", " 'RFA1',\n", " 'DPOG1',\n", " 'O10J4',\n", " 'SYVC',\n", " 'OCAD1',\n", " 'FANCM',\n", " 'RPC2',\n", " 'SYLC',\n", " 'TRAP1',\n", " 'KIF25',\n", " 'ABCA5',\n", " 'CP26B',\n", " 'CCNK',\n", " 'ZNF85',\n", " 'MDM4',\n", " 'NIPBL',\n", " 'HECW1',\n", " 'P63',\n", " 'ADPRM',\n", " 'CD275',\n", " 'FAAA',\n", " 'TEBP',\n", " 'RN103',\n", " 'MYO16',\n", " 'SAM15',\n", " 'SCRB2',\n", " 'NCF2',\n", " 'NU160',\n", " 'CD88',\n", " 'KNTC1',\n", " 'BTK',\n", " 'PPIA',\n", " 'CD300',\n", " 'CSTN3',\n", " 'CADH4',\n", " 'CO4A1',\n", " 'DGKQ',\n", " 'THYN1',\n", " 'ALDR',\n", " 'BMX',\n", " 'PRPS1',\n", " 'KMO',\n", " 'CD85J',\n", " 'PPR3A',\n", " 'ZO3',\n", " 'BAZ1A',\n", " 'K1C19',\n", " 'F120C',\n", " 'RNF12',\n", " 'VILI',\n", " 'GYS2',\n", " 'DHRS9',\n", " 'PDE3A',\n", " 'ENOG',\n", " 'PSMD2',\n", " 'MCFD2',\n", " 'VISTA',\n", " 'DCC1',\n", " 'TPPC8',\n", " 'KPRA',\n", " 'ATD3A',\n", " 'BRCA2',\n", " 'P5F1B',\n", " 'CAND1',\n", " 'AT11B',\n", " 'IGBP1',\n", " 'ARLY',\n", " 'GBP7',\n", " 'SYNEM',\n", " 'ZAN',\n", " 'NUP98',\n", " 'PDS5A',\n", " 'IF4A1',\n", " 'BRD8',\n", " 'MPRIP',\n", " 'ANR62',\n", " 'MINY3',\n", " 'ATAD1',\n", " 'KGD4',\n", " 'CYTSB',\n", " 'TCR.VBETA13.1',\n", " 'RL12',\n", " 'CUL1',\n", " 'PI42B',\n", " 'PLCB3',\n", " 'LMTK1',\n", " 'TUT4',\n", " 'SPSY',\n", " 'SSRG',\n", " 'MYOF',\n", " 'PAAT',\n", " 'GCP2',\n", " 'ECI2',\n", " 'ZMYM1',\n", " 'GAB3',\n", " 'DHB11',\n", " 'SGT1',\n", " 'TAM41',\n", " 'IGG4.IH',\n", " 'SCFD1',\n", " 'GUAA',\n", " 'NFIL3',\n", " 'CHM1A',\n", " 'DUS10',\n", " 'PUS1',\n", " 'PPRC1',\n", " 'VEGFA',\n", " 'MSH5',\n", " 'DQX1',\n", " 'BAG3',\n", " 'SAP',\n", " 'F184B',\n", " 'KRIT1',\n", " 'FXL19',\n", " 'NUP50',\n", " 'HS71L',\n", " 'MTUS2',\n", " 'GPTC4',\n", " 'PGC1A.P',\n", " 'CO8B',\n", " 'E2AK2',\n", " 'IMDH1',\n", " 'PTK7',\n", " 'STK24',\n", " 'ILK',\n", " 'ABCA7',\n", " 'PDPN',\n", " 'TXN4A',\n", " 'HMR1',\n", " 'RL26',\n", " 'DIRA3',\n", " 'BUB3',\n", " 'ACTS',\n", " 'ZN626',\n", " 'ARP19',\n", " 'ILKAP',\n", " 'PPAC',\n", " 'MYOM1',\n", " 'MRP1',\n", " 'SC2B2',\n", " 'CNN3',\n", " 'PAI2',\n", " 'THIOM',\n", " 'VPS41',\n", " 'CDK6',\n", " 'STK31',\n", " 'CAV1',\n", " 'SCPDL',\n", " 'CHIKV',\n", " 'COCA1',\n", " 'SMRD1',\n", " 'PRG4',\n", " 'WASP',\n", " 'PCSK9',\n", " 'IL.21',\n", " 'UGGG1',\n", " 'KI18A',\n", " 'ENO1',\n", " 'ZN292',\n", " 'MAGT1',\n", " 'CD282',\n", " 'ASPDH',\n", " 'ARSG',\n", " 'RT23',\n", " 'CL079',\n", " 'CX3CR1',\n", " 'NDUBB',\n", " 'FBP1L',\n", " 'ECI1',\n", " 'CLK1',\n", " 'CRNL1',\n", " 'ACSL3',\n", " 'AFAD',\n", " 'RB11A',\n", " 'STRAB',\n", " 'CC90B',\n", " '1433G',\n", " 'CADHERIN',\n", " 'RRAS2',\n", " 'RTL1',\n", " 'GADL1',\n", " 'DSG4',\n", " 'GRSF1',\n", " 'TCRB',\n", " 'COA3',\n", " 'PRC2B',\n", " 'NXPE2',\n", " 'ATPD',\n", " 'ECM29',\n", " 'TDH',\n", " 'CL004',\n", " 'AJM1',\n", " 'UQCC1',\n", " 'LYRM2',\n", " 'CDN2A',\n", " 'SAHH',\n", " 'RBM4',\n", " 'RL18',\n", " 'OVOL2',\n", " 'TE2IP',\n", " 'GYS1',\n", " 'PPM1D',\n", " 'ANTR2',\n", " 'RHG31',\n", " 'PSMD4',\n", " 'CD85A',\n", " 'LIN41',\n", " 'RL7L',\n", " 'CRK',\n", " 'DHX16',\n", " 'TBC31',\n", " 'DAPK1',\n", " 'EVPL',\n", " 'CPNE1',\n", " 'LAMA3',\n", " 'LRRN1',\n", " 'RL18A',\n", " 'CAN14',\n", " 'EFL1',\n", " 'RL19',\n", " 'RL22',\n", " 'THTR',\n", " 'ALG1',\n", " 'CS',\n", " 'ARP5L',\n", " 'EMAL4',\n", " 'TS1R1',\n", " 'TNPO1',\n", " 'COR1B',\n", " 'MYLK',\n", " 'ALG3',\n", " 'CD35',\n", " 'NRBP',\n", " 'COPA',\n", " 'CD229',\n", " 'DCTP1',\n", " 'RYBP',\n", " 'TX1B3',\n", " 'TRI60',\n", " 'WASF3',\n", " 'STIL',\n", " 'IKZF1',\n", " 'BCCIP',\n", " 'ZMY15',\n", " 'SNED1',\n", " 'ALPK3',\n", " 'CPNE6',\n", " 'GCC2',\n", " 'DDX6',\n", " 'FLOT2',\n", " 'LONP2',\n", " 'CELR3',\n", " 'DPP8',\n", " 'LAP',\n", " 'VPS4B',\n", " 'SH3G1',\n", " 'DNAS1',\n", " 'MTND',\n", " 'CD8',\n", " 'CISD2',\n", " 'CD90',\n", " 'COX5A',\n", " 'EIF2A',\n", " 'PSMD1',\n", " 'ADCK1',\n", " 'YK004',\n", " 'ATX2L',\n", " 'DLDH',\n", " 'NDST3',\n", " 'CENPC',\n", " 'STK19',\n", " 'PARVB',\n", " 'DDX49',\n", " 'F157A',\n", " 'OVGP1',\n", " 'SYUA',\n", " 'LMTK3',\n", " 'CD85H',\n", " 'TCPQ',\n", " 'NOL11',\n", " 'R10B1',\n", " 'FAKD4',\n", " 'APLP2',\n", " 'NUCB1',\n", " 'SEM3C',\n", " 'GLYG',\n", " 'TOM20',\n", " 'CD257',\n", " 'PABP5',\n", " 'RM53',\n", " 'HECD1',\n", " 'CD133',\n", " 'PPIL3',\n", " 'KCC4',\n", " 'AUTS2',\n", " 'PCH2',\n", " 'CD62L',\n", " 'SNRK',\n", " 'NPM3',\n", " 'CD210',\n", " 'CD11A',\n", " 'RT24',\n", " 'ARVC',\n", " 'MBNL2',\n", " 'PAQR1',\n", " 'HCK',\n", " 'ELMO3',\n", " 'PGAP1',\n", " 'RS18',\n", " 'CD252',\n", " 'CHFR',\n", " 'NELL1',\n", " 'PEX6',\n", " 'TTLL8',\n", " 'RGS8',\n", " 'TCP4',\n", " 'KEAP1',\n", " 'KIN17',\n", " 'SAE1',\n", " 'GARL3',\n", " 'PCD19',\n", " 'ZNF81',\n", " 'NFAT',\n", " 'CLEC1B',\n", " 'IL17F',\n", " 'SPT5H',\n", " 'CD7',\n", " 'SMAP',\n", " 'ADT4',\n", " 'TTC17',\n", " 'VATE1',\n", " 'CD123',\n", " 'ZN641',\n", " 'AGRV1',\n", " 'NH2L1',\n", " 'VWC2L',\n", " 'UHRF2',\n", " 'HDC',\n", " 'CD44',\n", " 'TSN11',\n", " 'UAP1',\n", " 'NRDC',\n", " 'FKBP4',\n", " 'CFA54',\n", " 'GDE',\n", " 'TCR.GAMMA',\n", " 'ARMT1',\n", " 'HEAT3',\n", " 'AT5EL',\n", " 'SYBU',\n", " 'CA2D4',\n", " 'CD107B',\n", " 'RS5',\n", " 'CD178',\n", " 'PP1G',\n", " 'GCNA',\n", " 'S1PROBE',\n", " 'ZFAT',\n", " 'PK3CA',\n", " 'ATX10',\n", " 'MED24',\n", " 'ARP3',\n", " 'PHF5A',\n", " 'VAMP5',\n", " 'SPICE',\n", " 'PPR3D',\n", " 'RXRG',\n", " 'PSB5',\n", " 'CIR1',\n", " 'CNGB3',\n", " 'TBA4A',\n", " 'NID1',\n", " 'CS2IP',\n", " 'NEXN',\n", " 'EST1',\n", " 'H2A1D',\n", " 'RAGP1',\n", " 'IST1',\n", " 'THOC4',\n", " 'GALECTIN9',\n", " 'GMPR1',\n", " 'STRC',\n", " 'TCPA',\n", " 'SLIK3',\n", " 'AKP13',\n", " 'SET1B',\n", " 'HEBP1',\n", " 'SCAM2',\n", " 'ESYT2',\n", " 'SNR40',\n", " 'FKBP9',\n", " 'IFNA',\n", " 'RT10',\n", " 'CBPD',\n", " 'TMA7',\n", " 'HLA.E',\n", " 'RTN3',\n", " 'SG3A2',\n", " 'INTEGRIN.A9B1',\n", " 'CRML',\n", " 'PRP16',\n", " 'AP1M2',\n", " 'PCX1',\n", " 'SLIK4',\n", " 'CD94',\n", " 'CD276',\n", " 'AK1C3',\n", " 'EMAL2',\n", " 'PEX5',\n", " 'VINEX',\n", " 'MEX3D',\n", " 'SQOR',\n", " 'RBM10',\n", " 'FAT2',\n", " 'PP2AB',\n", " 'TBB2B',\n", " 'RPR1B',\n", " 'ZC3H3',\n", " 'ANXA1',\n", " 'SLIK1',\n", " 'RS16',\n", " 'CD16B',\n", " 'FXR1',\n", " 'TP53B',\n", " 'BTNLA',\n", " 'RPB7',\n", " 'RALY',\n", " 'PSB4',\n", " 'CP250',\n", " 'EIF3J',\n", " 'CD365',\n", " 'MTPN',\n", " 'AT11A',\n", " 'DCAF5',\n", " 'ZN840',\n", " 'TM52B',\n", " 'ARFG2',\n", " 'ZN596',\n", " 'SHLB2',\n", " 'PSMF1',\n", " 'HNRPQ',\n", " 'SYCC',\n", " 'IDUA',\n", " 'SYQ',\n", " 'UN13D',\n", " 'CCD70',\n", " 'PRRT4',\n", " 'CC124',\n", " 'DREB',\n", " 'PTH2R',\n", " 'SAM14',\n", " 'F162A',\n", " 'ZN425',\n", " 'PDE4A',\n", " 'TOM22',\n", " 'DX39B',\n", " 'NUDT5',\n", " 'CABP7',\n", " 'S29P2',\n", " 'CSN3',\n", " 'NOL10',\n", " 'MTMR6',\n", " 'RLA1',\n", " 'APC7',\n", " 'PFD6',\n", " 'NBEA',\n", " 'NCOA5',\n", " 'ACADS',\n", " 'BRCA1',\n", " 'HARB1',\n", " 'CD019',\n", " 'MATR3',\n", " 'IQGA1',\n", " 'PRS8',\n", " 'CLD10',\n", " 'CX7A2',\n", " 'NUDC2',\n", " 'P3C2B',\n", " 'ARPC5',\n", " 'GPCP1',\n", " 'AUHM',\n", " 'TRRAP',\n", " 'TCR.Y.D',\n", " 'S6',\n", " 'LTOR5',\n", " 'KRR1',\n", " 'TCPR1',\n", " 'WBP1',\n", " 'FGD6',\n", " 'BTF3',\n", " 'HPLN1',\n", " 'PFD4',\n", " 'ZC11A',\n", " 'WDR82',\n", " 'CD215',\n", " 'RLA0',\n", " 'TBX2',\n", " 'DDX56',\n", " 'INSL3',\n", " 'ZNT3',\n", " 'SERP3',\n", " 'METK2',\n", " 'CTBL1',\n", " 'POF1B',\n", " 'GRAP2',\n", " 'PLAK',\n", " 'PESC',\n", " 'HMMR',\n", " 'CB078',\n", " 'CRBG3',\n", " 'OSGEP',\n", " 'CD108',\n", " 'H1X',\n", " 'CD352',\n", " 'RPGF5',\n", " '5MP2',\n", " 'TFAP4',\n", " 'ZN519',\n", " 'CD45RB',\n", " 'ZN608',\n", " 'TENS1',\n", " 'EIF3E',\n", " 'IFNA2',\n", " 'PRA33',\n", " 'CF20D',\n", " 'ATP5J',\n", " 'LRRF1',\n", " 'UBX2A',\n", " 'UQCC2',\n", " 'CA159',\n", " 'MUCL3',\n", " 'AKTS1',\n", " 'RAB8B',\n", " 'PLPL2',\n", " 'VSXL2',\n", " 'ABCAC',\n", " 'DYST',\n", " 'RNFT2',\n", " 'UTP4',\n", " 'HXC10',\n", " 'CFA74',\n", " 'CPIN1',\n", " 'RASN',\n", " 'FBSL',\n", " 'PRP8',\n", " 'CD182',\n", " 'DNJC2',\n", " 'TCR.VA7',\n", " 'SH3B4',\n", " 'RS8',\n", " 'NDKA',\n", " 'ESPNL',\n", " 'MEG10',\n", " 'SNX29',\n", " 'RPB9',\n", " 'UT14A',\n", " 'UBF1',\n", " 'FKBP5',\n", " 'EPAS1',\n", " 'SYSC',\n", " 'AOL',\n", " 'ZN471',\n", " 'PLOD2',\n", " 'KHDR1',\n", " 'HERC3',\n", " 'CPT2',\n", " 'TM1L1',\n", " 'KIRS.PE',\n", " 'MMP25',\n", " 'TM104',\n", " 'CD4',\n", " 'DDX41',\n", " 'MED17',\n", " 'ACL6A',\n", " 'CD165',\n", " 'DHB13',\n", " 'ZCH18',\n", " 'MAGB1',\n", " 'PR40B',\n", " 'CCDC6',\n", " 'UBP44',\n", " 'PLD2',\n", " 'SCG2',\n", " 'ETAA1',\n", " 'CDV3',\n", " 'RBGPR',\n", " 'CD5',\n", " 'PINX1',\n", " 'TSN13',\n", " 'ARRS',\n", " 'VN1R5',\n", " 'M4K4',\n", " 'DENR',\n", " 'RPN1',\n", " 'OR6K6',\n", " 'PAIP1',\n", " 'UBP48',\n", " 'ERP44',\n", " 'CCG6',\n", " 'UGDH',\n", " 'GL1AD',\n", " 'KINH',\n", " 'TGO1',\n", " 'CYB5B',\n", " 'RGS5',\n", " 'ENSA',\n", " 'H90B4',\n", " 'MYH6',\n", " 'UROK',\n", " 'RRP5',\n", " 'KANL1',\n", " 'CHSP1',\n", " 'MYO3B',\n", " 'DBNL',\n", " 'KALM',\n", " 'MAGB3',\n", " 'SPTB1',\n", " 'API5',\n", " 'CD207',\n", " 'CPPED',\n", " 'TBCE',\n", " 'CLC2B',\n", " 'CX6B1',\n", " 'FBRL',\n", " 'PSME1',\n", " 'AGRB1',\n", " 'OR1L1',\n", " 'HYDIN',\n", " 'TRPC1',\n", " 'EFNA1',\n", " 'CEP55',\n", " 'RTCA',\n", " 'BCL9L',\n", " 'ARP2',\n", " 'ODP2',\n", " 'COX41',\n", " 'CH086',\n", " 'GCDH',\n", " 'ZN582',\n", " 'TCF7',\n", " 'CCD89',\n", " 'PRTN3',\n", " 'CCD86',\n", " 'PBDC1',\n", " 'BLM',\n", " 'RSLBB',\n", " 'CFAI',\n", " 'ULK1',\n", " 'IGSF1',\n", " 'SMC1A',\n", " 'SHOC2',\n", " 'DJC11',\n", " 'POP7',\n", " 'HSDL2',\n", " 'SUV3',\n", " 'UGPA',\n", " 'PEBP1',\n", " 'SUMO2',\n", " 'RM17',\n", " 'ACLY',\n", " 'CTND2',\n", " 'ICAM5',\n", " 'NONO',\n", " 'LARP4',\n", " 'EWS',\n", " 'MED1',\n", " 'MCR',\n", " 'TCR.1',\n", " 'DNJC4',\n", " 'SRRM2',\n", " 'CD381',\n", " 'DDX21',\n", " 'CD274',\n", " 'RT09',\n", " 'GOLI4',\n", " 'ZC12C',\n", " 'NPSR1',\n", " 'TLK2',\n", " 'ZNF41',\n", " 'PNPH',\n", " 'PLPP7',\n", " 'GDIR2',\n", " 'LIMA1',\n", " 'HAOX2',\n", " 'GBRA1',\n", " 'ADAS',\n", " 'CNN1',\n", " 'CPSF2',\n", " 'ATLA3',\n", " 'ERIC1',\n", " 'PYGL',\n", " 'PELO',\n", " 'SMC2',\n", " 'CD3',\n", " 'TF65',\n", " 'AR13B',\n", " 'ARF4',\n", " 'SYNP2',\n", " 'ROBO1',\n", " 'PGRP4',\n", " 'CENPV',\n", " 'AKAP1',\n", " 'C1S',\n", " 'NACA2',\n", " 'SYIM',\n", " 'SAFB2',\n", " 'NOP53',\n", " 'XRCC6',\n", " 'ABLM3',\n", " 'TEKT1',\n", " 'FBW1B',\n", " 'NTPCR',\n", " 'NKD1',\n", " 'DDI2',\n", " 'ELAV1',\n", " 'CALM1',\n", " 'OR6Y1',\n", " 'BRI3B',\n", " 'OGT1',\n", " 'KAD3',\n", " 'SNX6',\n", " 'XBP1',\n", " 'CBPC4',\n", " 'PPP5',\n", " 'PLSL',\n", " 'ICAL',\n", " 'TCR.VA24.JA18',\n", " 'TDIF2',\n", " 'PCY1A',\n", " 'ARHGI',\n", " 'BBS2',\n", " 'CD49A',\n", " 'UBP42',\n", " 'UBE3A',\n", " 'TBB6',\n", " 'PP2AA',\n", " 'ALDH2',\n", " 'DEN4C',\n", " 'A4',\n", " 'UBP31',\n", " 'RTEL1',\n", " 'TTC22',\n", " 'PXDC1',\n", " 'VIP1',\n", " 'AGRA3',\n", " 'UBP11',\n", " 'ZBED4',\n", " 'PRDX6',\n", " 'IF172',\n", " 'ORC5',\n", " 'SF3A2',\n", " 'NNMT',\n", " 'GLR',\n", " 'PCKGM',\n", " 'NRG2',\n", " 'STAU1',\n", " 'BD1L1',\n", " 'CARL1',\n", " 'MUC21',\n", " 'TRIPB',\n", " 'CD054',\n", " 'ALKB5',\n", " 'CY24A',\n", " 'KCY',\n", " 'M3K6',\n", " 'SYNC',\n", " 'PCDH8',\n", " 'SCAF4',\n", " 'LELP1',\n", " 'NHP2',\n", " 'JUPI2',\n", " 'TRUB1',\n", " 'RU2A',\n", " 'CFA58',\n", " 'CERS2',\n", " 'RAP2B',\n", " 'PYAS1',\n", " 'SRSF9',\n", " 'SOX9',\n", " 'CD8B.2ST8.5H7',\n", " 'EFC11',\n", " 'TIAM1',\n", " 'ADCY1',\n", " 'CPT1A',\n", " 'ACOT9',\n", " 'FERM1',\n", " 'PSMD5',\n", " 'AATC',\n", " 'EMC2',\n", " 'GNAS2',\n", " 'ERB',\n", " 'ARHGB',\n", " 'SYYM',\n", " 'C1QBP',\n", " 'FANK1',\n", " 'PSB1',\n", " 'GDN',\n", " 'STT3A',\n", " 'LGSN',\n", " 'CD97',\n", " 'PSMG1',\n", " 'CC178',\n", " 'PUR4',\n", " 'CD158E1',\n", " 'MF2L2',\n", " 'PA24C',\n", " 'SCML1',\n", " 'TRPV5',\n", " 'ATRX',\n", " 'XRN2',\n", " 'CTNA2',\n", " 'DNJC9',\n", " 'NEUL',\n", " 'HNRL1',\n", " 'SMCA2',\n", " 'KRT85',\n", " 'CAVN2',\n", " 'MUC5A',\n", " 'DRC2',\n", " 'KCNH8',\n", " 'TDRD5',\n", " 'UTP11',\n", " 'VP13C',\n", " 'TM109',\n", " 'MAT2B',\n", " 'NU188',\n", " 'CNPY4',\n", " 'LAT2',\n", " 'PSMD9',\n", " 'CD324',\n", " 'SPAG7',\n", " 'BUD31',\n", " 'ATG3',\n", " 'ARFP2',\n", " 'COPG2',\n", " 'PEAK3',\n", " 'LARG1',\n", " 'CILK1',\n", " 'WDR44',\n", " 'CPSM',\n", " ...]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 22 }, { "cell_type": "markdown", "id": "9c155a72", "metadata": {}, "source": "## 3. Expand and save" }, { "cell_type": "code", "id": "68a9e520", "metadata": { "ExecuteTime": { "end_time": "2025-08-21T15:03:01.177924Z", "start_time": "2025-08-21T15:03:00.936083Z" } }, "source": [ "pro_expanded = expand_protein_to_panel(pro, panel, id_col=None)\n", "print('Expanded shape:', pro_expanded.shape)\n", "pro_expanded.write_h5ad(OUT_H5AD, compression=COMPRESSION)\n", "print('Saved to:', OUT_H5AD)" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Expanded shape: (10546, 6427)\n", "Saved to: ../../docs/tutorials/expanded_output/pro_expanded_6427.h5ad\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "../../scLinguist/data_loaders/data_loader.py:894: ImplicitModificationWarning: Trying to modify attribute `.var` of view, initializing view as actual.\n", " pro.var[\"feature_id\"] = pro.var_names\n" ] } ], "execution_count": 23 }, { "cell_type": "markdown", "id": "27a80539", "metadata": {}, "source": "## 4. Quick check" }, { "cell_type": "code", "id": "3cb5d9f6", "metadata": { "ExecuteTime": { "end_time": "2025-08-21T15:03:08.515088Z", "start_time": "2025-08-21T15:03:08.482450Z" } }, "source": [ "print('First 10 names:', list(pro_expanded.var_names[:10]))\n", "print('Total vars:', pro_expanded.n_vars)\n", "missing = [p for p in panel if p not in set(pro.var_names)]\n", "print('Missing count from source (filled with zeros):', len(missing))" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "First 10 names: ['SP110', 'GTPBA', 'SNX2', 'FRG1', 'TT21A', 'RHG18', 'AR', 'DOCK1', 'RAB1A', 'MUC1.HMFG2']\n", "Total vars: 6427\n", "Missing count from source (filled with zeros): 6417\n" ] } ], "execution_count": 24 } ], "metadata": {}, "nbformat": 4, "nbformat_minor": 5 }