LARRY 1 Lineage

import sys
import os
import matplotlib.pyplot as plt
import anndata as ad
import numpy as np
import scanpy as sc
import seaborn as sns
import pandas as pd
from matplotlib.patches import Patch
def plot_clustermap(pdf_matrix, title="KDE Heatmap", xlabel="Gene Index", ylabel="Grid Bin",col_colors=None,row_colors=None,lut=None):
        """
        Plot a clustermap of the pdf_matrix. (automatic hierarchical clus and ordering of row/col)
        pdf_matrix: (N_grid, N_genes or cells)
        """
        #fig, ax = plt.subplots(figsize=(8, 6))
        if row_colors is not None:
            g= sns.clustermap(pdf_matrix, cmap="viridis",row_cluster=True, col_cluster=False, row_colors=row_colors)
        elif col_colors is not None:
            g= sns.clustermap(pdf_matrix, cmap="viridis",row_cluster=False, col_cluster=True, col_colors=col_colors)
        else:
            g= sns.clustermap(pdf_matrix, cmap="viridis",row_cluster=False, col_cluster=True, col_colors=None)
        if lut is not None:
            handles = [Patch(facecolor=lut[name]) for name in lut]
            g.ax_heatmap.legend(handles, lut, title='Annot',bbox_to_anchor=(1, 1), bbox_transform=plt.gcf().transFigure, loc='lower left')
        g.ax_heatmap.set_title(title)
        g.ax_heatmap.set_xlabel(xlabel)
        g.ax_heatmap.set_ylabel(ylabel)
        #plt.tight_layout()
        g.fig.savefig(os.path.join('./', f'{title}.png'), dpi=300,bbox_inches="tight")
        plt.close()
data_path = '/home/tchari/CellDynamicsData/LARRY'
adata_rna = ad.read_h5ad(data_path+'/1lin_postprocessed.h5ad') #from Cameron --> figure out what processing was done
lut = dict(zip(adata_rna.obs['time_info'].unique(),"rbg"))
col_colors = np.array(adata_rna.obs['time_info'].map(lut))

pdf_matrix = np.random.rand(100,len(adata_rna))
plot_clustermap(pdf_matrix, title="test", xlabel="Cell Index", ylabel="Grid Bin",col_colors=col_colors,row_colors=None,lut=lut)
data_path = '/home/tchari/CellDynamicsData/LARRY'
larry = ad.read_h5ad(data_path+'/1lin_postprocessed.h5ad') #from Cameron --> figure out what processing was done
larry
#Well 0 (day 2) and Well 1 (day 4 and day 6), Well 2 (day 4 and day 6)
larry.obs['Well'].value_counts()
#Want Day 2,4,6, well1, Undiff+monocyte

monolin_allwells = larry[larry.obs['state_info'].isin(['Monocyte','Undifferentiated'])]
monolin_allwells
monolin_well1 = larry[larry.obs['state_info'].isin(['Monocyte','Undifferentiated']) & (larry.obs['Well'].isin([0,1]))]
monolin_well1
monolin_well2 = larry[larry.obs['state_info'].isin(['Monocyte','Undifferentiated']) & (larry.obs['Well'].isin([2]))]
monolin_well2
#larry.obs['time_info'].value_counts()
monolin_well1.layers['spliced'].todense()
#Save counts as raw U/S counts
monolin_well1.layers['spliced'] = monolin_well1.layers['raw_spliced'].copy()
monolin_well1.layers['unspliced'] = monolin_well1.layers['raw_unspliced'].copy()
monolin_well1.layers['spliced'].todense()
monolin_well2.layers['spliced'] = monolin_well2.layers['raw_spliced'].copy()
monolin_well2.layers['unspliced'] = monolin_well2.layers['raw_unspliced'].copy()
monolin_well2.var_names
#Save counts as raw U/S counts
monolin_allwells.layers['spliced'] = monolin_allwells.layers['raw_spliced'].copy()
monolin_allwells.layers['unspliced'] = monolin_allwells.layers['raw_unspliced'].copy()
monolin_allwells.layers['spliced'].todense()
#Save to h5ad files
monolin_well1.obs['n_cells'] = [1]*len(monolin_well1)
monolin_well2.obs['n_cells'] = [1]*len(monolin_well2)
monolin_allwells.obs['n_cells'] = [1]*len(monolin_allwells)

monolin_well1.write_h5ad(data_path+'/mono_lin_well1.h5ad') #Has day2, beginning cells
monolin_well2.write_h5ad(data_path+'/mono_lin_well2.h5ad')
monolin_allwells.write_h5ad(data_path+'/mono_lin_allwells.h5ad')
#Check how many TFs in this dataset...
test = pd.read_csv('/home/tchari/CellDynamics/Mouse_TFs.txt',header=None)
test
np.sum([(i in list(monolin_well1.var_names)) for i in list(test[0])])