Run cellranger count on multiple samples interactively with python
And collect summary pages/outs in a single folder
This notebook includes several simple functions to help generate and run cellranger count commends, and gather the summary pages and output folder from seperate sample run directories.
import os,shutil,re
import subprocess
%config ZMQInteractiveShell.ast_node_interactivity = "all"
Check current work path:
cfolder = os.getcwd()
cfolder
def get_count_result_folder_list(cfolder):
folders_list = next(os.walk(cfolder, followlinks = False))[1]
result_folder_list = []
for folder in folders_list:
if os.path.isfile(cfolder + "/" + folder + "/outs/web_summary.html"):
result_folder_list.append(folder)
return result_folder_list
def get_sample_names(fastq_path):
sample_names = []
for fastq in os.listdir(fastq_path):
sample_names.append(re.split('[_]', fastq)[0])
sample_names = list(set(sample_names))
return sample_names
### cellranger count commend construction:
def get_cellranger_count_cmd(sample,fastq_path):
a = ''
cmd = a.join([cellranger, " count --transcriptome ", transcriptome,
" --fastqs ", fastq_path, " --localcores=24 --localmem=100 --no-bam",
" --id ", sample, " --sample ", sample])
return cmd
# " --expect-cells=5000"
def run_cellranger_for_sample_list(sample_names, fastq_path):
for sample in sample_names:
cmd = get_cellranger_count_cmd(sample,fastq_path)
print("-------------------------------\n--------- Runing count---------\n-------------------------------\n"
+ sample + "-------------------------------\n")
print(cmd)
print("------------------------------------------")
res1 = subprocess.Popen(cmd, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
print(str(res1.communicate()[0],"utf-8"))
def gather_summaries(cfolder):
if not os.path.exists(cfolder + "/cellranger_web_summaries"):
print("Creating folder...")
os.makedirs(cfolder + "/cellranger_web_summaries")
print("Folder created, Ready.")
else:
print("Summary folder exists, Ready.")
flist = get_count_result_folder_list(cfolder)
for folder in flist:
shutil.copyfile(cfolder + "/" + folder + "/outs/web_summary.html", cfolder + "/cellranger_web_summaries/" + folder + "_web_summary.html")
print(folder + " Coppied.")
print("Finished.")
Add the path to formated fastqs:
fastq_path = "./fastq_checking/"
Setup ref genome and software version:
transcriptome = "/data_1T/ref/hm/refdata-gex-GRCh38-2020-A"
# transcriptome = "/data_1T/ref/mm/refdata-cellranger-mm10-3.0.0"
# transcriptome = "/data_1T/ref/rn/Rnor_6.0"
# cellranger = '/home/xiaofan/Biotools/cellranger-6.0.2/cellranger'
# cellranger = '/home/xiaofan/Biotools/cellranger-6.1.1/cellranger'
cellranger = '/home/xiaofan/Biotools/cellranger-6.1.2/cellranger'
Check sample names:
sample_names = get_sample_names(fastq_path)
sample_names
Start cellrange count:
run_cellranger_for_sample_list(sample_names, fastq_path)
gather_summaries(cfolder)
def gather_count_output(cfolder):
if not os.path.exists(cfolder + "/cellranger_count_output"):
print("Creating folder...")
os.makedirs(cfolder + "/cellranger_count_output")
print("Done.")
else:
print("Output folder exists, Ready.")
flist = get_count_result_folder_list(cfolder)
for folder in flist:
shutil.copytree(cfolder + "/" + folder + "/outs", cfolder + "/cellranger_count_output/" + folder + "_out")
print(folder + " Coppied.")
print("Finished.")
If needed, collect the entire count output folder(can take up to minutes if bam files included)
gather_count_output(cfolder)