This notebook includes several simple functions to help generate and run cellranger count commends, and gather the summary pages and output folder from seperate sample run directories.

import os,shutil,re
import subprocess
%config ZMQInteractiveShell.ast_node_interactivity = "all"

Check current work path:

cfolder = os.getcwd()
cfolder
'/mnt/data_processing/10xRNA_working/20220202_Polverino_sc5prime_GEX_miseq'
def get_count_result_folder_list(cfolder):
    folders_list = next(os.walk(cfolder, followlinks = False))[1]
    result_folder_list = []
    for folder in folders_list:
        if os.path.isfile(cfolder + "/" + folder + "/outs/web_summary.html"):
            result_folder_list.append(folder)
    return result_folder_list

def get_sample_names(fastq_path):
    sample_names = []
    for fastq in os.listdir(fastq_path):
        sample_names.append(re.split('[_]', fastq)[0])
    sample_names = list(set(sample_names))
    return sample_names


### cellranger count commend construction:
def get_cellranger_count_cmd(sample,fastq_path):
    a = ''
    cmd = a.join([cellranger, " count --transcriptome ", transcriptome,
                 " --fastqs ", fastq_path, " --localcores=24 --localmem=100 --no-bam",
                  " --id ", sample, " --sample ", sample])
    return cmd

#  " --expect-cells=5000"

def run_cellranger_for_sample_list(sample_names, fastq_path):
    for sample in sample_names:
        cmd = get_cellranger_count_cmd(sample,fastq_path)
        print("-------------------------------\n--------- Runing count---------\n-------------------------------\n" 
              + sample + "-------------------------------\n")
        print(cmd)
        print("------------------------------------------")
        res1 = subprocess.Popen(cmd, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
        print(str(res1.communicate()[0],"utf-8"))

def gather_summaries(cfolder):
    if not os.path.exists(cfolder + "/cellranger_web_summaries"):
        print("Creating folder...")
        os.makedirs(cfolder + "/cellranger_web_summaries")
        print("Folder created, Ready.")
    else:
        print("Summary folder exists, Ready.")
    flist = get_count_result_folder_list(cfolder)
    for folder in flist:
        shutil.copyfile(cfolder + "/" + folder + "/outs/web_summary.html", cfolder + "/cellranger_web_summaries/" + folder + "_web_summary.html")
        print(folder + " Coppied.")
    print("Finished.")

Add the path to formated fastqs:

fastq_path = "./fastq_checking/"

Setup ref genome and software version:

transcriptome = "/data_1T/ref/hm/refdata-gex-GRCh38-2020-A"
# transcriptome = "/data_1T/ref/mm/refdata-cellranger-mm10-3.0.0"
# transcriptome = "/data_1T/ref/rn/Rnor_6.0"

# cellranger = '/home/xiaofan/Biotools/cellranger-6.0.2/cellranger'
# cellranger = '/home/xiaofan/Biotools/cellranger-6.1.1/cellranger'
cellranger = '/home/xiaofan/Biotools/cellranger-6.1.2/cellranger'

Check sample names:

sample_names = get_sample_names(fastq_path)
sample_names
['multiqc', '69-20200902-cryo-lung-ssp-GEX']

Start cellrange count:

run_cellranger_for_sample_list(sample_names, fastq_path)

Collect and rename summary pages

gather_summaries(cfolder)
Creating folder...
Folder created, Ready.
69-20200902-cryo-lung-ssp-GEX-manual Coppied.
90-20210121-cryo-lung-susp-GEX Coppied.
73-20200924-cryo-lung-susp-GEX Coppied.
70-20200910-cryo-lung-susp-GEX Coppied.
74-20201001-cryo-lung-susp-GEX Coppied.
69-20200902-cryo-lung-ssp-GEX Coppied.
78-20201112-cryo-lung-susp-GEX Coppied.
UA94-RUL-cryo-lung-susp-20210311-GEX Coppied.
72-20200917-cryo-lung-ssp-GEX Coppied.
UA96-20210325-lung-cryo-GEX Coppied.
Finished.
def gather_count_output(cfolder):
    if not os.path.exists(cfolder + "/cellranger_count_output"):
        print("Creating folder...")
        os.makedirs(cfolder + "/cellranger_count_output")
        print("Done.")
    else:
        print("Output folder exists, Ready.")
    flist = get_count_result_folder_list(cfolder)
    for folder in flist:
        shutil.copytree(cfolder + "/" + folder + "/outs", cfolder + "/cellranger_count_output/" + folder + "_out")
        print(folder + " Coppied.")
    print("Finished.")

If needed, collect the entire count output folder(can take up to minutes if bam files included)

gather_count_output(cfolder)
Creating folder...
Done.
S3 Coppied.
S2 Coppied.
M5 Coppied.
P3 Coppied.
M3 Coppied.
M6 Coppied.
S1 Coppied.
P1 Coppied.
M4 Coppied.
M1 Coppied.
PA Coppied.
P5 Coppied.
P2 Coppied.
M2 Coppied.
Finished.