import os def find_bam_file(wildcards): # Construct the direct path first: direct_path = os.path.join(INPUT_DIR, f"{wildcards.sample}.bam") if os.path.exists(direct_path): return direct_path else: # If not found directly in INPUT_DIR, search recursively in subdirectories search_pattern = os.path.join(INPUT_DIR, '**', f"{wildcards.sample}.bam") bam_matches = glob.glob(search_pattern, recursive=True) if bam_matches: # If multiple matches, choose the first or define another selection criterion return bam_matches[0] else: # Raise an error if no bam file found for this sample raise FileNotFoundError(f"No BAM file found for sample {wildcards.sample}") if config["SICKLE"]=="no": rule popins2_crop_unmapped: input: find_bam_file # INPUT_DIR + "/{sample}/{sample}.bam" output: temp(WORK_DIR + "/{sample}/single.fastq"), temp(WORK_DIR + "/{sample}/paired.1.fastq"), temp(WORK_DIR + "/{sample}/paired.2.fastq"), temp(WORK_DIR + "/{sample}/mates.bam"), WORK_DIR + "/{sample}/POPINS_SAMPLE_INFO" params: q = config["min-qual"], l = config["min-read-len"] resources: mem_mb = lambda wildcards, attempt: resources["dynamic_schedule"]["mem_init"]* (2** (attempt - 1)), # mem_mb = resources["standard_4G"]["mem"], runtime = resources["standard_4G"]["time"] threads: threads["single"] log: out="logs/crop_remapped/{sample}_crop_remapped.out", err="logs/crop_remapped/{sample}_crop_remapped.err" benchmark: "benchmarks/crop_remapped/{sample}_crop_remapped.txt" shell: "mkdir -p {WORK_DIR}/{wildcards.sample}; " "{POPINS2_BIN} crop-unmapped {input}" " --prefix {WORK_DIR} " " --sample {wildcards.sample}" " --min-qual {params.q}" " --min-read-len {params.l}" " > {log.out} 2> {log.err}" rule popins2_sort: input: os.path.join(RESULTS_DIR, "read_numbers.png") if config["ANALYSIS"]=="yes" else [], mates = WORK_DIR + "/{sample}/mates.bam" output: temp(WORK_DIR + "/{sample}/non_ref.bam") conda: os.path.join(WORKFLOW_PATH,"snakemodules/envs/samtools.yml") resources: mem_per_thread = resources["samtools_multithread"]["mem_per_thread"], mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads, runtime = resources["samtools_multithread"]["time"] threads: threads["multi"]["samtools"] log: err="logs/crop_remapped/{sample}_sort.err" benchmark: "benchmarks/crop_remapped/{sample}_sort.txt" shell: "samtools sort -n -@ {threads} -m {resources.mem_per_thread}M -o {output} {input.mates} 2> {log.err}" elif config["SICKLE"]=="yes": rule popins2_crop_unmapped: input: INPUT_DIR + "/{sample}/{sample}.bam" output: temp(WORK_DIR + "/{sample}/single.fastq"), temp(WORK_DIR + "/{sample}/paired.1.fastq"), temp(WORK_DIR + "/{sample}/paired.2.fastq"), temp(WORK_DIR + "/{sample}/mates.bam"), WORK_DIR + "/{sample}/POPINS_SAMPLE_INFO" params: q = config["min-qual"], l = config["min-read-len"] resources: mem_mb = lambda wildcards, attempt: resources["dynamic_schedule"]["mem_init"]* (2** (attempt - 1)), # mem_mb = resources["standard_4G"]["mem"], runtime = resources["standard_4G"]["time"] threads: threads["single"] log: out="logs/crop_remapped/{sample}_crop_remapped_sickle.out", err="logs/crop_remapped/{sample}_crop_remapped_sickle.err" benchmark: "benchmarks/crop_remapped/{sample}_crop_remapped_sickle.txt" shell: "mkdir -p {WORK_DIR}/{wildcards.sample}; " "{POPINS2_BIN} crop-unmapped {input}" " --prefix {WORK_DIR} " " --sample {wildcards.sample}" " > {log.out} 2> {log.err}" rule popins2_sort: input: # os.path.join(RESULTS_DIR, "read_numbers.png") if config["ANALYSIS"]=="yes" else [], mates = WORK_DIR + "/{sample}/mates.bam" output: temp(WORK_DIR + "/{sample}/non_ref.bam") conda: os.path.join(WORKFLOW_PATH,"snakemodules/envs/samtools.yml") resources: mem_per_thread = resources["samtools_multithread"]["mem_per_thread"], mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads, runtime = resources["samtools_multithread"]["time"] threads: threads["multi"]["samtools"] log: err="logs/crop_remapped/{sample}_sort_sickle.err" benchmark: "benchmarks/crop_remapped/{sample}_sort_sickle.txt" shell: "samtools sort -n -@ {threads} -m {resources.mem_per_thread}M -o {output} {input.mates} 2> {log.err}" rule popins2_sickle: input: # os.path.join(RESULTS_DIR, "read_numbers.png") if config["ANALYSIS"]=="yes" else [], single = WORK_DIR + "/{sample}/single_clean.fastq", pair_1 = WORK_DIR + "/{sample}/paired_1_clean.fastq", pair_2 = WORK_DIR + "/{sample}/paired_2_clean.fastq" output: single = temp(WORK_DIR + "/{sample}/sickle.single_clean.fastq"), pair_1 = temp(WORK_DIR + "/{sample}/sickle.paired_1_clean.fastq"), pair_2 = temp(WORK_DIR + "/{sample}/sickle.paired_2_clean.fastq"), temp_single = temp(WORK_DIR + "/{sample}/sickle.single2_clean.fastq") params: q = config["min-qual"], l = config["min-read-len"] resources: mem_mb = resources["standard_2G"]["mem"], runtime = resources["standard_2G"]["time"] threads: threads["single"] log: out="logs/crop_remapped/{sample}_sickle.out", err="logs/crop_remapped/{sample}_sickle.err" benchmark: "benchmarks/crop_remapped/{sample}_sickle.txt" shell: "{SICKLE}" " pe -q {params.q} -l {params.l} -x -n -t sanger" " -f {input.pair_1} -r {input.pair_2}" " -o {output.pair_1} -p {output.pair_2} -s {output.single}" " > {log.out} 2> {log.err}; " "{SICKLE}" " se -q {params.q} -l {params.l} -x -n -t sanger" " -f {input.single} -o {output.temp_single}" " >> {log.out} 2>> {log.err}; " "cat {output.temp_single} >> {output.single}"