From f30daff1439ffbed16ac31e815ce8e9ad251cdaf Mon Sep 17 00:00:00 2001
From: kdc715 <kedic@LIT-PF3VB193.localdomain>
Date: Thu, 23 Jan 2025 13:24:06 +0100
Subject: [PATCH] samtools multithreading scheduling fix

---
 config/cluster_config.yaml     |  3 +++
 snakemodules/contigmap.smk     | 20 ++++++++++++--------
 snakemodules/crop_remapped.smk | 16 ++++++++--------
 snakemodules/crop_unmapped.smk | 16 ++++++++--------
 snakemodules/kraken.smk        | 16 ++++++++--------
 5 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/config/cluster_config.yaml b/config/cluster_config.yaml
index 0cb5ef9..3964707 100644
--- a/config/cluster_config.yaml
+++ b/config/cluster_config.yaml
@@ -27,6 +27,9 @@ resources:
     samtools:
         mem: 4096
         time: "12h"
+    samtools_multithread:
+        mem_per_thread: 4096
+        time: "12h"
     samtools_25G:
         mem: 25600
         time: "12h" 
diff --git a/snakemodules/contigmap.smk b/snakemodules/contigmap.smk
index 9b88b06..84d8bfb 100644
--- a/snakemodules/contigmap.smk
+++ b/snakemodules/contigmap.smk
@@ -96,9 +96,9 @@ rule name_sort_unsorted:
     conda:
         os.path.join(WORKFLOW_PATH,"snakemodules/envs/samtools.yml")
     resources:
-        # mem_mb = resources["samtools"]["mem"],
-        mem_mb = lambda wildcards, input, threads, attempt: resources["samtools"]["mem"] * threads,
-        runtime = resources["samtools"]["time"]
+        mem_per_thread = resources["samtools_multithread"]["mem_per_thread"],
+        mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads,
+        runtime = resources["samtools_multithread"]["time"]
     threads: 
         threads["multi"]["samtools"]
     log:
@@ -106,7 +106,9 @@ rule name_sort_unsorted:
     benchmark:
         "benchmarks/contigmap/{sample}_name_sort_unsorted.txt"
     shell:
-        "samtools sort -n -@ {threads} -m {resources.mem_mb}M -o {output} {input} 2> {log.err}"
+        """
+        samtools sort -n -@ {threads} -m {resources.mem_per_thread}M -o {output} {input} 2> {log.err}
+        """
 
 ################################################################################
 
@@ -171,9 +173,9 @@ rule coordinate_sort_unsorted:
     conda:
         os.path.join(WORKFLOW_PATH,"snakemodules/envs/samtools.yml")
     resources:
-        # mem_mb = resources["samtools_25G"]["mem"],
-        mem_mb = lambda wildcards, input, threads, attempt: resources["samtools"]["mem"] * threads,
-        runtime = resources["samtools_25G"]["time"]
+        mem_per_thread = resources["samtools_multithread"]["mem_per_thread"],
+        mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads,
+        runtime = resources["samtools_multithread"]["time"]
     threads: 
         threads["multi"]["samtools"]
     log:
@@ -181,7 +183,9 @@ rule coordinate_sort_unsorted:
     benchmark:
         "benchmarks/contigmap/{sample}_coordinate_sort_unsorted.txt"
     shell:
-        "samtools sort -@ {threads} -m {resources.mem_mb}M -o {output} {input} 2> {log.err}"
+        """
+        samtools sort -@ {threads} -m {resources.mem_per_thread}M -o {output} {input} 2> {log.err}
+        """
 
 
 rule index_sorted:
diff --git a/snakemodules/crop_remapped.smk b/snakemodules/crop_remapped.smk
index 2a6864b..d91fafc 100644
--- a/snakemodules/crop_remapped.smk
+++ b/snakemodules/crop_remapped.smk
@@ -60,9 +60,9 @@ if config["SICKLE"]=="no":
         conda:
             os.path.join(WORKFLOW_PATH,"snakemodules/envs/samtools.yml")
         resources:
-            # mem_mb = resources["samtools"]["mem"],
-            mem_mb = lambda wildcards, input, threads, attempt: resources["samtools"]["mem"] * threads,
-            runtime = resources["samtools"]["time"]
+            mem_per_thread = resources["samtools_multithread"]["mem_per_thread"],
+            mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads,
+            runtime = resources["samtools_multithread"]["time"]
         threads: 
             threads["multi"]["samtools"]
         log:
@@ -70,7 +70,7 @@ if config["SICKLE"]=="no":
         benchmark:
             "benchmarks/crop_remapped/{sample}_sort.txt"
         shell:
-            "samtools sort -n -@ {threads} -m {resources.mem_mb}M -o {output} {input.mates} 2> {log.err}" 
+            "samtools sort -n -@ {threads} -m {resources.mem_per_thread}M -o {output} {input.mates} 2> {log.err}" 
     
 
 elif config["SICKLE"]=="yes":
@@ -113,9 +113,9 @@ elif config["SICKLE"]=="yes":
         conda:
             os.path.join(WORKFLOW_PATH,"snakemodules/envs/samtools.yml")
         resources:
-            # mem_mb = resources["samtools"]["mem"],
-            mem_mb = lambda wildcards, input, threads, attempt: resources["samtools"]["mem"] * threads,
-            runtime = resources["samtools"]["time"]
+            mem_per_thread = resources["samtools_multithread"]["mem_per_thread"],
+            mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads,
+            runtime = resources["samtools_multithread"]["time"]
         threads: 
             threads["multi"]["samtools"]
         log:
@@ -123,7 +123,7 @@ elif config["SICKLE"]=="yes":
         benchmark:
             "benchmarks/crop_remapped/{sample}_sort_sickle.txt"
         shell:
-            "samtools sort -n -@ {threads} -m {resources.mem_mb}M -o {output} {input.mates} 2> {log.err}" 
+            "samtools sort -n -@ {threads} -m {resources.mem_per_thread}M -o {output} {input.mates} 2> {log.err}" 
 
     rule popins2_sickle:
         input:
diff --git a/snakemodules/crop_unmapped.smk b/snakemodules/crop_unmapped.smk
index 9b04065..1f0e9a3 100644
--- a/snakemodules/crop_unmapped.smk
+++ b/snakemodules/crop_unmapped.smk
@@ -58,9 +58,9 @@ if config["SICKLE"]=="no":
         conda:
             os.path.join(WORKFLOW_PATH,"snakemodules/envs/samtools.yml")
         resources:
-            # mem_mb = resources["samtools"]["mem"],
-            mem_mb = lambda wildcards, input, threads, attempt: resources["samtools"]["mem"] * threads,
-            runtime = resources["samtools"]["time"]
+            mem_per_thread = resources["samtools_multithread"]["mem_per_thread"],
+            mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads,
+            runtime = resources["samtools_multithread"]["time"]
         threads: 
             threads["multi"]["samtools"]
         log:
@@ -68,7 +68,7 @@ if config["SICKLE"]=="no":
         benchmark:
             "benchmarks/crop_unmapped/{sample}_sort.txt"
         shell:
-            "samtools sort -n -@ {threads} -m {resources.mem_mb}M -o {output} {input.mates} 2> {log.err}" 
+            "samtools sort -n -@ {threads} -m {resources.mem_per_thread}M -o {output} {input.mates} 2> {log.err}" 
 
 
 elif config["SICKLE"]=="yes":
@@ -109,9 +109,9 @@ elif config["SICKLE"]=="yes":
         output:
             WORK_DIR + "/{sample}/non_ref.bam"
         resources:
-            # mem_mb = resources["samtools"]["mem"],
-            mem_mb = lambda wildcards, input, threads, attempt: resources["samtools"]["mem"] * threads,
-            runtime = resources["samtools"]["time"]
+            mem_per_thread = resources["samtools_multithread"]["mem_per_thread"],
+            mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads,
+            runtime = resources["samtools_multithread"]["time"]
         threads: 
             threads["multi"]["samtools"]
         conda:
@@ -121,7 +121,7 @@ elif config["SICKLE"]=="yes":
         benchmark:
             "benchmarks/crop_unmapped/{sample}_sort_sickle.txt"
         shell:
-            "samtools sort -n -@ {threads} -m {resources.mem_mb}M -o {output} {input.mates} 2> {log.err}"     
+            "samtools sort -n -@ {threads} -m {resources.mem_per_thread}M -o {output} {input.mates} 2> {log.err}"     
      
     rule popins2_sickle:
         input:
diff --git a/snakemodules/kraken.smk b/snakemodules/kraken.smk
index 220eb5e..ea2abfe 100644
--- a/snakemodules/kraken.smk
+++ b/snakemodules/kraken.smk
@@ -150,9 +150,9 @@ rule samtools_remap_classified_human:
     conda:
         os.path.join(WORKFLOW_PATH,"snakemodules/envs/samtools.yml")
     resources:
-        # mem_mb = resources["samtools"]["mem"],
-        mem_mb = lambda wildcards, input, threads, attempt: resources["samtools"]["mem"] * threads,
-        runtime = resources["samtools"]["time"]
+        mem_per_thread = resources["samtools_multithread"]["mem_per_thread"],
+        mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads,
+        runtime = resources["samtools_multithread"]["time"]
     threads: 
         threads["multi"]["samtools"]
     log:
@@ -162,7 +162,7 @@ rule samtools_remap_classified_human:
     shell:
         """
         samtools view -Sb {input.sam} > {output.remapped_unsorted} 2> {log.err};
-        samtools sort -@ {threads} -m {resources.mem_mb}M -o {output.remapped_bam} {output.remapped_unsorted} 2>> {log.err}
+        samtools sort -@ {threads} -m {resources.mem_per_thread}M -o {output.remapped_bam} {output.remapped_unsorted} 2>> {log.err}
         """
        
 rule index_reads:
@@ -224,9 +224,9 @@ rule remapping_samsort_mates:
     conda:
         os.path.join(WORKFLOW_PATH,"snakemodules/envs/samtools.yml")
     resources:
-        # mem_mb = resources["samtools"]["mem"],
-        mem_mb = lambda wildcards, input, threads, attempt: resources["samtools"]["mem"] * threads,
-        runtime = resources["samtools"]["time"]
+        mem_per_thread = resources["samtools_multithread"]["mem_per_thread"],
+        mem_mb = lambda wildcards, input, threads, attempt: resources["samtools_multithread"]["mem_per_thread"] * threads,
+        runtime = resources["samtools_multithread"]["time"]
     threads: 
         threads["multi"]["samtools"]
     log:
@@ -234,7 +234,7 @@ rule remapping_samsort_mates:
     benchmark:
         "benchmarks/kraken/{sample}_remapping_samsort_mates.txt"    
     shell:
-        "samtools sort -n -@ {threads} -m {resources.mem_mb}M -o {output} {input} 2> {log.err}"
+        "samtools sort -n -@ {threads} -m {resources.mem_per_thread}M -o {output} {input} 2> {log.err}"
 
 
 rule merge_set_mate:
-- 
GitLab