Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
popinSnake
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
FONDA_A6
popinSnake
Commits
4dcf79b4
Commit
4dcf79b4
authored
2 months ago
by
kdc715
Browse files
Options
Downloads
Patches
Plain Diff
remove chromosome info from coverage analysis, only keep contigs
parent
f30daff1
Branches
Branches containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
snakemodules/notebooks/contig_coverage_heatmap.py.ipynb
+14
-4
14 additions, 4 deletions
snakemodules/notebooks/contig_coverage_heatmap.py.ipynb
with
14 additions
and
4 deletions
snakemodules/notebooks/contig_coverage_heatmap.py.ipynb
+
14
−
4
View file @
4dcf79b4
...
@@ -160,13 +160,23 @@
...
@@ -160,13 +160,23 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
91
,
"execution_count":
null
,
"id": "36f31bc9-8eaa-434a-9a81-6a97282fe469",
"id": "36f31bc9-8eaa-434a-9a81-6a97282fe469",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"coverage_df.rename(columns={'#rname': 'contigs'}, inplace=True)\n",
"coverage_df.rename(columns={'#rname': 'contigs'}, inplace=True)\n",
"coverage_df.set_index('contigs', inplace=True)\n",
"coverage_df.set_index('contigs', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "345e2ab4",
"metadata": {},
"outputs": [],
"source": [
"coverage_df = coverage_df[coverage_df.index.str.startswith(\"contig_\")]\n",
"coverage_df = coverage_df.reindex(order_by_length)"
"coverage_df = coverage_df.reindex(order_by_length)"
]
]
},
},
...
@@ -217,7 +227,7 @@
...
@@ -217,7 +227,7 @@
],
],
"metadata": {
"metadata": {
"kernelspec": {
"kernelspec": {
"display_name": "
Python 3 (ipykernel)
",
"display_name": "
base
",
"language": "python",
"language": "python",
"name": "python3"
"name": "python3"
},
},
...
@@ -231,7 +241,7 @@
...
@@ -231,7 +241,7 @@
"name": "python",
"name": "python",
"nbconvert_exporter": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"pygments_lexer": "ipython3",
"version": "3.
10
.12"
"version": "3.
9
.12"
}
}
},
},
"nbformat": 4,
"nbformat": 4,
...
...
%% Cell type:code id:99f09a73-ce8c-46b0-87c1-0f2c801a63ca tags:
%% Cell type:code id:99f09a73-ce8c-46b0-87c1-0f2c801a63ca tags:
```
python
```
python
%
load_ext
autoreload
%
load_ext
autoreload
%
autoreload
2
%
autoreload
2
```
```
%% Cell type:code id:0bcadc62-0e3e-41e7-ba9a-1623c666520f tags:
%% Cell type:code id:0bcadc62-0e3e-41e7-ba9a-1623c666520f tags:
```
python
```
python
from
Bio
import
SeqIO
from
Bio
import
SeqIO
import
glob
import
glob
import
os
import
os
import
sys
import
sys
import
pandas
as
pd
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
import
matplotlib.pyplot
as
plt
import
seaborn
as
sns
import
seaborn
as
sns
```
```
%% Cell type:code id:96c89b03-b6e0-4d20-a5ee-9df6607f79fb tags:
%% Cell type:code id:96c89b03-b6e0-4d20-a5ee-9df6607f79fb tags:
```
python
```
python
# If executing with snakemake, use the path below
# If executing with snakemake, use the path below
sys
.
path
.
append
(
snakemake
.
config
[
"
WORKFLOW_PATH
"
]
+
'
/snakemodules/notebooks/src/
'
)
sys
.
path
.
append
(
snakemake
.
config
[
"
WORKFLOW_PATH
"
]
+
'
/snakemodules/notebooks/src/
'
)
# If executing directly with jupyter notebook from the commandline, use the path below
# If executing directly with jupyter notebook from the commandline, use the path below
# sys.path.append(os.getcwd()+'/src/')
# sys.path.append(os.getcwd()+'/src/')
from
stat_func
import
calculator
from
stat_func
import
calculator
from
stat_func
import
visualization
from
stat_func
import
visualization
%
matplotlib
inline
%
matplotlib
inline
```
```
%% Cell type:code id:b61116c6-48c1-40e5-8e7a-42f31b05bfe2 tags:
%% Cell type:code id:b61116c6-48c1-40e5-8e7a-42f31b05bfe2 tags:
```
python
```
python
# # Get the current working directory
# # Get the current working directory
# print("Current Path:", current_path)
# print("Current Path:", current_path)
# # Navigate up two directories to reach '/home/usr/program'
# # Navigate up two directories to reach '/home/usr/program'
# print("Program Path:", program_path)
# print("Program Path:", program_path)
# Join 'results' to get the desired path
# Join 'results' to get the desired path
# current_path = os.getcwd()
# current_path = os.getcwd()
# program_path = os.path.dirname(os.path.dirname(current_path))
# program_path = os.path.dirname(os.path.dirname(current_path))
# results_path = os.path.join(program_path, 'results')
# results_path = os.path.join(program_path, 'results')
# If executing with snakemake, use the path below
# If executing with snakemake, use the path below
results_path
=
os
.
path
.
join
(
os
.
getcwd
(),
'
results
'
)
results_path
=
os
.
path
.
join
(
os
.
getcwd
(),
'
results
'
)
print
(
"
Results Path:
"
,
results_path
)
print
(
"
Results Path:
"
,
results_path
)
```
```
%% Cell type:code id:496682dd-a457-4510-b611-34ebd28fbd4f tags:
%% Cell type:code id:496682dd-a457-4510-b611-34ebd28fbd4f tags:
```
python
```
python
# Use paths below for snakemake execution
# Use paths below for snakemake execution
supercontigs
=
snakemake
.
input
.
fasta
supercontigs
=
snakemake
.
input
.
fasta
coverage_files
=
snakemake
.
input
.
cov
coverage_files
=
snakemake
.
input
.
cov
# Use paths below for jupyter notebook execution
# Use paths below for jupyter notebook execution
# For jupyter notebook execution, supercontigs is a list after glob.glob
# For jupyter notebook execution, supercontigs is a list after glob.glob
# supercontig_fa = glob.glob('/home/kedic/popinSnake/results/supercontigs.fa')
# supercontig_fa = glob.glob('/home/kedic/popinSnake/results/supercontigs.fa')
# supercontigs = supercontig_fa[0]
# supercontigs = supercontig_fa[0]
# coverage_files = glob.glob('/home/kedic/popinSnake/workdir/**/coverage.txt')
# coverage_files = glob.glob('/home/kedic/popinSnake/workdir/**/coverage.txt')
```
```
%% Cell type:code id:e2c85a70-b05a-4ee3-b964-638639f9343a tags:
%% Cell type:code id:e2c85a70-b05a-4ee3-b964-638639f9343a tags:
```
python
```
python
# For jupyter notebook execution, change supercontigs to supercontigs[0]
# For jupyter notebook execution, change supercontigs to supercontigs[0]
seq_objects
=
SeqIO
.
parse
(
supercontigs
,
'
fasta
'
)
seq_objects
=
SeqIO
.
parse
(
supercontigs
,
'
fasta
'
)
sequences
=
[]
sequences
=
[]
for
seq
in
seq_objects
:
for
seq
in
seq_objects
:
sequences
.
append
(
seq
)
sequences
.
append
(
seq
)
num_of_contigs
=
len
(
sequences
)
num_of_contigs
=
len
(
sequences
)
```
```
%% Cell type:code id:e88ab748-87cb-493d-b2c2-15134073d87d tags:
%% Cell type:code id:e88ab748-87cb-493d-b2c2-15134073d87d tags:
```
python
```
python
contig_id
=
[]
contig_id
=
[]
contig_length
=
[]
contig_length
=
[]
for
record
in
sequences
:
for
record
in
sequences
:
record_id
=
record
.
id
record_id
=
record
.
id
sequence
=
record
.
seq
sequence
=
record
.
seq
length
=
len
(
sequence
)
length
=
len
(
sequence
)
contig_id
.
append
(
record_id
)
contig_id
.
append
(
record_id
)
contig_length
.
append
(
length
)
contig_length
.
append
(
length
)
```
```
%% Cell type:code id:95441eb2-f5dd-42b0-bae1-c43f5b4b0cb6 tags:
%% Cell type:code id:95441eb2-f5dd-42b0-bae1-c43f5b4b0cb6 tags:
```
python
```
python
df_sort_contig
=
pd
.
DataFrame
({
df_sort_contig
=
pd
.
DataFrame
({
'
contig_id
'
:
contig_id
,
'
contig_id
'
:
contig_id
,
'
contig_length
'
:
contig_length
'
contig_length
'
:
contig_length
})
})
df_sort_contig
=
df_sort_contig
.
sort_values
(
by
=
[
'
contig_length
'
],
ascending
=
True
)
df_sort_contig
=
df_sort_contig
.
sort_values
(
by
=
[
'
contig_length
'
],
ascending
=
True
)
order_by_length
=
df_sort_contig
[
'
contig_id
'
].
tolist
()
order_by_length
=
df_sort_contig
[
'
contig_id
'
].
tolist
()
```
```
%% Cell type:code id:495f2814-9fcb-4843-a2b6-fc288301101a tags:
%% Cell type:code id:495f2814-9fcb-4843-a2b6-fc288301101a tags:
```
python
```
python
# Initialize an empty DataFrame to hold all coverage data
# Initialize an empty DataFrame to hold all coverage data
coverage_df
=
pd
.
DataFrame
()
coverage_df
=
pd
.
DataFrame
()
for
file
in
coverage_files
:
for
file
in
coverage_files
:
# Extract sample name from the filename
# Extract sample name from the filename
folder_path
=
os
.
path
.
dirname
(
file
)
folder_path
=
os
.
path
.
dirname
(
file
)
sample_name
=
os
.
path
.
basename
(
folder_path
)
sample_name
=
os
.
path
.
basename
(
folder_path
)
# Read the coverage data, skipping comment lines starting with '#'
# Read the coverage data, skipping comment lines starting with '#'
df
=
pd
.
read_csv
(
file
,
sep
=
'
\t
'
)
df
=
pd
.
read_csv
(
file
,
sep
=
'
\t
'
)
# Select the contig name and mean depth columns
# Select the contig name and mean depth columns
df
=
df
[[
'
#rname
'
,
'
meandepth
'
]]
df
=
df
[[
'
#rname
'
,
'
meandepth
'
]]
# Rename the mean depth column to the sample name
# Rename the mean depth column to the sample name
df
.
rename
(
columns
=
{
'
meandepth
'
:
sample_name
},
inplace
=
True
)
df
.
rename
(
columns
=
{
'
meandepth
'
:
sample_name
},
inplace
=
True
)
if
coverage_df
.
empty
:
if
coverage_df
.
empty
:
# Initialize the main DataFrame with the first sample
# Initialize the main DataFrame with the first sample
coverage_df
=
df
coverage_df
=
df
else
:
else
:
# Merge with the main DataFrame on the contig name
# Merge with the main DataFrame on the contig name
coverage_df
=
pd
.
merge
(
coverage_df
,
df
,
on
=
'
#rname
'
,
how
=
'
outer
'
)
coverage_df
=
pd
.
merge
(
coverage_df
,
df
,
on
=
'
#rname
'
,
how
=
'
outer
'
)
```
```
%% Cell type:code id:36f31bc9-8eaa-434a-9a81-6a97282fe469 tags:
%% Cell type:code id:36f31bc9-8eaa-434a-9a81-6a97282fe469 tags:
```
python
```
python
coverage_df
.
rename
(
columns
=
{
'
#rname
'
:
'
contigs
'
},
inplace
=
True
)
coverage_df
.
rename
(
columns
=
{
'
#rname
'
:
'
contigs
'
},
inplace
=
True
)
coverage_df
.
set_index
(
'
contigs
'
,
inplace
=
True
)
coverage_df
.
set_index
(
'
contigs
'
,
inplace
=
True
)
```
%% Cell type:code id:345e2ab4 tags:
```
python
coverage_df
=
coverage_df
[
coverage_df
.
index
.
str
.
startswith
(
"
contig_
"
)]
coverage_df
=
coverage_df
.
reindex
(
order_by_length
)
coverage_df
=
coverage_df
.
reindex
(
order_by_length
)
```
```
%% Cell type:code id:ab7e6633 tags:
%% Cell type:code id:ab7e6633 tags:
```
python
```
python
coverage_df_cleaned
=
calculator
.
remove_outliers_replace_with_bounds
(
coverage_df
)
coverage_df_cleaned
=
calculator
.
remove_outliers_replace_with_bounds
(
coverage_df
)
```
```
%% Cell type:code id:3f61507d-b554-4dfd-830c-fa3555cad215 tags:
%% Cell type:code id:3f61507d-b554-4dfd-830c-fa3555cad215 tags:
```
python
```
python
# Plot the heatmap
# Plot the heatmap
sns
.
set
(
font_scale
=
0.8
)
sns
.
set
(
font_scale
=
0.8
)
sns
.
heatmap
(
coverage_df_cleaned
,
annot
=
False
,
cmap
=
'
YlOrBr
'
)
sns
.
heatmap
(
coverage_df_cleaned
,
annot
=
False
,
cmap
=
'
YlOrBr
'
)
plt
.
title
(
'
Coverage of Contigs
'
)
plt
.
title
(
'
Coverage of Contigs
'
)
plt
.
xlabel
(
'
Samples
'
)
plt
.
xlabel
(
'
Samples
'
)
plt
.
ylabel
(
'
Contigs
'
)
plt
.
ylabel
(
'
Contigs
'
)
plt
.
savefig
(
snakemake
.
output
[
0
],
dpi
=
400
,
bbox_inches
=
'
tight
'
)
plt
.
savefig
(
snakemake
.
output
[
0
],
dpi
=
400
,
bbox_inches
=
'
tight
'
)
plt
.
show
()
plt
.
show
()
```
```
%% Cell type:code id:b61cdb41-3f9e-48fa-86ca-3249642fd7b4 tags:
%% Cell type:code id:b61cdb41-3f9e-48fa-86ca-3249642fd7b4 tags:
```
python
```
python
``
`
``
`
%%
Cell
type
:
code
id
:
de6a634e
-
ab13
-
4297
-
b7c9
-
349
cc69bad03
tags
:
%%
Cell
type
:
code
id
:
de6a634e
-
ab13
-
4297
-
b7c9
-
349
cc69bad03
tags
:
```
python
```
python
```
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment