This repository was archived by the owner on Jan 13, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathplot_sequence_properties.py
More file actions
executable file
·63 lines (50 loc) · 2.33 KB
/
plot_sequence_properties.py
File metadata and controls
executable file
·63 lines (50 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import sys
import numpy as np
from wub.util import seq as seq_util
from wub.vis import report
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
import seaborn as sns
warnings.resetwarnings()
_ = sns
# Parse command line arguments:
parser = argparse.ArgumentParser(
description='Plot histograms of lengths and quality values.')
parser.add_argument(
'-f', metavar='format', type=str, help="Input format (fastq).", default='fastq')
parser.add_argument(
'-b', metavar='bins', type=int, help="Number of bins on histograms (50).", default=50)
parser.add_argument(
'-r', metavar='report_pdf', type=str, help="Report pdf (plot_sequence_properties.pdf).", default='plot_sequence_properties.pdf')
parser.add_argument(
'-j', help="Produce joint plot of lengths and mean quality values (False).", default=False, action="store_true")
parser.add_argument('input_fastx', nargs='?', help='Input (default: stdin).',
type=argparse.FileType('r'), default=sys.stdin)
if __name__ == '__main__':
args = parser.parse_args()
in_format = args.f
input_iterator = seq_util.read_seq_records(
args.input_fastx, format=in_format)
# Could be more efficient with dictionaries if we did not have to
# deal with the joint plot.
lengths = []
mean_qualities = []
for record in input_iterator:
lengths.append(len(record))
if in_format == 'fastq':
mean_quality = seq_util.mean_qscore(record.letter_annotations["phred_quality"])
mean_qualities.append(mean_quality)
plotter = report.Report(args.r)
plotter.plot_histograms(
{'lengths': lengths}, title="Distribution of sequence lengths (mean={0:.3f})".format(np.mean(lengths)), xlab="Length", ylab="Count", legend=False)
if in_format == 'fastq':
plotter.plot_histograms(
{'qualities': mean_qualities}, title="Distribution of mean base qualities (mean={0:.3f})".format(np.mean(mean_qualities)), xlab="Mean base quality", ylab="Count", legend=False)
if args.j:
plotter.plot_arrays({'scatter': (lengths, mean_qualities)}, title="Sequence length vs. mean base quality",
xlab="Sequence length", ylab="Mean base quality", legend=False)
plotter.close()