,我需要用OQ作为附加字段,使用pysam向bam文件添加读取质量。
其他使用samtools等的传统方法会消耗更多的时间并创建多个文件。
我试了下给出的脚本,但最终没有签名的字符,而不是质量分数字符串!任何帮助都很感激。
提前谢谢。
eg.输入bam:
E00577:205:HVF37CCXY:3:2224:32461:53258 99 chr1 10419997 60 151M = 10420034 188 GGCAGTGGCTTCCGCGTGCCCCGTGTGCTGGTGCGGTTCCCATCACGCAGACAGGAAGGGTGTTTGCGCACTCTGATCAACTGGAACCTCTGTATCANGCGGCTGAATTCCCTTTTTCCTTNACTCNATAAAAGCTACATCAGACTGATGN AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF# MD:Z:97T23T4A23C0 PG:Z:MarkDuplicates RG:Z:HVF37CCXY.3 NM:i:4 AS:i:144 XS:i:107预期输出bam:
E00577:205:HVF37CCXY:3:2224:32461:53258 99 chr1 10419997 60 151M = 10420034 188 GGCAGTGGCTTCCGCGTGCCCCGTGTGCTGGTGCGGTTCCCATCACGCAGACAGGAAGGGTGTTTGCGCACTCTGATCAACTGGAACCTCTGTATCANGCGGCTGAATTCCCTTTTTCCTTNACTCNATAAAAGCTACATCAGACTGATGN AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF# MD:Z:97T23T4A23C0 PG:Z:MarkDuplicates RG:Z:HVF37CCXY.3 NM:i:4 AS:i:144 XS:i:107 OQ:Z:AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF#比萨姆
import os
import argparse
import pysam
parser = argparse.ArgumentParser(description = 'Generate BAM with OQ tag')
parser.add_argument('-i', '--input', required=True, help='Input mark dup BAM File')
parser.add_argument('-o', '--output', required=True, help='Output BAM file with OQ tag')
args = parser.parse_args()
infile_path = os.path.abspath(args.input)
outfile_path = os.path.abspath(args.output)
infile = pysam.AlignmentFile(infile_path, "rb")
outfile = pysam.AlignmentFile(outfile_path, "wb", template=infile)
iter = infile.fetch(until_eof=True)
for read in iter:
read.set_tag("OQ", read.query_qualities, replace=False)
outfile.write(read)
infile.close()
outfile.close()python GenerateBamWithOQTag.py -i subset.bam -o subset_OQ.bam
E00577:205:HVF37CCXY:3:2224:32461:53258 99 chr1 10419997 60 151M = 10420034 188 GGCAGTGGCTTCCGCGTGCCCCGTGTGCTGGTGCGGTTCCCATCACGCAGACAGGAAGGGTGTTTGCGCACTCTGATCAACTGGAACCTCTGTATCANGCGGCTGAATTCCCTTTTTCCTTNACTCNATAAAAGCTACATCAGACTGATGN AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF# MD:Z:97T23T4A23C0 PG:Z:MarkDuplicates RG:Z:HVF37CCXY.3 NM:i:4 AS:i:144 XS:i:107 OQ:B:C,32,32,37,37,37,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,27,37,27,41,12,37,41,41,41,41,41,41,41,41,41,41,41,41,41,41,37,41,41,37,27,41,41,41,37,41,27,41,41,41,41,41,41,27,41,41,37,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,37,41,41,41,41,2,41,41,41,41,41,32,41,41,37,41,41,41,41,41,41,41,41,41,41,41,32,37,41,2,37,41,41,41,2,41,41,41,41,41,41,41,41,37,41,41,32,41,37,32,37,27,41,41,41,32,41,37,2发布于 2020-07-19 12:18:01
"""
USAGE:-
python Generate_bam_with_OQtag.py -i <input_file> -o <output_file>
"""
import os
import argparse
import pysam
parser = argparse.ArgumentParser(description = 'Generate BAM with OQ tag')
parser.add_argument('-i', '--input', required=True, help='Input mark dup BAM File')
parser.add_argument('-o', '--output', required=True, help='Output BAM file with OQ tag')
args = parser.parse_args()
infile_path = os.path.abspath(args.input)
outfile_path = os.path.abspath(args.output)
infile = pysam.AlignmentFile(infile_path, "rb")
outfile = pysam.AlignmentFile(outfile_path, "wb", template=infile)
iter = infile.fetch(until_eof=True)
for read in iter:
read.set_tag('OQ', pysam.qualities_to_qualitystring(read.query_qualities), replace=False)
outfile.write(read)
infile.close()
outfile.close()https://stackoverflow.com/questions/62968011
复制相似问题