import sys
import re
from typing import NamedTuple
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
fn='data/fasta_example_data/small_ls_orchid.fasta'
seq_index=SeqIO.index(fn,'fasta')
chr_len={k:len(v.seq) for k,v in seq_index.items()}
n=len(chr_len)
total_len=sum(chr_len.values())
mean_len=total_len/n
(longest_id,longest_len)=max(chr_len.items(),key=lambda x:x[1])
def Nx0(l,x):
#该函数用于计算nx0,接受两个参数:
#l长度列表
#x:n50或n60等
#返回两个值:
#idx:nx0的编号
#nx0:nx0的长度
l=sorted(l,reverse=True)
p=int(x[1:])/100
total_len=sum(l)
nx0=0
idx=0
cumsum=0#累加
for i,v in enumerate(l,start=1): #从1开始排序
cumsum+=v
if cumsum>=total_len * p:
idx=i
nx0=v
break
return idx,nx0
#n count 和 gaps:
n_count=0
gaps=0
for k,v in seq_index.items():
s=v.seq#正则表达式只能用于字符串
n_count+=s.cout('N')
gaps+=len(re.split('N+',str(s))-1,
fi_name = fn.split('/')[-1]
print('stats for {}'.format(fi_name))
print('sum = {}, n = {}, ave = {}, largest = {}'\
.format(total_len, n, mean_len, longest_id))
for n in ('N50', 'N60', 'N70', 'N80', 'N90', 'N100'):
idx, nx0 = Nx0(chr_len.values(), n)
print('{} = {}, n = {}'.format(n, nx0, idx))
print('count = {}'.format(n_count))
print('Gaps = {}'.format(gaps))
shenweiyan已获得悬赏 5 硬币最佳答案
[md]你的第 46 行代码少了一个括号:
原 46 行代码:
`gaps+=len(re.split('N+',str(s))-1,`
正确的应该是:
`gaps+=len(re.split('N+',str(s)))-1,`
[/md]
|