import subprocess
import pandas as pd
from pandas import DataFrame as df
#gallus reference
galref="/home/ribli/input/index/gallus/Gallus_gallus.Galgal4.74.dna.toplevel.fa"
#input files
norm_bam="/nagyvinyok/adat83/sotejedlik/ngs_data/enzimologia/R3_bam/DS41.bam"
tum_bam="/nagyvinyok/adat83/sotejedlik/ngs_data/enzimologia/R3_bam/DS50.bam"
#temp files
norm_pup="DS41_tmp.pup"
tum_pup="DS50_tmp.pup"
#position
pos=' 1:40,000,000-50,000,000 '
#pileup flags
pup_flags=' -A -Q 0 -B '
#create pileup
cmd='samtools mpileup '+ pup_flags+ ' -f ' + galref + ' -r '+pos + norm_bam + ' > '+norm_pup
print cmd
print subprocess.call(cmd,shell=True)
cmd='samtools mpileup '+ pup_flags+ ' -f ' + galref + ' -r '+pos + tum_bam + ' > '+tum_pup
print cmd
print subprocess.call(cmd,shell=True)
#varscan params
mincov=' 5 '
pval=' 0.05 '
#output file
output="testout_41_51.vsc"
#run varscan
cmd='java -jar VarScan.v2.3.7.jar somatic '+ norm_pup +' ' +tum_pup + ' ' + output
cmd+=' --min-coverage '+mincov+' --somatic-p-value '+ pval
print cmd
print subprocess.call(cmd,shell=True)
snp_result=pd.read_csv(output+'.snp',sep='\t')
snp_result=snp_result.convert_objects(convert_numeric=True)
snp_result[['ref','var','normal_reads1','normal_reads2',
'tumor_reads1','tumor_reads2',
'somatic_status','variant_p_value','somatic_p_value']].iloc[:10]
print 'Small preview'
snp_result[snp_result['somatic_status']=='Somatic'][['ref','var',
'normal_reads1','normal_reads2',
'tumor_reads1','tumor_reads2',
'somatic_status','variant_p_value','somatic_p_value']].iloc[:10]
somatic_bug=snp_result[(snp_result['somatic_status']=='Somatic') &
(snp_result['somatic_p_value'] > 0.05 )][['ref','var',
'normal_reads1','normal_reads2',
'tumor_reads1','tumor_reads2',
'somatic_status','variant_p_value','somatic_p_value']]
print 'There are numerous position involved: N=',len(somatic_bug.index)
print 'Small preview:'
somatic_bug.iloc[:10]
print subprocess.call(['rm',norm_pup,tum_pup,output+'.snp',output+'.indel'])
#create pileup
cmd='samtools mpileup '+ pup_flags+ ' -f ' + galref + ' -r '+pos + norm_bam + ' > '+norm_pup
print cmd
print subprocess.call(cmd,shell=True)
cmd='samtools mpileup '+ pup_flags+ ' -f ' + galref + ' -r '+pos + tum_bam + ' > '+tum_pup
print cmd
print subprocess.call(cmd,shell=True)
#change somatic p-value filter to a very high value
pval=' 0.9 '
#run varscan
cmd='java -jar VarScan.v2.3.7.jar somatic '+ norm_pup +' ' +tum_pup + ' ' + output
cmd+=' --min-coverage '+mincov+' --somatic-p-value '+ pval
print cmd
print subprocess.call(cmd,shell=True)
snp_result=pd.read_csv(output+'.snp',sep='\t')
snp_result=snp_result.convert_objects(convert_numeric=True)
snp_s_called=snp_result[(snp_result['somatic_status']=='Somatic') &
(snp_result['somatic_p_value'] < 0.05 )][['ref','var',
'normal_reads1','normal_reads2',
'tumor_reads1','tumor_reads2',
'somatic_status','variant_p_value','somatic_p_value']]
print 'Number of somatic snps found N=',len(snp_s_called.index)
print 'Small preview:'
snp_s_called.iloc[:10]
snp_s_called=snp_result[(snp_result['somatic_status']=='Somatic') &
(snp_result['somatic_p_value'] < 0.005 )][['ref','var',
'normal_reads1','normal_reads2',
'tumor_reads1','tumor_reads2',
'somatic_status','variant_p_value','somatic_p_value']]
print 'Number of somatic snps found N=',len(snp_s_called.index)
print 'Small preview:'
snp_s_called.iloc[:10]
print subprocess.call(['rm',norm_pup,tum_pup,output+'.snp',output+'.indel'])
1, the sum of the bases doesnt add up the coverage
- filtering based on base quality
- and other stuff like * : deleted base
counts in the coverage why??
2, somatic filter and somatic p value discrepancy???
- output file p values way higher than threshold
- but filter does something because the numbber
of hits decreases with more strict filter
- filter wokrs nice at pileup2snp but that has
no utility to be used with somatic filtering...
3, coverage filtering is based on pileup coverage value
- very low qualitz bases also contribute to coverage
- even * the deleted base counts in coverage...
- this causes false positives...
- cannot be solved with var-freq-for-hom filter