Varscan has a bug in somatic filtering (what is quite unbelievable for me) : If you specify a somatic filter, there are numerous entries in the output which dont pass the somatic filter threshold.
The Fischer-exact test values seem to be correct (not shown here, only checked by hand). I tested numerous Varscan version, all of them failed.
I'm running Varscan on a 10Mbp long sample section to show the bug
import subprocess
import pandas as pd
from pandas import DataFrame as df
#gallus reference
galref="/home/ribli/input/index/gallus/Gallus_gallus.Galgal4.74.dna.toplevel.fa"
#input files
norm_bam="/nagyvinyok/adat83/sotejedlik/dt40/rmdup/DS50_RMdup_picard_realign.bam"
tum_bam="/nagyvinyok/adat83/sotejedlik/dt40/rmdup/DS41_RMdup_picard_realign.bam"
#temp files
norm_pup="DS41_tmp.pup"
tum_pup="DS50_tmp.pup"
#position
pos=' 1:40,000,000-50,000,000 '
#pileup flags
pup_flags=' -Q 30 -B '
#create pileup
cmd='samtools mpileup '+ pup_flags+ ' -f ' + galref + ' -r '+pos + norm_bam + ' > '+norm_pup
subprocess.call(cmd,shell=True)
print cmd
cmd='samtools mpileup '+ pup_flags+ ' -f ' + galref + ' -r '+pos + tum_bam + ' > '+tum_pup
subprocess.call(cmd,shell=True)
print cmd
#varscan params
mincov=' 5 '
pval=' 0.05 '
#output file
output="testout_41_51_som_filt.vsc"
#run varscan
cmd='java -jar VarScan.v2.3.7.jar somatic '+ norm_pup +' ' +tum_pup + ' ' + output
cmd+=' --min-coverage '+mincov+' --somatic-p-value '+ pval
print cmd
print subprocess.call(cmd,shell=True)
snp_result=pd.read_csv(output+'.snp',sep='\t')
snp_result=snp_result.convert_objects(convert_numeric=True)
snp_result[['ref','var','normal_reads1','normal_reads2',
'tumor_reads1','tumor_reads2',
'somatic_status','variant_p_value','somatic_p_value']].iloc[:10]
print 'Small preview'
snp_result[snp_result['somatic_status']=='Somatic'][['ref','var',
'normal_reads1','normal_reads2',
'tumor_reads1','tumor_reads2',
'somatic_status','variant_p_value','somatic_p_value']].iloc[:10]
somatic_bug=snp_result[(snp_result['somatic_status']=='Somatic') &
(snp_result['somatic_p_value'] > 0.05 )][['ref','var',
'normal_reads1','normal_reads2',
'tumor_reads1','tumor_reads2',
'somatic_status','variant_p_value','somatic_p_value']]
print 'There are numerous position involved: N=',len(somatic_bug.index)
print 'Small preview:'
somatic_bug.iloc[:10]
print subprocess.call(['rm',norm_pup,tum_pup,output+'.snp',output+'.indel'])