Filter Varscan hits for indel proximity


Using sqlite

In [125]:
#Toggle code

from IPython.display  import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[125]:
In [1]:
#load modules

import os
import subprocess
import fnmatch
from IPython.core.display import Image 
In [2]:
#go to working directory, where the data is

os.chdir("/nagyvinyok/adat83/sotejedlik/ribli/dt40/snp/varscan")
In [18]:
#create sql util function

import subprocess
def run_sqlilte3(command,db,remove_db=False,output=None):
    tempf=open('tempf.sql','w')
    tempf.write(command)
    tempf.close()
    
    if (remove_db):
        subprocess.check_output('rm '+db, shell=True)
        
    if (output == None):
        try:
            print subprocess.check_output('sqlite3 '+ db + ' < tempf.sql ', shell=True,
                                          stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError, e:
            print e.output
    else:
        try:
            print subprocess.check_output('sqlite3 '+ db + ' < tempf.sql > '+ output, shell=True,
                                         stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError, e:
            print e.output
    
    subprocess.check_output(['rm','tempf.sql'])
In [88]:
#load the vcf files into sqlite
#headers??

run_sqlilte3('''
.separator "\t"

CREATE TABLE indels(
  chrom TEXT,
  position INTEGER,  --!!!
  ref TEXT,
  var TEXT,
  normal_reads1 TEXT,
  normal_reads2 TEXT,
  normal_var_freq TEXT,
  normal_gt TEXT,
  tumor_reads1 TEXT,
  tumor_reads2 TEXT,
  tumor_var_freq TEXT,
  tumor_gt TEXT,
  somatic_status TEXT,
  variant_p_value TEXT,
  somatic_p_value TEXT,
  tumor_reads1_plus TEXT,
  tumor_reads1_minus TEXT,
  tumor_reads2_plus TEXT,
  tumor_reads2_minus TEXT,
  normal_reads1_plus TEXT,
  normal_reads1_minus TEXT,
  normal_reads2_plus TEXT,
  normal_reads2_minus TEXT
);

CREATE TABLE snps(
  chrom TEXT,
  position INTEGER, --!!!
  ref TEXT,
  var TEXT,
  normal_reads1 TEXT,
  normal_reads2 TEXT,
  normal_var_freq TEXT,
  normal_gt TEXT,
  tumor_reads1 TEXT,
  tumor_reads2 TEXT,
  tumor_var_freq TEXT,
  tumor_gt TEXT,
  somatic_status TEXT,
  variant_p_value TEXT,
  somatic_p_value NUMERIC, --!!!
  tumor_reads1_plus TEXT,
  tumor_reads1_minus TEXT,
  tumor_reads2_plus TEXT,
  tumor_reads2_minus TEXT,
  normal_reads1_plus TEXT,
  normal_reads1_minus TEXT,
  normal_reads2_plus TEXT,
  normal_reads2_minus TEXT
);

.import DS14_DS15.vsc.snp snps
.import DS14_DS15.vsc.indel indels

--delete headers
DELETE FROM indels WHERE chrom='chrom';
DELETE FROM snps WHERE chrom='chrom';

--create index to make next join faster?
CREATE INDEX indels_chrom_pos_index ON indels (chrom,position);
CREATE INDEX snps_chrom_pos_index ON snps (chrom,position);

''',db='variantdb',remove_db=True)

The number of somatic mutations

  • using p_value < 0.01
  • normal is clean
In [129]:
run_sqlilte3('''
.separator "\t"

SELECT COUNT(*)
FROM snps AS s
WHERE 
    s.somatic_status='Somatic' AND
    s.somatic_p_value < 0.01 AND
    s.normal_reads2 = '0';

''',db='variantdb',remove_db=False)
1519

The number of somatic mutations after indel proximity filtering

  • using p_value < 0.01
  • normal is clean
  • indel proximity filtering
In [128]:
run_sqlilte3('''
.separator "\t"


--EXPLAIN QUERY PLAN SELECT COUNT(*) --i.chrom,i.position,s.chrom,s.position
SELECT COUNT(*) --i.chrom,i.position,s.chrom,s.position
FROM snps AS s
LEFT JOIN indels AS i ON (
    i.chrom = s.chrom  AND  i.position BETWEEN s.position-150 AND s.position+150  )
WHERE
    i.chrom IS NULL AND
    s.somatic_status='Somatic' AND
    s.somatic_p_value < 0.01 AND
    s.normal_reads2 = '0';
    


''',db='variantdb',remove_db=False)
916

List some of them in the varscan output format

  • headers can be peeked from the code
In [130]:
run_sqlilte3('''
.separator "\t"


--EXPLAIN QUERY PLAN SELECT COUNT(*) --i.chrom,i.position,s.chrom,s.position
--SELECT COUNT(*) --i.chrom,i.position,s.chrom,s.position
SELECT s.*
FROM snps AS s
LEFT JOIN indels AS i ON (
    i.chrom = s.chrom  AND  i.position BETWEEN s.position-150 AND s.position+150  )
WHERE
    i.chrom IS NULL AND
    s.somatic_status='Somatic' AND
    s.somatic_p_value < 0.01 AND
    s.normal_reads2 = '0'
LIMIT 20;


''',db='variantdb',remove_db=False)
1	634212	C	T	6	0	0%	C	9	15	62.5%	Y	Somatic	1.0	0.00842911877394645	6	3	9	6	4	2	0	0
1	670994	C	T	25	0	0%	C	45	13	22.41%	Y	Somatic	1.0	0.00596568167261313	29	16	6	7	17	8	0	0
1	708151	C	T	7	0	0%	C	8	12	60%	Y	Somatic	1.0	0.00724637681159416	7	1	6	6	6	1	0	0
1	3349616	G	A	10	0	0%	G	10	13	56.52%	R	Somatic	1.0	0.00199604498825854	4	6	8	5	3	7	0	0
1	5754555	G	T	9	0	0%	G	8	11	57.89%	K	Somatic	1.0	0.00351966873706009	3	5	2	9	6	3	0	0
1	11247626	C	T	11	0	0%	C	9	9	50%	Y	Somatic	1.0	0.00485471549939312	4	5	5	4	1	10	0	0
1	11247631	G	A	10	0	0%	G	9	10	52.63%	R	Somatic	1.0	0.00461197972442354	4	5	5	5	1	9	0	0
1	17064922	T	C	16	0	0%	T	8	11	57.89%	Y	Somatic	1.0	0.000181153662799939	6	2	5	6	7	9	0	0
1	19932481	G	A	13	0	0%	G	14	9	39.13%	R	Somatic	1.0	0.00868027967583002	6	8	7	2	6	7	0	0
1	22530394	C	T	9	0	0%	C	11	11	50%	Y	Somatic	1.0	0.00833131821186174	8	3	6	5	4	5	0	0
1	24717379	A	T	5	0	0%	A	0	6	100%	T	Somatic	1.0	0.00216450216450216	0	0	1	5	0	5	0	0
1	24725078	T	C	13	0	0%	T	7	6	46.15%	Y	Somatic	1.0	0.00745341614906839	0	7	1	5	5	8	0	0
1	26053954	C	T	10	0	0%	C	3	5	62.5%	Y	Somatic	1.0	0.00653594771241836	1	2	5	0	7	3	0	0
1	26569385	A	G	7	0	0%	A	1	5	83.33%	G	Somatic	1.0	0.00466200466200466	0	1	0	5	2	5	0	0
1	26569387	A	G	6	0	0%	A	0	9	100%	G	Somatic	1.0	0.0001998001998002	0	0	0	9	1	5	0	0
1	46320037	T	C	10	0	0%	T	11	10	47.62%	Y	Somatic	1.0	0.00795262192950436	6	5	4	6	9	1	0	0
1	47369411	C	T	7	0	0%	C	6	21	77.78%	T	Somatic	1.0	0.000318981875286259	3	3	11	10	5	2	0	0
1	47369437	A	G	8	0	0%	A	6	17	73.91%	R	Somatic	1.0	0.000380669880113706	4	2	7	10	4	4	0	0
1	47369441	A	C	8	0	0%	A	5	17	77.27%	C	Somatic	1.0	0.000219890054972513	4	1	8	9	4	4	0	0
1	54482794	C	T	11	0	0%	C	2	6	75%	T	Somatic	1.0	0.00103199174406605	2	0	3	3	4	7	0	0


Conclusion 1:

  • still lot of mutations, indel proximity filtering is not the answer!

In [116]:
#collect filenames for samplenames

bam_link_dir='/nagyvinyok/adat83/sotejedlik/ribli/dt40/bam_links/'
#collect filenames
fnames=[]
for fname in os.listdir(bam_link_dir):
    if (fnmatch.fnmatch(fname, '*.bam') and 
        not fnmatch.fnmatch(fname,"*.bai")):
        fnames.append(fname)
fnames=sorted(fnames)
In [117]:
#define function to run pileup command

def run_pileup(chrom,pos):
    
    samtools="/nagyvinyok/adat87/home/ribli/tools/samtools-0.1.19/samtools"
    ref_fa="/nagyvinyok/adat87/home/ribli/input/index/gallus/Gallus_gallus.Galgal4.74.dna.toplevel.fa"

    cmd_mpileup = samtools + ' mpileup -Q 30 -B  -f ' + ref_fa
    cmd_mpileup += ' -r ' + str(chrom) +':'+ str(pos) +'-' +  str(pos) + ' ' 
    cmd_mpileup += ' '.join([bam_link_dir+x for x in fnames]) 
    
    try:
        pup_line=subprocess.check_output(cmd_mpileup,executable='/bin/bash',
                                         stderr=subprocess.STDOUT,shell=True)
    except subprocess.CalledProcessError,e:
        print e 
        
    pup_list=pup_line.split('\t')
    
    print pup_list[0],pup_list[1],pup_list[2]
    for i in xrange((len(pup_list)-3)/3):
        print fnames[i].split('.')[0][:-21],pup_list[3+3*i],pup_list[4+3*i]
        

Check a pileup

In [137]:
chrom,pos=1,634212
run_pileup(chrom,pos)
[mpileup] 118 samples in 118 input files
1 634212 C
DS001 19 ,Tt,ttt,.,,..,T,TT.
DS002 22 ,,,.,,t,t,.t.,tTT,...^].
DS003 28 ,,tt.,t.,,ttt,..TtTt.,.t.,TT
DS004 17 TT,,ttT...t.TTTT^]T
DS005 27 t$Tt,Ttt,T.,tT,.tT.Tt,..t,.T
DS006 29 t.,T,,.t.,,,t,tt,T,tT,.,tT.tT
DS007 10 ..,ttTt.tt
DS009 10 t.tTTT.T,.
DS010 7 t.,T,.,
DS011 32 tttTt,..tt,t,,..ttTt.,.,TTTTt,tt
DS014 24 ,..t.TtT.t,TTTt.T.TT,tTt
DS015 6 ..,,.^].
DS016 16 tT,T..ttTt,,T,,a
DS018 24 ,t,tt,,TT.t..TTtt,TT..TT
DS026 19 TT..t.T,..,.T,,tTtT
DS027 18 ttT,t.tTTTt,t.t,T,
DS033 14 ,.T.t..,T.t..^].
DS034 8 ,.t,T,.t
DS035 22 ttT.T,.T.tT.T..Tt,TtT,
DS036 7 T,.,t..
DS037 17 .Tt.,.ttTT.,.TT.t
DS041 13 .,,.,,TT.Tt..
DS042 17 ,t.tT,,TT,T,t.T,.
DS043 19 ,..,t..ttT,T,TttTT.
DS044 11 ,,T.t..TTT.
DS045 10 .,t.T.T.T,
DS046 12 ,TT.TT.,TTt.
DS047 35 ..,tt...,.tT,T,tt.,..t.t,tT,TT,T,Tt
DS048 8 ,.tt,t,t
DS049 21 tt,,.t.t..,.TT....T.T
DS050 19 ,,,,Ttt........,..T
DS051 11 ,,,,T.,t,T.
DS052 12 tTt.tt.t...T
DS053 13 TTt,tT...T.,,
DS054 13 .$Tt,,t,,t.,,^]T
DS055 14 T,.TTt,.,.TT..
DS056 13 .t,Ttt,TT.,.T
DS057 34 tT.ttt..,.t.T.t...TTtT,.T.t,TTt.t,
DS058 44 tTtt,.,T.tt,t,t,.,...t.T.tttt,.T.T,Ttt.T.,..
DS059 8 T,,,T.T,
DS060 9 ,Tt,..TTt
DS061 34 ttt,.,t,,Tt,.T,.T,TTtT,T.tTt,T...T
DS062 12 t.Tt.tttTT,t
DS063 14 ,,t.tt,.,T.TTT
DS064 11 tt.,ttt.T.,
DS065 57 ,t,..,t..T,.tt.tt,t.t,tt.,TTtT,t......,tTtT.Ttt,,ttTT.tT^]T
DS066 24 .t,,,T.T,TT,..t,T,T..TT.
DS067 15 ,...,Ttt,Tt,,t,
DS068 13 .tT.t.t,Tt...
DS069 11 ,ttttt.tT,.
DS070 11 .tt.TT.TTT^].
DS071 21 T,..,,t,..tT...TTtTtT
DS072 17 T.Tt,,T......tT.t
DS073 21 tT..,tTTt,t,.,,,.,,T.
DS074 20 ,$t.T,,,Tt,.ttTttt.T,
DS081 12 ,,T,t,,T..T,
DS082 18 ,,,,ttt,t,,.t,T.T.
DS083 25 ,t,,,,,,t.TT.t..T.T.T,,,T
DS084 39 ,t,.t.,,tt,,...TTTt..TT.t.,tTtt.,.T.T.T
DS085 19 ,T,tt,tt,t,,t.,T,T.
DS086 8 .,,.t,T^],
DS087 16 ,,,,ttT,TT,TTT..
DS088 10 ,T.ttTt,t.
DS089 27 t$,tT....t,t.,,,tT,t,,,T.TTT
DS090 13 t.tt,,t,tT,.^]t
DS091 26 ,tt,T..t,.,,t,.tt,TT,,.t,t
DS092 19 t.,t,..t.,.Tttt..,T
DS093 21 ,t,,,t,.t,,,tT,.,.TtT
DS094 26 ,$t,,,,.,t,.,,tt,,,,,ttTTT.
DS095 21 ,$TTt,.t,,,tt,.TT,t,.t
DS096 23 .,.t,t,Tt,,,,,,,t,tt,.^],
DS097 18 Tt,,T,,t,,,t,tt,tt
DS098 19 .,t,tt.,t,t..,t.tt,
DS099 16 tt,,tt,tt.TTTTT.
DS100 14 ,ttt,,,t,,,..,
DS101 14 ,$t$,t,t,t,,,t,t
DS102 13 .,t,Tt,.,tTtt
DS103 26 ttt,,ttt,tT,.tt,Tttt,.T.t.
DS104 14 t,.,.,.t.,,..,
DS105 15 t,,tt.tt,Tt,..t
DS106 12 ,,,,,.tTt.,,
DS107 7 t,,.,,t
DS108 20 ,$T.,t,t,,,tt.,,TTT,t
DS109 23 ,$.,tt,,ttt,T,T,.,.,t.,.
DS110 9 ,Ttt,,t,,
DS111 29 t$ttt,,t,tt,,ttttttt,..t.,,ttT
DS112 13 ,t.,,ttttT.tT
DS113 22 tT.ttt.,t,.tttt,T,t,.,
DS114 22 .$t,,,,,,T,t,TT,,tTt,.T
DS115 27 tt.,,,tt,,Tt,Tt,T...,tTtt,t
DS116 18 ,Tt,,T..tt.,..,T.t
DS117 17 ,,,.t,.T,,,t.T,.t
DS118 18 ....,,,T.tt,..,.,t
DS119 16 ,,.tT,TtTttT,t,T
DS120 23 .,tTT,,,,,.Tt.tT,t..ttT
DS121 6 t,,,T.
DS122 35 ,tT,.tt.t,.tttt,,.tTtT..,,Tt,,t.,,t
DS123 47 .$tTt,tt.,.t,,ttt.t,,t,,t.tTTT.ttttTt.,.tTTT.,.^].
DS124 29 ,.t,,tt,t,t,,T,,tttt..T..TtTt
DS125 36 ,T,Tt.,,,t,T.,TttT,,t,T,.T,t.t..,t,T
DS126 22 ,,tt.,.t..,tTt,T,t.t.T
DS127 29 .T,,T.,Tt,tT.t.tt,t,.T.,T..Tt
DS128 44 ,$,,,,tt,,,tt.,.,.,t.tt,TtT,.ttt..T,.,TT..,TT
DS129 30 ,t,ttt,,t.T,tT.tT.tTT,T,T,,Tt.
DS130 39 t$,,.t,,t,,t.,TTttTt.Ttttt...,.,TtttT.T.
DS131 29 T$t$,$t.tT,.,ttt,..t...T,tttT.T.
DS132 22 Tt.t,,,t,t,,tT,,T,t.,T
DS133 29 t$.T,,t,ttttt,T,.tt,t,,,t,ttTt
DS134 29 tt,T,,t,.tTtTT.t,t.,t.tT.t,.t
DS135 19 .tt.,t.t.TT,T,,.TT.
DS136 28 ,$t.,,,t.tTt,t,..,t,,Tt,tTTt,
DS137 23 ,.t.t.,,.t,t,TT.ttt,tt.
Sample1 36 ,$,..tt,.T.T,,,,T,.,,TtTT....t.,T,t.^].
Sample2 49 .,,..tttt,.,,,,.,,TT...,.tTtTT,ttTt.T,t..tTTtttt^]t
Sample3 46 tt.ttTtT,.t.Tt,TtttTTTT...tTt,t,,Tt..T,,.,.tt.
Sample4 38 ,$,$t,,..T,tT.t,TTt.ttttt,TtT,tt.,,,.T,t
Sample5 34 .$..T,.t,T.tTT,.,,,T,,TT.TttT.,TTT^].
Sample6 26 ,,.,Tt,,,t..T,,T,.Tt.TTt,t

Check it in IGV

In [153]:
#connect to IGV socket

import socket

SERVER_IP   = 'localhost'
PORT_NUMBER = 60151
SIZE = 1024

my_socket = socket.socket( socket.AF_INET, socket.SOCK_STREAM )
my_socket.connect((SERVER_IP, PORT_NUMBER))

#Load the bams, and genome

def load_bams(my_socket,bams,genome):
    my_socket.sendall('new \n') #clear it
    print my_socket.recv(1024),
    
    my_socket.sendall('genome '  + genome + ' \n') #load genome
    print my_socket.recv(1024),
    
    for bam in bams:
        my_socket.sendall('load ' + bam + '\n' ) # load the bams
        print my_socket.recv(1024),
        
    return

def make_snapshot(my_socket, snapshor_dir,chromosome,posmin,posmax):
    my_socket.sendall('goto '+chromosome+':'+str(posmin)+'-'+str(posmax)+'\n')
    print my_socket.recv(1024),

    my_socket.sendall('snapshotDirectory ' + snapshor_dir + '\n')
    print my_socket.recv(1024),

    my_socket.sendall('snapshot \n')
    print my_socket.recv(1024),
    
    return


bam1='/nagyvinyok/adat83/sotejedlik/ribli/dt40/bam_links/DS014_RMdup_picard_realign.bam'
bam2='/nagyvinyok/adat83/sotejedlik/ribli/dt40/bam_links/DS015_RMdup_picard_realign.bam'
genome='/home/ribli/input/index/gallus/Gallus_gallus.Galgal4.74.dna.toplevel.fa'

load_bams(my_socket,[bam1,bam2],genome)

snapshot_dir='/home/ribli/DT40/SNP/varscan/indel_prox/'
OK
OK
OK
OK
In [154]:
chromosome='1'
pos=634212
posmin,posmax=pos-100,pos+100
make_snapshot(my_socket,snapshot_dir,chromosome,posmin,posmax)
OK
OK
OK
In [155]:
Image('/home/ribli/DT40/SNP/varscan/indel_prox/1_634,112_634,312.png') 
Out[155]:

Conclusion:

Micoplasma affected region (?) , with random fluctuation -> no variant in DS15


Check another pos

In [121]:
chrom,pos=1,670994
run_pileup(chrom,pos)
[mpileup] 118 samples in 118 input files
1 670994 C
DS001 31 ...,.,...,..,...,tt,....T,...,.
DS002 72 ,.,,,,,.,,.,..,,,,,...,,..,,....,tTttttttttt..t....TTT.TTTT..,,,,.,,.,..
DS003 52 .,.....,,.,,.,,.,.,.........a,..TTTT.t..,,+5caact.T,T...,,^].
DS004 34 ,.,...,.,,..,.,,,......AT.Ttt.,,,.
DS005 53 ,...,....,.,..,..,,AATT,,ttt...,,......,,.,,,,.,...,.
DS006 48 ...,,..,.,,,..,,,...,...,At.ttttt.,,.,.T..,,,,.,
DS007 42 .,.,,....,..,a.....,tAtttttt,...,TT.TT,,,^].
DS009 36 ,.,...,..,..,.,..,,Atttt.....T,,...,
DS010 40 ....,.,.,.,.,,..,..,....TT.ttt....,T.,,.
DS011 61 .$.,.,,.,,.,..,.,...,..,,AA,tTttttt,.....,,..TTT.T.,....,....,
DS014 58 ,.,,.,..,,,..,.....,.T....,,.ttTTttttt.,,,.......TTT..,,..
DS015 25 ..,.,.,,........,,..,,...
DS016 39 ..,..,.,.,.,..,.......,.TTtt....A..T,.,
DS018 36 .,.,,...,...,.,,,........,t.,T,,,.,.
DS026 38 ,,,..,......,,.,,,AtTtt.At,....T,,....
DS027 46 .$..,,.,.,..,,,.,.A...TT.ttttt.....,.TT.,.,.,,.
DS033 57 ,....,.,,..,.,,...,.,..T..,a.,A.ttt,...,,...T.,,,t.......
DS034 44 ,,..,,...........AtTT,.,.....,..,......,.,,.
DS035 34 ,.,,,..,,..,,A.,...,,...t...T,..,,
DS036 37 ,$...,,..,,,...,,.,.ATt..t....T,,,..,,
DS037 42 ,,,.,..,.,.,.......A..t,ttt,....t.T.,,,,,.
DS041 36 .,.,............TTtt..,,..,.,...,,,,
DS042 42 ..,,,,,.,,.,,,..,,......,.....,,,g..,.,...
DS043 39 .,..,.,........,.,,,tTTt...t.T.T,t.T,.^],
DS044 55 ...,..,....,..,.,,..,,,...TTtt......,....T.,...,.,,....
DS045 56 ....,.,.,,,.,.,..,,...,.T.Ttt..,,.......T....,,,,,...,,.
DS046 49 .......,.,....,,...,.A.T.tt..,,.......TTT,,..,.,.
DS047 76 .,,...,,.....,,,...,..,,,.,.,...,,..,.AAttT.TTttttt..,.........t,T.T.TT,,t,.
DS048 39 ..,,,..,....,...T,,,t,t...,.t..TT,,,..,
DS049 46 ...,.,...,,,.,.,,.,.Ttttttt.,...T..T.,,.T..,..
DS050 47 ,$,..,,.,,,.,,.,,.,.,,,,....ttttt.,,.,....T.,,.,
DS051 44 ,,,,,,..,,,,..,,,,....,..T.,t....,,...T,,,,.
DS052 61 ......,,,,,.,,..,.,,.,.,.T.TttttA......,t..,t,,,T.TTT,,,.,...
DS053 43 ..,,,,,,.,,..,..,.A.TTtttt...,,..,TT.T..,,,
DS054 69 ...,.....,.,,.,.,..,,,.,.,,.,...,,,.TtT......,.....,..T..TT,,,,...,.^].
DS055 41 .,.,,,,,,,.,.,.,.,.,,,...,Ttt...,T...,,.,
DS056 36 .,,,.,,.,,,.,...A.Tttt,.,.TTT.,,,...
DS057 66 .$...,....,..,,..,.,,,.,.,*....,,...,..,TT.Ttt,,.....t.....,,,,,..,
DS058 114 ..,,,..,,,,....,,...,,..,..,..,..........,,.,,...,...a,,,,AAA.tttttt..,,,....t...,......T.TTT.T..,,,,,.,.,,...,.,,
DS059 27 .....,,,.,.,,,.,,.........,
DS060 34 .,.....,..,,,.ttttt.,....,T.,..,,.
DS061 70 .$.$.,.,.,..,.....,,.,,......,..a,,ATt,tttt..t,,.....t.,T.TT...,,,,,..,,
DS062 55 ......,,,.,A.,,.,,....,.T..t,t....,,....,TTTT.,..,....,
DS063 41 ,...,....,,.....,.TTt.....T...,..T..,..,,
DS064 52 .,.,,..,,.,.,...,,,...tatttt.,.......TTTT.,,,...,,..
DS065 94 .,,,,,.,..,.,.,,.....,,,,..,.,,..,..,,,..,.,..,.A,.AatTt,tttt..,..,tt.,T..TT,,,,TT..T,.,,....^].
DS066 53 ,.,.,..,,,..,,..,..,..,...TTttttt,,,...,TT.T...,,.,,^],
DS067 38 ,.,.,,.....T.T.tt.............T,,,,...
DS068 46 ,.......,,,,...,...Tttttttt.A.,.....TTg,...,,,
DS069 44 ,,,...,...,.,...,..Tt..........,,T.T.T,,,.,,
DS070 46 .,..,.,,,.,,.,....,...,.tttt.....t....t,,.,..^].
DS071 44 .,,,..,,.,,,.......A.tttttt.,,.......T,,..,^].
DS072 61 ,$.,,.,...,,*.,.,,,..,.,,A.aT.tt,tttt,t.........TT.T..,,,,..,,
DS073 40 ,.,.,..,.,,,.,,,...,..,....TTT..........
DS074 34 ,........,,..,...,....A.T.,....,,,
DS081 34 ...,.,.,,,...,....,,.,,..........,
DS082 30 .,,.,...,,....T.......,.,.....
DS083 32 ,..,,....,.,.,,.,.,.....,...,,..
DS084 35 .,,,..,,.,,,..,.,,.,.,,..........,.
DS085 41 .,,..,,...,,,..,......,,,.,...,...,...,.^].
DS086 17 .,,.,,........,..
DS087 49 .,,,..,,.,..,..,,.,.,..,.,.,,..,,,...,,........,.
DS088 35 ,..,,..,..........,.....T..........
DS089 31 .....,..,....,,,,.........,.,.^],
DS090 26 .,.,,,.,.,.,..,,,,.,.,...,
DS091 30 ,..,,....,,.,,,..,T....,.,...^].
DS092 33 ...,,.,,..,.,.....,.,......,....,
DS093 37 ...,..,...,.,,...,.....,......,..,...
DS094 45 ...,..,.,,,...,.,.......,,.,.T.T...,..,,...,^].
DS095 23 .$...,,.,...,...,.,.,..,
DS096 23 .,..,,....,,...,....,..
DS097 30 ..,.,.,..,......,,..,,.,,..,,,
DS098 32 .$,,.,,,.,,,,.,.,....,,,.,..,,.,,
DS099 31 ..,,..,.,.,,..,..,.....,.......
DS100 32 ,.,,.....,,,.,...,,....,,...,...
DS101 35 ,,.,.,..,.,,..,,..,...,.,.,,..,,,,.
DS102 20 ,,,........,......,^].
DS103 43 ,$,$,,,.,..,.,,,...,,,,.,,,,,.,....,...,.....
DS104 44 ..,,,.,..,.......,.......,...,.............^].
DS105 27 ..,.,...........,,,........
DS106 34 ,..,..,,,..,...,............,...,.
DS107 33 ......,.,..,,,..,.............,.,
DS108 30 .,,,..,,,..,,,,....,,,.,.,,...
DS109 29 ,$.,..,...,,.,,.,,..,..,,....,
DS110 54 ,$..,,..,,.,....,....,.,,,,,,.......,..T.......,....,.,
DS111 33 .$,...,..,,......,..,,..,,,,......
DS112 34 ,,,,.,.,...,,..,,,,,,...,.,,,...,.
DS113 31 ,,,,..,..,,,..,.,..,..,,.,.....
DS114 36 ,.,,...,.,,.,..,........,,,,,,...,,,
DS115 45 ...,.,....,.,..,,,....,....,.,,,,.......,.,,^],
DS116 34 ,..,,,.,.....,.,..,.......,..,..,^],
DS117 27 ,,,......,.,.,.......,..,,.
DS118 38 ..,,....,.,,...,,,.,.,..T......,.,....
DS119 33 ,....,,.,,.,,.,,...,T....,,......
DS120 25 ....,...,$,.,,.........,..
DS121 29 ,,..,,.,...,....,.....,.,,,.,
DS122 30 ,,.,..,..,.,,..,,.,,,.....,..^].
DS123 27 ,$..........,.,,...,,,...,..
DS124 44 .,,.,.,..,...,.,.,.,.....,..,.,.......,,..,.
DS125 40 .,,.....,..,,.,...,..,.,.,,...,.......,.
DS126 31 ...,.,..,,,.,.,.,,,..........,.
DS127 37 ......,,....,..,,,.A...............,,
DS128 47 .,..,,.,.,...,.,....,.,,...,..........,..,.....
DS129 43 .$.,..,.,,...,.,.,.,.,.,,....,.......,,.....
DS130 44 ,,....,...,,,.,....,.,,..,,,,..........,.,.,
DS131 27 ,.,,.,......,....,,..,,.,.,
DS132 41 ..,....,,.,..,,...,,...,........,,.,,.,.,
DS133 27 .,,....,,,..,..,..T......,.
DS134 36 .,....,.,..,.......................,
DS135 34 .,.,....,,.,....,.,.,.,,,,,.,,....
DS136 41 .,.....,.,,.,..,,..,...,...,.,.,,.,.,.,..
DS137 24 ,,......,..,.,....,.,,,^].
Sample1 134 ,,..,.,,..,.,.,,,....,,....,,..,ATT,.T.T...T...tt,................,,,.....,..........A..,,.,,..TTT.T..TTTTT..,t,,,,.,...T,,t...,.,.,,^],
Sample2 159 .,,...,,,...,..,,.,,...,..,....,.,,.,..,,,,.,AAAAt.TTTT..T..TTTttt,ttt........,,,,,,,....................t....t..A.a,.TT.T.T...A..TTT.,,,,...,,,T..,,.,...,....
Sample3 168 ..,...,.,.....,,,..,.,,.,,..,,,.,.,,.,,.A..,,,..,T.,a.T.....T.T...t,t,tt..................,,,,,,,,,,,,............,........,,......,T........TT.T.T,,,,..,.......,.,.,.^].
Sample4 123 ....,,..,....,,,,,..,,.,,,....,...AA,,,.Att......TT.,tt...............,,,,,......,..........,,..,...T.T.,.,.,.,t.,,.,,..,,,
Sample5 139 ,,,,...,,,..,,.,..,,,,,.,,,,...,,A..,..At..T......t,ttt.................,,,,,,,,t.......,,,......t,.,,,.T......TTTT.,,,......,,,.,...,...,^].
Sample6 108 .,,.,.......,,..,.,..,..,.,,,....A$..,AtT..T.TT.tttt,t.........,..........,,t.,.,T.TT..TT.TTT,,T,....,.,..,,.
In [164]:
chromosome='1'
pos=670994
posmin,posmax=pos-100,pos+100
make_snapshot(my_socket,snapshor_dir,chromosome,posmin,posmax)
OK
OK
OK

Weird region

In [166]:
Image('/home/ribli/DT40/SNP/varscan/indel_prox/1_670,894_671,094.png')
Out[166]:

Conclusion

  • weird position + random fluctuation?

Check another one

In [132]:
chrom,pos=1,17064922
run_pileup(chrom,pos)
[mpileup] 118 samples in 118 input files
1 17064922 T
DS001 12 ,cC,Cc.c,C.,
DS002 15 cC.,c..C,C,C.CC
DS003 23 .$.,.CccC,,,c,,.c..CC..^],
DS004 12 ,.,cC,Cc.cc.
DS005 21 .a..cc..c...C.,.c.c,.
DS006 24 c,C,C,,c,c,cccCc,,C,.CC,
DS007 20 c,CC.cc.,,c.c,.cC,.C
DS009 15 C,ccC,C.CC,CC,c
DS010 20 ,$C,....c,,..CCc,.c.c
DS011 22 CC.,Cc,c..c..,cC.cC.,c
DS014 19 CC..CC,ccCccc..,..^]c
DS015 16 ...,,,.,.,,.,,,.
DS016 21 ,CCCC,,,cc.,c..c,.,..
DS018 19 CC,,..c.Cc..Cc,,cc,
DS026 22 ,c.,CcC,C,.C.,C.CCcc,,
DS027 18 C$CC,Cccc,,,,Cc,C.C
DS033 23 CC.C.cCC..CcC,ccC.cccCc
DS034 15 ...C.C,c,CCc,,c
DS035 19 ,,c.,cCC,.,.C.cCC,c
DS036 9 ...Cc,,..
DS037 16 .Cc,,,ccc,,,,CC^],
DS041 15 c,.,cC.cc...CC^].
DS042 23 Cc,cc,.CCCcc.c....,,,.c
DS043 10 c..,...,..
DS044 18 C$..,.cC,,CC..,.,CC
DS045 18 ,,C.Cc.,Cccc.C.,C,
DS046 22 CCc.,.CC.ccC,ccCcC,CCC
DS047 40 ,.C.,cCccC.c.c.,C,CCc,C.,cC,cc.,,,Cc.,,.
DS048 25 ,c,.,CCCC.Cccc..c,CC,C..C
DS049 9 .,cCc.,cC
DS050 20 ,cccCCCC.ccCcC.,.C,.
DS051 22 c$C,c.,c..c.C.CCcc,CC,,
DS052 17 ..c..cCc.cCcC..cC
DS053 23 .$.$CCCCcc.....,,..C,.C..
DS054 21 C,C..,c,C,.C.cc.CCcCC
DS055 21 C,Cc,,Cc,...C..C,,C.C
DS056 23 ..Ccc,,..,c.CCC,Ccc..c^],
DS057 40 .$,.cccC,C.C,cccCCcccCCc,C.,,C.C...CC,,.c
DS058 41 .C,Cc.cCcCCC...cCCc.C,.cCcCCC.cCcC,..,Ccc
DS059 24 cCcc.c,C...,.C,C,,C..,..
DS060 17 CC.c,.,,ccc...CCc
DS061 38 CCcc..ccCccC.c.C,.,,C.CC.ccC.c..c..,,^]c
DS062 19 .,c.C,,,,cCcc,,c,.C
DS063 16 ,C,..C,.CC,cc,cc
DS064 20 ...c.cc.c,,,.,cc.c,^].
DS065 47 ,c,cC,,.CcC,C,.C,,CCC,,.,.,c.CCccC,C,,CCCCC.ccC
DS066 18 .CCC..,.,cccc,..c,
DS067 19 ..cc,.,CC,,.C.cC,CC
DS068 15 C..,cC.C,,C,,.c
DS069 25 ,$.cc..CCcc.c.c,CCC.CC.c,^]C
DS070 28 C.,C,C.C,ccc,.,.c.C.cC,C,cc^],
DS071 24 .$Cc.....,cC,c..C,C,..,..
DS072 30 .C..c,.,.,,CCcC,,C,,,CCcC.c,c^].
DS073 24 .C.c,,cC,c...Cc.C.c,CccC
DS074 21 c.c,ccc,,C,,,cCC.,,CC
DS081 28 CC,c.c,.ccC,c,,Ccc.C.cc.CC,.
DS082 24 ,.C.CCC.C.CC..c.C,c,C..^]c
DS083 43 ,.,.Ccc.CcC,c.cc,CcC.,CCCCC,CC.,cCc,C,CC..c
DS084 34 ,$,,...,C.c.,CcC.c,Ccc..,...c.,c.C^].
DS085 32 .CC,CC,c,C,ccC.cc..c..C,.C,C..c,
DS086 18 ccC.CcC,,,Ccc.,CC,
DS087 33 .c,C,.ccc.,cc,.c.Cc.,.Cc,cCC,.Cc.
DS088 34 C$.C,.c.C,,.,C.c.Cc,,cC,c,..c..,,cC
DS089 32 CC,.ccc,..,cc.,.c.C,..C,,.cC,,CC
DS090 28 .$CC.cCc.CCCC,c.C.,ccC..,cC,^]c
DS091 18 ,.cccc,.CCcC.cCc,.
DS092 20 .cccc,ccC,,..,Ccc,,.
DS093 19 cc,,,.Cc.c.C...cc,c
DS094 25 .,....,CCCcc,c..,,.c.,cCC
DS095 22 ..C.,CC..C,ccC.,.,Ccc,
DS096 24 cCcCC,cCc.,cC.cccCc..,C.
DS097 24 cc.,CcC,C.,,.,c,,c..,c.C
DS098 29 C.C,CC.cC..C,CC,,.cc,C,.,C,C,
DS099 34 C$,$c,CCCc,cC.,CC,,,.ccCC.,,,,CC,CCC
DS100 26 C.,,.cc,,C..c,C,C,.Ccc,,,c
DS101 22 ....c,cc.Cc,.,..,CC,,.
DS102 19 .$c$.,...,,..,CC,,,.,
DS103 37 ..,.C.c,,cCCc,.,.c,.,Cc,c.CC,ccC,,,,.
DS104 18 cC,c,,c,,C.Cc.,.cc
DS105 25 CCc.CC.,,ccC..,.C.C,.,cc^]C
DS106 20 C$c.c...C,C.CcC..,C.,
DS107 19 C,ccCC...,..,.C..,c
DS108 25 cC.,,CcCcC.,.CCC.cCC.CC,c
DS109 18 ,..c.,C.,c.CCc,C,c
DS110 27 CCcCc.CC.C,c.,cCC..,C,c.cc,
DS111 19 cC,,CCc,.,C,.Cc.,C^]c
DS112 25 ,.,c,.,,cC,,CcCC,,c.cCC,c
DS113 12 ,,c..CCcC.CC
DS114 32 ,.c,,.,c.C,C......c.,cCCc,.cc,.,
DS115 22 CC,c.c,cCcc.c,,CC.c.c^]c
DS116 28 ,$,cc,CC,.,.,CC,,C.C,CCc,,cC.
DS117 27 ,cc.Cc,C..CC,.,Cccc.,c,.C.^]c
DS118 17 .$ccCcc,C.,cCc.,.,
DS119 28 ccCc,C,ccC,Cc..C.cC..c,,.c.^]c
DS120 21 C.c.C,cCc,,,C,,..cCC,
DS121 30 C$.,,.c,C.C,,...C,c.C,cCc.C,cCC
DS122 22 C.CC..CCcCcc...cccccc^]c
DS123 24 .$cCC..cc..c.C..c.,,cccC^],
DS124 20 .c.C.Cccc,.,,cc.C,.,
DS125 23 ,,CC,C,,.cC.C..C.,cc.C^]c
DS126 28 C$C.CccccCc.,...cC.,C,,C...C.
DS127 23 C..,,c.,,c,.C,,,..CC.,,
DS128 17 ,c..ccc.CCCcccC,C
DS129 22 .,.,,,,.C,,.,cCC,cc,,,
DS130 19 .cc,C,C.CC.C,,cCc,c
DS131 19 c$c$C...Cc,c..,.,ccc^],
DS132 28 ,ccCcc.C.,....Cc,,C.,,.,cC,,
DS133 18 .cccC,C.,.Cc,CCC..
DS134 25 CCcCcCCCcCcC..,,CcC.cC,.^],
DS135 31 .,cCCCCc,c,,C,..,CCC..c..CC,.c,
DS136 30 CC.,..,c.,C,cC,....,,.cCC,.,,c
DS137 18 C,...cc,,,C.,,cCcC
Sample1 48 ,C.,.C..cc.C.c.,..,c.c.c,c,,c,,c,c,C,Cc,cCcc..,c
Sample2 44 CC,,CCc.cc.c,,cCC.,Cc,c,.,ccccCCc,,CCC,,.,Cc
Sample3 37 ccCCcCcCcC..CC.C.c..C.,,.C,CC,C.C.cC.
Sample4 34 c.,,,ccC.cc,,c..c.c,C,C.CC,c.cC,c.
Sample5 42 ,,,.,,.C.c.CC,,,,C,c.Ccc,c.c.C,C.,,Cc,cccC
Sample6 28 C.cCCcc.,ccc,...ccC..CccC.Cc
In [161]:
chromosome='1'
pos=17064922
posmin,posmax=pos-100,pos+100
make_snapshot(my_socket,snapshor_dir,chromosome,posmin,posmax)
OK
OK
OK
In [162]:
Image('/home/ribli/DT40/SNP/varscan/indel_prox/1_17,064,822_17,065,022.png') 
Out[162]:

Conclusion

  • random fluctuation


Conclusion

  • indel proximity filtering helps a bit but it doesnt solve the question completely

TODO:

  • What other reasonable improvements can be made?
  • General question, how much tuning should we try to do?

Looks good for out method