#load modules
import os
import subprocess
import fnmatch
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import gc
from IPython.core.display import Image
#go to working directory, where the data is
os.chdir("/nagyvinyok/adat83/sotejedlik/ribli/dt40/snp/varscan")
%%bash
#Check out the schema of the vsc file
head DS14_DS15.vsc.snp -n 1 | awk '{for (i=1; i<=NF;i++) {print i" " $i}}'
p < 0.0001
It looks like there is some cluestering: e.g.: 4.9M, 10.9M
#select some very unlikely fals positives ( p< 0.0001)
best_pos_cmd='''
head DS14_DS15.vsc.snp -n 600000 | awk ' $13 == "Somatic" && $15 < 0.0001 { print }'
'''
best_pos=subprocess.check_output(best_pos_cmd,executable='/bin/bash',shell=True).strip()
best_pos_list=best_pos.split('\n')
print '\n'.join(best_pos_list)
#collect filenames for samplenames
rm_dup_dir='/nagyvinyok/adat83/sotejedlik/dt40/rmdup/'
#collect filenames
fnames=[]
for fname in os.listdir(rm_dup_dir):
if (fnmatch.fnmatch(fname, '*.bam') and
not fnmatch.fnmatch(fname,"*.bam.bai")):
fnames.append(fname)
fnames=sorted(fnames)
#define function to run pileup command
def run_pileup(chrom,pos):
samtools="/nagyvinyok/adat87/home/ribli/tools/samtools-0.1.19/samtools"
ref_fa="/nagyvinyok/adat87/home/ribli/input/index/gallus/Gallus_gallus.Galgal4.74.dna.toplevel.fa"
cmd_mpileup = samtools + ' mpileup -Q 30 -B -f ' + ref_fa
cmd_mpileup += ' -r ' + str(chrom) +':'+ str(pos) +'-' + str(pos) + ' '
cmd_mpileup += ' '.join([rm_dup_dir+x for x in fnames])
pup_line=subprocess.check_output(cmd_mpileup,executable='/bin/bash',shell=True)
pup_list=pup_line.split('\t')
print pup_list[0],pup_list[1],pup_list[2]
for i in xrange((len(pup_list)-3)/3):
print fnames[i].split('.')[0][:-21],pup_list[3+3*i],pup_list[4+3*i]
Image('/home/ribli/DT40/SNP/varscan/igv_snapshots/ds14__hits/1-3089329.png')
Mpileup line:
i=0
print best_pos_list[i]
print
run_pileup(1,best_pos_list[i].split('\t')[1])
Image('/home/ribli/DT40/SNP/varscan/igv_snapshots/ds14__hits/1-7971336.png')
i=1
print best_pos_list[i]
print
run_pileup(1,best_pos_list[i].split('\t')[1])
Image('/home/ribli/DT40/SNP/varscan/igv_snapshots/ds14__hits/1-15680078.png')
i=2
print best_pos_list[i]
print
run_pileup(1,best_pos_list[i].split('\t')[1])
Image('/home/ribli/DT40/SNP/varscan/igv_snapshots/ds14__hits/1-23852983.png')
i=3
print best_pos_list[i]
print
run_pileup(1,best_pos_list[i].split('\t')[1])
Image('/home/ribli/DT40/SNP/varscan/igv_snapshots/ds14__hits/1-35282831.png')
i=5
print best_pos_list[i]
print
run_pileup(1,best_pos_list[i].split('\t')[1])
Image('/home/ribli/DT40/SNP/varscan/igv_snapshots/ds14__hits/1-37277423.png')
i=7
print best_pos_list[i]
print
run_pileup(1,best_pos_list[i].split('\t')[1])
Image('/home/ribli/DT40/SNP/varscan/igv_snapshots/ds14__hits/1-52021699.png')
i=8
print best_pos_list[i]
print
run_pileup(1,best_pos_list[i].split('\t')[1])
Image('/home/ribli/DT40/SNP/varscan/igv_snapshots/ds14__hits/1-60425138.png')
i=9
print best_pos_list[i]
print
run_pileup(1,best_pos_list[i].split('\t')[1])
Image('/home/ribli/DT40/SNP/varscan/igv_snapshots/ds14__hits/1-77729845.png')
i=10
print best_pos_list[i]
print
run_pileup(1,best_pos_list[i].split('\t')[1])