#import modules
import os
import subprocess
import time
#define sample pairs
pairs=dict()
######################
#R1
pairs['Sample1']='Sample4'
pairs['Sample4']='Sample1'
pairs['Sample2']='Sample5'
pairs['Sample5']='Sample2'
pairs['Sample3']='Sample6'
pairs['Sample6']='Sample3'
######################
#R2
#WT
pairs['DS001']='DS002'
for i in range(1,8)+[9,10,11]:
pairs['DS'+str(i).zfill(3)]='DS001'
#BRCA -/-
pairs['DS014']='DS015'
for i in [15,16,17,18]:
pairs['DS'+str(i).zfill(3)]='DS014'
#BRCA +/-
pairs['DS026']='DS027'
pairs['DS027']='DS026'
#KU70
pairs['DS033']='DS034'
for i in range(34,38):
pairs['DS'+str(i).zfill(3)]='DS033'
#####################
#R3
#WT
pairs['DS050']='DS041'
for i in range(41,50):
pairs['DS'+str(i).zfill(3)]='DS050'
#BRCA -/-
pairs['DS051']='DS052'
for i in range(52,59):
pairs['DS'+str(i).zfill(3)]='DS051'
#BRCA +/-
pairs['DS059']='DS060'
for i in range(60,62):
pairs['DS'+str(i).zfill(3)]='DS059'
#KU70
pairs['DS062']='DS033'
#???
pairs['DS063']='DS064'
for i in range(64,66):
pairs['DS'+str(i).zfill(3)]='DS063'
#???
pairs['DS066']='DS067'
for i in range(67,69):
pairs['DS'+str(i).zfill(3)]='DS066'
#
pairs['DS069']='DS070'
pairs['DS070']='DS069'
#
pairs['DS071']='DS072'
pairs['DS072']='DS071'
#
pairs['DS073']='DS074'
pairs['DS074']='DS073'
#############################
#R4
#WT
for i in [73,74]+range(81,98):
pairs['DS0'+str(i)]='DS041'
#PCNA164R spartan
pairs['DS098']='DS099'
for i in ['099','100']:
pairs['DS'+i]='DS098'
#BRCA1 homo del
for i in ['101','102']:
pairs['DS'+i]='DS014'
#BRCA1 hetero del
for i in xrange(104,108):
pairs['DS'+str(i)]='DS026'
#BRCA2 homo del
pairs['DS108']='DS109'
for i in xrange(109,118):
pairs['DS'+str(i)]='DS108'
#BRCA2 hetero del?
pairs['DS118']='DS119'
for i in xrange(119,127):
pairs['DS'+str(i)]='DS118'
#PCNA164R
pairs['DS127']='DS066'
#Ku70
pairs['DS128']='DS129'
for i in xrange(129,135):
pairs['DS'+str(i)]='DS128'
#spartan
pairs['DS135']='DS136'
for i in xrange(136,138):
pairs['DS'+str(i)]='DS135'
############################
#R5
#WT
pairs['DS141']='DS142'
for i in xrange(142,145):
pairs['DS'+str(i)]='DS141'
#DB1
pairs['DS145']='DS146'
for i in xrange(146,149):
pairs['DS'+str(i)]='DS145'
%%writefile startblat.py
#!/usr/bin/python
import subprocess
#gfserver
gfserver_hostname,gfserver_port=" localhost "," 50000 "
gfServer="gfServer"
#reference genome
ref_fa="/home/ribli/input/index/gallus/Gallus_gallus.Galgal4.74.dna.toplevel.fa"
ref_2bit="/home/ribli/input/index/gallus/Gallus_gallus.Galgal4.74.dna.toplevel.2bit"
#starting gfserver for blasting
cmd0=gfServer + " start "+gfserver_hostname+gfserver_port
cmd0+=" -stepSize=5 -canStop -log=blatServer.log " + ref_2bit + " &"
subprocess.check_output(cmd0,shell=True)
try:
subprocess.check_output(['sbatch','-C','jimgray85','--mem','1500','startblat.py' ])
except subprocess.CalledProcessError,e:
print e
%%writefile dt40_chr_CREST.py
#!/usr/bin/python
import os
import subprocess
import sys
#tmpdir for CREST, because it leaves garbage
os.environ['TMPDIR']='/nagyvinyok/adat83/sotejedlik/ribli/temp/'
tumbam=sys.argv[1]
germbam=sys.argv[2]
chromos=sys.argv[3]
#gfServer
gfserver_hostname,gfserver_port=" localhost "," 50000 "
#exec path
CREST="/nagyvinyok/adat88/kozos/sotejedlik/usr/CREST/CREST.pl"
bam2html="/nagyvinyok/adat88/kozos/sotejedlik/usr/CREST/bam2html.pl"
extractSClip="/nagyvinyok/adat88/kozos/sotejedlik/usr/CREST/extractSClip.pl"
cap3="/nagyvinyok/adat88/kozos/sotejedlik/usr/CAP3/cap3"
#reference
ref_fa="/home/ribli/input/index/gallus/Gallus_gallus.Galgal4.74.dna.toplevel.fa"
ref_2bit="/home/ribli/input/index/gallus/Gallus_gallus.Galgal4.74.dna.toplevel.2bit"
#directory
output_dir="/home/ribli/DT40/SV/CREST/R_1-5/results/raw/R_1235"
#run extractsclip, it finds soft clipping positions in bams
cmd_extr=extractSClip + " -i "+tumbam+" --ref_genome "+ref_fa
cmd_extr+=" -o "+output_dir + " -r "+chromos
print
print cmd_extr
subprocess.check_output(cmd_extr,shell=True)
#run CREST
cmd_CREST= CREST + " -f "+output_dir+"/"+os.path.basename(tumbam)+"."+chromos+".cover "
cmd_CREST+=" -d "+tumbam+" -g " + germbam + " --ref_genome "+ref_fa+" -t "+ref_2bit
cmd_CREST+=" -o "+output_dir+" --blatserver "+gfserver_hostname
cmd_CREST+=" --blatport "+gfserver_port+" --cap3 " + cap3 + " -r "+chromos
cmd_CREST+=" -p " + os.path.basename(tumbam)+"."+chromos
print
print cmd_CREST
subprocess.check_output(cmd_CREST,shell=True)
#submit jobs
#rename the .bam.bais
bampos='/nagyvinyok/adat83/sotejedlik/ribli/dt40/bam_links_2/'
machine='jimgray85'
chromosomes=map(str,range(1,29)+[32,'W','Z'])
for tum_sample,germ_sample in pairs.iteritems():
tum_bam= bampos + tum_sample + '_RMdup_picard_realign.bam'
germ_bam= bampos + germ_sample + '_RMdup_picard_realign.bam'
for chromosome in chromosomes:
cmd_CREST= ['sbatch','-C',machine,'--mem','2000',
'dt40_chr_CREST.py',tum_bam,germ_bam,chromosome]
try:
print subprocess.check_output(cmd_CREST,stderr=subprocess.STDOUT),
except subprocess.CalledProcessError,e:
print e