#!/usr/bin/perl

#exome analysis pipeline from the raw data. 

use warnings;
use Getopt::Long;
use Time::Local;
use Cwd;
use constant DEFAULT_TEMP_DIRECTORY => ".";				# Default directory
my $pwd = cwd();
# Generate a run ID to be used for temporary files
sub generateID {
    my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
    return sprintf("%d%02d%02d.%02d%02d%02d", 1900+$year, $mon+1, $mday, $hour, $min, $sec);
} # End of sub generateID

# Given total seconds signifying the module run-time, format it in hh:mm:ss format
sub formatTime {
    my $totaltime = $_[0];
    my $str;

    $str = sprintf("%02d:", $totaltime / 3600);    # total hours
    $totaltime = $totaltime % 3600;
    $str .= sprintf("%02d:", $totaltime / 60);      # total minutes
    $totaltime = $totaltime % 60; 
    $str .= sprintf("%02d", $totaltime);            # total sconds

    return $str;
} # End of sub formatTime

# CNS windows moving on the chromosomes, with a windows of 100kb, step of 20kb.
sub densitymoving {
my $chposfile = $_[0];
my $chpos;
my $currentchr = 0;
my %chpos;my %subchr;
print "$chposfile\n";
open (OUTPUT, ">$chposfile.posdens");
print OUTPUT "";
open (OUTPUT2, ">$chposfile.negdens");
print OUTPUT2 "";
open (OUTPUT3, ">$chposfile.alldens");
print OUTPUT3 "";

unless ( open ("chpos", $chposfile) ) {
print "Cannot open file \"$chposfile\"\n";
exit;
}
while (<chpos>) {

my @ind = split(",",$_);
if (!$subchr{$ind[0]}){$subchr{$ind[0]}=1;}
else{if ($subchr{$ind[0]}<$ind[1]){$subchr{$ind[0]}=$ind[1];}}

$chpos=$ind[0]."_".$ind[1];
$chpos{$chpos}=$_;
}
close ("chpos");

my	$win = 100000;
my	$step = 20000;	
  for my $chposkey (sort keys %subchr) {
		my $position = 0;
		my $istart=0;
    until ($position>$subchr{$chposkey}){
			$position = $istart+$win;
			my $positivenum=0;
			my $negativenum=0;
			my $iallnum=0;
			for (my $e=$istart; $e<= $position; $e++) {
				my $cnsposkey = $chposkey."_".$e;
				####if cns exists.
				if ($chpos{$cnsposkey}){
					    my @fields = split(',',$chpos{$cnsposkey});
						if ($fields[6]==0 || $fields[7]==0){
							if ($fields[6]>$fields[7] && $fields[5]>$fields[4]){$positivenum ++;}
							elsif ($fields[6]<$fields[7] && $fields[5]<$fields[4]){$positivenum ++;}
							elsif ($fields[6]==0 && $fields[7]==0){$positivenum ++;}
							elsif ($fields[4]==0 && $fields[5]==0){$positivenum ++;}
							else {$negativenum ++;}
						}
						else {$negativenum ++;}
					$iallnum ++;	
			}
				####if cns exists.
		
		   }
		   if (!$positivenum){$positivenum=0;}
		   if (!$negativenum){$negativenum=0;}
		   if (!$iallnum){$iallnum=0;}
		   print OUTPUT "$chposkey,$istart,$positivenum\n";
		   print OUTPUT2 "$chposkey,$istart,$negativenum\n";
		   print OUTPUT3 "$chposkey,$istart,$iallnum\n";
	   
		   $istart += $step; 
    }




  }
	close OUTPUT3;
	close OUTPUT2;
	close OUTPUT;
}

# chromosome plot by ggbio.
sub chromplotggbio {
	my ($bamdir, $firstch, $mychname, $mychle, $plotfilename) = @_;
	open (OUTPUT, ">$bamdir/$plotfilename.ggbio.R");
	print OUTPUT "
	library(ggbio)
	library(GenomicRanges)

	gr <- GRanges(seqnames =\"$firstch\", IRanges(start = 1, end = 2))
	seqinfo(gr) <- Seqinfo(paste0(\"chr\", c($mychname)), c($mychle), NA, \"mock1\")
	xq <- seqinfo(gr)
	p_0<-autoplot(xq[paste0(\"chr\", c($mychname))])

	setwd(\"$bamdir/\")
	";
	my @files = split ('>',$plotfilename);
	my $fplot;
	for (my $i=0; $i< scalar(@files); $i++) {
		if (!$files[$i]){next;}
		my $iadd = $i+1;
		$fplot = $iadd;
		my $endpos = 10*$i+4;
		my $uppos = 10*$i+8;
		my $endpos2 = 10*$i+2;
		my $uppos2 = 10*$i+4;
		print OUTPUT "
		d=read.csv(\"$files[$i]\", header=FALSE)
		d=na.omit(d)
		d2 = subset(d, d[,4]==0.2)
		gstart=d2[,2]
		gend=d2[,3]
		gchr=d2[,1]
		gden=as.numeric(d2[,4])
		
		d3= subset(d, d[,4]!=0.2)
		start=d3[,2]
		end=d3[,3]
		chr=d3[,1]
		den=as.numeric(d3[,4])		
		mut_$i <- GRanges(seqnames =paste0(\"chr\", data.matrix(as.numeric(as.character(chr)))), IRanges(start = data.matrix(as.numeric(as.character(start))), end = data.matrix(as.numeric(as.character(end)))), den=den)
		x_$i<-p_$i+layout_karyogram(mut_$i, aes(x = start, y = den), geom = \"point\", ylim = c($endpos, $uppos), color = \"red\", fill =\"red\")
		mut_$i <- GRanges(seqnames =paste0(\"chr\", data.matrix(as.numeric(as.character(gchr)))), IRanges(start = data.matrix(as.numeric(as.character(gstart))), end = data.matrix(as.numeric(as.character(gend)))), den=gden)
		p_$iadd<-x_$i+layout_karyogram(mut_$i, aes(x = start, y = den), geom = \"rect\", ylim = c($endpos2, $uppos2), color = \"blue\", fill =\"blue\")
		";
	}
	print OUTPUT "
	pdf(\"$plotfilename.ggbio.pdf\")
	p_$fplot
	dev.off()
	";
	close OUTPUT;

	system "Rscript $bamdir/$plotfilename.ggbio.R";	
}

# chromosome plot. 
sub chromplot {
	my ($bamdir, $mychname, $plotfilename) = @_;
	my @chran = split(',',$mychname);
	my $subplotnum = scalar(@chran);
	open (OUTPUT, ">$bamdir/$plotfilename.chplot.R");
	print OUTPUT "
	setwd(\"$bamdir/\")
	d=read.csv(\"$plotfilename\", header=FALSE)
			d=na.omit(d)
	colnames(d) = c(\"Chr\",\"Pos\",\"End\",\"Counts\")
	pdf(\"$plotfilename.chplot.pdf\")
	par( mfrow = c($subplotnum,1),mar=c(1,4,1,1))
	m = max(as.numeric(d\$Counts))
	d2 = subset(d, d\$Counts!=0.2)
	";



	for (my $w=0; $w<= $#chran; $w++) {
		print OUTPUT "
		l = subset(d2,d2\$Chr==$chran[$w])
		plot(l\$Pos,l\$Counts,type = \"p\", col = \"red\",xlab=\"\",ylab=l[1,1],xaxt='n',ylim=c(0, m))
		l2 = subset(d,d\$Counts==0.2)
		for(i in 1:nrow(l2)){
			p = l2[i,2:3]
			lines(p,c(0.2,0.2), type=\"l\", lwd = 20, col=\"blue\")
		}
		";
	}
	print OUTPUT "dev.off()\n";

	close OUTPUT;
	system "Rscript $bamdir/$plotfilename.chplot.R";	
}

sub bam2depth2exons {
	my ($bamdir, $wtbam, $mubam, $output, $gtf) = @_;
	#depth by samtools.
	system "samtools depth $bamdir/$wtbam > $bamdir/$wtbam.depth";	
	system "samtools depth $bamdir/$mubam > $bamdir/$mubam.depth";	

	###########reading gtf file.
	unless ( open (PRO, $gtf) ) {
	print "Cannot open file \"$gtf\"\n\n";
	exit;
	}
	my (%exonspos, %exonstart);
	while (<PRO>) {
			my @fs = split ("\t", $_);
			my $chr=$fs[0];
			my $exon=$fs[2];
			my $start=$fs[3];
			my $end=$fs[4];
			my $incgene=$fs[8];
			my @forid = split('gene_id \"',$incgene);
			my @forid2 = split('\"',$forid[1]);
			my $id = $forid2[0];
			my @foren = split('exon_number \"',$incgene);
			my @foren2 = split('\"',$foren[1]);
			my $en = $foren2[0];
			if ($exon eq "exon"){
				my $postart = $chr."_".$start;
				$exonspos{$postart}=$end;
				$exonstart{$postart}=$id."_E".$en;
			}
	
	}
	close PRO;

	#depth assigned to exons.
	my (%wtexon, %muexon, %chle);
	my $filenamewt = "$bamdir/$wtbam.depth";
	open (OUTPUT, ">$bamdir\/$wtbam.depth.exondepth");
	print OUTPUT "";

	unless ( open (PRO, $filenamewt) ) {
	print "Cannot open file \"$filenamewt\"\n";
	exit;
	}
	<PRO>;
	my $currchr = "xx";
	my $currsite = 1;
	my ($currexonpos, $exonaccudepth, $currdep, $exonstart);
	while(<PRO>) {
	    chomp($_);
	    my @fields = split('\t',$_);
		my $temv = $fields[2];
	##### same chromosome
	        if ($fields[0] eq $currchr){
				for (my $i=$currsite; $i<= $fields[1]; $i++) {				
					my $pos = $fields[0]."_".$i;
					if (!$chle{$fields[0]}){$chle{$fields[0]}=1;}else{if ($chle{$fields[0]}<$i){$chle{$fields[0]}=$i;}}#deposit the longest position for each chromosome.
					if ($exonstart{$pos}){undef $exonaccudepth;undef $currexonpos;undef $currdep; $currexonpos = $pos; $exonstart=$pos; $currdep = $exonstart{$pos}.",".$fields[0].",".$i;}
					#####
					if ($currexonpos && $exonspos{$currexonpos} && $i<$exonspos{$currexonpos}){
						if ($temv){$exonaccudepth += $temv;undef $temv;}
					}
					elsif ($currexonpos && $exonspos{$currexonpos} && $i>=$exonspos{$currexonpos}) {
						if ($temv){$exonaccudepth += $temv;undef $temv;}
						my @forlg = split(',',$currdep);
						my $lg = $exonspos{$currexonpos}-$forlg[2];
						if (!$exonaccudepth){undef $exonaccudepth; undef $currexonpos;undef $currdep; undef $exonstart; next;}
						if ($lg<=0){my $avgv=0;}else{$avgv = $exonaccudepth/$lg;}
						print OUTPUT "$currdep,$exonspos{$currexonpos},$lg,$exonaccudepth,$avgv\n";
						$wtexon{$exonstart}="$currdep,$exonspos{$currexonpos},$lg,$exonaccudepth,$avgv";
						undef $exonaccudepth; undef $currexonpos;undef $currdep; undef $exonstart;
					}
					#####
				}
	$currsite = $fields[1]+1;
			}
	##### same chromosome

	##### changed chromosome
			elsif ($fields[0] ne $currchr){$currsite = 1;$pos = $fields[0]."_".$fields[1];
		        if ($exonstart{$pos}){undef $exonaccudepth;undef $currexonpos;undef $currdep;$currexonpos = $pos; $currdep = $exonstart{$pos}.",".$fields[0].",".$fields[1]; }
				goto mynext;
			}
	##### changed chromosome	
	mynext:
	$currchr = $fields[0];
	}
	close PRO;
	close OUTPUT;
	
	#mu depth
	my $filenamemu = "$bamdir/$mubam.depth";
	open (OUTPUT, ">$bamdir\/$mubam.depth.exondepth");
	print OUTPUT "";

	unless ( open (PRO, $filenamemu) ) {
	print "Cannot open file \"$filenamemu\"\n";
	exit;
	}
	<PRO>;
	$currchr = "xx";
	$currsite = 1;
	undef $exonaccudepth; undef $currexonpos;undef $currdep; undef $exonstart;
	while(<PRO>) {
	    chomp($_);
	    my @fields = split('\t',$_);
		my $temv = $fields[2];
	##### same chromosome
	        if ($fields[0] eq $currchr){
				for (my $i=$currsite; $i<= $fields[1]; $i++) {
					my $pos = $fields[0]."_".$i;
					if (!$chle{$fields[0]}){$chle{$fields[0]}=1;}else{if ($chle{$fields[0]}<$i){$chle{$fields[0]}=$i;}}#deposite the longest position for each chromosome.
					if ($exonstart{$pos}){undef $exonaccudepth;undef $currexonpos;undef $currdep; $currexonpos = $pos; $exonstart=$pos; $currdep = $exonstart{$pos}.",".$fields[0].",".$i;}
					#####
					if ($currexonpos && $exonspos{$currexonpos} && $i<$exonspos{$currexonpos}){
						if ($temv){$exonaccudepth += $temv;undef $temv;}
					}
					elsif ($currexonpos && $exonspos{$currexonpos} && $i>=$exonspos{$currexonpos}) {
						if ($temv){$exonaccudepth += $temv;undef $temv;}
						my @forlg = split(',',$currdep);
						my $lg = $exonspos{$currexonpos}-$forlg[2];
						if (!$exonaccudepth){undef $exonaccudepth; undef $currexonpos;undef $currdep; undef $exonstart; next;}
						if ($lg<=0){my $avgv=0;}else{
						$avgv = $exonaccudepth/$lg;}
						print OUTPUT "$currdep,$exonspos{$currexonpos},$lg,$exonaccudepth,$avgv\n";
						$muexon{$exonstart}="$currdep,$exonspos{$currexonpos},$lg,$exonaccudepth,$avgv";						
						undef $exonaccudepth; undef $currexonpos;undef $currdep;undef $exonstart;
					}
					#####
				}
	$currsite = $fields[1]+1;
			}
	##### same chromosome

	##### changed chromosome
			elsif ($fields[0] ne $currchr){$currsite = 1;$pos = $fields[0]."_".$fields[1];
		        if ($exonstart{$pos}){undef $exonaccudepth;undef $currexonpos;undef $currdep;$currexonpos = $pos; $currdep = $exonstart{$pos}.",".$fields[0].",".$fields[1]; }
				goto mynext;
			}
	##### changed chromosome	
	mynext:
	$currchr = $fields[0];
	}
	close PRO;
	close OUTPUT;
	
	#synchronize wt and mu.
	for my $wtkey (keys %wtexon)
	    {
			my @wtspex = split (",", $wtexon{$wtkey});
			if (!$muexon{$wtkey}){$muexon{$wtkey}=$wtspex[0].",".$wtspex[1].",".$wtspex[2].",".$wtspex[3].","."0".","."0".","."0";}
		}
	for my $mukey (keys %muexon)
		{
				my @muspex = split (",", $muexon{$mukey});
				if (!$wtexon{$mukey}){$wtexon{$mukey}=$muspex[0].",".$muspex[1].",".$muspex[2].",".$muspex[3].","."0".","."0".","."0";}
		}

	#save results on the chromosmes.
	open (OUTPUT, ">$bamdir\/$output.depth.wt_mu.diff");
	print OUTPUT "";
	open (OUTPUT2, ">$bamdir\/$output.depth.wt_mu.diffplot");
	print OUTPUT2 "";
	for my $chrkey (keys %chle) {
		for (my $e=1; $e<= $chle{$chrkey}; $e++) {
			my $chrpos = $chrkey."_".$e;
			####if exon exists.
			if ($wtexon{$chrpos}){
				my @wtdep = split(',',$wtexon{$chrpos});
				my @mudep = split(',',$muexon{$chrpos});
				if ($wtdep[5]>10 && $mudep[5]<4){
			      print OUTPUT "$wtexon{$chrpos},$muexon{$chrpos}\n";
				  print OUTPUT2 "$wtdep[1],$wtdep[2],$wtdep[3],1\n";
			    }
			}		
			####if exon exists.
		}
	}
	close OUTPUT;
	close OUTPUT2;
	return %chle;
}

# Samtool for mileup file, which is processed by VarScan for cns files with fitlering.
sub mileup2varscan2cns {
	my ($bamdir, $wtbam, $mubam, $output, $reference) = @_;
	#mileup by samtools.
	system "samtools mpileup -f $reference $bamdir/$wtbam $bamdir/$mubam > $bamdir/wt_mu.merged.bam.mileup";	

	#SNP/InDel calling by VarScan.	
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Progressing: VarScan beggins\n");
	print STDERR sprintf("\n");

	system "java -jar VarScan.v2.3.7.jar mpileup2cns $bamdir/wt_mu.merged.bam.mileup --min-coverage 2 --min-reads2 2 --min-avg-qual 15 --min-var-freq 0.01 --min-freq-for-hom 0.75 > $bamdir/wt_mu.merged.bam.mileup.cns";
	#chle is for collecting chromosome end position.
	my %chle; 
	my ($ff1, $ff2, $ff3, $ff4, $snppo, $refvar, $fre, $to1, $to2, $snpfre);
	my (@gspli, @fre);
	my $wt_mu_cns = "$bamdir/wt_mu.merged.bam.mileup.cns";
	unless ( open (WTMUCNS, $wt_mu_cns) ) {
		print "Cannot open file \"$wt_mu_cns\"\n";
		exit;
	}
	while (<WTMUCNS>) {
		chomp;
		@gspli = split("\t",$_);
		if (!$gspli[1] || $gspli[1]!~ /\d/){next;}#filtering out the header and the tail.
		if (!$chle{$gspli[0]}){$chle{$gspli[0]}=1;}else{if ($chle{$gspli[0]}<$gspli[1]){$chle{$gspli[0]}=$gspli[1];}}#deposite the longest position for each chromosome.
		my $posend = $gspli[1]+100000;
		$snppo = $gspli[0].",".$gspli[1].",".$posend;#cns position with chr and position.
		if ($gspli[2]=~ m/,/){next;}#discard the alternative type for reference.
		if ($gspli[3]=~ m/,/){next;}#discard the alternative type for variation.
		$refvar = $gspli[2].",".$gspli[3];#ref and var.
		@fre = split(":",$gspli[10]);#all frequency data in 11th column.
		$fre = $fre[2].",".$fre[3].",".$fre[7].",".$fre[8];#frequency data for wt and mu.
		#set unfound position's fre to 0.
		if ($fre[2] eq "-"){$ff1=0;}else {$ff1=$fre[2]}
		if ($fre[3] eq "-"){$ff2=0;}else {$ff2=$fre[3]}
		if ($fre[7] eq "-"){$ff3=0;}else {$ff3=$fre[7]}
		if ($fre[8] eq "-"){$ff4=0;}else {$ff4=$fre[8]}
		#only the two allels in one SNP are accepted.
		$to1 = $ff1 + $ff2;
		$to2 = $ff3 + $ff4;
		if ($to1 != $fre[1]){next;}
		if ($to2 != $fre[6]){next;}
		#filtering out the same SNP pattern of wt and mu. 
		if ($ff1==0 && $ff3==0 && $ff2!=0 && $ff4!=0){next;}
		if ($ff2==0 && $ff4==0 && $ff1!=0 && $ff3!=0){next;}
		#SNP ratio > 0.2 is accepted.
		if ($ff1 || $ff2 || $ff3 || $ff4){$snpfre = ($ff2+$ff4)/($ff1+$ff2+$ff3+$ff4)};
		if ($ff1==0 && $ff2==0 && $fre[6]>5){
			$fre = "0".","."0".",".$fre[7].",".$fre[8];
			open (OUTPUT, ">>$bamdir/$output.ins");
			print OUTPUT "$snppo,$refvar,$fre\n";	
		}
		if (($ff1>5 || $ff2>5) && $snpfre>0.2 && $snpfre<0.8 && $fre[6]>5){
			open (OUTPUT, ">>$bamdir/$output.snps");
			print OUTPUT "$snppo,$refvar,$fre\n";
		}
		if ($ff3==0 && $ff4==0 && $fre[1]>5){
			$fre = $fre[2].",".$fre[3].","."0".","."0";
			open (OUTPUT, ">>$bamdir/$output.del");
			print OUTPUT "$snppo,$refvar,$fre\n";
		}
		if (($ff1 > $ff2 && $ff3 < $ff4 && $ff1 >5 && $ff4 >5) || ($ff1 < $ff2 && $ff3 > $ff4 && $ff2 >5 && $ff3 >5)){
			open (OUTPUT, ">>$bamdir/$output.diff");
			print OUTPUT "$snppo,$refvar,$fre\n";
		}

	}
	close OUTPUT;
	close WTMUCNS;
	return %chle;
}

#subroutine primerdesign.
sub primerdesign {
	my ($exonseq,$primer3para,$resultdir) = @_;
	open (OUTPUT, ">$resultdir/myprimer.codes");
	print OUTPUT "SEQUENCE_ID=example\nSEQUENCE_TEMPLATE=$exonseq\nPRIMER_PICK_LEFT_PRIMER=1\nPRIMER_PICK_INTERNAL_OLIGO=0\nPRIMER_PICK_RIGHT_PRIMER=1\n$primer3para\n=\n";
	system("primer3_core < $resultdir/myprimer.codes > $resultdir/myprimer.codes.out");	
	my $filenamex = "$resultdir/myprimer.codes.out";unless ( open (PRIMER, $filenamex) ) {print "Cannot open file \"$filenamex\"\n\n";exit;} 
	my @pcode = <PRIMER>;close PRIMER;
	my $allre = join('',@pcode);

	my @left_primer = ($allre =~ /^PRIMER_LEFT_0_SEQUENCE=(.*)\n/gm);
	my $left_primer = $left_primer[0];
	if (!$left_primer){return ("NA,NA,NA,NA,NA", "NA,NA,NA,NA,NA");}
	my @right_primer = ($allre =~ /^PRIMER_RIGHT_0_SEQUENCE=(.*)\n/gm);
	my $right_primer = $right_primer[0];
	if (!$right_primer){return ("NA,NA,NA,NA,NA", "NA,NA,NA,NA,NA");}

	my @left_pos = ($allre =~ /^PRIMER_LEFT_0=(.*)\n/gm);
	my $left_pos = $left_pos[0];

	my @right_pos = ($allre =~ /^PRIMER_RIGHT_0=(.*)\n/gm);
	my $right_pos = $right_pos[0];

	my @left_tm = ($allre =~ /^PRIMER_LEFT_0_TM=(.*)\n/gm);
	my $left_tm = $left_tm[0];

	my @right_tm = ($allre =~ /^PRIMER_RIGHT_0_TM=(.*)\n/gm);
	my $right_tm = $right_tm[0];

	my @left_gc = ($allre =~ /^PRIMER_LEFT_0_GC_PERCENT=(.*)\n/gm);
	my $left_gc = $left_gc[0];

	my @right_gc = ($allre =~ /^PRIMER_RIGHT_0_GC_PERCENT=(.*)\n/gm);
	my $right_gc = $right_gc[0];

	my @size = ($allre =~ /^PRIMER_PAIR_0_PRODUCT_SIZE=(.*)\n/gm);
	my $size = $size[0];
	if ($left_primer){$left = $left_primer.",".$left_pos.",".$left_tm.",".$left_gc.",".$size;}
	if ($right_primer){$right = $right_primer.",".$right_pos.",".$right_tm.",".$right_gc.",".$size;}
	return ($left, $right);
}
#subroutine primerdesign.

#subroutine specificity.
sub specificity {
	my ($primerseq, $reference, $cpus, $resultdir) = @_;
##########################
	open (OUTPUT, ">$resultdir/temprimer.fas");
	print OUTPUT ">primer\n$primerseq\n";
	system "blastn -query $resultdir/temprimer.fas -db $reference -num_threads $cpus -word_size 20 -evalue 0.05 -out $resultdir/temprimer.fas.blast";
	
	my $filenamex = "$resultdir/temprimer.fas.blast";
	unless ( open (PRO, $filenamex) ) {
	print "Cannot open file \"$filenamex\"\n\n";
	exit;
	}
	my @blast = <PRO>;
	close PRO;
	my $allre = join('',@blast);
	my $re;
	if ($allre=~/No hits found/){return "NA";}
	else {
		my @schr = ($allre =~ />\s*(\S+)\s*/mg);
		my $schr=$schr[0];
		my @sbn = ($allre =~ /Sbjct\s*(\d+)\D+(\d+)\s*$/mg);
		my $sbn = scalar(@sbn);
		$sbn = $sbn/2;
		my $re = $sbn."-".$schr;
		my $temn = $#sbn;
		if ($temn>=2){$temn=1;}
		for ($g=0; $g<= $temn; $g++) {$re=$re."-".$sbn[$g];}
			return $re;
      	}		
}
#subroutine specificity.


# define command line arguments
my ($design, $input, $winstep, $smrange, $primer3, $reference, $output, $cpus, $log);

my $result = &GetOptions("design=s{1}" => \$design,
						 "winstep=s{1}" => \$winstep,
						 "smrange=s{1}" => \$smrange,
						 "input=s{1}" => \$input,
						 "primer3:s{0,}" => \@primer3,
						 "reference=s{1}" => \$reference,
						 "output|o=s{1}" => \$output,
						 "threads|t=i{0,}" => \$cpus,
						 "log!" => \$log,);
## Print help info.
unless ($result && defined($reference) && defined($design) && defined($input) && defined($output)) {
	print STDERR sprintf("\n");
	print STDERR sprintf("primer design pipeline\n");
	print STDERR sprintf("=============================================================\n");
	print STDERR sprintf("USAGE:\n");
	print STDERR sprintf("  perl %s --design <three design: individual, region, and SM> --input <input files with chromosome positions> [--winstep <Window and step for region design>]", $0);
	print STDERR sprintf("[--smrange <DNA length for SM design strategy>] --reference <genome reference fasta file> [--primer3 <primer3 parameters>] [--threads|-t <CPU cores or threads>] --output|-o <output.prefix> [OPTIONS]\n");
	print STDERR sprintf("\n");
	print STDERR sprintf("WHERE:\n");
	print STDERR sprintf("  --design <three design: individual, region, and SM>: Primer design strategy with three options. individual, for individual exon per line; region, for the region with start and end positions; and SM, small mutations, SNP/InDel.\n");
	print STDERR sprintf("  --winstep <Window and step for region design>  : Window and step for primer design. Default (500,200): window, 500bp; step, 200bp. \n");
	print STDERR sprintf("  --smrange <DNA length for SM design strategy>: DNA length for SM design strategy used to design forward and reverse primers before and after the small mutation\n");
	print STDERR sprintf("  --input <input files with chromosome positions>: Path to the input files with genomic coordinates, one gene or exon in one line.\n");	
	print STDERR sprintf("  --reference <genome reference fasta file>    : Path to the fasta reference files.\n");
	print STDERR sprintf("  --primer3 <primer3 parameters>               : Optional. Set up your own Primer3 parameters to design a pair of primers. See Primer3 manual for more details\n");
	print STDERR sprintf("  PRIMER_MIN_SIZE=22 PRIMER_OPT_SIZE=25 PRIMER_MAX_SIZE=27 PRIMER_MIN_TM=55 PRIMER_OPT_TM=60 PRIMER_MAX_TM=62 PRIMER_PRODUCT_SIZE_RANGE=50-1000 PRIMER_PRODUCT_OPT_SIZE=200 PRIMER_NUM_RETURN=1\n");	
	print STDERR sprintf("  --threads|-t <CPU cores or threads>          : Multiple threads for blastn running.\n");
	print STDERR sprintf("  --output|-o <output.prefix>                  : Path to output files' prefix\n");
	print STDERR sprintf("\n");
	print STDERR sprintf("OPTIONS:\n");
	print STDERR sprintf("  --log|--nolog                     : Enable/Disable the cleaning of temporary files [DEFAULT: --log]\n");
	print STDERR sprintf("Example: perl primer_design_pipeline.pl --design=region --input=example.txt --reference=maize.3_4.ref.fa --threads=20 --output=test");
	print STDERR sprintf("\n");
	print STDERR sprintf("\n");
	exit();
} # End of unless statement

my $starttime;
$starttime = timelocal(localtime(time));	#local current time.

#assigning default values to win-step index.
my ($win, $step);
if ($design eq "region"){
	if (!defined($winstep))
		{
			print "Warning: no winstep index specified, using default 500,200. ";
			$win = 500;
			$step = 200;
		}
	else 
		{
			my @winstep = split(',',$winstep);	
			$win = $winstep[0];
			$step = $winstep[1];
		}
}

#assigning default values to smrange index.
if ($design eq "SM"){
	if (!defined($smrange))
		{
			print "Warning: no smrange index specified, using default 500. ";
			$smrange=500;
		}
}

# assigning default values
$cpus = 1 if (!defined($cpus) || $cpus !~ m/^\d+$/);
$log = "true" if (!defined($log));

# assgining default values for primer3.
my ($p3others, $primer3para, $PRIMER_MIN_SIZE, $PRIMER_OPT_SIZE, $PRIMER_MAX_SIZE, $PRIMER_MIN_TM, $PRIMER_OPT_TM, $PRIMER_MAX_TM, $PRIMER_PRODUCT_SIZE_RANGE, $PRIMER_PRODUCT_OPT_SIZE, $PRIMER_NUM_RETURN);
for (my $p=0; $p<= $#primer3; $p++) {
	if ($primer3[$p] =~ /PRIMER_MIN_SIZE/){$PRIMER_MIN_SIZE=$primer3[$p];}
	elsif ($primer3[$p] =~ /PRIMER_OPT_SIZE/){$PRIMER_OPT_SIZE=$primer3[$p];}
	elsif ($primer3[$p] =~ /PRIMER_MAX_SIZE/){$PRIMER_MAX_SIZE=$primer3[$p];}
	elsif ($primer3[$p] =~ /PRIMER_MIN_TM/){$PRIMER_MIN_TM=$primer3[$p];}
	elsif ($primer3[$p] =~ /PRIMER_OPT_TM/){$PRIMER_OPT_TM=$primer3[$p];}
	elsif ($primer3[$p] =~ /PRIMER_MAX_TM/){$PRIMER_MAX_TM=$primer3[$p];}
	elsif ($primer3[$p] =~ /PRIMER_PRODUCT_SIZE_RANGE/){$PRIMER_PRODUCT_SIZE_RANGE=$primer3[$p];}
	elsif ($primer3[$p] =~ /PRIMER_PRODUCT_OPT_SIZE/){$PRIMER_PRODUCT_OPT_SIZE=200;}
	elsif ($primer3[$p] =~ /PRIMER_NUM_RETURN/){$PRIMER_NUM_RETURN=$primer3[$p];}
	else {$p3others .="\n".$primer3[$p];}
}
if (!$PRIMER_MIN_SIZE){$PRIMER_MIN_SIZE="PRIMER_MIN_SIZE=22";}
if (!$PRIMER_OPT_SIZE){$PRIMER_OPT_SIZE="PRIMER_OPT_SIZE=25";}
if (!$PRIMER_MAX_SIZE){$PRIMER_MAX_SIZE="PRIMER_MAX_SIZE=27";}
if (!$PRIMER_MIN_TM){$PRIMER_MIN_TM="PRIMER_MIN_TM=55";}
if (!$PRIMER_OPT_TM){$PRIMER_OPT_TM="PRIMER_OPT_TM=60";}
if (!$PRIMER_MAX_TM){$PRIMER_MAX_TM="PRIMER_MAX_TM=62";}
if (!$PRIMER_PRODUCT_SIZE_RANGE){$PRIMER_PRODUCT_SIZE_RANGE="PRIMER_PRODUCT_SIZE_RANGE=50-1000";}
if (!$PRIMER_PRODUCT_OPT_SIZE){$PRIMER_PRODUCT_OPT_SIZE="PRIMER_PRODUCT_OPT_SIZE=22";}
if (!$PRIMER_NUM_RETURN){$PRIMER_NUM_RETURN="PRIMER_NUM_RETURN=1";}
if ($p3others)
	{
		$primer3para = join ("\n", $PRIMER_MIN_SIZE, $PRIMER_OPT_SIZE, $PRIMER_MAX_SIZE, $PRIMER_MIN_TM, $PRIMER_OPT_TM, $PRIMER_MAX_TM, $PRIMER_PRODUCT_SIZE_RANGE, $PRIMER_PRODUCT_OPT_SIZE, $PRIMER_NUM_RETURN, $p3others);
	}
else 
	{
		$primer3para = join ("\n", $PRIMER_MIN_SIZE, $PRIMER_OPT_SIZE, $PRIMER_MAX_SIZE, $PRIMER_MIN_TM, $PRIMER_OPT_TM, $PRIMER_MAX_TM, $PRIMER_PRODUCT_SIZE_RANGE, $PRIMER_PRODUCT_OPT_SIZE, $PRIMER_NUM_RETURN);
	}
# generate running ID and running folders.
my $run_id = &generateID();
system "mkdir $run_id";
my $resultdir = "$pwd/$run_id";

#open reference file.
unless ( open (REF, $reference) ) {
print "Cannot open file \"$reference\"\n\n";
exit;
}
my %chrseq;my $chrseqs;my $chrname;
while (<REF>) {
	chomp $_;
	if ($_ =~ />/){
		my @fs = split (" ", $_);
		my @fs2 = split (">", $fs[0]);
		print "chr: $fs2[1]\n";
		if (!$chrname){$chrname = $fs2[1];}
		else{
		$chrseq{$chrname}= $chrseqs;
		undef $chrseqs;
		$chrname = $fs2[1];}
	}
	else{
		if ($chrname){$chrseqs.=$_;}
	}
}
$chrseq{$chrname}= $chrseqs;
undef $chrseqs;
close REF;

#format reference.
print "Progess: making new blast database. \n";
system "makeblastdb -in $reference -dbtype nucl -out $resultdir/reference";

#open input file.
unless ( open (INPUT, $input) ) {print "Cannot open file \"$input\"\n\n";exit;} 
my @rawexons = <INPUT>;close INPUT;

#cycle for exons.
my (%small, %big, %regionchr);
my ($istart,$exonseq);
for (my $we=0; $we<= $#rawexons; $we++) {	
	chomp $rawexons[$we];
	my @mypos = split('\t',$rawexons[$we]);
    my $chr = $mypos[1];
	my $start = $mypos[2];
	my $end = $mypos[3];
	my @exon = split('_',$rawexons[$we]);
	if (!$small{$exon[0]}){$small{$exon[0]}=100000000000000000000000;}
	if (!$big{$exon[0]}){$big{$exon[0]}=0;}
	if ($start<$small{$exon[0]}){$small{$exon[0]}=$start;}
	if ($end>$big{$exon[0]}){$big{$exon[0]}=$end;}
	$regionchr{$exon[0]}=$chr;
	
	if ($design eq "SM")
		{ 
		$istart = $mypos[2]-$smrange;
		if (!$chrseq{$chr}){print "Wrong: no chromosome $chr seqs in the reference";exit;}
		my $exonseq1 = substr($chrseq{$chr}, $istart, $smrange);
		my $exonseq2 = substr($chrseq{$chr}, $end, $smrange);
		$exonseq = $exonseq1.$exonseq2;
		$primer3para .= "\n"."SEQUENCE_TARGET=$smrange\,2";
		}
	else
		{
	#searching for start and end of the region.
	my $exonlg = $end-$start+1;
	if ($exonlg<150){$istart=$start-80;$exonlg +=160;}else{$istart=$start;}
	if (!$chrseq{$chr}){print "Wrong: no chromosome $chr seqs in the reference";exit;}
	$exonseq = substr($chrseq{$chr}, $istart, $exonlg);
	undef $exonlg;
		}
	#deposite the exon seq.	
	open (OUTPUTFA, ">>$resultdir\/$output.exons.fas");
	print OUTPUTFA ">$chr\t$start\t$end\n$exonseq\n";
	#running primer design.
	my ($leftprimer,$rightprimer) = &primerdesign($exonseq,$primer3para,$resultdir);
	if ($leftprimer && $rightprimer)
		{	
		my @forl = split(',',$leftprimer);
		my @forr = split(',',$rightprimer);
		my $spnl = &specificity($forl[0], "$resultdir/reference", $cpus, $resultdir);
		my $spnr = &specificity($forr[0], "$resultdir/reference", $cpus, $resultdir);
		my @forplotthreel = split('-',$spnl);
		my @forplotthreer = split('-',$spnr);
		open (OUTPUT, ">>$resultdir/$output.exons.primers");
		print OUTPUT "$forplotthreel[1],$forplotthreel[2],$forplotthreel[3],1,$mypos[0],$chr,$start\_$istart,$end,$leftprimer,F,$spnl\n$forplotthreer[1],$forplotthreer[2],$forplotthreer[3],1,$mypos[0],$chr,$start\_$istart,$end,$rightprimer,R,$spnr\n";
		
		undef $rightprimer;undef $exonseq;undef $leftprimer;
	    }
undef $start; undef $end; 	
}

####################################################search for large gaps among primers.
open (OUTPUT, ">>$resultdir/$output.exons.primers");
print OUTPUT "\n";
my (%chrsmall,%chrbig);
for my $skey (keys %small) {
	my $small = $small{$skey};
	my $big = $big{$skey};
	if (!$chrseq{$regionchr{$skey}}){print "Wrong: no chromosome $skey seqs. ";exit;}
	if (!$chrsmall{$regionchr{$skey}}){$chrsmall{$regionchr{$skey}}=$small{$skey};}
	else{
		if ($chrsmall{$regionchr{$skey}}>$small{$skey}){$chrsmall{$regionchr{$skey}}=$small{$skey};}
	}
	if (!$chrbig{$regionchr{$skey}}){$chrbig{$regionchr{$skey}}=$big{$skey};}
	else{
		if ($chrbig{$regionchr{$skey}}<$big{$skey}){$chrbig{$regionchr{$skey}}=$big{$skey};}
	}
	if ($design eq "SM"){$chrbig{$regionchr{$skey}}=$chrbig{$regionchr{$skey}}+$smrange;$chrsmall{$regionchr{$skey}}=$chrsmall{$regionchr{$skey}}-$smrange;next;}
	if ($design eq "individual"){next;}
	for (my $e=$small; $e<= $big; $e+=$step) {
		$rend = $e+$win;
		my $rseq = substr($chrseq{$regionchr{$skey}}, $e, $win);
		my ($rleftprimer,$rrightprimer) = &primerdesign($rseq,$primer3para,$resultdir);
		if ($rleftprimer =~ /NA/ || $rrightprimer =~ /NA/) {
			open (OUTPUT, ">>$resultdir/$output.exons.primers");
			print OUTPUT ",,,0.5,$win\_$step,$skey,$regionchr{$skey},$e,$rend,$rleftprimer,F,\n,,,0.5,$win\_$step,$skey,$regionchr{$skey},$e,$rend,$rrightprimer,R,\n";
			undef $rrightprimer;undef $rseq;undef $rleftprimer;
		}
		else {		
				my @rforl = split(',',$rleftprimer);
				my @rforr = split(',',$rrightprimer);
				my $rspnl = &specificity($rforl[0], "$resultdir/reference", $cpus, $resultdir);
				my $rspnr = &specificity($rforr[0], "$resultdir/reference", $cpus, $resultdir);
				my @rforplotthreel = split('-',$rspnl);
				my @rforplotthreer = split('-',$rspnr);
				open (OUTPUT, ">>$resultdir/$output.exons.primers");
				print OUTPUT "$rforplotthreel[1],$rforplotthreel[2],$rforplotthreel[3],0.5,$win\_$step,$skey,$regionchr{$skey},$e,$rend,$rleftprimer,F,$rspnl\n$rforplotthreer[1],$rforplotthreer[2],$rforplotthreer[3],0.5,$win\_$step,$skey,$regionchr{$skey},$e,$rend,$rrightprimer,R,$rspnr\n";

				undef $rrightprimer;undef $rseq;undef $rleftprimer;
	         }
	}
open (OUTPUT, ">>$resultdir/$output.exons.primers");
print OUTPUT "\n";
}
close OUTPUT;

my $plotfilename = "$resultdir/$output.exons.primers";
unless ( open (PRIMER, $plotfilename) ) {print "Cannot open file \"$plotfilename\"\n\n";exit;} 
my @myprimers = <PRIMER>;close PRIMER;

#generate primer file for plotting
open (FOUTPUT, ">$resultdir/$output.exons.primersplot");
print FOUTPUT "";
for (my $p=0; $p<= $#myprimers; $p++) {	
	chomp $myprimers[$p];
	my @spprimer = split(',',$myprimers[$p]);
	if (!$spprimer[0] | !$chrsmall{$spprimer[0]}){next;}
	my $pristart = $spprimer[1]-$chrsmall{$spprimer[0]};
	my $priend = $spprimer[2]-$chrsmall{$spprimer[0]};
	if ($priend<$pristart){print FOUTPUT "$spprimer[0],$priend,$pristart,$spprimer[3]\n";}
	else{
	print FOUTPUT "$spprimer[0],$pristart,$priend,$spprimer[3]\n";
	}
}

for (my $we=0; $we<= $#rawexons; $we++) {	
	chomp $rawexons[$we];
	my @expos = split('\t',$rawexons[$we]);
	if (!$expos[1] | !$chrsmall{$expos[1]}){next;}
	my $exstart = $expos[2]-$chrsmall{$expos[1]};
	my $exend = $expos[3]-$chrsmall{$expos[1]};
	if ($exend<$exstart){print FOUTPUT "$expos[1],$exend,$exstart,0.2\n";}
	else{
	print FOUTPUT "$expos[1],$exstart,$exend,0.2\n";
	}
}
close FOUTPUT;

#plot the CNS in the genome coordinates.
my $mychname;my $mychle;my $firstch;
for $chlekey (sort keys %chrsmall) {
	if (!$firstch){$firstch = "chr".$chlekey;}
	if (!$mychname){$mychname =$chlekey;}else{$mychname .= ",".$chlekey;}
	if (!$mychle){$mychle =$chrbig{$chlekey}-$chrsmall{$chlekey}+1;}else{my $temlen = $chrbig{$chlekey}-$chrsmall{$chlekey}+1; $mychle .= ",".$temlen;}
}
$plotfilename = "$output.exons.primersplot";
print "$resultdir, $output, $firstch, $mychname, $mychle, $plotfilename";
&chromplotggbio($resultdir, $firstch, $mychname, $mychle, $plotfilename);
&chromplot($resultdir, $mychname, $plotfilename);

$endtime = timelocal(localtime(time));
$diff = $endtime - $starttime;
print STDERR sprintf("\n\n");
print STDERR sprintf("All finished, total time: %s\n", &formatTime($diff));	
print STDERR sprintf("\n");	
	