#!/usr/bin/perl

# Copyright (c) 2015
# University of Nebraska - Lincoln
# All Rights Reserved
#
# Authors: Shangang Jia, David Holding and Chi Zhang
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 
# following conditions are met:
#   o Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#   o Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following 
#     disclaimer in the documentation and/or other materials provided with the distribution.
#   o Neither the name of the University of Nebraska - Lincoln nor the names of its contributors may be used to endorse or promote 
#     products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED 
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SHANGANG JIA, DAVID HOLDING and CHI ZHANG 
# (OR UNIVERSITY OF NEBRASKA - LINCOLN) BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
# STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
# SOFTWARE, EVEN IF ADVISED OF THE  POSSIBILITY OF SUCH DAMAGE.
#
# DESCRIPTION: BSRE enables the complete analysis of BSR-seq and exome-seq, by calling the thrid party softwares, including Samtools, 
# Bowtie2 and VarScan. It is an integrated pipeline from input of raw fastq data, alignment, SNPs/indel calling, identifying deleted exons, 
# and plotting mapping linkage peaks and deleted exons. It supports inputs of fastq or bam files in a comparison of normal type and mutant type 
# in BSR-seq for F2 generation, and normal type and mutant type in exome-seq for Mxx generation. This script has a flexibility to process 
# BSR-seq or exome-seq data, and generates the results in one command line.
#
# CITATION: it can be cited as Shangang Jia, David Holding and Chi Zhang, unpublished.
#


use warnings;
use Getopt::Long;
use Time::Local;
use Cwd;
use constant DEFAULT_TEMP_DIRECTORY => ".";				# Default directory
my $pwd = cwd();
# Generate a run ID to be used for temporary files
sub generateID {
    my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
    return sprintf("%d%02d%02d.%02d%02d%02d", 1900+$year, $mon+1, $mday, $hour, $min, $sec);
} # End of sub generateID

# Given total seconds signifying the module run-time, format it in hh:mm:ss format
sub formatTime {
    my $totaltime = $_[0];
    my $str;

    $str = sprintf("%02d:", $totaltime / 3600);    # total hours
    $totaltime = $totaltime % 3600;
    $str .= sprintf("%02d:", $totaltime / 60);      # total minutes
    $totaltime = $totaltime % 60; 
    $str .= sprintf("%02d", $totaltime);            # total sconds

    return $str;
} # End of sub formatTime

# CNS windows moving on the chromosomes, with a windows of 100kb, step of 20kb.
sub densitymoving {
my $chposfile = $_[0];
my $chpos;
my $currentchr = 0;
my %chpos;my %subchr;
print "$chposfile\n";
open (OUTPUT, ">$chposfile.posdens");
print OUTPUT "";

unless ( open ("chpos", $chposfile) ) {
print "Cannot open file \"$chposfile\"\n";
exit;
}
while (<chpos>) {

my @ind = split(",",$_);
if (!$subchr{$ind[0]}){$subchr{$ind[0]}=1;}
else{if ($subchr{$ind[0]}<$ind[1]){$subchr{$ind[0]}=$ind[1];}}

$chpos=$ind[0]."_".$ind[1];
$chpos{$chpos}=$_;
}
close ("chpos");

my	$win = 100000;
my	$step = 20000;	
  for my $chposkey (sort keys %subchr) {
		my $position = 0;
		my $istart=0;
    until ($position>$subchr{$chposkey}){
			$position = $istart+$win;
			my $positivenum=0;
			my $negativenum=0;
			my $iallnum=0;
			for (my $e=$istart; $e<= $position; $e++) {
				my $cnsposkey = $chposkey."_".$e;
				####if cns exists.
				if ($chpos{$cnsposkey}){
					    my @fields = split(',',$chpos{$cnsposkey});
						if ($fields[8]==0){
							if ($fields[7]>$fields[8] && $fields[6]>$fields[5]){$positivenum ++;}
							else {$negativenum ++;}
						}
						else {$negativenum ++;}
					$iallnum ++;	
			}
				####if cns exists.
		
		   }
		   if (!$positivenum){$positivenum=0;}
		   if (!$negativenum){$negativenum=0;}
		   if (!$iallnum){$iallnum=0;}
		   print OUTPUT "$chposkey,$istart,$positivenum,$iallnum\n";
	   
		   $istart += $step; 
    }




  }

	close OUTPUT;
}

# chromosome plot by ggbio.
sub chromplotggbio {
	my ($bamdir, $firstch, $mychname, $mychle, $plotfilename) = @_;
	open (OUTPUT, ">$bamdir/ggbio.R");
	print OUTPUT "";
	print OUTPUT "
	library(ggbio)
	library(GenomicRanges)

	gr <- GRanges(seqnames =\"$firstch\", IRanges(start = 1, end = 2))
	seqinfo(gr) <- Seqinfo(paste0(\"chr\", c($mychname)), c($mychle), NA, \"mock1\")
	xq <- seqinfo(gr)
	p1<-autoplot(xq[paste0(\"chr\", c($mychname))])

	setwd(\"$bamdir/\")
	d=read.csv(\"$plotfilename\", header=FALSE)
	start=d[,2]
	end=d[,2]+100000
	chr=d[,1]
	den=d[,3]
	snp <- GRanges(seqnames =paste0(\"chr\", data.matrix(as.numeric(as.character(chr)))), IRanges(start = data.matrix(as.numeric(as.character(start))), end = data.matrix(as.numeric(as.character(end)))), den=den)
	p2<-p1+layout_karyogram(snp, aes(x = start, y = den), geom = \"line\", ylim = c(0, 10), color = \"red\", fill =\"red\")
	pdf(\"$plotfilename.ggbio.pdf\")
	p2
	dev.off()
	
	";
	close OUTPUT;

	system "Rscript $bamdir/ggbio.R";	
}

# chromosome plot. 
sub chromplot {
	my ($bamdir, $mychname, $plotfilename) = @_;
	my @chran = split(',',$mychname);
	my $subplotnum = scalar(@chran);
	open (OUTPUT, ">$bamdir/plot.R");
	print OUTPUT "
	setwd(\"$bamdir/\")
	d=read.csv(\"$plotfilename\", header=FALSE)
	pdf(\"$plotfilename.chplot.pdf\")
	par( mfrow = c($subplotnum,1),mar=c(1,4,1,1))
	m = max(as.numeric(d[,3]))
	";

	for (my $w=0; $w<= $#chran; $w++) {
		print OUTPUT "
		l = subset(d,d[,1]==$chran[$w])
		plot(l[,2],l[,3],col = \"red\",xlab=\"\",ylab=l[1,1],xaxt='n',ylim=c(0, m))
		";
	}
	print OUTPUT "dev.off()\n";

	close OUTPUT;
	system "Rscript $bamdir/plot.R";	
}

sub bam2depth2exons {
	my ($bamdir, $wtbam, $mubam, $gtf) = @_;
	#depth by samtools.
	system "samtools depth $bamdir/$wtbam > $bamdir/$wtbam.depth";	
	system "samtools depth $bamdir/$mubam > $bamdir/$mubam.depth";	

	###########reading gtf file.
	unless ( open (PRO, $gtf) ) {
	print "Cannot open file \"$gtf\"\n\n";
	exit;
	}
	my (%exonspos, %exonstart);
	while (<PRO>) {
			my @fs = split ("\t", $_);
			my $chr=$fs[0];
			my $exon=$fs[2];
			my $start=$fs[3];
			my $end=$fs[4];
			my $incgene=$fs[8];
			my @forid = split('gene_id \"',$incgene);
			my @forid2 = split('\"',$forid[1]);
			my $id = $forid2[0];
			my @foren = split('exon_number \"',$incgene);
			my @foren2 = split('\"',$foren[1]);
			my $en = $foren2[0];
			if ($exon eq "exon"){
				my $postart = $chr."_".$start;
				$exonspos{$postart}=$end;
				$exonstart{$postart}=$id."_E".$en;
			}
	
	}
	close PRO;

	#depth assigned to exons.
	my (%wtexon, %muexon, %chle);
	my $filenamewt = "$bamdir/$wtbam.depth";
	open (OUTPUT, ">$bamdir\/$wtbam.depth.exondepth");
	print OUTPUT "";

	unless ( open (PRO, $filenamewt) ) {
	print "Cannot open file \"$filenamewt\"\n";
	exit;
	}
	<PRO>;
	my $currchr = "xx";
	my $currsite = 1;
	my ($currexonpos, $exonaccudepth, $currdep, $exonstart);
	while(<PRO>) {
	    chomp($_);
	    my @fields = split('\t',$_);
		my $temv = $fields[2];
	##### same chromosome
	        if ($fields[0] eq $currchr){
				for (my $i=$currsite; $i<= $fields[1]; $i++) {				
					my $pos = $fields[0]."_".$i;
					if (!$chle{$fields[0]}){$chle{$fields[0]}=1;}else{if ($chle{$fields[0]}<$i){$chle{$fields[0]}=$i;}}#deposit the longest position for each chromosome.
					if ($exonstart{$pos}){undef $exonaccudepth;undef $currexonpos;undef $currdep; $currexonpos = $pos; $exonstart=$pos; $currdep = $exonstart{$pos}.",".$fields[0].",".$i;}
					#####
					if ($currexonpos && $exonspos{$currexonpos} && $i<$exonspos{$currexonpos}){
						if ($temv){$exonaccudepth += $temv;undef $temv;}
					}
					elsif ($currexonpos && $exonspos{$currexonpos} && $i>=$exonspos{$currexonpos}) {
						if ($temv){$exonaccudepth += $temv;undef $temv;}
						my @forlg = split(',',$currdep);
						my $lg = $exonspos{$currexonpos}-$forlg[2];
						if (!$exonaccudepth){undef $exonaccudepth; undef $currexonpos;undef $currdep; undef $exonstart; next;}
						if ($lg<=0){my $avgv=0;}else{$avgv = $exonaccudepth/$lg;}
						print OUTPUT "$currdep,$exonspos{$currexonpos},$lg,$exonaccudepth,$avgv\n";
						$wtexon{$exonstart}="$currdep,$exonspos{$currexonpos},$lg,$exonaccudepth,$avgv";
						undef $exonaccudepth; undef $currexonpos;undef $currdep; undef $exonstart;
					}
					#####
				}
	$currsite = $fields[1]+1;
			}
	##### same chromosome

	##### changed chromosome
			elsif ($fields[0] ne $currchr){$currsite = 1;$pos = $fields[0]."_".$fields[1];
		        if ($exonstart{$pos}){undef $exonaccudepth;undef $currexonpos;undef $currdep;$currexonpos = $pos; $currdep = $exonstart{$pos}.",".$fields[0].",".$fields[1]; }
				goto mynext;
			}
	##### changed chromosome	
	mynext:
	$currchr = $fields[0];
	}
	close PRO;
	close OUTPUT;
	
	#mu depth
	my $filenamemu = "$bamdir/$mubam.depth";
	open (OUTPUT, ">$bamdir\/$mubam.depth.exondepth");
	print OUTPUT "";

	unless ( open (PRO, $filenamemu) ) {
	print "Cannot open file \"$filenamemu\"\n";
	exit;
	}
	<PRO>;
	$currchr = "xx";
	$currsite = 1;
	undef $exonaccudepth; undef $currexonpos;undef $currdep; undef $exonstart;
	while(<PRO>) {
	    chomp($_);
	    my @fields = split('\t',$_);
		my $temv = $fields[2];
	##### same chromosome
	        if ($fields[0] eq $currchr){
				for (my $i=$currsite; $i<= $fields[1]; $i++) {
					my $pos = $fields[0]."_".$i;
					if (!$chle{$fields[0]}){$chle{$fields[0]}=1;}else{if ($chle{$fields[0]}<$i){$chle{$fields[0]}=$i;}}#deposite the longest position for each chromosome.
					if ($exonstart{$pos}){undef $exonaccudepth;undef $currexonpos;undef $currdep; $currexonpos = $pos; $exonstart=$pos; $currdep = $exonstart{$pos}.",".$fields[0].",".$i;}
					#####
					if ($currexonpos && $exonspos{$currexonpos} && $i<$exonspos{$currexonpos}){
						if ($temv){$exonaccudepth += $temv;undef $temv;}
					}
					elsif ($currexonpos && $exonspos{$currexonpos} && $i>=$exonspos{$currexonpos}) {
						if ($temv){$exonaccudepth += $temv;undef $temv;}
						my @forlg = split(',',$currdep);
						my $lg = $exonspos{$currexonpos}-$forlg[2];
						if (!$exonaccudepth){undef $exonaccudepth; undef $currexonpos;undef $currdep; undef $exonstart; next;}
						if ($lg<=0){my $avgv=0;}else{
						$avgv = $exonaccudepth/$lg;}
						print OUTPUT "$currdep,$exonspos{$currexonpos},$lg,$exonaccudepth,$avgv\n";
						$muexon{$exonstart}="$currdep,$exonspos{$currexonpos},$lg,$exonaccudepth,$avgv";						
						undef $exonaccudepth; undef $currexonpos;undef $currdep;undef $exonstart;
					}
					#####
				}
	$currsite = $fields[1]+1;
			}
	##### same chromosome

	##### changed chromosome
			elsif ($fields[0] ne $currchr){$currsite = 1;$pos = $fields[0]."_".$fields[1];
		        if ($exonstart{$pos}){undef $exonaccudepth;undef $currexonpos;undef $currdep;$currexonpos = $pos; $currdep = $exonstart{$pos}.",".$fields[0].",".$fields[1]; }
				goto mynext;
			}
	##### changed chromosome	
	mynext:
	$currchr = $fields[0];
	}
	close PRO;
	close OUTPUT;
	
	#synchronize wt and mu.
	for my $wtkey (keys %wtexon)
	    {
			my @wtspex = split (",", $wtexon{$wtkey});
			if (!$muexon{$wtkey}){$muexon{$wtkey}=$wtspex[0].",".$wtspex[1].",".$wtspex[2].",".$wtspex[3].","."0".","."0".","."0";}
		}
	for my $mukey (keys %muexon)
		{
				my @muspex = split (",", $muexon{$mukey});
				if (!$wtexon{$mukey}){$wtexon{$mukey}=$muspex[0].",".$muspex[1].",".$muspex[2].",".$muspex[3].","."0".","."0".","."0";}
		}

	#save results on the chromosmes.
	open (OUTPUT, ">$bamdir\/$wtbam.$mubam.diff");
	print OUTPUT "";
	open (OUTPUT2, ">$bamdir\/$wtbam.$mubam.diffplot");
	print OUTPUT2 "";
	for my $chrkey (keys %chle) {
		for (my $e=1; $e<= $chle{$chrkey}; $e++) {
			my $chrpos = $chrkey."_".$e;
			####if exon exists.
			if ($wtexon{$chrpos}){
				my @wtdep = split(',',$wtexon{$chrpos});
				my @mudep = split(',',$muexon{$chrpos});
				if ($wtdep[5]>10 && $mudep[5]<4){
			      print OUTPUT "$wtexon{$chrpos},$muexon{$chrpos}\n";
				  print OUTPUT2 "$wtdep[1],$wtdep[2],$wtdep[3],1\n";
			    }
			}		
			####if exon exists.
		}
	}
	close OUTPUT;
	close OUTPUT2;
	return %chle;
}

# Samtool for mileup file, which is processed by VarScan for cns files with fitlering.
sub mileup2varscan2cns {
	my ($bamdir, $wtbam, $mubam, $reference) = @_;
	#mileup by samtools.
	system "samtools mpileup -f $reference $bamdir/$wtbam $bamdir/$mubam > $bamdir/$wtbam.$mubam.mileup";	

	#SNP/InDel calling by VarScan.	
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Progressing: VarScan beggins\n");
	print STDERR sprintf("\n");

	system "java -jar VarScan.v2.3.7.jar mpileup2cns $bamdir/$wtbam.$mubam.mileup --min-coverage 2 --min-reads2 2 --min-avg-qual 15 --min-var-freq 0.01 --min-freq-for-hom 0.75 > $bamdir/$wtbam.$mubam.mileup.cns";
	#chle is for collecting chromosome end position.
	my %chle; 
	my ($ff1, $ff2, $ff3, $ff4, $snppo, $refvar, $fre, $to1, $to2, $snpfre);
	my (@gspli, @fre);
	my $wt_mu_cns = "$bamdir/$wtbam.$mubam.mileup.cns";
	unless ( open (WTMUCNS, $wt_mu_cns) ) {
		print "Cannot open file \"$wt_mu_cns\"\n";
		exit;
	}
	while (<WTMUCNS>) {
		chomp;
		@gspli = split("\t",$_);
		if (!$gspli[1] || $gspli[1]!~ /\d/){next;}#filtering out the header and the tail.
		if (!$chle{$gspli[0]}){$chle{$gspli[0]}=1;}else{if ($chle{$gspli[0]}<$gspli[1]){$chle{$gspli[0]}=$gspli[1];}}#deposite the longest position for each chromosome.
		my $posend = $gspli[1]+100000;
		$snppo = $gspli[0].",".$gspli[1].",".$posend;#cns position with chr and position.
		if ($gspli[2]=~ m/,/){next;}#discard the alternative type for reference.
		if ($gspli[3]=~ m/,/){next;}#discard the alternative type for variation.
		$refvar = $gspli[2].",".$gspli[3];#ref and var.
		@fre = split(":",$gspli[10]);#all frequency data in 11th column.
		$fre = $fre[2].",".$fre[3].",".$fre[7].",".$fre[8];#frequency data for wt and mu.
		#set unfound position's fre to 0.
		if ($fre[2] eq "-"){$ff1=0;}else {$ff1=$fre[2]}
		if ($fre[3] eq "-"){$ff2=0;}else {$ff2=$fre[3]}
		if ($fre[7] eq "-"){$ff3=0;}else {$ff3=$fre[7]}
		if ($fre[8] eq "-"){$ff4=0;}else {$ff4=$fre[8]}
		#only the two allels in one SNP are accepted.
		$to1 = $ff1 + $ff2;
		$to2 = $ff3 + $ff4;
		if ($to1 != $fre[1]){next;}
		if ($to2 != $fre[6]){next;}
		#filtering out the same SNP pattern of wt and mu. 
		if ($ff1==0 && $ff3==0 && $ff2!=0 && $ff4!=0){next;}
		if ($ff2==0 && $ff4==0 && $ff1!=0 && $ff3!=0){next;}
		#SNP ratio > 0.2 is accepted.
		if ($ff1 || $ff2 || $ff3 || $ff4){$snpfre = ($ff2+$ff4)/($ff1+$ff2+$ff3+$ff4)};
		if ($ff1==0 && $ff2==0 && $fre[6]>5){
			$fre = "0".","."0".",".$fre[7].",".$fre[8];
			open (OUTPUT, ">>$bamdir/$wtbam.$mubam.mileup.cns.ins");
			print OUTPUT "$snppo,$refvar,$fre\n";	
		}
		if (($ff1>5 || $ff2>5) && $snpfre>0.2 && $snpfre<0.8 && $fre[6]>5){
			open (OUTPUT, ">>$bamdir/$wtbam.$mubam.mileup.cns.snps");
			print OUTPUT "$snppo,$refvar,$fre\n";
		}
		if ($ff3==0 && $ff4==0 && $fre[1]>5){
			$fre = $fre[2].",".$fre[3].","."0".","."0";
			open (OUTPUT, ">>$bamdir/$wtbam.$mubam.mileup.cns.del");
			print OUTPUT "$snppo,$refvar,$fre\n";
		}
		if (($ff1 > $ff2 && $ff3 < $ff4 && $ff1 >5 && $ff4 >5) || ($ff1 < $ff2 && $ff3 > $ff4 && $ff2 >5 && $ff3 >5)){
			open (OUTPUT, ">>$bamdir/$wtbam.$mubam.mileup.cns.diff");
			print OUTPUT "$snppo,$refvar,$fre\n";
		}

	}
	close OUTPUT;
	close WTMUCNS;
	return %chle;
}






# define command line arguments
my (@e_bam, @b_bam, @e_fqp_wt, @e_fqp_mu, @e_fqu_wt, @e_fqu_mu, @b_fqp_wt, @b_fqp_mu, @b_fqu_wt, @b_fqu_mu, $reference, $gtf, $output, $cpus);

my $result = &GetOptions("e_bam:s{0,}" => \@e_bam,
						 "b_bam:s{0,}" => \@b_bam,
						 "e_fqp_wt:s{0,}" => \@e_fqp_wt,
						 "e_fqp_mu:s{0,}" => \@e_fqp_mu,
						 "e_fqu_wt:s{0,1}" => \@e_fqu_wt,
						 "e_fqu_mu:s{0,1}" => \@e_fqu_mu,
						 "b_fqp_wt:s{0,}" => \@b_fqp_wt,
						 "b_fqp_mu:s{0,}" => \@b_fqp_mu,
						 "b_fqu_wt:s{0,1}" => \@b_fqu_wt,
						 "b_fqu_mu:s{0,1}" => \@b_fqu_mu,
						 "reference=s{1}" => \$reference,
						 "gtf=s{1}" => \$gtf,
						 "output|o=s{1}" => \$output,
						 "threads|t=i{0,}" => \$cpus,
						 "log!" => \$log,);
## Print help info.
unless ($result && ((scalar(@e_bam)+scalar(@e_fqp_wt)+scalar(@e_fqu_wt)+scalar(@e_fqp_mu)+scalar(@e_fqu_mu)>0) || (scalar(@b_bam)+scalar(@b_fqp_wt)+scalar(@b_fqu_wt)+scalar(@b_fqp_mu)+scalar(@b_fqu_mu)>0)) && defined($reference) && defined($gtf) && defined($output)) {
	print STDERR sprintf("\n");
	print STDERR sprintf("BSRE analysis pipeline\n");
	print STDERR sprintf("=============================================================\n");
	print STDERR sprintf("USAGE:\n");
	print STDERR sprintf("  perl %s [--e_bam <exome bam files>] [--b_bam <bsr bam files>] [--e_fqp_wt <exome fastq paired files for wt>] [--e_fqp_mu <exome fastq paired files for mu>]\n", $0);
	print STDERR sprintf("[--e_fqu_wt <exome fastq single_end files for wt>] [--e_fqu_mu <exome fastq single_end files for mu>] [--b_fqp_wt <bsr fastq paired files for wt>]\n");
	print STDERR sprintf("[--b_fqp_mu <bsr fastq paired files for mu>] [--b_fqu_wt <bsr fastq single_end files for wt>] [--b_fqu_mu <bsr fastq single_end files for mu>] \n");
	print STDERR sprintf("[--threads|-t <CPU cores or threads>] --reference <genome reference fasta file> --gtf <gtf annotation file> --output|-o <output.prefix> [OPTIONS]\n");
	print STDERR sprintf("\n");
	print STDERR sprintf("WHERE:\n");
	print STDERR sprintf("  --e_bam <exome bam files>         : Path to exome alignment output files generated by bowtie2. The specified files should be of wild type and mutant.\n");
	print STDERR sprintf("  --b_bam <bsr bam files>           : Path to bsr alignment output files generated by bowtie2. The specified files should be of wild type and mutant.\n");
	print STDERR sprintf("                                          Refer to suggested bowtie2 program parameters below.\n");
	print STDERR sprintf("  --e_fqp_wt <exome fastq paired files for wt>  : Path to the two fastq files with pair-end reads for wild type for exome-seq analysis.\n");
	print STDERR sprintf("  --e_fqp_mu <exome fastq paired files for mu>  : Path to the two fastq files with pair-end reads for mutant for exome-seq analysis.\n");
	print STDERR sprintf("  --e_fqu_wt <exome fastq single_end files for wt>     : Path to the fastq files with single-end reads for wild type for exome-seq analysis.\n");
	print STDERR sprintf("  --e_fqu_mu <exome fastq single_end files for mu>     : Path to the fastq files with single-end reads for mutant for exome-seq analysis.\n");
	print STDERR sprintf("  --b_fqp_wt <bsr fastq paired files for wt>  : Path to the two fastq files with pair-end reads for wild type for bsr-seq analysis.\n");
	print STDERR sprintf("  --b_fqp_mu <bsr fastq paired files for mu>  : Path to the two fastq files with pair-end reads for mutant for bsr-seq analysis.\n");
	print STDERR sprintf("  --b_fqu_wt <bsr fastq single_end files for wt>     : Path to the fastq files with single-end reads for wild type for bsr-seq analysis.\n");
	print STDERR sprintf("  --b_fqu_mu <bsr fastq single_end files for mu>     : Path to the fastq files with single-end reads for mutant for bsr-seq analysis.\n");
	print STDERR sprintf("  The above fq options are only required if mapping by bowtie2 needed, unless e_bam and b_bam files are provided.\n");
	print STDERR sprintf("  --reference <genome reference fasta file>    : Path to the fasta reference files.\n");
	print STDERR sprintf("  --gtf <gtf annotation file>                  : Path to the GTF annotation files.\n");
	print STDERR sprintf("  --threads|-t <CPU cores or threads>   : Multiple threads\n");
	print STDERR sprintf("  --output|-o <output.prefix>           : Path to output files' prefix\n");
	print STDERR sprintf("\n");
	print STDERR sprintf("OPTIONS:\n");
	print STDERR sprintf("  --log|--nolog                     : Enable/Disable the cleaning of temporary files [DEFAULT: --log]\n");
	print STDERR sprintf("\n");
	print STDERR sprintf("Example: perl BSRE_analysis_pipeline_v0.pl --e_fqp_wt=B73.test_1.fastq B73.test_2.fastq --e_fqp_mu=937.test_1.fastq 937.test_2.fastq --b_fqp_wt=wt_1.fastq wt_2.fastq --b_fqp_mu=mu_1.fastq mu_2.fastq --reference=maize.3_4.ref.fa --gtf=GRMZM2G139797.gtf --threads=10 --output=test");
	print STDERR sprintf("BOWTIE2 commands:\n");
	print STDERR sprintf("  %% bowtie2 -x reference -1 fastq1 -2 fastq2 -I 0 -X 1000 -S output.sam\n");
	print STDERR sprintf("\n");
	print STDERR sprintf("\n");
	exit();
} # End of unless statement

# assigning default values
$cpus = 1 if (!defined($cpus) || $cpus !~ m/^\d+$/);
$log = "true" if (!defined($log));

my $e_mapping_input = scalar(@e_bam);
my $b_mapping_input = scalar(@b_bam);
my $e_fastq_input = scalar(@e_fqp_wt) + scalar(@e_fqp_mu) + scalar(@e_fqu_wt) + scalar(@e_fqu_mu);
my $b_fastq_input = scalar(@b_fqp_wt) + scalar(@b_fqp_mu) + scalar(@b_fqu_wt) + scalar(@b_fqu_mu);

#check if there are input files.
if ($e_mapping_input == 0 && $e_fastq_input == 0) {
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Warning: No exome input file specified, you must at least specify bam input files by bowtie2 and bwa, or specify fastq files\n");
	print STDERR sprintf("\n");
} # End of if statmenet
if ($b_mapping_input == 0 && $b_fastq_input == 0) {
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Warning: No bsr input file specified, you must at least specify bam input files by bowtie2 and bwa, or specify fastq files\n");
	print STDERR sprintf("\n");
} # End of if statmenet
if ($b_mapping_input == 0 && $b_fastq_input == 0 && $e_mapping_input == 0 && $e_fastq_input == 0) {
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Error: No bsr & exome input file specified, you must at least specify bam input files by bowtie2 and bwa, or fastq files for bsr or exome analysis\n");
	print STDERR sprintf("\n");
	exit();	
} # End of if statmenet

#check if input files are paired for wild type and mutant. 
if ( ( (scalar(@e_fqp_wt) +scalar(@e_fqu_wt))>0 && (scalar(@e_fqp_mu) +scalar(@e_fqu_mu))==0 ) || ( (scalar(@e_fqp_wt) +scalar(@e_fqu_wt))==0 && (scalar(@e_fqp_mu) +scalar(@e_fqu_mu))>0 ) ) {
	print STDERR sprintf("\n\n");
	print STDERR sprintf("ERROR: exome fastq input files are not paired for wild type and mutant.\n");
	print STDERR sprintf("\n");
	exit();
} # End of if statmenet
if ( ( (scalar(@b_fqp_wt) +scalar(@b_fqu_wt))>0 && (scalar(@b_fqp_mu) +scalar(@b_fqu_mu))==0 ) || ( (scalar(@b_fqp_wt) +scalar(@b_fqu_wt))==0 && (scalar(@b_fqp_mu) +scalar(@b_fqu_mu))>0 ) ) {
	print STDERR sprintf("\n\n");
	print STDERR sprintf("ERROR: bsr fastq input files are not paired for wild type and mutant.\n");
	print STDERR sprintf("\n");
	exit();
} # End of if statmenet

#check if bam files provided and mapping needed. 
if ($e_mapping_input > 0 && $e_fastq_input > 0) {
	print STDERR sprintf("\n\n");
	print STDERR sprintf("WARNING: ignore exome bam files, and processing exome fastq files\n");
	print STDERR sprintf("\n");
} # End of if statmenet
if ($b_mapping_input > 0 && $b_fastq_input > 0) {
	print STDERR sprintf("\n\n");
	print STDERR sprintf("WARNING: ignore bsr bam files, and processing exome fastq files\n");
	print STDERR sprintf("\n");
} # End of if statmenet

# generate running ID and running folders.
my $run_id = &generateID();
system "mkdir $run_id";
system "mkdir $run_id/bamsam";
my $resultdir = "$pwd/$run_id";
my $bamdir = "$pwd/$run_id/bamsam";
my ($e_ffile_wt, $e_ffile_mu, $b_ffile_wt, $b_ffile_mu);#bam files for merging use in samtools.
my $starttime;
$starttime = timelocal(localtime(time));	#local current time.

#Indexing bowtie2 database.
if ($e_fastq_input > 0 || $b_fastq_input > 0) {
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Processing: Indexing bowtie2 database\n");
	print STDERR sprintf("\n");
	system "bowtie2-build $reference $bamdir/$reference.bw2 1>$bamdir/log.txt";
}

#running mapping and merging bam files for PE and SE.
#running mapping for exome
if ($e_fastq_input > 0 && $e_mapping_input == 0) {
	#pair end alignment.
	#pair end alignment for wt.
	if (scalar(@e_fqp_wt)>0){
		system "bowtie2 -p $cpus -x $bamdir/$reference.bw2 -1 $e_fqp_wt[0] -2 $e_fqp_wt[1] -I 0 -X 1000 -S $bamdir/e_wt.PE.sam 1>$bamdir/log.txt";
		system "samtools view -bS -@ $cpus -o $bamdir/e_wt.PE.bam $bamdir/e_wt.PE.sam 1>$bamdir/log.txt";
		system "samtools sort -@ $cpus $bamdir/e_wt.PE.bam -o $bamdir/e_wt.PE.sorted.bam 1>$bamdir/log.txt";
		$e_ffile_wt .= "e_wt.PE.sorted.bam ";
	}
	#pair end alignment for mu.
	if (scalar(@e_fqp_mu) >0){
		system "bowtie2 -p $cpus -x $bamdir/$reference.bw2 -1 $e_fqp_mu[0] -2 $e_fqp_mu[1] -I 0 -X 1000 -S $bamdir/e_mu.PE.sam 1>$bamdir/log.txt";
		system "samtools view -bS -@ $cpus -o $bamdir/e_mu.PE.bam $bamdir/e_mu.PE.sam 1>$bamdir/log.txt";
		system "samtools sort -@ $cpus $bamdir/e_mu.PE.bam -o $bamdir/e_mu.PE.sorted.bam 1>$bamdir/log.txt";
		$e_ffile_mu .= "e_mu.PE.sorted.bam ";	
	}
	#single end alignment for wt.
	if (scalar(@e_fqu_wt) >0){
		system "bowtie2 -p $cpus -x $bamdir/$reference.bw2 -U $e_fqu_wt[0] -S $bamdir/e_wt.SE.sam 1>$bamdir/log.txt";
		system "samtools view -bS -@ $cpus -o $bamdir/e_wt.SE.bam $bamdir/e_wt.SE.sam 1>$bamdir/log.txt";
		system "samtools sort -@ $cpus $bamdir/e_wt.SE.bam -o $bamdir/e_wt.SE.sorted.bam 1>$bamdir/log.txt";
		$e_ffile_wt .= "e_wt.SE.sorted.bam ";
	}
	#single end alignment for mu.
	if (scalar(@e_fqu_mu) >0){
		system "bowtie2 -p $cpus -x $bamdir/$reference.bw2 -U $e_fqu_mu[0] -S $bamdir/e_mu.SE.sam 1>$bamdir/log.txt";
		system "samtools view -bS -@ $cpus -o $bamdir/e_mu.SE.bam $bamdir/e_mu.SE.sam 1>$bamdir/log.txt";
		system "samtools sort -@ $cpus $bamdir/e_mu.SE.bam -o $bamdir/e_mu.SE.sorted.bam 1>$bamdir/log.txt";	
		$e_ffile_mu .= "e_mu.SE.sorted.bam ";	
	}
	
	#check and merge all bam files.
	if ($e_ffile_wt =~ /PE/ && $e_ffile_wt =~ /SE/){
		system "samtools merge $bamdir/e_wt.merged.bam $e_ffile_wt";
		system "samtools index $bamdir/e_wt.merged.bam";
	}
	elsif ($e_ffile_wt =~ /PE/ && $e_ffile_wt !~ /SE/){system "cp $bamdir/e_wt.PE.sorted.bam $bamdir/e_wt.merged.bam";}
	elsif ($e_ffile_wt =~ /SE/ && $e_ffile_wt !~ /PE/){system "cp $bamdir/e_wt.SE.sorted.bam $bamdir/e_wt.merged.bam";}
	else {
		print STDERR sprintf("\n\n");
		print STDERR sprintf("ERROR: No wt bam files found. \n");
		print STDERR sprintf("\n");
		exit();
	}
	
	if ($e_ffile_mu =~ /PE/ && $e_ffile_mu =~ /SE/){
		system "samtools merge $bamdir/e_mu.merged.bam $e_ffile_mu";
		system "samtools index $bamdir/e_mu.merged.bam";
	}
	elsif ($e_ffile_mu =~ /PE/ && $e_ffile_mu !~ /SE/){system "cp $bamdir/e_mu.PE.sorted.bam $bamdir/e_mu.merged.bam";}
	elsif ($e_ffile_mu =~ /SE/ && $e_ffile_mu !~ /PE/){system "cp $bamdir/e_mu.SE.sorted.bam $bamdir/e_mu.merged.bam";}
	else {
		print STDERR sprintf("\n\n");
		print STDERR sprintf("ERROR: No mu bam files found. \n");
		print STDERR sprintf("\n");
		exit();
	}

}#end of exome mapping for wt and mu.
#running mapping for bsr
if ($b_fastq_input > 0 && $b_mapping_input == 0) {
	#pair end alignment.
	#pair end alignment for wt.
	if (scalar(@b_fqp_wt)>0){
		system "bowtie2 -p $cpus -x $bamdir/$reference.bw2 -1 $b_fqp_wt[0] -2 $b_fqp_wt[1] -I 0 -X 1000 -S $bamdir/b_wt.PE.sam 1>$bamdir/log.txt";
		system "samtools view -bS -@ $cpus -o $bamdir/b_wt.PE.bam $bamdir/b_wt.PE.sam 2>$bamdir/log.txt";
		system "samtools sort -@ $cpus $bamdir/b_wt.PE.bam -o $bamdir/b_wt.PE.sorted.bam 2>$bamdir/log.txt";
		$b_ffile_wt .= "b_wt.PE.sorted.bam ";
	}
	#pair end alignment for mu.
	if (scalar(@b_fqp_mu) >0){
		system "bowtie2 -p $cpus -x $bamdir/$reference.bw2 -1 $b_fqp_mu[0] -2 $b_fqp_mu[1] -I 0 -X 1000 -S $bamdir/b_mu.PE.sam 1>$bamdir/log.txt";
		system "samtools view -bS -@ $cpus -o $bamdir/b_mu.PE.bam $bamdir/b_mu.PE.sam 2>$bamdir/log.txt";
		system "samtools sort -@ $cpus $bamdir/b_mu.PE.bam -o $bamdir/b_mu.PE.sorted.bam 2>$bamdir/log.txt";
		$b_ffile_mu .= "b_mu.PE.sorted.bam ";	
	}
	#single end alignment for wt.
	if (scalar(@b_fqu_wt) >0){
		system "bowtie2 -p $cpus -x $bamdir/$reference.bw2 -U $b_fqu_wt[0] -S $bamdir/b_wt.SE.sam 1>$bamdir/log.txt";
		system "samtools view -bS -@ $cpus -o $bamdir/b_wt.SE.bam $bamdir/b_wt.SE.sam 2>$bamdir/log.txt";
		system "samtools sort -@ $cpus $bamdir/b_wt.SE.bam -o $bamdir/b_wt.SE.sorted.bam 2>$bamdir/log.txt";
		$b_ffile_wt .= "b_wt.SE.sorted.bam ";
	}
	#single end alignment for mu.
	if (scalar(@b_fqu_mu) >0){
		system "bowtie2 -p $cpus -x $bamdir/$reference.bw2 -U $b_fqu_mu[0] -S $bamdir/b_mu.SE.sam 1>$bamdir/log.txt";
		system "samtools view -bS -@ $cpus -o $bamdir/b_mu.SE.bam $bamdir/b_mu.SE.sam 2>$bamdir/log.txt";
		system "samtools sort -@ $cpus $bamdir/b_mu.SE.bam -o $bamdir/b_mu.SE.sorted.bam 2>$bamdir/log.txt";	
		$b_ffile_mu .= "b_mu.SE.sorted.bam ";	
	}
	
	#check and merge all bam files.
	if ($b_ffile_wt =~ /PE/ && $b_ffile_wt =~ /SE/){
		system "samtools merge $bamdir/b_wt.merged.bam $b_ffile_wt";
		system "samtools index $bamdir/b_wt.merged.bam";
	}
	elsif ($b_ffile_wt =~ /PE/ && $b_ffile_wt !~ /SE/){system "cp $bamdir/b_wt.PE.sorted.bam $bamdir/b_wt.merged.bam";}
	elsif ($b_ffile_wt =~ /SE/ && $b_ffile_wt !~ /PE/){system "cp $bamdir/b_wt.SE.sorted.bam $bamdir/b_wt.merged.bam";}
	else {
		print STDERR sprintf("\n\n");
		print STDERR sprintf("ERROR: No wt bam files found. \n");
		print STDERR sprintf("\n");
		exit();
	}
	
	if ($b_ffile_mu =~ /PE/ && $b_ffile_mu =~ /SE/){
		system "samtools merge $bamdir/b_mu.merged.bam $b_ffilb_mu";
		system "samtools index $bamdir/b_mu.merged.bam";
	}
	elsif ($b_ffile_mu =~ /PE/ && $b_ffile_mu !~ /SE/){system "cp $bamdir/b_mu.PE.sorted.bam $bamdir/b_mu.merged.bam";}
	elsif ($b_ffile_mu =~ /SE/ && $b_ffile_mu !~ /PE/){system "cp $bamdir/b_mu.SE.sorted.bam $bamdir/b_mu.merged.bam";}
	else {
		print STDERR sprintf("\n\n");
		print STDERR sprintf("ERROR: No mu bam files found. \n");
		print STDERR sprintf("\n");
		exit();
	}

}#end of bsr mapping for wt and mu.


#what if bam files are provided.
if ($e_fastq_input == 0 && $e_mapping_input > 0){
	system "cp $e_bam[0] $bamdir/e_wt.merged.bam";
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Processing: $e_bam[0] as exome wt\n");
	print STDERR sprintf("\n");
	system "cp $e_bam[1] $bamdir/e_mu.merged.bam";
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Processing: $e_bam[1] as exome mu\n");
	print STDERR sprintf("\n");
}

if ($b_fastq_input == 0 && $b_mapping_input > 0){
	system "cp $b_bam[0] $bamdir/b_wt.merged.bam";
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Processing: $b_bam[0] as bsr wt\n");
	print STDERR sprintf("\n");
	system "cp $b_bam[1] $bamdir/b_mu.merged.bam";
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Processing: $b_bam[1] as bsr mu\n");
	print STDERR sprintf("\n");
}

# check if there are wt and mu bam files ready for following process.
my ($e_wtflag, $e_muflag, $b_wtflag, $b_muflag);
opendir(DIR, $bamdir) or die $!;
while (my $file = readdir(DIR)) {
	if ($file =~ m/e_wt.merged.bam/){
		$e_wtflag=1;
		print STDERR sprintf("\n\n");
		print STDERR sprintf("e_wt.merged.bam is present for following use\n");
		print STDERR sprintf("\n");
	}
	if ($file =~ m/e_mu.merged.bam/){
		$e_muflag=1;
		print STDERR sprintf("\n\n");
		print STDERR sprintf("e_mu.merged.bam is present for following use\n");
		print STDERR sprintf("\n");
	}
	if ($file =~ m/b_wt.merged.bam/){
		$b_wtflag=1;
		print STDERR sprintf("\n\n");
		print STDERR sprintf("b_wt.merged.bam is present for following use\n");
		print STDERR sprintf("\n");
	}
	if ($file =~ m/b_mu.merged.bam/){
		$b_muflag=1;
		print STDERR sprintf("\n\n");
		print STDERR sprintf("b_mu.merged.bam is present for following use\n");
		print STDERR sprintf("\n");
	}
}
closedir(DIR);


my (%echle, %echle2, %bchle);

# exome-seq for plotting
if ((!$e_wtflag || !$e_muflag) && (!$b_wtflag || !$b_muflag)){print STDERR sprintf("Error: no wt.merged.bam or mu.merged.bam\n");exit();}
if ($e_wtflag && $e_muflag){
	%echle = &mileup2varscan2cns($bamdir, "e_wt.merged.bam", "e_mu.merged.bam", $reference);
	%echle2 = &bam2depth2exons($bamdir, "e_wt.merged.bam", "e_mu.merged.bam", $gtf);
	my $ecnsflag = "$bamdir/e_wt.merged.bam.e_mu.merged.bam.mileup.cns.diff";
	
	my $emychname;my $emychle;my $efirstch;
	for $echlekey (sort keys %echle) {
		if (!$efirstch){$efirstch = "chr".$echlekey;}
		if (!$emychname){$emychname =$echlekey;}else{$emychname .= ",".$echlekey;}
		if (!$emychle){$emychle =$echle{$echlekey};}else{$emychle .= ",".$echle{$echlekey};}
	}
	
	my $plotfilename_exome = "$ecnsflag.posdens";
	print "$bamdir, $output, $efirstch, $emychname, $emychle, $plotfilename_exome\n";
	&chromplotggbio($bamdir, $efirstch, $emychname, $emychle, $plotfilename_exome);
	&chromplot($bamdir, $emychname, $plotfilename_exome);
}

# bsr-seq for plotting
if ($b_wtflag && $b_muflag){
# call for SNP/InDel (CNS).
	%bchle = &mileup2varscan2cns($bamdir, "b_wt.merged.bam", "b_mu.merged.bam", $reference);
# cns density moving.
	$bcnsflag = "$bamdir/b_wt.merged.bam.b_mu.merged.bam.mileup.cns.snps";
	&densitymoving($bcnsflag);
	# plot the CNS in the genome coordinates.
	my $bmychname;my $bmychle;my $bfirstch;
	for $bchlekey (sort keys %bchle) {
		if (!$bfirstch){$bfirstch = "chr".$bchlekey;}
		if (!$bmychname){$bmychname =$bchlekey;}else{$bmychname .= ",".$bchlekey;}
		if (!$bmychle){$bmychle =$bchle{$bchlekey};}else{$bmychle .= ",".$bchle{$bchlekey};}
	}
	my $plotfilename_bsr = "$bcnsflag.posdens";
	print "$bamdir, $output, $bfirstch, $bmychname, $bmychle, $plotfilename_bsr\n";
	&chromplotggbio($bamdir, $bfirstch, $bmychname, $bmychle, $plotfilename_bsr);
	&chromplot($bamdir, $bmychname, $plotfilename_bsr);

}


my $total_seconds;
	my $endtime = timelocal(localtime(time));
	my $diff = $endtime - $starttime;
	$total_seconds += $diff;		# Accumulate total run-time
	print STDERR sprintf("\n\n");
	print STDERR sprintf("Parsing CNS finished, used time: %s\n", &formatTime($diff));	
	print STDERR sprintf("\n");
	
$endtime = timelocal(localtime(time));
$diff = $endtime - $starttime;
print STDERR sprintf("\n\n");
print STDERR sprintf("All finished, total time: %s\n", &formatTime($diff));	
print STDERR sprintf("\n");	
	