#!usr/bin/perl
use Statistics::Basic qw(:all);
use Math::CDF qw(:all);
use Cwd;
use File::Basename;



our $gff_file=();
our @positive_file=();
our @positive_name=();
our @negative_file=();
our @negative_name=();
our @all_gene_name=();
our $transcript_length_sd=0;
our $average_transcript_length=0;
our @transcript_length=();
our $output_dir="results";
our @globle_normal_factor=();
our $positive_sample_size=0;
our $negative_sample_size=0;
our $reads_length=50;
our $paired_end_flag="NO";

$commond_line=join(" ",@ARGV);    
check_commond_line($commond_line);
mkdir($output_dir);

print “\n\n”
print “*************************************************************************\n”
print “*** Usually, it takes about 10 minutes for 1GB data. Please be patient **\n”
print “*************************************************************************\n”;
check_paired_end();
foreach $key (@positive_file){gene_position_information($key);}
foreach $key (@negative_file){gene_position_information($key);}
foreach $key (@positive_name){local_normalization($key);}
foreach $key (@negative_name){local_normalization($key);}
globle_normal();
feature_extract();
#foreach $key (@positive_name){unlink("$output_dir/$key.GC");}
#foreach $key (@negative_name){unlink("$output_dir/$key.GC");}
#foreach $key (@positive_name){unlink("$output_dir/$key.position-depth");}
#foreach $key (@negative_name){unlink("$output_dir/$key.position-depth");}



$current_dir;
if ($0 =~ m{^/}) 
{
  $current_dir = dirname($0);
} 
else 
{
  $current_dir = dirname(getcwd()."/$0");
}

print "$current_dir\n";
$file_model="$current_dir/training_dataset/training.txt";
$file_feature="$output_dir/gene_feature.txt";
$file_results="$output_dir/results.txt";

system("Rscript $current_dir/NaiveBayes_R/predict.r $file_model $file_feature $file_results");

#########################################################################################################
##the function extracted features for each gene
#########################################################################################################
sub feature_extract
{
	my %mean_reads_sample_1=();
	my %mean_reads_sample_2=();
	my %varance_reads_sample_1=();
	my %varance_reads_sample_2=();
	my %transcript_length=();
	my $mean_transcript_length=0;
	my $raw=();
	my @sample_1=();
	my @sample_2=();
	my $i=0;
	my $j=0;
	my @inf=();
	my @inf2=();
	my @inf3=();
	
	print "Extracting features..............\n";
	
		
	open(READS,$output_dir."/reads-globle-normalized.txt")||die "Can not open file: $output_dir/reads-globle-normalized.txt";
	while($raw=<READS>)
	{
		@sample_1=();
		@sample_2=();
		$raw=~s/\n|\r//g;
		@inf=split(/\s+/,$raw);
		for($i=0;$i<$positive_sample_size;$i++)
		{
			$sample_1[$i]=$inf[$i+1];
			#$sample_2[$i]=$inf[$i+1+$positive_sample_size];	
		}
		for($j=0;$j<$negative_sample_size;$j++)
		{
			$sample_2[$j]=$inf[$j+1+$positive_sample_size];
		}
		$mean_reads_sample_1{$inf[0]}=mean(@sample_1);
		$mean_reads_sample_2{$inf[0]}=mean(@sample_2);
		$varance_reads_sample_1{$inf[0]}=variance(@sample_1);
		$varance_reads_sample_2{$inf[0]}=variance(@sample_2);
		$transcript_length{$inf[0]}=$inf[scalar(@inf)-1];		
	}
	$average_transcript_length=mean(values(%transcript_length));
	print "Mean of transcript lengths: $average_transcript_length\n";
	close(READS);
	
	#open(MEANOUT,">Mean_sample1.txt");
	#foreach $key (keys(%mean_reads_sample_1))
	#{
	#	print MEANOUT $key,"\t$mean_reads_sample_1{$key}\t$mean_reads_sample_2{$key}\n";
	#}
	#close(MEANOUT);	
		
	###########################################################################
	my $total_reads=0;
	my $total_GC=0;
	my $mean_GC=0;
	my %gene_GC=();
	my %gene_reads=();
	my %GCC=();
	
	foreach $key (@positive_name)
	{
		open(GCC,$output_dir."/$key.GC");
		while($raw=<GCC>)
		{
			$raw=~s/\n|\r//g;
			@inf=split(/\s+|;|=/,$raw);
			$raw=<GCC>;
			$raw=~s/\n|\r//g;
			@inf2=split(/\s/,$raw);
			for($i=0;$i<scalar(@inf2);$i++)
			{
				$gene_GC{$inf[9]}+=$inf2[$i];
				$total_GC+=$inf2[$i];
			}
			$raw=<GCC>;
			$raw=~s/\n|\r//g;
			@inf3=split(/\s/,$raw);
			$gene_reads{$inf[9]}+=$inf3[0];
			$total_reads+=$inf3[0];
		}	
		close(GCC);
	}
	foreach $key (@negative_name)
	{
		open(GCC,$output_dir."/$key.GC");
		while($raw=<GCC>)
		{
			$raw=~s/\n|\r//g;
			@inf=split(/\s+|;|=/,$raw);
			$raw=<GCC>;
			$raw=~s/\n|\r//g;
			@inf2=split(/\s/,$raw);
			for($i=0;$i<scalar(@inf2);$i++)
			{
				$gene_GC{$inf[9]}+=$inf2[$i];
				$total_GC+=$inf2[$i];
			}
			$raw=<GCC>;
			$raw=~s/\n|\r//g;
			@inf3=split(/\s/,$raw);
			$gene_reads{$inf[9]}+=$inf3[0];
			$total_reads+=$inf3[0];
		}	
		close(GCC);
	}
	$mean_GC=$total_GC/($total_reads*$reads_length);
	print "mean_GC: $mean_GC\n";
		
	foreach $key (keys(%gene_GC))
	{
		if($gene_reads{$key}>0)
		{
			$GCC{$key}=$gene_GC{$key}/($gene_reads{$key}*$reads_length)
		}
		else
		{
			$GCC{$key}=0;
		}
	}
	###########################################################################
	open(OUT,">$output_dir/gene_feature.txt")||die "Can not open file: >$output_dir/gene_feature.txt\n";
	open(OUTLOGFC,">$output_dir/log-fold-change.txt")||die "Can not open file: >$output_dir/log-fold-change.txt\n";
	foreach $key (keys(%mean_reads_sample_1))
	{
		print OUT $key,"\t";
		
		print OUTLOGFC $key,"\t";	
		if($mean_reads_sample_1{$key}<1&&$mean_reads_sample_2{$key}<1)  ####print logFC to a file
		{
			print OUTLOGFC "0\n";
		}
		else
		{
			if($mean_reads_sample_1{$key}<1){$mean_reads_sample_1{$key}=1;}
			if($mean_reads_sample_2{$key}<1){$mean_reads_sample_2{$key}=1;}
			print OUTLOGFC log($mean_reads_sample_1{$key}/$mean_reads_sample_2{$key})/log(2),"\n";
		}		
		
		if($mean_reads_sample_1{$key}<1&&$mean_reads_sample_2{$key}<1)  ####logFC feature
		{
			print OUT "1\t";
		}
		else
		{
			if($mean_reads_sample_1{$key}<1){$mean_reads_sample_1{$key}=1;}
			if($mean_reads_sample_2{$key}<1){$mean_reads_sample_2{$key}=1;}
			print OUT 1/(1+abs(log($mean_reads_sample_1{$key}/$mean_reads_sample_2{$key})/log(2))),"\t";
		}
		
		if(abs($mean_reads_sample_1{$key}+$mean_reads_sample_2{$key})<=1)
		{
			print OUT "1\t";
		}
		else
		{
			print OUT 1/(1+abs(log(500*abs($mean_reads_sample_1{$key}+$mean_reads_sample_2{$key})/$transcript_length{$key})/log(2))),"\t"; ####ARPK feature
		}		
		print OUT 1/(1+$GCC{$key}/$mean_GC),"\tDE\n"; ##GC-content feature		
	}	
	close(OUT);
	close(OUTLOGFC);	
}

#########################################################################################################
##the function calculate globle normalized and transcript length normalized counts for each gene
#########################################################################################################
sub globle_normal
{
	my $i=0;
	my $j=0;
	my @all_counts=();
	my @counts=();
	my $sample_num=0;
	my $size=0;
	my $raw;
	my @raw_inf=();
	my @transcript_length=();
	my @negative_mean=();
	my @positive_mean=();
	my $len;
	
	print "Calculating globle normal factor\n";
	for($i=0;$i<scalar(@positive_name);$i++)
	{
		$size=0;
		@counts=();
		open(IN,$output_dir."/".$positive_name[$i].".local_normalized")||die "Can not open file: $output_dir/$positive_name[$i].local_normalized\n";
		while($raw=<IN>)
		{
			$raw=~s/\n|\r//g;
			@raw_inf=split(/\s+/,$raw);
			$counts[$size]=$raw_inf[1];
			$all_gene_name[$size]=$raw_inf[0];
			$transcript_length[$size]=$raw_inf[3];
			$size++;
		}
		close(IN);
		push @all_counts,[@counts];
		$sample_num++;
	}
	for($i=0;$i<scalar(@negative_name);$i++)
	{
		$size=0;
		@counts=();
		open(IN,$output_dir."/".$negative_name[$i].".local_normalized")||die "Can not open file: $output_dir/$negative_name[$i].local_normalized\n";
		while($raw=<IN>)
		{
			@raw_inf=split(/\s+/,$raw);
			$counts[$size]=$raw_inf[1];
			$size++;
		}
		close(IN);
		push @all_counts,[@counts];
		$sample_num++;
	}
	
	open(OUT,">$output_dir/data-for-globle-normal.txt")||die "Can not open file: >$output_dir/data-for-globle-normal.txt\n";
	for($i=0;$i<$size;$i++)
	{
		for($j=0;$j<$sample_num;$j++)
		{
			print OUT $all_counts[$j][$i],"\t";
		}
		print OUT "\n";
	}	
	close(OUT);
	
	################################################################################################
	DESeq_normal_factor();
	#################################################################################################
	for($j=0;$j<$sample_num;$j++)
	{
		print "normal factor for lane $j: ",$globle_normal_factor[$j],"\n";
	}
	for($i=0;$i<$size;$i++)
	{
		for($j=0;$j<$sample_num;$j++)
		{
			$all_counts[$j][$i]=$all_counts[$j][$i]/$globle_normal_factor[$j];
		}
	}	
	open(OUT,">$output_dir/reads-globle-normalized.txt")||die "Can not open file: >$output_dir/data-for-globle-normal.txt\n";
	for($i=0;$i<$size;$i++)
	{
		print OUT $all_gene_name[$i],"\t";
		for($j=0;$j<$sample_num;$j++)
		{
			print OUT $all_counts[$j][$i],"\t";
		}
		print OUT "$transcript_length[$i]\n";
	}	
	close(OUT);	

}

#########################################################################################################
##the function calculate local normalized counts for each gene
##the input of the function is the name of a sam file 
#########################################################################################################
sub local_normalization
{
	my($file_name)=@_;
	my @gene_name=();
	my %position_depth=();
	my %reads_num=();
	my %transcript_length=();
	my $gene_num=0;
	my $all_transcript_length=0;
	my $average_transcript_length=0;
	my $up_bound=0;
	my $low_bound=0;
	my $i=0;
	my $sum=0;
	my $len=0;
	print "Doing local normalization for $file_name\n";
	
	open(IN,$output_dir."/".$file_name.".position-depth")||die "Can not open file: $output_dir/$file_name.position-depth\n";
	open(OUT,">".$output_dir."/".$file_name.".local_normalized")||die "Can not open file: >$output_dir/$file_name.local_normalized\n";

	while($raw=<IN>)
	{
		$raw=~s/\n|\r//g;
		@inf=split(/=|;/,$raw);
		$gene_name[$gene_num]=$inf[1];
		$raw=<IN>;
		$raw=~s/\n|\r//g;
		$position_depth{$gene_name[$gene_num]}=$raw;
		$raw=<IN>;
		$raw=~s/\n|\r//g;
		@inf=split(/ |\t/,$raw);
		$reads_num{$gene_name[$gene_num]}=$inf[0];
		$transcript_length{$gene_name[$gene_num]}=$inf[1];
		$all_transcript_length+=$inf[1];
		$gene_num++;
	}
	$averaged_transcript_length=int($all_transcript_length/$gene_num+0.5);
	for($i=0;$i<$gene_num;$i++)
	{
		$sum+=($transcript_length{$gene_name[$i]}-$averaged_transcript_length)*($transcript_length{$gene_name[$i]}-$averaged_transcript_length);
	}
	$transcript_length_sd=sqrt($sum/$gene_num);
	#print $averaged_transcript_length,"\t",$transcript_length_sd,"\n";

	my @all_low=();
	my @all_up=();
	my @all_mean=();
	for($i=0;$i<$gene_num;$i++)
	{
		$len=$transcript_length{$gene_name[$i]};
		if($transcript_length{$gene_name[$i]}<300)
		{
			$len=300;
		}
		if($transcript_length{$gene_name[$i]}>($averaged_transcript_length+2*$transcript_length_sd))
		{
			$len=$averaged_transcript_length+2*$transcript_length_sd;
		}
		$all_mean[$i]=$reads_num{$gene_name[$i]}/$len;
		$all_up[$i]=1-1/(2*100*$len);
		$all_low[$i]=1/(2*100*$len);
	}	

	for($i=0;$i<$gene_num;$i++)
	{
		$up_bound=qpois($all_up[$i],$all_mean[$i]);
		$low_bound=qpois($all_low[$i],$all_mean[$i])+0;
		if($up_bound<1)
		{
			$up_bound=1;
		}
		else
		{
			$up_bound=int($up_bound)
		}
		if($low_bound==0||$low_bound==int($low_bound))
		{
			$low_bound=0;
		}
		else
		{
			$low_bound=int($low_bound)+1;
		}

		$position_depth{$gene_name[$i]}=~s/ $//g;
		my @position=split(/ |\t/,$position_depth{$gene_name[$i]});
		my $sum=0;	
		for($j=0;$j<scalar(@position);$j++)
		{
			if($position[$j]>$up_bound)
			{
				$sum+=$up_bound;
			}
			if($position[$j]<$low_bound)
			{
				$sum+=$low_bound;
			}
			if($position[$j]<=$up_bound&&$position[$j]>=$low_bound)
			{
				$sum+=$position[$j];
			}
		}
		print OUT $gene_name[$i],"\t","$sum\t",$reads_num{$gene_name[$i]},"\t",$transcript_length{$gene_name[$i]},"\n";
	}
	close(IN);
	close(OUT);
}



#########################################################################################################
##the function calculate position depth for each position within an transcript
##the input of the function is the direction of a sam file 
#########################################################################################################
sub gene_position_information
{
	my($sam_file)=@_;
	my %transcript_position=();
	my @position=();
	my %depth=();
	my %GCC=();
	my $transcript_length=0;
	my $total_reads=0;
	my $chr_name=();
	my $gene_num=0;
	my $raw=();
	my @inf=();
	my %paired_end=();
	my $paired_end_start=();
	my $total_mapped_reads=0;
	
	
	print "Reading position depth for $sam_file\n";
	my @sam_inf=split(/\//,$sam_file);
	my $output_file=$output_dir."/".$sam_inf[scalar(@sam_inf)-1].".position-depth";
	my $output_GC=$output_dir."/".$sam_inf[scalar(@sam_inf)-1].".GC";

	if($paired_end_flag eq "YES") ##########for paired-end reads
	{	
		open(ALIG,$sam_file)||die "can not open file: $sam_file\n";
		%depth=();
		%paired_end=();
		$raw=<ALIG>;
		while($raw=~/^@/)
		{
			$raw=<ALIG>;
		}
		@inf=split(/\t| /,$raw);
		$inf[2]=~s/chr|chromosome//i;
		$reads_length=2*length($inf[9]);
		$paired_end_start=min_mate_start($inf[3],$inf[7]);
		if(!(exists($paired_end{$inf[0]."-"."$paired_end_start"})))
		{
			$depth{$inf[2]."-".$paired_end_start}+=1;
		}
		$paired_end{$inf[0]."-"."$paired_end_start"}+=1;
		$GCC{$inf[2]."-".$paired_end_start}+=()=$inf[9]=~m/G|C/gi;
		$total_mapped_reads=1;
		while($raw=<ALIG>)
		{
			@inf=split(/\t| /,$raw);
			$inf[2]=~s/chr|chromosome//i;
			$paired_end_start=min_mate_start($inf[3],$inf[7]);
			$total_mapped_reads++;
			if(!(exists($paired_end{$inf[0]."-"."$paired_end_start"})))
			{
				$depth{$inf[2]."-".$paired_end_start}+=1;
			}
			$paired_end{$inf[0]."-"."$paired_end_start"}+=1;
			$GCC{$inf[2]."-".$paired_end_start}+=()=$inf[9]=~m/G|C/gi;
			if($total_mapped_reads%100000==0)
			{
				print "Mate number: $total_mapped_reads\n"; 
			}
		}
		close(ALIG);
		print "Total mate for $sam_file: $total_mapped_reads\n";
		undef %paired_end;
	}
	if($paired_end_flag eq "NO")##########for single-end reads
	{
		open(ALIG,$sam_file)||die "can not open file: $sam_file\n";
		%depth=();
		%paired_end_start=();
		$raw=<ALIG>;
		while($raw=~/^@/)
		{
			$raw=<ALIG>;
		}
		@inf=split(/\t| /,$raw);
		$inf[2]=~s/chr|chromosome//i;
		$reads_length=length($inf[9]);
		$depth{$inf[2]."-".$inf[3]}+=1;
		$GCC{$inf[2]."-".$inf[3]}+=()=$inf[9]=~m/G|C/gi;
		$total_mapped_reads=1;
		while($raw=<ALIG>)
		{
			@inf=split(/\t| /,$raw);
			$inf[2]=~s/chr|chromosome//i;
			$depth{$inf[2]."-".$inf[3]}+=1;
			$GCC{$inf[2]."-".$inf[3]}+=()=$inf[9]=~m/G|C/gi;
			$total_mapped_reads++;
			if($total_mapped_reads%100000==0)
			{
				print "Reads number: $total_mapped_reads\n"; 
			}
		}
		close(ALIG);
		print "Total reads number for $sam_file: $total_mapped_reads\n"; 		
	}
	
	

	print "Writing position depth for $sam_file .........................\n\n"; 
	open(GFF,$gff_file)||die "can not open file: $gff_file\n";
	open(OUT,">".$output_file)||die "can not open file: >$output_file\n";
	open(GC,">".$output_GC)||die "can not open file: >$output_GC\n";
	$raw=<GFF>;
	while($raw=~/#/)
	{
		$raw=<GFF>;
	}
	@inf=split(/\t| /,$raw);
	while($inf[2] ne "gene")
	{
		$raw=<GFF>;
		@inf=split(/\t| /,$raw);
	}
	while(!eof)
	{
		@inf=split(/\t| /,$raw);
		if($inf[2] eq "gene")
		{
			%transcript_position=();
			@position=();
			$transcript_length=0;
			$total_reads=0;
			$chr_name=$inf[0];
			$chr_name=~s/chr|chromosome//i;	
			print OUT "$raw";
			print GC "$raw";
		}
		$raw=<GFF>;
		@inf=split(/\t| /,$raw);
		while($inf[2] ne "gene"&&!eof)
		{
			if($inf[2] eq "exon")
			{
				@inf=split(/\t| /,$raw);
				for($i=$inf[3];$i<=$inf[4];$i++)
				{
					$transcript_position{$i}++;
				}		
			}
			$raw=<GFF>;
			@inf=split(/\t| /,$raw);
		}
		@position=keys(%transcript_position);
		#print OUT scalar(@position),"\n";
		foreach my $key (@position)
		{
			if(exists($depth{$chr_name."-".$key}))
			{
				print OUT $depth{$chr_name."-".$key}," ";
				print GC $GCC{$chr_name."-".$key}," ";
				$total_reads+=$depth{$chr_name."-".$key};
			}
			#else
			#{
			#	print OUT "0 ";
			#}
			$transcript_length++;
		}
		$transcript_length[$gene_num]=$transcript_length;
		$gene_num++;
		print OUT "\n$total_reads\t$transcript_length\n";
		print GC "\n$total_reads\t$transcript_length\n";	
	}
	close(GFF);
	close(OUT);
	close(GC);	
}

#########################################################################################################
##the function check the commond line input and specify all parameters.
##the input is a vector
#########################################################################################################
sub check_commond_line
{
	my($commond_line)=@_;
	my $count=$commond_line=~s/-g /-g /g;
	if($count!=1){die "Error -g (gff file)\n";}
	$count=$commond_line=~s/-p /-p /g;
	if($count!=1){die "Error -p (positive files)\n";}
	$count=$commond_line=~s/-n /-n /g;
	if($count!=1){die "Error -n (negative files)\n";}
	$count=$commond_line=~s/-o /-o /g;
	if($count==0){print "Warning: -o is not specified. Default value \"results\" is used.\n";}
	if($count>1){die "Error -o (output direction)\n";}
	$commond_line=~s/^-//;
	my @commond_inf=split(/\s+-/,$commond_line);
	for(my $i=0;$i<4;$i++)
	{
		if($commond_inf[$i]=~/^g /)
		{
			my @g_inf=split(/\s+/,$commond_inf[$i]);
			$gff_file=$g_inf[1];
		}
		if($commond_inf[$i]=~/^p /)
		{
			my @p_inf=split(/\s+/,$commond_inf[$i]);
			for(my $j=1;$j<scalar(@p_inf);$j++)
			{
				$positive_file[$j-1]=$p_inf[$j];
				$p_inf[$j]=~s/^\///;
				my @name_inf=split(/\//,$p_inf[$j]);
				$positive_name[$j-1]=$name_inf[scalar(@name_inf)-1];
			}
		}
		if($commond_inf[$i]=~/^n /)
		{
			my @n_inf=split(/\s+/,$commond_inf[$i]);
			for($j=1;$j<scalar(@n_inf);$j++)
			{
				$negative_file[$j-1]=$n_inf[$j];
				$n_inf[$j]=~s/^\///;
				@name_inf=split(/\//,$n_inf[$j]);
				$negative_name[$j-1]=$name_inf[scalar(@name_inf)-1];
			}
		}
		if($commond_inf[$i]=~/^o /)
		{
			my @o_inf=split(/\s+/,$commond_inf[$i]);
			$o_inf[1]=~s/\/$//;
			$output_dir=$o_inf[1];
		}
	}
	
	
	####################################################
	###check replicates
	####################################################
	$positive_sample_size=scalar(@positive_name);
	$negative_sample_size=scalar(@negative_name);
	if($positive_sample_size==1&&$negative_sample_size>1)
	{
		print "********\nWarning!!!!\nThe replicate of the positive group is 1, so that of negative group must be 1!\n********\n";
		exit;
	}
	if($positive_sample_size>1&&$negative_sample_size==1)
	{
		print "********\nWarning!!!!\nThe replicate of the negative group is 1, so that of positive group must be 1!\n********\n";
		exit;
	}
	####################################################
	###check GFF file
	####################################################
	my $num=0;	
	open(GFF,$gff_file)||die "can not open file: $gff_file\n";
	my $flag1=0;
	my @inf=();
	my $count1=();
	my $count2=();
	my $sum1=0;
	my $sum2=0;
	
	$raw=<GFF>;
	while($raw=~/#/)
	{
		$raw=<GFF>;
	}
	if($raw eq "chromosome")
	{
		$flag++;
	}
	$sum1=$raw=~s/"/"/g;
	$sum1=$raw=~s/;/;/g;
	for($num=0;$num<9999;$num++)
	{	
		$raw=<GFF>;
		@inf=split(/\s+/,$raw);
		if($inf[2] eq "chromosome")
		{
			$flag++;
		}
		$count1=$raw=~s/"/"/g;
		$count2=$raw=~s/;/;/g;
		$sum1+=$count1;
		$sum2+=$count2;
	}
	if($glag>0||$sum1>=40000||$sum2>=20000)
	{
		print "\n********\nWarning!!!!\nPlease check your GFF file\n********\n";
		exit;
	}
	
	close(GFF);
}


#########################################################################################################
##the function calculate normal factor for DESeq package.
#########################################################################################################
sub DESeq_normal_factor
{
	my @counts=();
	my $sample_num=0;
	my $gene_num=0;
	my @logmean=();
	my @log_clunts=();
	my $k=0;
	my $i=0;
	my $j=0;
	
	open(IN,"$output_dir/data-for-globle-normal.txt")||die "Can not open file: $output_dir/data-for-globle-normal.txt\n";
	while($raw=<IN>)
	{
		$raw=~s/\n|\r//g;
		@inf=split(/\s+/,$raw);
		$sample_num=scalar(@inf);
		push @counts,[@inf];	
		$gene_num++;
	}
	close(IN);

	for($i=0;$i<$gene_num;$i++)
	{
		my $raw_sum=0;
		my $flag=0;
		for($j=0;$j<$sample_num;$j++)
		{
			if($counts[$i][$j]>0){$raw_sum+=log($counts[$i][$j]);}else{$flag=1}
		}
		if($flag==0)
		{
			$logmean[$i]=$raw_sum/$sample_num;
		}
		else
		{
			$logmean[$i]=0;
		}
	}

	for($i=0;$i<$gene_num;$i++)
	{
		if($logmean[$i]>0)
		{
			for($j=0;$j<$sample_num;$j++)
			{
				if($counts[$i][$j]>0)
				{
					$log_clunts[$k][$j]=log($counts[$i][$j])-$logmean[$i];
				}
				else
				{
					$log_clunts[$k][$j]=0;
				}
			}
			$k++;
		}
	}
	for($j=0;$j<$sample_num;$j++)
	{
		for($i=0;$i<$k;$i++)
		{	

			$tt[$i]=$log_clunts[$i][$j];
		}
		my $u=median(@tt);
		$globle_normal_factor[$j]=exp($u);
	}
}


sub check_paired_end
{
	my $raw=();
	my $flag=0;
	my $i=0;
	open(IN,$positive_file[0])||die "Can not open file: $positive_name[0] in check_paired_end function\n";
	$raw=<IN>;
	while($raw=~/^@/)
	{
		$raw=<IN>;
	}
	while($i<10000)
	{
		@inf=split(/\s+/,$raw);
		if($inf[2] ne "*")
		{
			$i++;
			if(!($inf[7]==0)&&!($inf[8]==0))
			{
				$flag++;
			}
		}
	}
	if($flag>0)
	{
		$paired_end_flag="YES";
		print “\n*******************************************************************************************\n”
                print “********** Reads in $positive_file[0] are paired-end, all lanes must be paired-end *********\n”
                print “********************************************************************************************\n\n”;
	}
	else
	{
		print “\n*******************************************************************************************\n”
                print “*********** Reads in $positive_file[0] are single-end, all lanes must be single-end ********\n”
                print “********************************************************************************************\n\n”
	}
	close(IN);
}

sub min_mate_start
{
	my $minimum=0;
	my ($start1,$start2)=@_;
	if($start1>$start2)
	{
		$minimum=$start2;
	}
	else
	{
		$minimum=$start1;
	}
	return($minimum);
}



























