use strict;
use HTTP::Request::Common qw(POST);
use LWP::UserAgent;
use File::Temp qw/ :POSIX /;
use DBI;
use Statistics::Distributions;
use File::Basename;
use CGI qw/:all :html3/;
use lib 'C:/Scripts/';
use SmPlot;
use GD;
use Bio::Tools::SeqPattern;
use GraphViz;
use Cwd;
use GO::AnnotationProvider::AnnotationParser;
no warnings;
$|=1;

if (!defined(@ARGV)){
   print "No arguments provided\n";
   print "Please provide: Organism, BkgIdFile, ClusterFile, ExpFile\n";
   exit;
}

my %Config = @ARGV;

# Parameters that have to be given by the user:
my $Organism = $Config{'Organism'};
my $T_MipsFile = $Config{'BkgIdFile'};
my $C_File = $Config{'ClusterFile'};
my $ExpFile = $Config{'ExpFile'};

my %DefaultConfig = &ReadDefaultConf($Organism);

# For optional parameters not supplied, set them to the default
foreach my $k (keys %DefaultConfig){
   if (!defined $Config{$k}){
	  $Config{$k} = $DefaultConfig{$k};
   }
}

# Parameters that have default values:
my $WantDetail = $Config{'LevelOfDetail'};
my $MapToParents = $Config{'MapToParents'};
my $ConToSlim = $Config{'ConvertToSlim'};
my $An_Source = $Config{'AnnotationSource'};
my $Pr_Source = $Config{'PromoterSource'};
my $P_Type = $Config{'PromoterType'};
my $PromoterFile = $Config{'PromoterSourceFile'};
my $AnnotationFile = $Config{'AnnotationSourceFile'};
my $MotifFile = $Config{'MotifFile'};
my $SlimTerms = $Config{'SlimTerms'};
my $MappingType = $Config{'MappingType'};
my $NumOfSim = $Config{'NumberOfSim'};
my $FDRcutoff = $Config{'FDRcutoff'};
my $CorrectionType = $Config{'CorrectionType'};
my $data_source = $Config{'mygo'};
my $username = $Config{'user'};
my $password = $Config{'password'};
my $Ignore = $Config{'IgnoreList'};
my $DescriptionFile = $Config{'DescriptionFile'};
my $HaveLocalGO = 1;
my $RemDup = "Y";
my $Prefix = "";

my @Clusters;
# Find the files to analyze:
open (File, $C_File) or die "Cant open $C_File\n";
my @File = <File>;
close File;

foreach my $line (@File){
	chomp $line;
	if (-e $line){
		push (@Clusters, $line);
	}
}

@Clusters = ($C_File) unless scalar@Clusters >= 1;

print "\n * ",scalar(@Clusters)," Cluster/s to analyze\n";

# Check for fatal config errors:
if ($ConToSlim =~m/YC/ and !-e $SlimTerms){
	print "$SlimTerms is not a valid file\n";
	exit;
}
if ($An_Source =~m/Local/ and !-e $AnnotationFile){
	print "Cant find your local annotation source file\n";
	exit;
}

# Check for non fatal config errors:
if ($Pr_Source eq "LocalFile" and !-e $PromoterFile){
	print "Cant find your local promoter source file\n";
	undef $PromoterFile;
}
if ($ConToSlim eq "YT" and $An_Source =~m/Local/){
	print "Incompatible Slim and Source, Source reset to TAIR\n";
	$An_Source = "TAIR";
}
if ($Pr_Source eq "TAIR" and !defined $P_Type){
	print "PromoterType is not defined\n";
	$P_Type = 'At_upstream_1000';
}
if ($username and $password){
	my $dbh = DBI->connect($data_source,$username,$password);
	if ($DBI::err){
		print "No local GO db found!\n";
		print " ... ignoring slim conversion and mapping-to-parents\n";
		$ConToSlim = "N";
		$MapToParents = "N";
		$HaveLocalGO = 0;
	}else{
		$dbh->disconnect;
	}
}


# Process the Reference set:
print "\n * Processing ",basename($T_MipsFile),"...\n";
my ($T_FreqRef, $T_NameRef, $T_CategRef, $T_GenesRef, $T_NumGenes,
	$PromoFreqRef,$PromoMatrixRef,$ProcessedAnnotFile,$T_GeneListRef)
	= ProcessReferenceFile($T_MipsFile,$ConToSlim,$MapToParents,
						   $An_Source,$AnnotationFile,
						   $Pr_Source,$PromoterFile,$P_Type,$MotifFile);

# Use the code below to add a custom description for your gene ids using
# a tab delimited text file with just two columns. The first containing the id
# the second containing the description.
my %IdToDescription;
open (File, $DescriptionFile) or warn "Cant open $DescriptionFile";
while (my $line = <File>){
	chomp $line;
	my ($id,$desc) = split (/\t/,$line);
	$IdToDescription{lc$id} = $desc;
}

# Process each cluster:
foreach my $C_MipsFile (@Clusters){
	chomp $C_MipsFile;
	my $ResultName;
	$ResultName = $Prefix.basename($C_MipsFile);
    $ResultName =~s/\s+/_/g;
    $ResultName =~s/\.txt|\.xls//ig;

	print "\n * Analyzing Cluster ",basename($C_MipsFile),"...\n";
	HandleCluster($C_MipsFile, $ResultName, $ExpFile, $ProcessedAnnotFile,
				  $NumOfSim, $FDRcutoff, $CorrectionType);
}

&EasyView(@Clusters) if scalar@Clusters > 1;

print "\n * Your results are ready\n";
system "del \"$ProcessedAnnotFile\"" if (-e $ProcessedAnnotFile);
exit;

################################################################################
sub HandleCluster {
################################################################################
	my ($C_MipsFile, $ResultName, $ExpFile, $ProcessedAnnotFile, $NumberOfSim)=@_;

	open (FILE,$C_MipsFile) or die "cant open $C_MipsFile";
	my @Mips = <FILE>;
	close FILE;

	print "\t - Checking validity of ids\n";
	my ($MipsRef2, $Notmips,$Converted) = CheckIds(@Mips);
	print "\t - Converted $Converted ids to mips\n";
	undef @Mips;
	@Mips = @{$MipsRef2};

	if ($RemDup eq "Y"){
	   my @tmp = @Mips; undef @Mips;
	   my ($MipsRef, $Redundant) = RemoveRedundant(@tmp);
	   @Mips = @{$MipsRef};
	   print "\t - $Redundant ids are redundant\n";
	}

	my $C_NumGenes = scalar@Mips;
	print "\t - $C_NumGenes to get annotations for\n";

	my $AnnotationRef = Read_Processed_AnnotationFile($ProcessedAnnotFile,\@Mips);
	my $AnnotMatrix = $AnnotationRef;
	my $Annot1Ref = TrimHashMatrix($AnnotationRef);
	my ($C_FreqRef,$C_GenesRef) = AnalyzeMatrix($Annot1Ref);

#	print "\t - ",scalar(keys%{$AnnotationRef})," unique terms\n";
#	print "\t - ",scalar(keys%{$Annot1Ref})," unique terms\n";
#	print "\t - ",scalar(keys%{$C_FreqRef})," unique terms\n";

	print "\n * Analyzing for enrichment of terms\n";
	my ($ResultRef, $SigRef, $PvaluesRef) = AnalyzeCateg($C_FreqRef,$C_NumGenes,
														 $T_FreqRef,$T_NumGenes);
	my $SigRefp = RetrieveAndAnalyzePromoters(\@Mips);

	my $sc = 0;
	my $sp = 0;
	my %Detailsc;

	# Find the number of significant categories, both, total
	# and by categroy size:
	foreach my $k (keys %{$SigRef}){
		$sc++ if ($$SigRef{$k} == 1);
		for my $ng (2..10){
			$Detailsc{$ng}++ if ($$SigRef{$k} == 1 and $$C_FreqRef{$k} >=$ng);
		}
	}

	foreach my $p (keys %{$SigRefp}){
		$sp++ if ($$SigRefp{$p} == 1);
	}

	# Perform simulations to calculate an FDR and a simulation
	# based p-value for each category:
	my ($Av_scr, $Av_spr, $SimPvalRef, $StoredPvalRef,$StoredFreqRef,$DetailscrRef);

	# No need to do simulations if nothing was significant
	if ($sc == 0) {
		$NumberOfSim = 0;
		print "\n * Skipping simulations: no categories are significant\n";
	}
	if ($NumberOfSim > 0){
		print "\n * Performing $NumberOfSim simulations for FDR\n";
		($Av_scr, $Av_spr, $SimPvalRef, $StoredPvalRef,$StoredFreqRef, $DetailscrRef)
		=PerformSimulations($T_GeneListRef,$T_NumGenes,$C_NumGenes,$ProcessedAnnotFile,
							$T_FreqRef,$C_FreqRef,$NumberOfSim);
		foreach my $id (keys %{$SigRef}){
			$$SimPvalRef{$id} = ($$SimPvalRef{$id}/$NumberOfSim);
		}
		# Adjust p-value cutoff to get the FDR under control:
		print "\n * Changing p-value cutoff for reducing FDR\n";
		if ($CorrectionType eq "global"){
			my $FDR = sprintf("%.3f",$Av_scr/$sc);
			print "\t - FDR = ",$FDR,"\n";
			if ($FDR > $FDRcutoff){
				($ResultRef, $SigRef, $sc, $Av_scr)
				= OptimizePvalueCutoff($ResultRef, $SigRef,$PvaluesRef,
									   $StoredPvalRef,$FDR,$FDRcutoff);
			}
		}elsif($CorrectionType eq "stepped"){
			my %FDR;
			for my $n (2..10){
				if ($Detailsc{$n} == 0){
					$FDR{$n} = "undef";
				}else{
					$FDR{$n} = sprintf("%.3f",int($$DetailscrRef{$n}/$NumberOfSim)/$Detailsc{$n});
				}
			}
			($ResultRef, $SigRef, $sc, $Av_scr)
			= OptimizePvalueCutoffByCategSize($ResultRef,$SigRef,$PvaluesRef,
											  $C_FreqRef,$StoredPvalRef,
											  $StoredFreqRef,\%FDR,$FDRcutoff);
		}
	}else{
		$Av_scr = "N/A";
		$Av_spr = "N/A";
		$SimPvalRef = {};
		foreach my $id (keys %{$SigRef}){
			$$SimPvalRef{$id} = "N/A";
		}
	}

    # Make a place to write the images:
    my $ImageDir = $ResultName."_files";
    if (!-d $ImageDir){
    	system "mkdir $ImageDir";
    }elsif (-d $ImageDir){
		unlink <./$ImageDir/*.png>;
		unlink <./$ImageDir/*.txt>;
	}

	print "\n * Making summary plots\n";
    my ($ExpImage,$SmExp,undef) = DrawTermGenes(\@Mips,'all',$ExpFile,$ImageDir);
    my $ThE = MakeThumbNail($ImageDir,$ExpImage,400,300,190);

    my ($PromImage,$ThP,$AltText) = DrawPromoters(\@Mips,'all',$ImageDir);
    my ($AnnotImage,$ThA) = DrawAnnotations(\@Mips,'all',$SigRef,$ImageDir,undef,$AnnotMatrix);

	# Sort the GO ids by their frequency and classify
	# into the three main aspects:
	print "\n * Making Results:\n";

    my ($P_Result,$F_Result,$C_Result);
    my (@Process, @Function, @Component);
	my %GO_Aspects = ("Process" => \@Process,
					  "Function" => \@Function,
					  "Component" => \@Component);

    foreach my $id(sort {$$C_FreqRef{$b}<=>$$C_FreqRef{$a}}(keys%{$C_FreqRef})){
    		if ($$T_CategRef{$id}=~m/process/i){push (@Process,$id);}
    		if ($$T_CategRef{$id}=~m/function/i){push (@Function,$id);}
    		if ($$T_CategRef{$id}=~m/component/i){push (@Component,$id);}
    }

    # Process each GO aspect (P|F|C):
    for my $CategName(keys %GO_Aspects){
		print "\t - For category $CategName...\n";
		my $ArrayRef = $GO_Aspects{$CategName};

		# Make the HTML table for each aspect:
    	my ($TableHTML,$InResultRef,$SigCountInResult)
    		= MakeResult($ArrayRef,$ResultRef,$C_GenesRef,$C_FreqRef,
						 $ImageDir,$ExpFile,$SigRef,$AnnotMatrix,$SimPvalRef);

		# Make the DAG for each aspect:
		my ($Graph,$ThG,$ThG_map,$FigureHTML);
		if ($HaveLocalGO == 1 and scalar(keys%{$InResultRef}) > 1){
    		($Graph,$ThG,$ThG_map) = Drawdag($ArrayRef,$SigRef,$C_FreqRef,
    								$CategName,$ImageDir,$InResultRef);
		}
		$FigureHTML .= "<a name=\"$CategName\"></a>";
		$FigureHTML .= "<MAP NAME=$CategName>$ThG_map</MAP>";
		$FigureHTML .= "<br><table width=600>";
		$FigureHTML .= "<tbody style=font-family:verdana;font-size:12><tr>";
		$FigureHTML .= "<td><h3>Results for Category $CategName</h3>".
						scalar(keys%{$InResultRef})." rows in table<br>".
						$SigCountInResult." categories significant</td>";
		if (defined $Graph){
			$FigureHTML .= "<td><img src=$ImageDir/$ThG border=1 usemap=#$CategName><br>";
			$FigureHTML .= "Graph of term relationship ";
			$FigureHTML .= "[ <a href=$ImageDir/$Graph>See Big Graph</a> ]</td>";
		}else{
			$FigureHTML .= "<td>No GO graph</td>";
		}
		$FigureHTML .= "</tr></tbody></table>";

		# Join the DAG and table HTML to create the result
		# for each GO aspect:
		if ($CategName eq "Process"){
			$P_Result = $FigureHTML."\n".$TableHTML;
		}elsif ($CategName eq "Function"){
			$F_Result = $FigureHTML."\n".$TableHTML;
		}elsif ($CategName eq "Component"){
			$C_Result = $FigureHTML."\n".$TableHTML;
		}
	}

    # Write out the HTML results:
    my $now_string = localtime;
	open (OUT,">$ResultName".".html") or die "cant write result";
	print OUT "<Head><Title>CLENCH - ",basename($C_MipsFile),"</Title>";
	print OUT "<style><!--
            	A 			{ color: blue; text-decoration: underline; }
				A:link		{ color: blue; text-decoration: underline; }
				A:visited	{ color: brown; text-decoration: underline; }
				A:active	{ color: red;  }
				A:hover		{ color: red;  }
				--></style></Head><Body>";
	# First the summary box:
	print OUT "<Table bgcolor=#eeeeee width=600>";
	print OUT "<tbody style=font-family:verdana;font-size:11><tr><td>";
	print OUT "<B>Result:</B> $ResultName on -- <TT>".$now_string."</TT><br>";
	print OUT "<B>TotalGenes (TG):</B> ",basename($T_MipsFile)," -- <B>$T_NumGenes</B> Genes<br>";
	print OUT "<B>ChangedGenes (CG):</B> ",basename($C_MipsFile)," -- <B>$C_NumGenes</B> Genes<br>";
	print OUT "<B>Annotation Source:</B> $An_Source<br>";
	print OUT "<B>Mapping to Parents:</B> $MapToParents<br>";
	print OUT "<B>Slim Conversion Choice:</B> $ConToSlim<br>";
	print OUT "<B>SlimTerm File:</B> ",basename($SlimTerms)," (used only if Conversion = YC)<br>";
	print OUT "<B>Total significant categories:</B> $sc<br>";
	print OUT "<B>FDR:</B> $Av_scr categories, based on $NumberOfSim simulations<br>";
	print OUT "<B>FDR Correction type:</B> ",$CorrectionType,"<br>";
	print OUT "<B>Total significant TFBS:</B> $sp<br>";
	print OUT "<B>FDR:</B> $Av_spr TFBS, based on $NumberOfSim simulations<br>";
	print OUT "</td></tr></tbody></Table><br>";
	print OUT "\n";

	# Then the overview images:
	print OUT "<font color=blue><B>Overview images:</B></font><br>";
	print OUT "<Table bgcolor=#eeeeee width=600>";
	print OUT "<tbody style=font-family:verdana;font-size:12><tr>";
	print OUT "<td>TF Sites in Promoters</td><td>Annotations</td>"
			 ."<td>Expression</td></tr><tr>";
	if (defined $PromImage and defined $ThP){
		print OUT "<td><a href=$ImageDir/$PromImage>"
				."<img src=$ImageDir/$ThP border=1 alt=\"$AltText\"></a></td>";
	}else {print OUT "<td>No promoter image</td>";}

	if (defined $AnnotImage and defined $ThA){
		print OUT "<td><a href=$ImageDir/$AnnotImage>"
				."<img src=$ImageDir/$ThA border=1></a></td>";
	}else {print OUT "<td>No Annotation image</td>";}

	if (defined $ExpImage and defined $ThE){
		print OUT "<td><a href=$ImageDir/$ExpImage>"
				."<img src=$ImageDir/$ThE border=0></a></td>";
	}else {print OUT "<td>No Expression image</td>";}

	print OUT "</tr></tbody></Table>";
	print OUT "<hr width=600 align=left color=black><br>";
	print OUT "\n";

	# Links to the three result sections:
	print OUT "<font color=blue><B>See Results for: </B></font>
			   <a href=#Process>Process</a> |
			   <a href=#Function>Function</a> |
			   <a href=#Component>Component</a><br>";

	# Tables with the detail results:
	print OUT $P_Result,"\n";
	print OUT $F_Result,"\n";
	print OUT $C_Result,"\n";
    print OUT "</Body>";
    close OUT;
	return 1;
}
################################################################################
sub ProcessReferenceFile {
################################################################################
	my ($MipsFile, $ConToSlim, $MapToParents,$A_Source,$Local_A_File,
		$P_Source,$Local_P_File,$P_Type,$MotifFile) = @_;

	open (FILE,$MipsFile) or die "cant open $MipsFile";
	my @Mips = <FILE>;
	close FILE;

	print "\t - Checking validity of ids\n";
	my ($MipsRef2, $Notmips,$Converted) = CheckIds(@Mips);
	print "\t - Converted $Converted ids to mips\n";
	undef @Mips;
	@Mips = @{$MipsRef2};

	if ($RemDup eq "Y"){
	   my @tmp = @Mips; undef @Mips;
	   my ($MipsRef, $Redundant) = RemoveRedundant(@tmp);
	   @Mips = @{$MipsRef};
	   print "\t - $Redundant ids are redundant\n";
	}

	my $NumGenes = scalar@Mips;
	print "\t - $NumGenes to get annotations for\n";

	# Get the annotations and process:
	print "\t - Getting annotations from $A_Source\n";
	my $RawAnnotFile = GetAnnotation(\@Mips,$A_Source,$Local_A_File);
	if (!defined $RawAnnotFile){
		warn "\t ** No valid source for Annotations **\n";
		exit;
	}
	print "\t   = Got annotations ... processing\n";
	my $ProcessedAnnotFile;
	my $SFile;
	if ($ConToSlim =~m/YC|YT/){
	   	print "\t   = Converting to Slim terms\n";
		if ($ConToSlim eq "YT"){
	   		$SFile = ConvertTo_TairSlim($RawAnnotFile,$MapToParents);
		}elsif ($ConToSlim eq "YC"){
	   		$SFile = ConvertTo_MySlim($RawAnnotFile,$MapToParents);
	   	}
	   	if ($MapToParents eq "Y"){
			print "\t   = Mapping to Parents\n";
			my $PFile = MapToParents($SFile);
			$ProcessedAnnotFile = $PFile;
		}else{
			# In the rare case that slim conversion introduced
			# duplicate entries.
	   		print "\t   = Removing redundant annotations\n";
	   		$ProcessedAnnotFile = RemoveRedundantAnnot($SFile);
		}

	}else{
		if ($MapToParents eq "Y"){
			print "\t   = Mapping to Parents\n";
			my $File = MapToParents($RawAnnotFile);
			$ProcessedAnnotFile = RemoveRedundantAnnot($File);
		}else{
			$ProcessedAnnotFile = $RawAnnotFile;
		}
	}
	print "\t   = Parsing annotations\n";
	my ($FreqRef,$NameRef,$CategRef,$GenesRef) = ParseAnnotation($ProcessedAnnotFile);

	# Get the promoters and process:
	print "\t - Getting promoters from $P_Source\n ";
	my ($PromoterMatrixRef,$BkgPromoterRef,$BkgPromo1Ref,$BkgPromo2Ref,
		$BkgPromFreqRef,$BkgPromGenesRef);
	my $PromoterFile = GetPromoters(\@Mips,$P_Source,$P_Type,$Local_P_File);
	if (!defined $PromoterFile){
		warn "\t ** No valid source for Promoters **\n";
	}
	if (defined $PromoterFile and defined $MotifFile){
		print "\t   = Analyzing $NumGenes promoters for TF sites\n";
		$PromoterMatrixRef = MakeTFMatrix($PromoterFile,$MotifFile);
		$BkgPromoterRef = BalanceMatrix($PromoterMatrixRef,\@Mips);
		$BkgPromo1Ref = TrimHashMatrix($BkgPromoterRef);
		($BkgPromFreqRef,$BkgPromGenesRef) = AnalyzeMatrix($BkgPromo1Ref);
	}

	# Clean up the temp files:
    system "del \"$RawAnnotFile\"" if (-e $RawAnnotFile and ($ProcessedAnnotFile ne $RawAnnotFile));
	system "del \"$SFile\"" if (-e $SFile);
    system "del \"$PromoterFile\"" if (-e $PromoterFile and $Pr_Source ne "Stored");

	return ($FreqRef, $NameRef, $CategRef, $GenesRef, $NumGenes,
			$BkgPromFreqRef,$BkgPromoterRef,$ProcessedAnnotFile,\@Mips);
}
################################################################################
sub BalanceMatrix {
################################################################################
	my ($Ref,$ListRef)=@_;
    my %Matrix = %{$Ref};
    my @Genes = @{$ListRef};
	my %Hash;

	foreach my $k (keys %Matrix){
		for my $i (0..$#Genes){
			if (defined $Matrix{$k}{lc$Genes[$i]} and $Matrix{$k}{lc$Genes[$i]}>=1){
				$Hash{$k}{lc$Genes[$i]}=1;
			}else{
				$Hash{$k}{lc$Genes[$i]}=0;
			}
		}
	}
	return \%Hash;
}
################################################################################
sub RemoveRedundant {
################################################################################
	my @RawInput = @_;
	my @Output=();
	my $Redun = 0;
	my @Input = sort{uc($a) cmp uc($b)}@RawInput;
	$Output[0]=$Input[0];

	for (my $i=1; $i<=$#Input; $i++){
		if ($Input[$i-1] ne $Input[$i]){
			push (@Output, $Input[$i]);
		}
		else {$Redun++;}
	}
	return (\@Output, $Redun);
}

################################################################################
sub GetAnnotation {
################################################################################
	my ($ListRef,$Source,$AnnotationFile)=@_;
	my @Ids = @{$ListRef};
    if($Source eq "LocalFile" and -e $AnnotationFile){
		# Use local file:
		my $Ref = Read_TAIR_AnnotationFile($AnnotationFile);
		my %File = %{$Ref};
		my ($FH,$TmpFile) = tmpnam();
		foreach my $id (@Ids){
			chomp $id;
			$id = lc$id;
			if (defined $File{$id}){
				foreach my $key (keys %{$File{$id}}){
					print $FH $File{$id}{$key};
				}
			}
		}
		close $FH;
		return $TmpFile;
	}elsif($Source eq "LocalGOFile" and -e $AnnotationFile){
		my $Ref = Read_GO_AnnotationFile(\@Ids,$AnnotationFile);
		my %File = %{$Ref};
		my ($FH,$TmpFile) = tmpnam();
		foreach my $id (keys %File){
			foreach my $key (keys %{$File{$id}}){
				print $FH $File{$id}{$key};
			}
		}
		close $FH;
		return $TmpFile;
	}elsif($Source eq "TAIR"){
		# Otherwise query TAIR:
		my ($FH,$TmpFile) = tmpnam();
		my $UseAg = LWP::UserAgent->new();
		my $Req = POST 'http://www.arabidopsis.org/cgi-bin/bulk/go/getgo.pl',
			[ loci=>"@Ids",
			  output_type=>'text',
			];
		my $Content = $UseAg->request($Req,$TmpFile)->as_string;
		close $FH;
		my $ResultFile = Format_TAIR_Result($TmpFile);
		system "del \"$TmpFile\"";
		return $ResultFile;
	}else{
		return undef;
	}
}
################################################################################
sub Format_TAIR_Result {
################################################################################
	my $File = @_[0];
	my ($FH,$FormattedFile) = tmpnam();
	open (File, $File) or die "Cant open $File\n";
    while (my $line = <File>){
		chomp $line;
		my ($Mips,undef,$GeneModel,$Term,$GO_id,undef,$Cat,$TairSlims,
			undef,undef,undef)=split(/\t/,$line);
		if ($Cat=~m/func/i){$Cat="function";}
		elsif ($Cat=~m/proc/i){$Cat="process";}
		elsif ($Cat=~m/comp/i){$Cat="component";}
		print $FH "$Mips\t$GeneModel\t$GO_id\t$Term\t$TairSlims\t$Cat\n"
    }
    close File;
    close $FH;
    return $FormattedFile;
}
################################################################################
sub Read_TAIR_AnnotationFile {
################################################################################
	my $File = @_[0];
	open (File, $File) or die "Cant open $File\n";
    my %Hash;
	my $count=0;

    while (my $line = <File>){
		chomp $line;
		my ($Gid, $Mips, $Gname,undef,$GO_id,$Term,undef,undef,undef,$Cat)=split(/\t/,$line);
		if ($Cat=~m/F/i){$Cat="function";}
		elsif ($Cat=~m/P/i){$Cat="process";}
		elsif ($Cat=~m/C/i){$Cat="component";}

		$Hash{lc$Mips}{$GO_id}="$Mips\t$Gname\t$GO_id\t$Term\tx\t$Cat\n";
		$count++;
    }
	close File;
	print "\t  ($count Annotations in local file)\n";
	return \%Hash;
}
################################################################################
sub Read_GO_AnnotationFile {
################################################################################
	my ($IdRef,$File) = @_;

	my $annotation = GO::AnnotationProvider::AnnotationParser->new(annotationFile=>$File);
	my $dbh = DBI->connect($data_source,$username,$password) or
	die ("Can't connect to $data_source:\n");
	my %Hash;
	my $Skipped =0;
	my $Guessed = 0;

	# Convert each gene into a database id before getting annotations
    foreach my $gene (@{$IdRef}){
		chomp $gene;
		$gene=~s/\s+//;
		my $Dbid;
		my $S_gene;
		# If gene is ambigous, try to find its database id:
    	if ($annotation->nameIsAmbiguous($gene)){
			my @databaseIds = $annotation->databaseIdsForAmbiguousName($gene);
			if (@databaseIds){
				foreach $_ (@databaseIds){
					$S_gene = $annotation->standardNameByDatabaseId($_);
					if ($S_gene and ($annotation->nameIsAmbiguous($S_gene)!=1)){
						$Dbid = $_;
						last;
					}elsif($S_gene and ($annotation->nameIsAmbiguous($S_gene)==1)){
						$Dbid = $_;
						$Guessed++;
					}
				}
			}
			if (!defined $Dbid or !defined @databaseIds){
    	    	print "Cant find id for ambigous gene $gene\n";
    	    	$Skipped++;
    	    	next;
			}
    	}else{
			$S_gene = $gene;
		}

		# Now get the database ID for the gene:
		if ($annotation->nameIsStandardName($S_gene)){
			$Dbid = $annotation->databaseIdByStandardName($S_gene);
		}elsif($annotation->nameIsDatabaseId($S_gene)){
			$Dbid = $S_gene;
		}else{
			$Dbid = $annotation->databaseIdByName($S_gene);
		}

		# Skip gene if no annotation or no id:
		if ($annotation->nameIsAnnotated(name => $gene)!=1 or !defined $Dbid){
			print "no annotations for\t$gene,$S_gene,$Dbid\n";
			$Skipped++;
			next;
		}

		my $StdName = $annotation->standardNameByDatabaseId($Dbid);
    	foreach my $aspect ('P','F','C'){
			my $goidsRef = $annotation->goIdsByDatabaseId(databaseId => $Dbid,
                                                        aspect     => $aspect);
            my $Cat;
			if ($aspect=~m/F/i){$Cat="function";}
			elsif ($aspect=~m/P/i){$Cat="process";}
			elsif ($aspect=~m/C/i){$Cat="component";}
            foreach my $Id (@{$goidsRef}){
				my $Term = GetGoName($Id,$dbh);
				$Hash{lc$gene}{$Id} = "$gene\t$StdName\t$Id\t$Term\tx\t$Cat\n";
			}
		}
	}
	$dbh->disconnect;
	return \%Hash;
}
################################################################################
sub GetGoName {
################################################################################
	my ($Go_id,$dbh) = @_;
	my $query = "select name from term where acc = '$Go_id'";
	my $sth = $dbh->prepare($query);
		$sth->execute();
	my $GoTerm = $sth->fetchrow;
	return $GoTerm;
}
################################################################################
sub ConvertTo_MySlim {
################################################################################
	my ($FullAnnotFile,$MapToParents) = @_;
	my ($FH2,$SlimAnnotFile) = tmpnam();

	my @SlimTerms;
	open (SlimTerms, $SlimTerms) or die "cant open $SlimTerms";
	while (my $Line=<SlimTerms>){
		chomp $Line;
		while ($Line=~m/GO:\d{7}/gm){
			push (@SlimTerms,$&);
		}
	}
	close SlimTerms;
	print "\t = ",scalar@SlimTerms," SlimTerms in ",basename($SlimTerms),"\n";

	my $dbh = DBI->connect($data_source,$username,$password) or
	die ("Can't connect to $data_source:$@");

	my %WarnTerms;
	my %NoParents;

	open (File, $FullAnnotFile) or die "cant open $FullAnnotFile";
	open ($FH2, ">$SlimAnnotFile") or die "cant write output";
	while (<File>){
		chomp $_;
		my ($Mips, $GeneModel, $GO_id, $GO_annot, undef, $GO_categ) = split(/\t/,$_);
		next if ($GO_id=~m/null/i or !defined $GO_id or $GO_id eq "");

		# Get the parents and check if there are any:
		my ($ParentIds,$ParentAnnot)=GetGoParents($GO_id,$GO_categ,$dbh);
		if ($$ParentIds[0] eq ""){
			$NoParents{$GO_id}=1;
			next;
		}

		# Find the Slim terms:
		my $SlimTermFound=0;
		for my $i(0..$#{$ParentIds}){
            foreach my $SlimTerm (@SlimTerms){
                chomp $SlimTerm;
                if ($$ParentIds[$i] eq "$SlimTerm"){
                    $SlimTermFound=1;
                    print $FH2 "$Mips\t$GeneModel\t$$ParentIds[$i]\t"
							  ."$$ParentAnnot[$i]\tx\t$GO_categ\n";
					# Get out if we want only one slim parent(Hard slim mapping):
					last if ($MappingType eq "OneOnly" and $SlimTermFound==1);
                }
            }
			# Get out if we want only one slim parent(Hard slim mapping):
			last if ($MappingType eq "OneOnly" and $SlimTermFound==1);
		}

		# Ideally this should not happen, i.e. you want a slim mapping
		# to a term in your file but cant find any acceptable annotation
		# warn the user and preserve the original annotation
		if ($SlimTermFound==0){
			$WarnTerms{$GO_id}=1;
			print $FH2 "$Mips\t$GeneModel\t$GO_id\t$GO_annot\tx\t$GO_categ\n";
		}
	}
	close File;
	close $FH2;

	if (scalar(keys%NoParents) >=1){
		warn "No parents found for ",scalar(keys%NoParents),
			 " annotations!\n";
	}
	if (scalar(keys%WarnTerms) >=1){
		warn "No slimterm in ",basename($SlimTerms),
			 " for ",scalar(keys%WarnTerms)," GO ids\n";
	}

	$dbh->disconnect;

	return $SlimAnnotFile;
}
################################################################################
sub GetGoParents {
################################################################################
	my ($GO_id,$GO_categ,$dbh)=@_;
	my (@Parent_annot,@Parent_id,$GO_categ_full);

	my $query = "select p.acc, p.name from graph_path INNER JOIN term AS t ON "
	   		   ."(t.id = graph_path.term2_id) INNER JOIN term AS p ON "
			   ."(p.id = graph_path.term1_id) where t.acc = ? and "
			   ."graph_path.distance >=0 and t.term_type = ? ";

    if ($GO_categ=~m/process/i){$GO_categ_full = "biological_process";}
    if ($GO_categ=~m/component/i){$GO_categ_full = "cellular_component";}
    if ($GO_categ=~m/function/i){$GO_categ_full = "molecular_function";}

    my $sth = $dbh->prepare("$query");
    	$sth->execute($GO_id, $GO_categ_full);
    while (my($GO_id_s, $GO_annot_s) = $sth->fetchrow_array){
		push (@Parent_id, $GO_id_s);
		push (@Parent_annot, $GO_annot_s);
	}

	return (\@Parent_id,\@Parent_annot);
}
################################################################################
sub ConvertTo_TairSlim {
################################################################################
	my ($FullAnnotFile,$MapToParents) = @_;
	my ($FH2,$SlimAnnotFile) = tmpnam();

	my $dbh = DBI->connect($data_source,$username,$password) or
	die ("Can't connect to $data_source:\n");

	open (File, $FullAnnotFile) or die "cant open $FullAnnotFile";
	open ($FH2, ">$SlimAnnotFile") or die "cant write output";

	while (<File>){
		chomp $_;
		my ($Mips, $GeneModel, $GO_id, $GO_annot, $undef, $GO_categ) = split(/\t/,$_);
		next if ($GO_id=~m/null/i);
#        $undef=~m/(GO:\d{5,})/i;
#        my $TairSlim_id = $1;
#        my $TairSlim_annot = $`;
#        $TairSlim_annot =~s/\s+\(//;
		if ($undef=~m/\|/){
			my @Terms = split(/\s+|\s+/,$undef);
			foreach my $Term (@Terms){
				my $TairSlim_id = GetGoID($Term,$dbh);
        		print $FH2 "$Mips\t$GeneModel\t$TairSlim_id\t$Term\tx\t$GO_categ\n";
			}
		}else{
			my $TairSlim_id = GetGoID($undef,$dbh);
			print $FH2 "$Mips\t$GeneModel\t$TairSlim_id\t$undef\tx\t$GO_categ\n";
		}
	}
	close File;
	close $FH2;
	$dbh->disconnect;

	return $SlimAnnotFile;
}
################################################################################
sub GetGoID {
################################################################################
	my ($Go_Term,$dbh) = @_;
	my $query = "select acc from term where name = '$Go_Term'";
	my $sth = $dbh->prepare($query);
		$sth->execute();
	my $Go_id = $sth->fetchrow;
	return $Go_id;
}
################################################################################
sub MapToParents{
################################################################################
	my $FullAnnotFile = $_[0];
	my ($FH2,$PAnnotFile) = tmpnam();

	my $dbh = DBI->connect($data_source,$username,$password) or
	die ("Can't connect to $data_source:$@");

	open (File, $FullAnnotFile) or die "cant open $FullAnnotFile";
	open ($FH2, ">$PAnnotFile") or die "cant write output";
	my %Unique;
	while (<File>){
		chomp $_;
		my ($Mips, $GeneModel, $GO_id, $GO_annot, $undef, $GO_categ) = split(/\t/,$_);
		next if ($GO_id=~m/null/i);
		# Find the Parents of a term -- (includes the term too):
		my ($ParentIds,$ParentAnnot)=GetGoParents($GO_id,$GO_categ,$dbh);
		for my $i(0..$#{$ParentIds}){
			if (!defined $Unique{lc($Mips.$$ParentIds[$i])}){
        		print $FH2 "$Mips\t$GeneModel\t$$ParentIds[$i]\t"
					  	  ."$$ParentAnnot[$i]\t$undef\t$GO_categ\n";
				$Unique{lc($Mips.$$ParentIds[$i])}=1;
			}
		}
	}
	close File;
	close $FH2;
	undef %Unique;
	$dbh->disconnect;
	return $PAnnotFile;
}
################################################################################
sub RemoveRedundantAnnot{
################################################################################
	my $File = $_[0];
	my ($FH,$UniqAnnotFile) = tmpnam();
	open (FILE, $File) or die "Cant open $File\n";
	my (%Unique, @Output);
	while (my $line = <FILE>){
		my ($Mips,$GeneModel,$GO_id,$GO_annot,$undef,$GO_categ) = split(/\t/,$line);
		if (!defined $Unique{lc($Mips.$GO_id)}){
			print $FH $line;
			$Unique{lc($Mips.$GO_id)}=1;
		}
	}
	close FILE;
	close $FH;
	# delete the file with redundant annotations:
	system "del \"$File\"";
	return $UniqAnnotFile;
}

################################################################################
sub ParseAnnotation {
################################################################################
	my $AnnotFile = $_[0];
	open (File, $AnnotFile) or die "cant open $AnnotFile";
	my (%TermFreq, %TermName, %TermCateg, %TermGenes);
	while (<File>){
		chomp $_;
		my ($Mips, $GeneModel, $GO_id, $GO_annot, $undef, $GO_categ) = split(/\t/,$_);
		$Mips =~s/^\s+//;
		if (defined $TermFreq{$GO_id}){
			$TermFreq{$GO_id}++;
		}
		else {$TermFreq{$GO_id}=1;}
#		if (defined $TermGenes{$GO_id}){
#			$TermGenes{$GO_id}.="\t$Mips";
#		}
#		else {$TermGenes{$GO_id}="$Mips";}
		$TermName{$GO_id} = $GO_annot;
		$TermCateg{$GO_id} = $GO_categ;
	}
	close File;
	return (\%TermFreq, \%TermName, \%TermCateg, \%TermGenes);
}
################################################################################
sub GetPromoters{
################################################################################
	my ($ListRef, $Source, $Type, $PromoterFile)=@_;

	if ($Source eq "LocalFile" and -e $PromoterFile){
		# Return from local file:
		my $Ref = ReadPromoterFile($PromoterFile);
		my %File = %{$Ref};
		my ($FH,$TmpFile) = tmpnam();
		foreach my $id (@$ListRef){
			chomp $id;
			if (defined $File{lc$id}){
				print $FH ">",$id,"\n";
				print $FH $File{lc$id},"\n";
			}
		}
		close $FH;
		return $TmpFile;
	}elsif($Source eq "TAIR" and defined $Type){
		# Get from TAIR:
		my @Ids=@{$ListRef};
		my $UseAg = LWP::UserAgent->new();
		my ($FH,$TmpFile) = tmpnam();
		my $Req = POST 'http://www.arabidopsis.org/cgi-bin/bulk/sequences/getseq.pl',
			[ loci=>"@Ids",
			  outputformat=>'fasta',
			  dataset=>$Type,
			];
		my $Content = $UseAg->request($Req,$TmpFile)->as_string;
		close $FH;
		# Clean up the result file:
		my @Result;
		open (IN, $TmpFile) or die "Cant read result\n";
		while (my $line=<IN>){
			last if $line=~m/^--/;
			push (@Result, $line);
		}
		close IN;

		open (Out, ">".$TmpFile) or die "Cant write result\n";
		foreach $_ (@Result){
			print Out $_;
		}
		close Out;
		return $TmpFile;
	}else{
		return undef;
	}
}
################################################################################
sub ReadPromoterFile {
################################################################################
	my $File = $_[0];
	open (File, $File) or die "Cant open $File\n";
    my ($fl, %Hash, $Promoter, $id);
	my $count=0;

    while (my $line = <File>){
		# If promoter is extracted, store it:
        if (($line =~m/^>/ && $fl==1) or eof(File)){
			$Hash{$id} = $Promoter;
			$Promoter = '';
			$count++;
        }
		# Find the Mips ID at the '>' line:
		if ($line =~m/^>/){
			$' =~m/(At(\d|c|m)g\d{5,})\.?\d*/i;
			$id = lc$1;
			$fl=1;
		}
		else {
			chomp $line;
			$Promoter .=$line;
		}
    }
	close File;
	print "\t  ($count promoters in local file)\n";
	return \%Hash;
}
################################################################################
sub MakeTFMatrix{
################################################################################
	my ($upsfile,$patfile) = @_;

	my %Patterns;
	open (PATFILE, $patfile) or die ("could not open pattern list");
	while (my $Line=<PATFILE>){
		chomp $Line;
		my($BS,$Name)=split(/\t/,$Line);
		$Patterns{$Name} = $BS;
	}
	close PATFILE;

	my %Matrix;
	# Now open the upstream squence file [FASTA Format] and scan each promoter:
	my ($ups_seq, $flag, $id);
	open (ALLSEQ, $upsfile )or die "could not open upstream seq file";
	while (my $line = <ALLSEQ>){
		chomp ($line);
		if (($line =~ m/^>/&& $flag == 1 ) or eof (ALLSEQ)){
			foreach my $Pat(keys %Patterns){
				chomp $Pat;
				my $hits= MotifSearch($ups_seq,$Patterns{$Pat});
				$Matrix{$Pat}{$id}=$hits;
			}
			$flag =0, $ups_seq ='';
		}
		if($line =~ m/^>/) {
			$' =~m/(At(\d|c|m)g\d{5,})\.?\d*/i;
			$id = lc$1;
		}elsif (!($line=~m/^>/) and ($line ne "")){
			$ups_seq = $ups_seq.$line;
			$flag =1;
		}
	}
	close ALLSEQ;
	return \%Matrix;
}
################################################################################
sub MotifSearch {
################################################################################
	my ($ups_seq, $pat) = @_;
	my $query = lc($ups_seq);
       $query =~ s/\s//gm;
    my $pattern = new Bio::Tools::SeqPattern(-SEQ =>$pat, -TYPE =>'Dna');
    my $expand = $pattern->expand;
    my $revcompexp = $pattern->revcom(1)->str;
	my $hits=0;
	while ($query =~m/$expand|$revcompexp/ig){$hits++;}
	return $hits;
}
################################################################################
sub AnalyzeCateg {
################################################################################
	my ($C_FreqRef,$C_Genes,$T_FreqRef,$T_Genes) = @_;

	my (%Result, %Sig, %Pvalues);
	foreach my $GO_id(keys %$C_FreqRef){
        next if $GO_id eq "";
		next if ($GO_id=~m/$Ignore/i);
		next if ($$T_FreqRef{$GO_id} < 2);

        my ($SigText, $p_val, $RE, $Chi_prob, $Hyp_prob, $Fisher_prob);

        eval {
            $p_val = Binomial($C_Genes, $$C_FreqRef{$GO_id},
              		  		  $T_Genes, $$T_FreqRef{$GO_id});
            $RE = (($$C_FreqRef{$GO_id}/$C_Genes)/
              	   ($$T_FreqRef{$GO_id}/$T_Genes));
            $Chi_prob = ChiSqProb($$T_FreqRef{$GO_id},$$C_FreqRef{$GO_id},
              			   			 $T_Genes,$C_Genes);
            $Fisher_prob = FisherExact();
            $Hyp_prob = HyperGeometric($$C_FreqRef{$GO_id},$C_Genes,
              	  				   	$$T_FreqRef{$GO_id},$T_Genes);
        };
        if ($@){
        	warn $GO_id,"\t",$@;
        	$p_val = $RE = $Chi_prob = $Hyp_prob = "N/A";
        }else {
			$p_val = sprintf("%.3f",$p_val);
			$RE = sprintf("%.2f",$RE);
			$Chi_prob = sprintf("%.3f",$Chi_prob);
			$Hyp_prob = sprintf("%.3f",$Hyp_prob);
        }

		# Decide the significance:
        if ($Hyp_prob <= 0.05 and $Hyp_prob !~m/-1\.#/ and
			$p_val <=0.05 and "N/A"!~m/$Hyp_prob|$p_val/){
			$SigText="Yes";
		}elsif ($Hyp_prob =~m/-1\.#/ and $p_val <=0.05) {
			$SigText="Yes";
		}
		if ($Hyp_prob <= 0.05){
			$Sig{$GO_id}=1;
		}else{
			$Sig{$GO_id}=0;
		}
		# Store:
		$Result{$GO_id}="$SigText,$RE,$Chi_prob,$Hyp_prob,$p_val";
		$Pvalues{$GO_id} = $Hyp_prob;

	}
	return (\%Result,\%Sig, \%Pvalues);
}
################################################################################
sub MakeResult {
################################################################################
	my ($IdArrayRef,$ResultRef,$C_GenesRef,$C_FreqRef,
		$ImageDir,$ExpFile,$SigRef,$AnnotMatrix,$SimPvalRef) = @_;
	my @ResultHTML;
	my %InResult;
	my $SigCountInResult=0;
	foreach my $GO_id(@$IdArrayRef){

        # Make the results:
		my ($SigText,$RE,$Chi_prob,$Hyp_prob,$p_val) = split(/,/,$$ResultRef{$GO_id});
		my @GeneList = sort (split(/\t/,$$C_GenesRef{$GO_id}));

        next if ($WantDetail eq "S" and $SigText ne "Yes");
        next if ($WantDetail eq "S" and $$C_FreqRef{$GO_id} < 2);
        next if ($WantDetail eq "All" and ($$C_FreqRef{$GO_id} < 4 and $SigText ne "Yes"));
        next if ($$C_FreqRef{$GO_id}==1);

        my ($ExpImage,$SmExp,$DataFile) = DrawTermGenes(\@GeneList,$$T_NameRef{$GO_id},$ExpFile,$ImageDir);
		my ($PromImage,$ThP,$Prom_AltText) = DrawPromoters(\@GeneList,$$T_NameRef{$GO_id},$ImageDir);
        my ($AnnotImage,$ThA) = DrawAnnotations(\@GeneList,$$T_NameRef{$GO_id},$SigRef,
        										$ImageDir,$$T_CategRef{$GO_id},$AnnotMatrix);
		my $TableRow = MakeHTML($GO_id,$$T_NameRef{$GO_id},$SigText,$RE,$Chi_prob,
        					$Hyp_prob,$p_val,\@GeneList,$$C_FreqRef{$GO_id},
        					$$T_FreqRef{$GO_id},$ImageDir,$ExpImage,$PromImage,
        					$AnnotImage,$ThP,$ThA,$SmExp,$DataFile,$Prom_AltText,
        					$$SimPvalRef{$GO_id});
        push(@ResultHTML,$TableRow);
        $InResult{$GO_id}=1;
        if ($$SigRef{$GO_id}==1){
			$SigCountInResult++;
		}
	}
	# Gather the table rows into a table:
	my $Table =  " <Table  border=0>"
				." <tbody style=font-family:verdana;font-size:12>"
	    		." <tr bgcolor=#FFCC99><td>GO-id & Name</td>"
				." <td>Sig<br>R,C,H,B<br>Sim p-val</td>"
				." <td>MipsID,Description</td>"
				." <td>Profile, CG (TG)</td>"
				." <td>Promoters </td><td>Annotation</td>"
	   			." </tr> </tbody>"
	   			." <tbody style=font-family:verdana;font-size:11> "
	   			." @ResultHTML </tbody></Table>";
	return ($Table,\%InResult,$SigCountInResult);
}
################################################################################
sub MakeHTML {
################################################################################
    my ($GO_id,$TermName,$Sig,$RE,$Chi_prob,$Hyp_prob,$p_val,
		$ListRef,$C_Freq,$T_Freq,$ImageDir,$ExpImage,$PromoImage,
		$AnnotImage,$ThP,$ThA,$SmExp,$DataFile,$Prom_AltText,$Simp_val)=@_;

	my $MipsAndDesc;
	my @Description = GetDescription(@{$ListRef});

    foreach my $i(0..$#{$ListRef}){
		my $id = lc $$ListRef[$i];
		$id =~tr/a/A/;
        $MipsAndDesc .= "<a href=http://www.arabidopsis.org/"
        			."servlets/TairObject?type=locus&name=$id>$id</a>"
        			."--";
        $MipsAndDesc .= "$Description[$i]<br>";
    }

	my $RowColor = "#eeeeee";
	if ($Sig eq "Yes"){
		$RowColor = "#DCDCDC";
	}

	my $TableRow = "<tr bgcolor=$RowColor><td><a name=\"$GO_id\"></a>"
			."<a href=http://www.godatabase.org/"
			."cgi-bin/go.cgi?query=$GO_id>$GO_id</a><br>$TermName</td>"
			."<td><font color=red><B>$Sig</B></font><br>R_ $RE<br> "
			."C_ $Chi_prob<br>H_ $Hyp_prob<br>B_ $p_val<br>Sim_ $Simp_val</td>";

	$TableRow .="<td><font point-size=6>$MipsAndDesc</font></td>";

	if (defined $ExpImage and $SmExp){
		$TableRow .= "<td valign=bottom><B>$C_Freq</B> ($T_Freq) "
				  ." [<a href=$ImageDir/$DataFile>Get Data</a>]<br>"
				  ."<a href=$ImageDir/$ExpImage><image src=$ImageDir/$SmExp border=0></a></td>";
	}else{
		$TableRow .= "<td>No Expression Image<br>"
					."<B>$C_Freq</B> ($T_Freq)</td>";
	}

	if (!defined $PromoImage and !defined $ThP){
		$TableRow .= "<td>No Promoter Image<br>"
	}
	elsif (defined $PromoImage and !defined $ThP){
		$TableRow .= "<td valign=bottom><image src=$ImageDir/$PromoImage alt=\"$Prom_AltText\"></td>";
	}else{
		$TableRow .= "<td valign=bottom><a href=$ImageDir/$PromoImage>"
					."<image src=$ImageDir/$ThP border=0 alt=\"$Prom_AltText\"></a></td>";
	}
	if (!defined $AnnotImage and !defined $ThA){
		$TableRow .= "<td>No Annotation Image<br>"
	}
	elsif (defined $AnnotImage and !defined $ThA){
		$TableRow .= "<td valign=bottom><image src=$ImageDir/$AnnotImage></td>";
	}else{
		$TableRow .= "<td valign=bottom><a href=$ImageDir/$AnnotImage>"
					."<image src=$ImageDir/$ThA border=0></a></td>";
	}

	$TableRow .= "</tr>\n<tr><td></td></tr>\n";
	return $TableRow;
}
################################################################################
sub CheckIds {
################################################################################
	my @Ids = @_;
	my @Result;
	my $Notmips = 0;
	my $Converted = 0;

	foreach my $id (@Ids){
		chomp $id;
		# A simple pattern check to see if the ID is in the right form.
		if ($id=~m/At(\d|c|m)g\d{5,}\.?\d*/i){
			push (@Result, $id);
		}else{
			$Notmips++;
		}
	}

# Use the code below to convert your custom gene ids to public identifiers
# using a local database table
#	my $data_source = "Some Data source";
#	my $username = "user name";
#	my $password = "password";
#
#	my $dbh = DBI->connect($data_source,$username,$password) or
#	die ("Can't connect to $data_source: $@\n");
#	my $query = "select description from TableName where mips = ?";
#
#	foreach my $id (@Ids){
#		chomp $id;
#		if ($id=~m/At(\d|c|m)g\d{5,}\.?\d*/i){
#			push (@Result, $id);
#		}
#		else {
#			my $query = "select mips from TableName where id = \'$id\'";
#    		my $sth = $dbh->prepare("$query");
#    			$sth->execute();
#    		my $row = $sth->fetchrow;
#    		if ($row !~m/none/i and $row ne""){
#    			push (@Result, $row);
#    			$Converted++;
#			}
#			$Notmips++;
#		}
#	}
#
#	$dbh->disconnect;
#
#							OR
#
# Use the code below to convert your custom gene ids to public identifiers
# using a tab delimited text file with just two columns. The first containing
# your internal id and the second containing the Mips ids.
#	my %IdToMips;
#	open (File, $IdConversionFile) or warn "Cant open $IdConversionFile";
#	while (my $line = <File>){
#		chomp $line;
#		my ($id,$mips) = split (/\t/,$line);
#		$IdToMips{lc$id} = $mips;
#	}
#	foreach my $id (@Ids){
#		chomp $id;
#		# A simple pattern check to see if the ID is in the right form.
#		if ($id=~m/At(\d|c|m)g\d{5,}\.?\d*/i){
#			push (@Result, $id);
#		}else{
#			if (defined $IdToMips{lc$id}){
#				push (@Result, $IdToMips{lc$id});
#				$Converted++;
#			}
#			$Notmips++;
#		}
#	}

	if (scalar@Result >=1){
		return (\@Result, $Notmips, $Converted);
	}else{
		return (\@Ids,$Notmips,$Converted);
	}
}
################################################################################
sub GetDescription {
################################################################################
	my @List = @_;
	my @Description;

# Use the code below to add a custom description for your gene ids using
# a tab delimited text file indexed in %IdToDescription

	foreach my $id (@List){
		chomp $id;
		if (defined $IdToDescription{lc$id}){
			push (@Description, $IdToDescription{lc$id});
		}else{
			push (@Description,'none');
		}
	}

#							OR
#
# Use the code below to add a custom description for your gene ids using
# a local database table
#	my $data_source = "Some Data source";
#	my $username = "user name";
#	my $password = "password";
#
#	my $dbh = DBI->connect($data_source,$username,$password) or
#	die ("Can't connect to $data_source: $@\n");
#	my $query = "select description from TableName where mips = ?";
#
#	foreach $_ (@List){
#        my $sth = $dbh->prepare("$query");
#        $sth->execute($_);
#        my $row = $sth->fetchrow;
#        push (@Description,$row);
#	}
#
#	$dbh->disconnect;

	return @Description;
}
################################################################################
sub DrawTermGenes {
################################################################################
	my ($GeneListRef, $Annot, $ExpressionFile, $ImageDir) = @_;
	my ($ID, $Name, @expression, $nG, $Data, @List);
	my (@PDX, @PDY, @PDType, @X, @Y, $T, @Av);
	my $GetX = 0;
	my @GeneList = @{$GeneListRef};

	return undef unless (-e $ExpressionFile);

	# Get the data from the expression file:
    open(FILE,$ExpressionFile) or die ("Cant open $ExpressionFile\n");
    my $Header = <FILE>;
    while(<FILE>){
    	($Name, $ID, $Data) = split (/\t/,$_,3);
    	for my $i (0..$#GeneList){
 			# Try to find the corresponding rows for each gene:
       		if ((lc $GeneList[$i] eq lc $Name) or (lc $GeneList[$i] eq lc $ID)){
				# Store for printing:
				push (@expression, "$_");
				# Get the Y axis:
				my @Row = split (/\t/,$Data);
				for my $j(0..$#Row){
					$Av[$j] +=$Row[$j];
				}
				# Build the X-axis:
				if ($GetX == 0){
            		my @H = split(/\t/,$Header);
            		my $UseH =1;
            		for my $i (2..$#H){
            			if ($H[$i]=~m/(\d+\.?\d*).*(h|hr|hour|min)/i and $UseH==1){
            				my $x = $1;
            				if ($2=~m/min/i){ $x = $x/60;}
            				push (@X, $x);
            			}else{
            				push (@X, int($i-1));
            				$UseH=0;
            			}
            		}
					$GetX = 1;
   				}
				# Store for plotting and count:
				$nG++;
				push (@PDX, \@X);
    			push (@PDY, \@Row);
    			push (@PDType, 'NoPoints line blue');
        	}
    	}
    }
    close(FILE);

    # Calculate the average profile of the group:
    for my $k (0..$#Av){
    	$Av[$k] /=$nG;
    }
    push (@PDX, \@X);
    push (@PDY, \@Av);
    push (@PDType, 'Points solidline red');

    my $TermName = $Annot;
    $TermName =~ s/\,|\\|\/|\"|\'//g;
    $TermName =~ s/\s+/_/g;

    # Draw an image
    my $Image = MakeImage(\@PDX, \@PDY, \@PDType);
	my $ImageName = $TermName."_Plot.png";
	if (-e $ImageDir."/".$ImageName){
		$ImageName=~s/_Plot\.png//;
		$ImageName .= rand()."_Plot.png";
	}

	open (WR, ">".$ImageDir."/".$ImageName)
	or warn ("Cant write image file: ".$ImageDir."/".$ImageName);
    binmode WR;
    print WR $Image->draw();
    close WR;

	my $BigImage = 'Big_'.$ImageName;
	my $BigIm = MakeBigImage(\@PDX, \@PDY, \@PDType, $Annot,$nG,scalar@GeneList);
	open (WR, ">".$ImageDir."/".$BigImage)
	or warn ("Cant write image file: ".$ImageDir."/".$BigImage);
    binmode WR;
    print WR $BigIm->draw();
    close WR;

	# Write to a text file to plot manually.
	my $PlotDataFile = $TermName."_Profile.txt";
	open (DataFile, ">".$ImageDir."/".$PlotDataFile)
	or warn  "Can't write: ".$ImageDir."/".$PlotDataFile;
	print DataFile $Header, @expression;
	close DataFile;

    return ($BigImage,$ImageName,$PlotDataFile);
}

################################################################################
sub MakeImage {
################################################################################
	my ($PDXRef, $PDYRef, $PDTypeRef) = @_;
	my $img = SmPlot->new(160,120);
	for my $i (0..$#{$PDXRef}){
		$img->setData($$PDXRef[$i],$$PDYRef[$i],$$PDTypeRef[$i])
		or warn ($img->error());
	}
    $img->setGraphOptions('horGraphOffset' => 25,
                        'vertGraphOffset' => 25,
                        'horAxisLabel' => 'Time Points',
                        'vertAxisLabel' => 'Log Ratio' );

	return $img;
}
################################################################################
sub MakeBigImage {
################################################################################
	my ($PDXRef, $PDYRef, $PDTypeRef, $Term, $nR, $nG) = @_;
	my $img = SmPlot->new(480,360);
	for my $i (0..$#{$PDXRef}){
		$img->setData($$PDXRef[$i],$$PDYRef[$i],$$PDTypeRef[$i])
		or warn ($img->error());
	}
    $img->setGraphOptions(
		'horGraphOffset' => 30,
        'vertGraphOffset' => 30,
        'title' => "$nG Genes with label ".$Term,
        'horAxisLabel' => 'Time Points',
        'vertAxisLabel' => 'Log Ratio' );

   	# Make custom labels:
   	my %xTickLabels;
	for $_ (@{$$PDXRef[0]}){
		$xTickLabels{$_}=$_;
	}
    $img->setGraphOptions('xTickLabels' => \%xTickLabels);

	return $img;
}
################################################################################
sub DrawPromoters {
################################################################################
	my ($GeneListRef, $Annot, $ImageDir) = @_;

	return undef unless (defined $PromoMatrixRef);
	return undef if (scalar @$GeneListRef > 250);

	my $PromoterRef = GetFromHashMatrix($GeneListRef,$PromoMatrixRef);
	my $Promo1Ref = TrimHashMatrix($PromoterRef);

	# Calculate the Significant TFs (to color red):
	my ($PromFreqRef) = AnalyzeMatrix($Promo1Ref);
	my ($SigRefp,$AltText)=AnalyzePromoters($PromFreqRef,scalar(@$GeneListRef),
								  			$PromoFreqRef,$T_NumGenes);

    my $TermName = $Annot;
    $TermName =~ s/\,|\\|\/|\"|\'//g;
    $TermName =~ s/\s+/_/g;
	my $ImageName1 = $TermName."_matrix.png";
	if (-e $ImageDir."/".$ImageName1){
		$ImageName1=~s/_matrix\.png//;
		$ImageName1 .= rand()."_matrix.png";
	}

    # Draw an image
	my ($ImageName,$ThN) = VisualizeMatrix($Promo1Ref,$SigRefp,$ImageDir,$ImageName1);
	return ($ImageName,$ThN,$AltText);
}
################################################################################
sub DrawAnnotations {
################################################################################
	my ($GeneListRef, $Annot, $SigRefa, $ImageDir, $Categ, $AnnotMatrix) = @_;

	return undef unless (defined $AnnotMatrix);
	return undef if (scalar @$GeneListRef > 250);

	my $AnnotationRef = GetFromHashMatrix($GeneListRef,$AnnotMatrix);
	my $Annot1Ref = TrimHashMatrix($AnnotationRef,$Categ);

    my $TermName = $Annot;
    $TermName =~ s/\,|\\|\/|\"|\'//g;
    $TermName =~ s/\s+/_/g;
	my $ImageName2 = $TermName."_annot.png";
	if (-e $ImageDir."/".$ImageName2){
		$ImageName2=~s/_annot\.png//;
		$ImageName2 .= rand()."_annot.png";
	}
    # Draw an image
	my ($ImageName,$ThN) = VisualizeMatrix($Annot1Ref,$SigRefa,$ImageDir,$ImageName2);
	return ($ImageName,$ThN);
}
################################################################################
sub RetrieveAndAnalyzePromoters {
################################################################################
	my $GeneListRef = @_[0];

	return undef unless (defined $PromoMatrixRef);

	my $PromoterRef = GetFromHashMatrix($GeneListRef,$PromoMatrixRef);
	my $Promo1Ref = TrimHashMatrix($PromoterRef);
	my ($P_FreqRef) = AnalyzeMatrix($Promo1Ref);

	# Calculate the Significant TFs:
	my ($SigRefp,$AltText) = AnalyzePromoters($P_FreqRef,scalar(@$GeneListRef),
										   $PromoFreqRef,$T_NumGenes);
	return $SigRefp;
}
################################################################################
sub GetFromHashMatrix{
################################################################################
	my ($ListRef,$MRef) = @_;
	my %An;
	my %M=%{$MRef};
	foreach my $H(keys%M){
		foreach $_ (@$ListRef){
			chomp $_;
			if (defined $M{$H}{lc$_}){
				my $id = lc $_;
				$id =~tr/a/A/;
				$An{$H}{$id} = $M{$H}{lc$_};
			}else{
				my $id = lc $_;
				$id =~tr/a/A/;
				$An{$H}{$id} = 0;
			}
		}
	}
	return \%An;
}
################################################################################
sub TrimHashMatrix {
################################################################################
	my ($Ref,$Categ) = @_;
	my %Hash = %{$Ref};
	my %TrimedHash;

	foreach my $H (keys %Hash){
		my $ToPrint=0;
		foreach my $G (keys %{$Hash{$H}}){
			if ($Hash{$H}{$G} > 0){
				$ToPrint=1;
				last;
			}
		}
		next if ($ToPrint==0);
		next if ($H=~m/$Ignore/i);
		next if ($H eq "");
		if (defined $Categ){
			next if ($$T_CategRef{$H} eq $Categ);
		}

		foreach my $G (keys %{$Hash{$H}}){
			if ($Hash{$H}{$G} > 0){
				$TrimedHash{$H}{$G} = 1;
			}else{
				$TrimedHash{$H}{$G} = 0;
			}
		}
	}
	return \%TrimedHash;
}
################################################################################
sub AnalyzeMatrix {
################################################################################
	my ($Ref,$type)=@_;
	my %Hash = %{$Ref};
	my %Freq;
	my %Genes;

	foreach my $H (keys %Hash){
		foreach my $G (keys %{$Hash{$H}}){
			if ($Hash{$H}{$G} > 0){
				if (defined $Freq{$H}){
					$Freq{$H}++;
				}else{
					$Freq{$H}=1;
				}

				if (defined $Genes{$H}){
					$Genes{$H}.="\t$G";
				}else{
					$Genes{$H}="$G";
				}
			}
		}
	}
	return (\%Freq,\%Genes);
}
################################################################################
sub AnalyzePromoters {
################################################################################
	my ($FreqRef,$C_Genes,$BkgFreqRef,$BkgGenes)=@_;
	my %Freq = %$FreqRef;
	my %BkgFreq = %$BkgFreqRef;
	my %Sig;
	my $AltText;
	my @pval_text;
	foreach my $key (keys %Freq){
		eval {Binomial($C_Genes,$Freq{$key},$BkgGenes,$BkgFreq{$key});
			  HyperGeometric($Freq{$key},$C_Genes,$BkgFreq{$key},$BkgGenes);};
		if (!$@){
			my $pval = Binomial($C_Genes,$Freq{$key},$BkgGenes,$BkgFreq{$key});
			my $hypg = HyperGeometric($Freq{$key},$C_Genes,$BkgFreq{$key},$BkgGenes);
			if ($pval <=0.05 and $hypg <=0.05){
				$Sig{$key}=1;
				push (@pval_text, "$key"."..".sprintf("%.3f",$pval)."|".sprintf("%.3f",$hypg));
			}
		}
	}
	if (!defined @pval_text){
		$AltText = "No Promoters enriched";
	}else{
		$AltText = join ("\n",@pval_text);
	}
	return (\%Sig,$AltText);
}
################################################################################
sub VisualizeMatrix {
################################################################################
	my ($Ref,$SigRef,$ImageDir,$ImageName)=@_;
	my %Hash =  %{$Ref};
	my %Sig = %$SigRef;
	my $Y = scalar(keys %Hash);
	my $X;
	my $xlen;
	my $ylen;
    my $y=0;
    my $s=6;
    my $g=3;
	# Get information to decide size of image:
	foreach my $r (keys %Hash){
		$X = scalar(keys %{$Hash{$r}}) if ( scalar(keys %{$Hash{$r}})> $X);
        if ($r=~m/(GO:\d{5,})/i){
        	$_ = GetGoTermName($r);
        	#$_ = $r if (length $_ > 25);
        }else{ $_ = $r;}

        $xlen = length$_ if (length$_ > $xlen);
		foreach my $c (keys %{$Hash{$r}}){
			if ($c=~m/(GO:\d{5,})/i){
				$_ = GetGoTermName($c);
				#$_ = $c if (length $_ > 25);
			}else{ $_ =$c;}

			$ylen = length$_ if (length$_ > $ylen);
		}
	}

	# Make a new image:
    my $im = new GD::Image(($X*($s+$g))+($xlen*5)+3,($Y*($s+$g))+($ylen*5)+3);
    my $white = $im->colorAllocate(255,255,255);
    my $grey = $im->colorAllocate(50,50,50);
	my $black = $im->colorAllocate(0,0,0);
    my $blue = $im->colorAllocate(0,0,200);
	my $red = $im->colorAllocate(200,0,0);
	my $green = $im->colorAllocate(0,200,0);
    	$im->interlaced('true');

	# Print the keys for the header:
	foreach my $r (sort keys %Hash){
		my $x=3;
		$x+=($xlen*5);
		$y=($ylen*5);
		foreach my $c (sort keys %{$Hash{$r}}){
			my $l = $c;
			if ($c=~m/(GO:\d{5,})/i){
				my $N = GetGoTermName($c);
				$l = $N;
				#if (length $N <25);
			}
			my $color = $blue;
			if ($Sig{$c}==1){
				$color = $red;
			}
			$im->stringUp(gdTinyFont,$x,$y,$l,$color);
			$x +=($s+$g);
		}
		$y+=3;
		last;
	}

	# Print rest of the image:
	foreach my $r (sort keys %Hash){
		my $x=3;
		my $l = $r;
        if ($r=~m/(GO:\d{5,})/i){
        	my $N = GetGoTermName($r);
			$l = $N;
			#if (length $N < 25);
        }
        my $color = $blue;
		if ($Sig{$r}==1){
			$color = $red;
		}
		$im->string(gdTinyFont,$x,$y,$l,$color);
		$x+=($xlen*5);
		foreach my $c (sort keys %{$Hash{$r}}){
			my $fillcolor = $white;
			if ($Hash{$r}{$c} == 1){
				if ($Sig{$c}==1 or $Sig{$r}==1){
					$fillcolor = $red;
				}else{
					$fillcolor = $grey;
				}
			}
			$im->filledRectangle($x,$y,$x+$s,$y+$s,$fillcolor);
			$x +=($s+$g);
		}
		$y+=($s+$g);
	}

	my ($w, $h) = $im->getBounds();
	$im->rectangle(0,0,$w-1,$h-1,$black);

	open (Result, ">".$ImageDir."/".$ImageName);
    binmode Result;
    print Result $im->png;
    close Result;

	my $ThN = MakeThumbNail($ImageDir,$ImageName,$w,$h,190);
	return ($ImageName,$ThN);
}
################################################################################
sub GetGoTermName{
################################################################################
	my $id = $_[0];
	return $$T_NameRef{$id};
}
################################################################################
sub Binomial {
################################################################################
	my ($SetSize, $Hits, $BkgSize, $BkgHits)=@_;
    my $cum_prob=0;
	my $bkg_freq = ($BkgHits/$BkgSize);
    for (my $X = ($Hits-1); $X>=0; $X--){
		my $p = Probability($SetSize, $X, $bkg_freq);
    	$cum_prob += $p;
    }
	my $final_prob = 1-$cum_prob;
	return $final_prob;
}

################################################################################
sub Probability {
################################################################################
    my ($T, $hits, $f)=@_;
    my $d=$T-$hits;
	my $total = 0;
    for (my $n = $hits; $n>=1; $n--){
    	$total += log(($f/$n)*$T);
		$T--;
	}
	my $final = exp$total;
	my $expo = (1-$f)**$d;
    my $prob = $final*$expo;
    return $prob;
}

################################################################################
sub ChiSqProb {
################################################################################
# Chi-square = N*M*Sum[(xi/N-yi/M)^2/(xi+yi)]
  my ($X1, $Y1, $N, $M)=@_;
  my ($Sum ,$tmp3, $ChiSq)=0;
  my $X2 = ($N-$X1); my $Y2 = ($M-$Y1);
  my @data1 = ($X1,$X2); my @data2 = ($Y1,$Y2);

# Calculate the Chi-Square:
  if ($#data1 == $#data2){
    	for (my $x=0; $x<=$#data1; $x++){
    		my $tmp1 = (($data1[$x]/$N)-($data2[$x]/$M))**2;
    		my $tmp2 = ($data1[$x]+$data2[$x]);
    		if ($tmp1>0){$tmp3 = $tmp1/$tmp2;}
    		else {$tmp3 =0;}
    		$Sum += $tmp3;
    	}
    	$ChiSq = $N*$M*$Sum;
	}
  else {$ChiSq = "N/A";}

# Get the probability of this ChiSq value at 1 deg of freedom:
  my $Chi_prob=Statistics::Distributions::chisqrprob(1,$ChiSq);
  return $Chi_prob;
}

sub FisherExact {

}
################################################################################
sub HyperGeometric {
################################################################################
    my ($x, $k, $m, $n) = @_;
    return unless $m > 0 && $m == int($m) && $n > 0 && $n == int($n) &&
        $k > 0 && $k <= $m + $n;
    return 0 unless $x <= $k && $x == int($x);
    return choose($m, $x) * choose($n - $m, $k - $x) / choose($n, $k);
}

################################################################################
sub choose {
################################################################################
    my ($n, $k) = @_;
    my ($result, $j) = (1, 1);

    return 0 if $k > $n || $k < 0;
    $k = ($n - $k) if ($n - $k) < $k;
    while ( $j <= $k ) {
        $result *= $n--;
        $result /= $j++;
	}
    return $result;
}
################################################################################
sub Drawdag {
################################################################################
	my ($ListRef, $SigRef, $C_FreqRef, $Cat,$ImageDir, $InResultRef) = @_;
	my @GOids = @$ListRef;
	my %NodeInGraph;
	my %Edges;

	my $dbh = DBI->connect($data_source,$username,$password) or
	die ("Can't connect to $data_source:$@");

	my $g = GraphViz->new(node=>{shape=>'box'},concentrate=>1);

	foreach my $node(@GOids){
		if ($NodeInGraph{$node} !=1 and $$InResultRef{$node} == 1){
			my $color;
			if ($$SigRef{$node} ==1 ){
				$color = 'red';
			}else{
				$color = 'cyan';
			}
			$g->add_node($node, label=>$$T_NameRef{$node},style=>'filled',
						 fillcolor=>$color,fontsize=>8,fontname=>'Arial',
						 URL=>"#$node");
			$NodeInGraph{$node}=1;
			$g = AddParents($node,$dbh,$Cat,\%NodeInGraph,\%Edges,$g);
		}
	}

    $dbh->disconnect;

	# Get the graph in raw form for input to dot
	my $text = $g->_as_debug;

	# Create the dot file
	my $dotName = "graph_$Cat.dot";
	open (Result, ">".$ImageDir."/".$dotName) or die "cant write result\n";
	print Result $text;
	close Result;

	# Make a system call to dot.exe to create a png file
    my $ImageName = "graph_$Cat.png";
	my $d = cwd();
	$d =~s/\s+$//;
	system "dot.exe -Tpng -o\"$d/$ImageDir/$ImageName\" \"$d/$ImageDir/$dotName\"";

	# Make a system call to dot.exe to create the image map
    my $image_map = `dot.exe -Tcmap \"$d/$ImageDir/$dotName\"`;

#	This is a simpler alternative, but uses an extra module:
#	use Image::Size;
#	my ($w,$h) = imgsize($ImageDir."/".$ImageName);
	my $myImage = newFromPng GD::Image($ImageDir."/".$ImageName);
	my ($w,$h) = $myImage->getBounds();
	undef $myImage;
	my $ThN = MakeThumbNail($ImageDir,$ImageName,$w,$h,360);
#	my $ThN = MakeThumbNail_GD($ImageDir,$ImageName,$myImage,$w,$h,360);

	# Calculate the scaling ratio and adjust the image map:
	my $r = $w>$h ? $w/360 : $h/360;
	my $ThN_map = ResizeImageMap($image_map,$r);
	return ($ImageName,$ThN,$ThN_map);
}
################################################################################
sub AddParents{
################################################################################
	my ($node,$dbh,$Cat,$NodesRef,$EdgesRef,$g)=@_;
	my %Nodes = %{$NodesRef};
	my %Edges = %{$EdgesRef};
	my ($parentsRef, $parents_labelRef) = GetParent($node,$Cat,$dbh);
	for my $i(0..$#{$parentsRef}){
		my $p = $$parentsRef[$i];
		my $p_label = $$parents_labelRef[$i];
		my $c = $node;
		if ($Nodes{$p}!=1){
			$g->add_node($p,label=>$p_label,fontsize=>8,fontname=>'Arial');
			$Nodes{$p}=1;
		}
		if ($Edges{$p}{$c}!=1){
			$g->add_edge($p=>$c);
			$Edges{$p}{$c}=1;
		}
		$g = AddParents($p,$dbh,$Cat,\%Nodes,\%Edges,$g);
	}
	return $g;
}
################################################################################
sub GetParent{
################################################################################
	my ($GO_id,$GO_categ,$dbh)=@_;
	my (@Parent_annot,@Parent_id,$GO_categ_full);

	my $query = "select p.acc, p.name from graph_path INNER JOIN term AS t ON "
	   		   ."(t.id = graph_path.term2_id) INNER JOIN term AS p ON "
			   ."(p.id = graph_path.term1_id) where t.acc = ? and "
			   ."graph_path.distance =1 and t.term_type = ? ";

    if ($GO_categ =~m/process/i){$GO_categ_full = "biological_process";}
    if ($GO_categ =~m/component/i){$GO_categ_full = "cellular_component";}
    if ($GO_categ =~m/function/i){$GO_categ_full = "molecular_function";}

    my $sth = $dbh->prepare("$query");
    	$sth->execute($GO_id, $GO_categ_full);
    while (my($GO_id_s, $GO_annot_s) = $sth->fetchrow_array){
		push (@Parent_id, $GO_id_s);
		push (@Parent_annot, $GO_annot_s);
	}

	return (\@Parent_id,\@Parent_annot);
}
################################################################################
sub MakeThumbNail {
################################################################################
	my ($ImageDir,$ImageName,$w,$h,$Size)=@_;

	return undef if (!defined $ImageName);

	my $Big = $ImageDir."/".$ImageName;
	my $ThN = "Sm".$ImageName;
	my $Small = $ImageDir."/".$ThN;

	if ($w > $Size or $h >$Size ){
		# use imagemagik to resize:
    	my $CONVERT="convert.exe -geometry ${Size}x${Size}+0+0";
		system "$CONVERT $Big $Small";
		return $ThN;
	}else{
		return undef;
	}
}
################################################################################
sub MakeThumbNail_GD {
# Makes crappy thumbnails...but can be used if Imagemagick is unavailable
################################################################################
	my ($ImageDir,$ImageName,$orig,$ox,$oy,$Size) = @_;
	my $ThN = "Sm_GD".$ImageName;
	my $Small = $ImageDir."/".$ThN;

	if ($ox > $Size or $oy > $Size){
		my $r = $ox>$oy ? $ox / $Size : $oy / $Size;
		my $thumb = new GD::Image($ox/$r,$oy/$r);
		$thumb->copyResized($orig,0,0,0,0,$ox/$r,$oy/$r,$ox,$oy);

		open (OUT, ">$Small") or warn "cant write thumbnail";
		binmode OUT;
		print OUT $thumb->png;
		close OUT;
		return $ThN;
	}else{
		return undef;
	}
}
################################################################################
sub ResizeImageMap{
################################################################################
	my ($Input, $ResizeF) = @_;
	my @Map = split(/\n/,$Input);
	my $ReSizedMap;
	foreach my $Line (@Map){
		if ($Line =~m/coords\=\"(.+)\"/){
			my $C = $1;
			my ($x,$y,$x2,$y2) = split(/,/,$C);
			$x = int($x/$ResizeF);
			$x2 = int($x2/$ResizeF);
			$y = int($y/$ResizeF);
			$y2 = int($y2/$ResizeF);
			$ReSizedMap .=  $`."coords=\"$x,$y,$x2,$y2\">\n";
		}
	}
	return $ReSizedMap;
}
################################################################################
sub ReadDefaultConf {
################################################################################
    my $Organism = @_[0];
	my $dir = dirname($0);
	my $configfile = $dir."\\Clench_".$Organism.".conf";
	my %Conf;
	open (Conf, $configfile) or die "Cant read $configfile\n";
	while (my $line=<Conf>){
		next if ($line=~m/^#/);
		next if ($line=~m/^$/);
		chomp $line;
		$line =~m/(.+)=(.+)/;
		my $k = $1;
		my $v = $2;
		$k =~s/\s+//g;
		$v =~s/\\/\//g;
		$v =~s/\s+//g;
		$Conf{$k}=$v;
	}
	close Conf;
	return %Conf;
}
################################################################################
sub EasyView {
################################################################################
	my @List = @_;
	&GenerateFrameset;
    open (LIST, ">ClusterList.html")
    or die "Cannot create ClutserList.html : $!";
	print LIST "Cluster Links",hr;
    foreach $_ (@List){
		 	$_ =~s/\s+/_/g;
			$_=~s/\.txt|\.xls//;
        	print LIST "<font size=-1>";
            print LIST a({-href=>$Prefix.basename($_).".html",
        		  		  -target=>'result'}, basename($_)), br;
        	print LIST "</font>";
    }
    close LIST;
}
################################################################################
sub GenerateFrameset {
################################################################################
	my $IndexFile = "index.html";
	if (-e $IndexFile){
		$IndexFile=~s/\.html/$$\.html/
	}
    open (FRAMES, ">$IndexFile") or
	die "Cannot create $IndexFile : $!";

    print FRAMES frameset({-cols         => "80, *",
			   -marginheight => '0',
			   -marginwidth  => '0',
			   -frameborder  => '1',
			   -border       => '1'},

			  frame({'-name'       => "list",
				 -src          => "ClusterList.html",
				 -marginwidth  => 12,
				 -marginheight => 12,
				 -border       => 1}),

			  frame({'-name'       =>'result',
				 -marginwidth  => 0,
				 -marginheight => 0,
				 -border       => 0}));

    close FRAMES;
}
################################################################################
sub Read_Processed_AnnotationFile {
################################################################################
	my ($File,$ListRef) = @_;
	open (File, $File) or die "Cant open $File\n";
    my %Hash;
    my %GenesHash;
	my $count=0;

	foreach my $g (@{$ListRef}){
		$GenesHash{lc$g}=1;
	}

    while (my $line = <File>){
		chomp $line;
		my ($Mips, $GeneModel, $GO_id, $GO_annot, $undef, $GO_categ) = split(/\t/,$line);
		if ($GenesHash{lc$Mips} == 1){
			if (!defined $Hash{$GO_id}{lc$Mips}){
				$Hash{$GO_id}{lc$Mips} = 1;
			}else{
				$Hash{$GO_id}{lc$Mips} = 0;
			}
		}
    }
	close File;
	return \%Hash;
}
################################################################################
sub PerformSimulations {
################################################################################
	my ($T_GeneListRef,$T_NumGenes,$C_NumGenes,$ProcessedAnnotFile,
		$T_FreqRef,$C_FreqRef,$NumberOfSim)=@_;
	my $R_scr=0;
	my $R_spr=0;
	my @StoredPval;
	my @StoredFreq;
	my %Detailscr;
	my %SimPval;
	my %SimPvalOnTest;
	print "\t - Current simulation number  ";
	for my $Simsample (1..$NumberOfSim){
		#foreach my $i (1..length($Simsample-1)){
		#	print "\b";
		#}
		print $Simsample;
		print "," if ($Simsample < $NumberOfSim);
		# Make a random set:
		my @TotalGenes = @{$T_GeneListRef};
		my @RandomList;
		for my $i(1..$C_NumGenes){
			push (@RandomList, $TotalGenes[int(rand()*$T_NumGenes)]);
		}

		# Analyze annotations for that set:
		my $R_AnnotationRef = Read_Processed_AnnotationFile($ProcessedAnnotFile,\@RandomList);
		my $R_Annot1Ref = TrimHashMatrix($R_AnnotationRef);
		my ($R_FreqRef,$R_GenesRef) = AnalyzeMatrix($R_Annot1Ref);
		my ($R_ResultRef, $R_SigRef, $R_PvaluesRef)
		= AnalyzeCateg($R_FreqRef,scalar@RandomList,$T_FreqRef,$T_NumGenes);

		my $scr = 0;
		foreach my $k (keys %{$R_SigRef}){
			# count how many nodes were significant in a random list
			# this will get us our FDR
			$scr++ if ($$R_SigRef{$k} == 1);
			for my $ngr (2..10){
				$Detailscr{$ngr}++ if ($$R_SigRef{$k} == 1 and $$R_FreqRef{$k} >=$ngr);
			}

			# count how many times a particular category turned up as
			# significant in a random list. This puts a confidence on
			# the test used to label a category "significant".
#			if ($$R_SigRef{$k} == 1){
#				$SimPvalOnTest{$k}++;
#			}
			# count how many times a particular category turned up with
			# at least that many genes as in the real cluster. This puts
			# a p-value empirically, without using a distribution like
			# the hypergeometric or the binomial distribution
			if ($$R_FreqRef{$k} >= $$C_FreqRef{$k}){
				$SimPval{$k}++;
			}
		}

		# Analyze promoters for that set:
		my $R_SigRefp = RetrieveAndAnalyzePromoters(\@RandomList);
		my $spr = 0;
		foreach my $p (keys %{$R_SigRefp}){
			# count how many nodes were significant in a random list
			# this will get us our FDR
			$spr++ if ($$R_SigRefp{$p} == 1);
		}
		$R_scr +=$scr;
		$R_spr +=$spr;
		$StoredPval[$Simsample-1] = $R_PvaluesRef;
		$StoredFreq[$Simsample-1] = $R_FreqRef;
	}
	print "\n";
	# Calculate the average number of enriched nodes and TFBS
	# in the random sets
	my $Av_scr = int($R_scr/$NumberOfSim);
	my $Av_spr = int($R_spr/$NumberOfSim);

	return ($Av_scr,$Av_spr,\%SimPval,\@StoredPval,\@StoredFreq,\%Detailscr);
}
################################################################################
sub OptimizePvalueCutoff {
################################################################################
	my ($ResultRef, $SigRef, $PvaluesRef, $RandomPvaluesRef, $oldFDR, $FDRcutoff)=@_;
	my $newcutoff = 0.05;
	my $newFDR = $oldFDR;
	for (my $cutoff=0.05; $cutoff >= 0.001; $cutoff -=0.005){
		my $sc = 0;
		foreach my $k (keys %{$PvaluesRef}){
			$sc++ if ($$PvaluesRef{$k} <= $cutoff);
		}
		my $scr =0;
		foreach my $Simulation (@{$RandomPvaluesRef}){
			foreach my $k (keys %{$Simulation}){
				$scr++ if ($$Simulation{$k} <= $cutoff);
			}
		}
		my $Av_scr = int ($scr/scalar(@{$RandomPvaluesRef}));
		if ($sc == 0){
			# If the number of significant categories is zero then no point
			# in continuing. The best FDR would be from the previous iteration
			# so rollback the cutoff and exit
			if ($newFDR != $oldFDR){
				$newcutoff += 0.005;
			}
			last;
		}elsif(sprintf("%.3f",$Av_scr/$sc) <= $oldFDR){
			# calculate the current FDR and update the cutoff
			$newFDR = sprintf("%.3f",$Av_scr/$sc);
			$newcutoff = $cutoff;
			if ($newFDR <= $FDRcutoff){
				print "\t - New FDR = $newFDR at p-value $newcutoff\n";
				last;
			}
		}
	}

	if ($newFDR > $FDRcutoff){
		print "\t - Lowest FDR = $newFDR at p-value $newcutoff\n";
	}
	# Modify the ResultRef and SigRef using the newcutoff:
	my $sigcount = 0;
	foreach my $k (keys %{$PvaluesRef}){
		if ($$PvaluesRef{$k} <= $newcutoff){
			$sigcount++;
			$$SigRef{$k} = 1;
			my ($SigText,$RE,$Chi_prob,$Hyp_prob,$p_val) = split(/,/,$$ResultRef{$k});
			$$ResultRef{$k} = join(",",("Yes",$RE,$Chi_prob,$Hyp_prob,$p_val));
		}else{
			$$SigRef{$k} = 0;
			my ($SigText,$RE,$Chi_prob,$Hyp_prob,$p_val) = split(/,/,$$ResultRef{$k});
			$$ResultRef{$k} = join(",",("",$RE,$Chi_prob,$Hyp_prob,$p_val));
		}
	}
	return ($ResultRef, $SigRef, $sigcount, int($newFDR*$sigcount));
}
################################################################################
sub OptimizePvalueCutoffByCategSize {
################################################################################
	my ($ResultRef, $SigRef, $PvaluesRef, $C_FreqRef, $RandomPvaluesRef,
		$StoredFreqRef, $OldFDRRef, $FDRcutoff)=@_;

	my %FDRHash;
	my %AdjustedCutoff;
	for my $CatSize (2..10){
		next if ($$OldFDRRef{$CatSize} eq "undef");
		print "\t - FDR=",$$OldFDRRef{$CatSize}," for terms with $CatSize or more genes|";
		my $newcutoff = 0.05;
		   $FDRHash{$CatSize} = $$OldFDRRef{$CatSize};
		for (my $cutoff=0.05; $cutoff >= 0.001; $cutoff -=0.005){
			my $sc = 0;
			foreach my $k (keys %{$PvaluesRef}){
				$sc++ if ($$PvaluesRef{$k} <= $cutoff and
						  $$C_FreqRef{$k} >=$CatSize);
			}
			my $scr =0;
			for my $Simulation (0..$#{$RandomPvaluesRef}){
				foreach my $k (keys %{$$RandomPvaluesRef[$Simulation]}){
					$scr++ if ($$RandomPvaluesRef[$Simulation]{$k} <= $cutoff and
							   $$StoredFreqRef[$Simulation]{$k} >=$CatSize);
				}
			}
			my $Av_scr = int ($scr/scalar(@{$RandomPvaluesRef}));
			if ($sc == 0){
				# If the number of significant categories is zero then no point
				# in continuing. The best FDR would be from the previous iteration
				# so rollback the cutoff and exit
				if ($FDRHash{$CatSize} != $$OldFDRRef{$CatSize}){
					$newcutoff += 0.005;
				}
				last;
			}elsif(sprintf("%.3f",$Av_scr/$sc) <=$$OldFDRRef{$CatSize}){
				# calculate the current FDR and update the cutoff
				$FDRHash{$CatSize} = sprintf("%.3f",$Av_scr/$sc);
				$newcutoff = $cutoff;
				if ($FDRHash{$CatSize} <= $FDRcutoff){
					print $FDRHash{$CatSize}," at cutoff $newcutoff\n";
					last;
				}
			}
		}

		if ($FDRHash{$CatSize} > $FDRcutoff){
			print $FDRHash{$CatSize}," at cutoff $newcutoff\n";
		}
		# Store the acceptable cutoff for each category:
		foreach my $k (keys %{$PvaluesRef}){
			if ($$C_FreqRef{$k} >= $CatSize){
				$AdjustedCutoff{$k} = $newcutoff;
			}
		}
	}

	# Modify the ResultRef and SigRef using the adjusted cutoffs:
	foreach my $k (keys %{$PvaluesRef}){
		if ($$PvaluesRef{$k} <= $AdjustedCutoff{$k}){
			$$SigRef{$k} = 1;
			my ($SigText,$RE,$Chi_prob,$Hyp_prob,$p_val) = split(/,/,$$ResultRef{$k});
			$$ResultRef{$k} = join(",",("Yes",$RE,$Chi_prob,$Hyp_prob,$p_val));
		}else{
			$$SigRef{$k} = 0;
			my ($SigText,$RE,$Chi_prob,$Hyp_prob,$p_val) = split(/,/,$$ResultRef{$k});
			$$ResultRef{$k} = join(",",("",$RE,$Chi_prob,$Hyp_prob,$p_val));
		}
	}

	# Count the number of categories that are still significant:
	my $Count=0;
	for my $i (keys %{$SigRef}){
		$Count++ if ($$SigRef{$i}==1);
	}

	# Estimate the average FDR across categories of different sizes:
	my $newFDR_acrossCat;
	foreach my $key (%FDRHash){
		$newFDR_acrossCat += $FDRHash{$key};
	}
	$newFDR_acrossCat /= scalar(keys %FDRHash);
	$newFDR_acrossCat *= $Count;

	return ($ResultRef, $SigRef, $Count, int($newFDR_acrossCat));
}