#!/usr/bin/perl #Wei Wu use LWP; my $GoBase='http://www.geneontology.org/ontology/'; my @GoFiles=('function.ontology','process.ontology','component.ontology'); my $urlbase='http://www.geneontology.org/gene-associations/'; my %files=('S_cerevisiae' =>'gene_association.sgd', 'Drosophila' =>'gene_association.fb', 'Mouse' => 'gene_association.mgi', 'Arabidopsis'=>'gene_association.tair', 'C_elegans'=>'gene_association.wb', 'Rat'=>'gene_association.rgd', 'S_pombe'=>'gene_association.GeneDB_Spombe', 'Zebrafish'=>'gene_association.zfin', 'Human'=>'gene_association.goa_human'); sub do_GET { # Parameters: the URL, # and then, optionally, any header lines: (key,value, key,value) my $resp = $browser->get(@_); return ($resp->content, $resp->status_line, $resp->is_success, $resp) if wantarray; return unless $resp->is_success; return $resp->content; } $browser = LWP::UserAgent->new; my %GoHash=(); foreach my $file (@GoFiles){ ($doc, $status, $success, $resp) = do_GET($GoBase.$file); open OUTPUT, ">$file" or die; binmode(OUTPUT); print OUTPUT $doc; close OUTPUT; open FH, $file or die; my $line,@rowdata; while ( $line=){ next if $line =~ /^!/; chomp($line); my @rowdata= split /;/, $line; $rowdata[0]=~ s/^\s+//; $rowdata[0]=~ s/\s+$//; $rowdata[0]=~ s/[$\<%]//; $rowdata[1]=~ /GO:\d{7}/; $GoHash{$&}=$rowdata[0]; } close FH; unlink $file; } foreach my $file (keys %files){ my %geneHash=(); ($doc, $status, $success, $resp) = do_GET($urlbase.$files{$file}.'.gz'); open OUTPUT, ">$files{$file}.gz" or die; binmode(OUTPUT); print OUTPUT $doc; close OUTPUT; system("gunzip ${files{$file}}.gz"); open FH,"$files{$file}" or die; open OUTPUT,">GO_${file}.txt" or die; print OUTPUT "Names\tDescription\n"; my $last_id,$last_goid; my $names; while( $line=){ next if $line =~ /^!/; @rowdata=split /\t/,$line; if (defined ($last_id) && ( $last_id ne $rowdata[1])){ print OUTPUT "${names}\t${geneHash{$last_id}}\n"; } $last_id=$rowdata[1]; if ($geneHash{$rowdata[1]}){ if ($last_goid ne $rowdata[4]){ $geneHash{$rowdata[1]}.="| ${GoHash{$rowdata[4]}}"}; $last_goid=$rowdata[4];next; }; $last_goid=$rowdata[4]; $geneHash{$rowdata[1]}.=$GoHash{$rowdata[4]}; $names=$rowdata[1]; $names.=", $rowdata[2]" unless ($rowdata[10] =~ /$rowdata[2]/); @aliases= split /\|/, $rowdata[10]; $rowdata[10]= join ", ",@aliases; $names.=", $rowdata[10]"; } print OUTPUT "${names}\t${geneHash{$last_id}}\n"; close OUTPUT; close FH; unlink $files{$file}; } ($doc, $status, $success, $resp) = do_GET("http://www.gene.ucl.ac.uk/public-files/nomen/searchdata.txt"); open OUTPUT, ">searchdata.txt" or die; binmode(OUTPUT); print OUTPUT $doc; close OUTPUT; open FH, "searchdata.txt" or die; my %geneHash=(); ; while ($line=){chomp($line); my @rowdata =split /\t/,$line; $geneHash{$rowdata[17]}=$line; } close FH; open FH,"GO_Human.txt" or die; open OUTPUT,">HGNC_HumanAnnotation.txt" or die; $line=; print OUTPUT $line; while($line=){ @rowdata=split /\t/, $line; @names=split /,/,$rowdata[0]; $names[0]=~ s/\s+$//; if (defined($geneHash{$names[0]})) {my @moreinfo= split /\t/,$geneHash{$names[0]}; $rowdata[0].=", ${moreinfo[3]}" if ($moreinfo[3]); $rowdata[0].=", ${moreinfo[7]}" if ($moreinfo[7]); $rowdata[0].=", $moreinfo[11]" if ($moreinfo[11]); $rowdata[0].=", $moreinfo[16]" if ($moreinfo[16]); $rowdata[1]="${moreinfo[2]}:".$rowdata[1]; $line = join "\t", @rowdata; } print OUTPUT $line;} unlink "searchdata.txt" ; close FH; close OUTPUT;