#! /user/bin/perl -w

use strict;
use DBI;            
use Data::Dumper;              
#read UNIPROT IDs FROM DATABASE, obtain all synonyms from Ensembl and store them;


# =======================
# = DATABASE CONNECTION =
# =======================


# my $database='DDR_120115';
# my $hostname='demetriusIB';
# my $dsn = "DBI:mysql:database=$database;host=$hostname";
# my $user='ddr_admin';
# my $password='ddr_2014';
# my $dbh = DBI->connect($dsn, $user,$password);

my $database='DDR_120115';
my $hostname='localhost';
my $dsn = "DBI:mysql:database=$database;host=$hostname";
my $user='eandres';
my $password='';
my $dbh = DBI->connect($dsn, $user,$password);

my $uniprot_ids = $dbh->selectall_hashref("SELECT id_entity,`Uniprot Id` FROM entity",'Uniprot Id') || die $dbh->errstr;  ;
my $uniprot_name = $dbh->selectall_hashref("SELECT id_entity,`Uniprot Name` FROM entity",'Uniprot Name') || die $dbh->errstr;  ;



# ====================
# = CREATE DATABASE  =
# ====================

$dbh->do("DROP TABLE IF EXISTS `synonym` ") || die $dbh->errstr;
$dbh->do("CREATE TABLE  `synonym` (
  `id_entity` MEDIUMINT NOT NULL,
  `synonym` varchar(50) NOT NULL,
  FOREIGN KEY (id_entity) REFERENCES entity(id_entity) ON UPDATE CASCADE ON DELETE CASCADE,
  PRIMARY KEY(id_entity,synonym)
) ENGINE=InnoDB DEFAULT CHARSET=utf8") || die $dbh->errstr;  ;

my $all_saved;

# ==================
# = FIND SYNONYMS  =
# ==================

use Bio::EnsEMBL::Registry;
my $registry = 'Bio::EnsEMBL::Registry';

$registry->load_registry_from_db(
    -host => 'ensembldb.ensembl.org', # alternatively 'useastdb.ensembl.org'
    -user => 'anonymous'
);

my $gene_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Gene' );

#my $uniprot_ids->{"P52701"}->{id_entity}=54;


foreach my $id_name (sort keys %$uniprot_ids){
	#gathering synonyms
	#print STDERR "For $id_name\n";
	my $saved->{$id_name}++;
	my @genes = @{ $gene_adaptor->fetch_all_by_external_name($id_name) };
	foreach my $gene (@genes){
		$saved->{$gene->external_name}++;
		$saved->{$gene->display_id}++;
	    foreach my $dbe (@{ $gene->get_all_DBEntries() }) {
			foreach my $syn (@{$dbe->get_all_synonyms}) {
				if(!exists $saved->{uc($syn)}){
					$saved->{uc($syn)}++;
				}
			}
	    }
	}
	#Saving into database
	foreach my $result (keys %$saved){
		my $db_id=$uniprot_ids->{$id_name}->{id_entity};
		if(!exists($all_saved->{$db_id}->{$result})){
		    my $query="INSERT INTO `synonym` VALUES(".$db_id.",\"$result\")";
			#print STDERR "LOG:: ". $query ."\n";
			my $sth=$dbh->prepare($query);
		    $sth->execute() || print STDERR $query ."\n";
			$all_saved->{$db_id}->{$result}++;
		}
	}
}

# =====================================
# = Secondary Accesions from uniprot  =
# =====================================

require LWP::UserAgent;

my $ua = LWP::UserAgent->new;
#$ua->timeout(10);

#my $all_saved2;

foreach my $id_name (sort keys %$uniprot_name){
	my $response = $ua->get("http://www.uniprot.org/uniprot/$id_name.txt");
	my $tmp_file="/tmp/.uniprot.txt";
	open(TMP,">$tmp_file") || die $!;
	
	if ($response->is_success) {
	    print TMP $response->decoded_content;  # or whatever
		
	}
	else {
	    die $response->status_line;
	}
	close TMP;
	
	#open the file
	open(TXT,$tmp_file) || die $!;
	my $id;
	my @acc;
	while(<TXT>){
		chomp;
		if($_ =~ /^ID   ([a-zA-Z0-9_]+)/) {
			# Identificador
			$id = $1;

		}elsif($_ =~ /^AC   (.+)/) {
			# Los accnumber, que pueden estar en varias líneas
			my $ac=$1;	
			# Elimino los espacios
			$ac =~ s/\s+//g;
	
			# Rompo por los puntos y coma, y
			# añado a la lista de accnumber
			push(@acc,split(/;/,$ac));
		}
	}
	close TXT;
	
	my $db_id=$uniprot_name->{$id}->{id_entity};
	if($db_id){
		#print "$id\t[$db_id]\t". join(",",@acc) ."\n";
	    foreach my $ac (@acc){
			#print "$id\t[$db_id]\t$ac\n";#. join(",",@acc) ."\n";
			if(!exists($all_saved->{$db_id}->{$ac})){
				my $query="INSERT INTO `synonym` VALUES(".$db_id.",\"$ac\")";
				#print "LOG:: ". $query ."\n";
				my $sth=$dbh->prepare($query);
			    $sth->execute() || print STDERR $query ."\n";
				$all_saved->{$db_id}->{$ac}++;
			}
		}
	}
	else{
		print STDERR "No hay id para $id\n";
	}
}