#! /user/bin/perl -w

use strict;
use Cwd;
                 
#read UNIPROT IDs FROM DATABASE;
use DBI;            

# ===========
# = OPTIONS =
# ===========

my $UPDATE_SIFTS=1;   #  set to 1 to download the SIFTS file      
my $DOWNLOAD_PDB=1;    #  Download pdsb. If 0 only list those that would be downloaded
my $UPDATE_PDB=0;      #  remove old pbs and download again

  
my $SIFTS_URL="ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/tsv/";  
my $SIFTS_file='pdb_chain_uniprot.tsv';

my $PDBs_DIR='../Web/data/pdb/';
my $PDB_URL='http://files.rcsb.org/download/';


sub download_pdb($){
    my $pdb_f=shift @_;
    my $current_dir=getcwd();
    chdir($PDBs_DIR); 
    my $command= "curl -O $PDB_URL".$pdb_f;
    print STDERR $command,"\n";
    system($command);  
    chdir($current_dir);
}


# =======================
# = DATABASE CONNECTION =
# =======================


my $database='DDR_120115';
my $hostname='demetriusIB';                
my $dsn = "DBI:mysql:database=$database;host=$hostname";
my $user='ddr_admin';
my $password='ddr_2014';
my $dbh = DBI->connect($dsn, $user,$password);
 

my $uniprot_ids = $dbh->selectall_hashref("SELECT id_entity,`Uniprot Id` FROM entity",'Uniprot Id') || die $dbh->errstr;  ;


if ($UPDATE_SIFTS || -e $SIFTS_file){
#download SIFTS

   my $command="curl -O ".$SIFTS_URL.$SIFTS_file.".gz;gunzip ".$SIFTS_file.".gz";   
   #print $command,"\n";
   system($command); #|| die "ops! something went wrong downloading the uniprot-pdb "; 
}

# CREATE DB 

$dbh->do("DROP TABLE IF EXISTS `pdb_rel` ") || die $dbh->errstr;
$dbh->do("CREATE TABLE  `pdb_rel` (
  `id_entity` mediumint(9) unsigned NOT NULL,
  `pdb` varchar(45) NOT NULL,
  `start_e` int(10) unsigned NOT NULL,
  `start_pdb` int(10)  NOT NULL,
  `end_e` int(10) unsigned NOT NULL,
  `end_pdb` int(10) unsigned NOT NULL,
  `chain` varchar(45) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8") || die $dbh->errstr;  ;

# INSERT DATABASES  

open(PDBS,$SIFTS_file) || die "cannot open uniprot-pdb file";

#skip 2 lines from header;
my $void=<PDBS>;
$void=<PDBS>;

while(<PDBS>){
   chomp;
   my ($pdb,$chain,$sp_id,$res_s,$res_e,$pdb_s,$pdb_e,$sp_s,$sp_e)=split("\t",$_); 
   #print join(" ",($pdb,$chain,$sp_id,$res_s,$res_e,$pdb_s,$pdb_e,$sp_s,$sp_e)) ,"  \n";
   next unless $uniprot_ids->{$sp_id};   
   #next if ($sp_e-$sp_s<30);
   my $query='INSERT INTO `pdb_rel` (id_entity,pdb,chain,start_e,start_pdb,end_e,end_pdb) 
                  VALUES ((select id_entity from entity where `Uniprot Id`=?),?,?,?,?,?,?)';
   
   my $sth=$dbh->prepare($query);
   $sth->execute($sp_id,$pdb,$chain,$sp_s,$pdb_s,$sp_e,$pdb_e) ||
    print STDERR join(" ",($pdb,$chain,$sp_id,$res_s,$res_e,$pdb_s,$pdb_e,$sp_s,$sp_e)),"\n";  
   
   #downlaod pdb   
   
      my $pdb_file=$pdb.".pdb"; 
      if(! -e $PDBs_DIR.$pdb_file ){     
         print STDERR $pdb_file," not found. Downloading\n";
         $DOWNLOAD_PDB && download_pdb($pdb_file) ;
      }  else {
         print STDERR $pdb_file," found."; 
         if ($UPDATE_PDB){
            print STDERR "Updating\n";
            $DOWNLOAD_PDB && download_pdb($pdb_file) ;  
         } else {
             print STDERR "Skipping\n";
         }
      }
  
}     

