merlin_search.pl

{
/* ______________________________________________________________________________
                                            Copyright (C) DataAspects, Corp. 1998
  merlin_search.pl
  Objective: To run a Merlin search on any open pool allowing the 
             user to select the pool, search type, search field, 
             search value and number of hits and to return the structure number, 
             search column, CAS, MW, MF, Similarity index, and SMILES.

  Actions:
1. Client merlin_search.pl connects to ChemServer and runs listmerlin.c which 
       returns a list of Merlin Pools that are online. User specifies the 
       db to search.
    2. Client minimer_gen connects to AppServer and uses the specified db name 
       to select a list from the Merlin table that displays the possible 
       search fields and type of searches. User specifies the search type and 
       field.
    3. User is prompted for the number of hits to return.
    4. Client minimer.pl connects to server chemcop.pl 
       running on ThorServer & passes it three parameters based on the 
       information entered by the user:
         parm.0  db_exec -executes a SQL statement (see trim.h).
         parm.1  'minimer search', flag instructing chemcop to call
                  minimer_gen.c.
         parm.2  list (merlin_list)with one row and three parameters:
                1. 'dbname'         (i.e. 'cis')
                2. 'srch_type'      (i.e. 'simil')
                3. 'srch_column'    (i.e. '$SMI'
                4. 'srch_value'     (i.e. '0c1ccccc1')
                5. hits             (i.e.  40)
       An example for parm.2 is: 'nci96' 'simil' '$SMI' 'Oc1ccccc1' 4

    5. Chemcop.pl executes minimer_gen.c, a second server program.
       Minimer_gen.c searches Merlin and returns an integer code 
       and an ascii file and chemcop passes them back to the client.

       Integer Codes:
         0 - search successful
         1 - can't open Merlin server
         2 - pool not open or bad pool name
         3 - can't open Merlin Pool
         4 - can't create hit list from pool
         5 - can't get field(s) for requested datatype
         6 - search error; bad input value?
         7 - target not found; structures not selected  
                  8  -   CANSMI failed to generate USMILES

       ASCII File:
       If the search was unsuccessful, the ASCII file will contain 
       a one line error message.

       If the search was successful, the ASCII file will contain 
       multiple rows. The 
       first row will be the number of hits.  The remaining rows 
       will be data delimited with a vertical bar:

                  Column 1- WRNO if we searched the cis database
                  Column 2- original search value excluding SMILES which 
                            are too long
                  Column 3- CAS # 
                  Column 4- molecular weight
                  Column 5- molecular formula
                  Column 6- similarity index if a similarity search was done
                  Column 7- SMILES

                  Note that a '~' is substituted for any missing values.

    6. An example of a minimer_gen search:

         merlin_list:    'nci96' 'simil' '$SMI' 'Oc1ccccc1' 4
         integer code:  0
         results_list*: 
                        4
                        ~|~|~|142.24|C6H6S2|0.75|Sc1ccccc1S
                        ~|~|150-76-5|124.15|C7H8O2|0.7317|COc1ccc(O)cc1
                        ~|~|533-75-5|122.13|C7H6O2|0.6829|Oc1cccccc1=O
                        ~|~|~|172.19|C11H8O2|0.6667|Oc1cccc2ccccc(=O)c12
         *Note that the first row is the number of hits returned.

   7. Command Line:
        minimer_gen 'dbname' 'srch_type' 'srch_column' 'srch_value' num_return
        etchem% minimer_gen 'nci96' 'simil' '$SMI' 'Oc1ccccc1' 4

	  dbname is the name of the database to search
	  srch_type is the type of search to run 
		simil=similarity
		super=superstructure
		subst=substructure
		strin=substring
		strex=exact string match
		numer=numeric
	  srch_col is the target column used in the search
	  srch_value is the value to search for
	  num_return is the maximum number of hits to return

      Output:
	  WRNO
	  search_column (= column to search if that column is not SMILES)
        CAS
        molecular weight,
        molecular formula
        similarity index
        SMILES

  8. Notes: 

     (a) After calling the program, there can be a 3 min delay before data 
         is returned. 
     (b) Minimer_gen.c is hardcoded to search for structures with a similarity
         co-efficient of >=0.600. In the future the level of similarity 
         should be included as a parameter to be set by the user.  
         Bill sets his at .75-.8)

...Pat McGreevy, 18 Feb 99
______________________________________________________________________________ */
int c[7];                                /* store position of comma delimiters */
int hits;
int i;                                   /* counter                            */
int rc;                             /* store return code fm server chemcopl.pl */

char cisserver[15];
char db[20];
char fld[20];
char smi[2000];
char srch[5];
char merlinserver[15];
char value[2000];                                           /* search value    */
list merlin_list;
list fld_list;
list temp_list;

cisserver=  "xxx.xxx.xx.xx"; 
merlinserver= "yyy.yyy.yy.yy";
printf("Wait...Connecting to Chemcop on MerlinServer");
if (trap({
   connect(0,"net:chemcop@"^^merlinserver^^"!vtxhost.trm");
   status("Connected to Chemcop");
   })){
       bell();
       status("Failed to connect to Chemcop");
       break;
       }
/* ________ Get Names of Merlin DBs Currently Online using list_merlin _______ */
if (!exec_sql("merlin_open")){                       /* RPC Call               */
    temp_list= list_open("select results",20, "Select Merlin DB (i.e. nci96");
    if (list_rows(temp_list))                         /* display open Thor DBs */
      db= list_view3(temp_list,-2,-2,-1,key_f3,opt_highlight,0); 
    }

/* __________________ Get Query Fields for Selected Merlin DB ________________ */
printf("Wait...Connecting to CISServer");
if (trap({
   connect(1,"net:sql$database@1958:"^^cisserver^^"(user/pass)!ljk");
   status("  Connected to CISServer");
   })){
       bell();
       status("  Failed to connect to CISServer");
       break;
       }
db= translate(db," ","");                                     /* remove spaces */
fld_list= list_open("select descrip,srch_type,field_name from merlin "
                    "where dbname= &db ",
                    20,"Select Query Field:");

  if (list_rows(fld_list)){
    srch= list_view3(fld_list,-2,-2,-1,key_f3,opt_highlight,1,0); 
    fld= list_curr(fld_list,2);
    }
  else {
    printf("  db not found in Merlin table");
    break;
    }

/* ___________________ Get Search Value for Selected Field ___________________ */
value= prompt2(-2,-2,
               "Enter Search Value.  For example: ",
               "nci96 $NSC 84241",
               "nci96 $CAS 81-30-1",
               "nci96 $SMI O=C1OC(=O)c2ccc3C(=O)OC(=O)c4ccc1c2c43",
               "",100,vis_normal,1); 

/* ___________________ Get Search Value for Selected Field ___________________ */
hits= to_int(prompt2(-2,-2,
               "Enter number of hits: ",
               "",4,vis_normal,1)); 

/* __________________________ Format & Execute Query ___________________________ */
/* minimer_gen 'dbname' 'srch_type' 'srch_column' 'srch_value' num_return */
merlin_list= list_open("200",1,"'"^^db^^"' '"^^srch^^"' '"
                       ^^fld^^"' 'srch_value' num_return");
list_mod(merlin_list,1,"'"^^db^^"' '"^^srch^^"' '"^^fld^^"' '"^^value^^"' "^^hits);
list_view3(merlin_list,-2,-2,-1,key_f3,opt_highlight,0);
connect(0);

rc= (exec_sql("merlin_search",merlin_list));      /* TRIMrpc-remote procedure call */
printf(decode(rc,
         0,"rc=0: search successful",
       256,"rc=1: can't open Merlin server",
       512,"rc=2: pool not open or bad pool name",
       768,"rc=3: can't open Merlin pool",
       1024,"rc=4: can't create hit list from pool",
       1280,"rc=5 can't get field(s) for requested datatype",
       1536,"rc=6: Search error or bad input value",
       1792,"rc=7: Target not found/structures not selected",
       2048,"rc=8:  CANSMI failed to generate USMILES",
           "  Can't interpret '"^^rc^^"' from ChemCop"));

if (rc == 0){ 
  temp_list= list_open("select results",1000);   /* open file returned fm chemcop */
  list_view(temp_list,0);
  if (temp_list){                                     /* list opened successfully */
    /*(list_rows(temp_list)<=list_curr(temp_list,0))){  hits <= requested hits    */
    printf("Hits Returned: "^^list_curr(temp_list,0));      /* assign SMILES      */
    list_next(temp_list);                                   /* move to data row   */
/* _______________________________ Parse Fields _________________________________ */
    for(i=1;i<=6;i++)                                       /* find pos of commas */
      c[i]= instr(list_curr(temp_list,0),"|",1,i);

    printf(substr(list_curr(temp_list,0),1,c[1]-1));              /* WRNO        */
    printf(substr(list_curr(temp_list,0),c[1]+1, c[2]-c[1]-1));   /* search field*/
    printf(substr(list_curr(temp_list,0),c[2]+1, c[3]-c[2]-1));   /* CAS         */
    printf(substr(list_curr(temp_list,0),c[3]+1, c[4]-c[3]-1));   /* MW          */
    printf(substr(list_curr(temp_list,0),c[4]+1, c[5]-c[4]-1));   /* MF          */
    printf(substr(list_curr(temp_list,0),c[5]+1, c[6]-c[5]-1));   /* search index*/
    printf(substr(list_curr(temp_list,0),c[6]+1));                /* SMILES      */
    }                                        /* end if (list_rows(temp_list)==2){ */
  else
    list_view(temp_list,0);                       /* show error msg from Daylight */
  }                                               /* end else if (rc == 0){       */
}