merlin_search.pl
{
/* ______________________________________________________________________________
Copyright (C) DataAspects, Corp. 1998
merlin_search.pl
Objective: To run a Merlin search on any open pool allowing the
user to select the pool, search type, search field,
search value and number of hits and to return the structure number,
search column, CAS, MW, MF, Similarity index, and SMILES.
Actions:
1. Client merlin_search.pl connects to ChemServer and runs listmerlin.c which
returns a list of Merlin Pools that are online. User specifies the
db to search.
2. Client minimer_gen connects to AppServer and uses the specified db name
to select a list from the Merlin table that displays the possible
search fields and type of searches. User specifies the search type and
field.
3. User is prompted for the number of hits to return.
4. Client minimer.pl connects to server chemcop.pl
running on ThorServer & passes it three parameters based on the
information entered by the user:
parm.0 db_exec -executes a SQL statement (see trim.h).
parm.1 'minimer search', flag instructing chemcop to call
minimer_gen.c.
parm.2 list (merlin_list)with one row and three parameters:
1. 'dbname' (i.e. 'cis')
2. 'srch_type' (i.e. 'simil')
3. 'srch_column' (i.e. '$SMI'
4. 'srch_value' (i.e. '0c1ccccc1')
5. hits (i.e. 40)
An example for parm.2 is: 'nci96' 'simil' '$SMI' 'Oc1ccccc1' 4
5. Chemcop.pl executes minimer_gen.c, a second server program.
Minimer_gen.c searches Merlin and returns an integer code
and an ascii file and chemcop passes them back to the client.
Integer Codes:
0 - search successful
1 - can't open Merlin server
2 - pool not open or bad pool name
3 - can't open Merlin Pool
4 - can't create hit list from pool
5 - can't get field(s) for requested datatype
6 - search error; bad input value?
7 - target not found; structures not selected
8 - CANSMI failed to generate USMILES
ASCII File:
If the search was unsuccessful, the ASCII file will contain
a one line error message.
If the search was successful, the ASCII file will contain
multiple rows. The
first row will be the number of hits. The remaining rows
will be data delimited with a vertical bar:
Column 1- WRNO if we searched the cis database
Column 2- original search value excluding SMILES which
are too long
Column 3- CAS #
Column 4- molecular weight
Column 5- molecular formula
Column 6- similarity index if a similarity search was done
Column 7- SMILES
Note that a '~' is substituted for any missing values.
6. An example of a minimer_gen search:
merlin_list: 'nci96' 'simil' '$SMI' 'Oc1ccccc1' 4
integer code: 0
results_list*:
4
~|~|~|142.24|C6H6S2|0.75|Sc1ccccc1S
~|~|150-76-5|124.15|C7H8O2|0.7317|COc1ccc(O)cc1
~|~|533-75-5|122.13|C7H6O2|0.6829|Oc1cccccc1=O
~|~|~|172.19|C11H8O2|0.6667|Oc1cccc2ccccc(=O)c12
*Note that the first row is the number of hits returned.
7. Command Line:
minimer_gen 'dbname' 'srch_type' 'srch_column' 'srch_value' num_return
etchem% minimer_gen 'nci96' 'simil' '$SMI' 'Oc1ccccc1' 4
dbname is the name of the database to search
srch_type is the type of search to run
simil=similarity
super=superstructure
subst=substructure
strin=substring
strex=exact string match
numer=numeric
srch_col is the target column used in the search
srch_value is the value to search for
num_return is the maximum number of hits to return
Output:
WRNO
search_column (= column to search if that column is not SMILES)
CAS
molecular weight,
molecular formula
similarity index
SMILES
8. Notes:
(a) After calling the program, there can be a 3 min delay before data
is returned.
(b) Minimer_gen.c is hardcoded to search for structures with a similarity
co-efficient of >=0.600. In the future the level of similarity
should be included as a parameter to be set by the user.
Bill sets his at .75-.8)
...Pat McGreevy, 18 Feb 99
______________________________________________________________________________ */
int c[7]; /* store position of comma delimiters */
int hits;
int i; /* counter */
int rc; /* store return code fm server chemcopl.pl */
char cisserver[15];
char db[20];
char fld[20];
char smi[2000];
char srch[5];
char merlinserver[15];
char value[2000]; /* search value */
list merlin_list;
list fld_list;
list temp_list;
cisserver= "xxx.xxx.xx.xx";
merlinserver= "yyy.yyy.yy.yy";
printf("Wait...Connecting to Chemcop on MerlinServer");
if (trap({
connect(0,"net:chemcop@"^^merlinserver^^"!vtxhost.trm");
status("Connected to Chemcop");
})){
bell();
status("Failed to connect to Chemcop");
break;
}
/* ________ Get Names of Merlin DBs Currently Online using list_merlin _______ */
if (!exec_sql("merlin_open")){ /* RPC Call */
temp_list= list_open("select results",20, "Select Merlin DB (i.e. nci96");
if (list_rows(temp_list)) /* display open Thor DBs */
db= list_view3(temp_list,-2,-2,-1,key_f3,opt_highlight,0);
}
/* __________________ Get Query Fields for Selected Merlin DB ________________ */
printf("Wait...Connecting to CISServer");
if (trap({
connect(1,"net:sql$database@1958:"^^cisserver^^"(user/pass)!ljk");
status(" Connected to CISServer");
})){
bell();
status(" Failed to connect to CISServer");
break;
}
db= translate(db," ",""); /* remove spaces */
fld_list= list_open("select descrip,srch_type,field_name from merlin "
"where dbname= &db ",
20,"Select Query Field:");
if (list_rows(fld_list)){
srch= list_view3(fld_list,-2,-2,-1,key_f3,opt_highlight,1,0);
fld= list_curr(fld_list,2);
}
else {
printf(" db not found in Merlin table");
break;
}
/* ___________________ Get Search Value for Selected Field ___________________ */
value= prompt2(-2,-2,
"Enter Search Value. For example: ",
"nci96 $NSC 84241",
"nci96 $CAS 81-30-1",
"nci96 $SMI O=C1OC(=O)c2ccc3C(=O)OC(=O)c4ccc1c2c43",
"",100,vis_normal,1);
/* ___________________ Get Search Value for Selected Field ___________________ */
hits= to_int(prompt2(-2,-2,
"Enter number of hits: ",
"",4,vis_normal,1));
/* __________________________ Format & Execute Query ___________________________ */
/* minimer_gen 'dbname' 'srch_type' 'srch_column' 'srch_value' num_return */
merlin_list= list_open("200",1,"'"^^db^^"' '"^^srch^^"' '"
^^fld^^"' 'srch_value' num_return");
list_mod(merlin_list,1,"'"^^db^^"' '"^^srch^^"' '"^^fld^^"' '"^^value^^"' "^^hits);
list_view3(merlin_list,-2,-2,-1,key_f3,opt_highlight,0);
connect(0);
rc= (exec_sql("merlin_search",merlin_list)); /* TRIMrpc-remote procedure call */
printf(decode(rc,
0,"rc=0: search successful",
256,"rc=1: can't open Merlin server",
512,"rc=2: pool not open or bad pool name",
768,"rc=3: can't open Merlin pool",
1024,"rc=4: can't create hit list from pool",
1280,"rc=5 can't get field(s) for requested datatype",
1536,"rc=6: Search error or bad input value",
1792,"rc=7: Target not found/structures not selected",
2048,"rc=8: CANSMI failed to generate USMILES",
" Can't interpret '"^^rc^^"' from ChemCop"));
if (rc == 0){
temp_list= list_open("select results",1000); /* open file returned fm chemcop */
list_view(temp_list,0);
if (temp_list){ /* list opened successfully */
/*(list_rows(temp_list)<=list_curr(temp_list,0))){ hits <= requested hits */
printf("Hits Returned: "^^list_curr(temp_list,0)); /* assign SMILES */
list_next(temp_list); /* move to data row */
/* _______________________________ Parse Fields _________________________________ */
for(i=1;i<=6;i++) /* find pos of commas */
c[i]= instr(list_curr(temp_list,0),"|",1,i);
printf(substr(list_curr(temp_list,0),1,c[1]-1)); /* WRNO */
printf(substr(list_curr(temp_list,0),c[1]+1, c[2]-c[1]-1)); /* search field*/
printf(substr(list_curr(temp_list,0),c[2]+1, c[3]-c[2]-1)); /* CAS */
printf(substr(list_curr(temp_list,0),c[3]+1, c[4]-c[3]-1)); /* MW */
printf(substr(list_curr(temp_list,0),c[4]+1, c[5]-c[4]-1)); /* MF */
printf(substr(list_curr(temp_list,0),c[5]+1, c[6]-c[5]-1)); /* search index*/
printf(substr(list_curr(temp_list,0),c[6]+1)); /* SMILES */
} /* end if (list_rows(temp_list)==2){ */
else
list_view(temp_list,0); /* show error msg from Daylight */
} /* end else if (rc == 0){ */
}