#!/bin/tcsh -f #(ie run the tshell on this but don't read the .cshrc or .tcshrc) echo version = 1.11 of topaper 2006 Aug 02 # 2006 Aug 02, 1.11: duck sound to klaxon # 2006 Jan 12, 1.10: clean up # 2006 Jan 12, 1.09: clean up and fix; key for externa link: dx.doi.org # 2005 Dec 14, 1.08: break fused urls # 2005 Dec 13, 1.07: fused urls with: # pms 257 286 1982 martin # 2005 Apr 7, 1.06: crash on "pms vergani gadsby 876" at openline # 2005 Mar 5, 1.05: give line with PMID if failure # 2004 May 11, 1.04: identification sounds, duck, wolf # 2004 Apr 26, 1.03: error: pubmedsearch "Jarzynski C": 'Word too long' # 2004 Apr 9, 1.01: functional!!!! # 2004 Apr 9, 1.00: origin echo 'Move the browser to a paper based on a pubmed url\!' if ($#argv != 1) then echo 'usage: topaper [pubmed web page]' exit else echo pubmed web page given is: "$1" endif set baseurl = 'http://www.ncbi.nlm.nih.gov' set tmp1 = /tmp/`whoami`1.topaper.html set tmp2 = /tmp/`whoami`2.topaper.html wget -O $tmp1 "$1" # cat $tmp1 # The key is the way to identify the relevant URLs on the page! set key = "dx.doi.org" cat $tmp1 |\ sed "s/url=/@@/" |\ sed 's/"/@/g' |\ tr '@' '\n' |\ grep "$key" |\ cat > $tmp2 echo ---- $key links: cat $tmp2 set count = `cat $tmp2 |wc -l` if ("$count" > 1) then echo "***********************************************" echo "* The wolfchorus means that there are multiple links" # echo "* $count Multiple links\!" sound wolfchorus > /dev/null & cat $tmp2 | number echo "***********************************************" endif if ("$count" == '0') then echo "No $key line found\!" echo This probably means that there are no links sound klaxon > /dev/null & exit else sound towerclock > /dev/null & endif set url = `head -1 $tmp2` echo --------------------------------- the next url is: echo "$url" echo -------------------------------------------------- # fused urls detection code # set urls = `echo "$url"|sed 's,http://,@http:,g'|tr '@' '\n'|grep 'http:'|wc -l` # if ("$urls" != 1) then # echo "urls = $urls" # echo "url = $url" # echo "baseurl = $baseurl" # # echo "$url"|sed 's,http://,@,g'|tr '@' '\n' # exit # endif mozillaurl "$url" echo "********************************************************************" exit # ok, try to find a PDF and move to that!! set tmp3 = /tmp/`whoami`2.topaper.html echo the tmp3 file is: "$tmp3" echo "$tmp3" echo the url we are trying is: echo "$url" echo --- FINAL WGET DOES NOT WORK YET: wget -O "$tmp3" "$url" echo --- exit ******************************************************************************** From toms Fri Apr 9 19:47:58 2004 To: custserv@nlm.nih.gov Subject: unknown protocol Content-Length: 1342 Hi: A pubmed entry is at: http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=14755570 The link on the pubmed entry is: http://www.ncbi.nlm.nih.gov//entrez/utils/lofref.fcgi?PrId=3058&uid=14755570&db=pubmed&url=http://dx.doi.org/10.1002/bip.10525 I can paste this in to my Mozilla browser and it works. However, If I use wget, it gives me an error: --19:45:22-- http://www.ncbi.nlm.nih.gov:80/entrez/utils/lofref.fcgi?PrId=3058&uid=14755570&db=pubmed&url=http%3A/dx.doi.org/10.1002/bip.10525 => `/tmp/toms2.topaper.html' Connecting to www.ncbi.nlm.nih.gov:80... connected! HTTP request sent, fetching headers... done. Location: http%3A/dx.doi.org/10.1002/bip.10525 [following] http%3A/dx.doi.org/10.1002/bip.10525: Unknown/unsupported protocol. It should go (through a hop) to http://www3.interscience.wiley.com/cgi-bin/abstract/106565357/ABSTRACT In theory the Elink should do this: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html But most of the examples fail! How do I do this? Tom Dr. Thomas D. Schneider National Cancer Institute Laboratory of Experimental and Computational Biology Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.lecb.ncifcrf.gov/~toms/