#!/bin/tcsh -f #(ie run the tshell on this but don't read the .cshrc or .tcshrc) set ver = "version = 1.06 of pmid2pmcid 2009 Dec 19" # 2009 Dec 19, 1.06: fix varible name to be ver instead of version # 2008 Jul 25, 1.05: switch to Monica Romiti's method # 2008 Jul 24, 1.04: set agent # 2008 Jul 24, 1.03: handle error # 2008 Mar 25, 1.02: official documentation # 2008 Mar 25, 1.01: turn on verbosemode flag # 2008 Mar 25, 1.00: origin if ($#argv == 2) then set verbosemode = 1 else set verbosemode = 0 endif # swap these lines to force verbose mode: #set verbosemode = 1 #set verbosemode = 0 if ($#argv == 1) then if ($verbosemode) then echo "$ver" endif endif if ($#argv == 0) then echo "$ver" echo 'usage: pmid2pmcid [PMID] [turn-on-verbose]' echo 'Convert the PubMed ID (PMID) number of a paper from PubMed' echo 'to a PubMed Central ID (PMCID) number.' echo 'The program uses wget to get web pages.' echo 'The last line output is the PMCID.' echo '' echo 'If there is any second argument, the program runs in' echo 'verbose mode, showing results. Use this for testing.' echo '' echo 'If there is no PubMed Central paper and no PMCID is found,' echo 'A blank line is returned.' echo '' echo 'Pubmed:' echo 'http://www.ncbi.nlm.nih.gov/sites/entrez' echo '' echo 'Pubmed Central:' echo 'http://www.pubmedcentral.nih.gov/' echo '' echo 'wget:' echo 'http://www.ccrnp.ncifcrf.gov/~toms/wget.html' echo '' echo 'PMCID is required by NIH:' echo 'Policy No. 118 3/10/08 Public Access Policy' echo 'http://web.ncifcrf.gov/campus/administrative/policies/100admin/118.asp' echo '' echo 'Examples:' echo '' echo '=== no argument ===================================================' echo 'pmid2pmcid' echo '=== argument for existing PMID ====================================' echo 'pmid2pmcid 15130839' echo '=== argument for existing PMID with verbose =======================' echo 'pmid2pmcid 15130839 verbose' echo '=== argument for non-existing PMID ================================' echo 'pmid2pmcid 17377584' echo '===================================================================' echo '' echo 'See also: bibtex2pmcid' echo '' echo '---' echo '' echo 'Dr. Thomas D. Schneider' echo 'National Institutes of Health' echo 'National Cancer Institute' echo 'Center for Cancer Research Nanobiology Program' echo 'Molecular Information Theory Group' echo 'Frederick, Maryland 21702-1201' echo 'toms@ncifcrf.gov' echo 'permanent email: toms@alum.mit.edu' echo 'http://www.ccrnp.ncifcrf.gov/~toms/' exit endif # 2008 Jul 25 New method from Monica Romiti: # wget -O - -q "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&r # etmode=txt&rettype=medline&id=17921503" | grep "PMC - " # # PMC - PMC2189734 # echo ---- # wget -O - -q "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&r # etmode=txt&rettype=medline&id=1792" | grep "PMC - " set PMID = $1 set source = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=txt&rettype=medline&id=${PMID}" wget -O - -q "$source" |\ grep "PMC - " |\ sed "s/PMC - //" exit ******************************************************************************** # Old method: # set agent = "" # set tmp1 = /tmp/`whoami`1.pmid2pmc # set tmp2 = /tmp/`whoami`2.pmid2pmc set source = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pubmed&pubmedid=${PMID}" if ($verbosemode) then # give the wget message echo wget $agent -O "$tmp1" "$source" wget $agent -O "$tmp1" "$source" else # silence the wget message wget $agent -O "$tmp1" "$source" >& $tmp1 endif # head $tmp1 # set PMCID = `cat $tmp1 | grep 'PMCID:'` # echo "$PMCID" #cat $tmp1 | grep 'PMCID:' > $tmp2 #cat $tmp2 set g = "" cat $tmp1 |\ grep 'PMCID:' |\ sed "s,
,${g},g" |\ tr "$g" "\n" |\ grep 'PMCID:' |\ cat > $tmp2 if ($verbosemode) then echo ----- isolated code for the PMCID: cat $tmp2 echo ----- the final PMCID: endif set PMCID = `cat $tmp2|tr '>' '\n'|sed 's/PMC//' |tail -1` # handle error if ("$PMCID" == "") then # report the error grep ERROR $tmp1 | head -1 else # give the PMCID echo $PMCID endif exit ******************************************************************************** Example: http://www.ncbi.nlm.nih.gov/pubmed/15130839?ordinalpos=1&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum points to http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pubmed&pubmedid=15130839 that page gives PMCID: PMC1852464 which is the desired number. The line on the page is:
PMCID: PMC1852464
NIHMSID: NIHMS8281
strawberry 7% grep PMCID toms.pmid2pmc | wc 1 338 11028 strawberry 8% wc toms.pmid2pmc 4 7861 116441 toms.pmid2pmc So since they are using really long lines (UGH!!) this cut it down only 4 fold. # for possible use later with wget: # --user-agent=netscape set agent = "" set agent = "--user-agent=netscape" # Error message: # % pmid2pmcid 17921503 # # --16:32:55-- http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pubmed&pu # bmedid=17921503 # => `/tmp/toms1.pmid2pmc' # Resolving www.pubmedcentral.nih.gov... 130.14.29.110 # Connecting to www.pubmedcentral.nih.gov[130.14.29.110]:80... connected. # HTTP request sent, awaiting response... 503 Service Unavailable # 16:32:55 ERROR 503: Service Unavailable.