#!/bin/tcsh -f #(ie run the tshell on this but don't read the .cshrc or .tcshrc) set version = "version = 1.02 of pmid2pmcid 2008 Mar 25" # 2008 Mar 25, 1.02: official documentation # 2008 Mar 25, 1.01: turn on verbosemode flag # 2008 Mar 25, 1.00: origin if ($#argv == 2) then set verbosemode = 1 else set verbosemode = 0 endif # swap these lines to force verbose mode: #set verbosemode = 1 #set verbosemode = 0 if ($#argv == 1) then if ($verbosemode) then echo "$version" endif endif if ($#argv == 0) then echo "$version" echo 'usage: pmid2pmcid [PMID] [turn-on-verbose]' echo 'Convert the PubMed ID (PMID) number of a paper from PubMed' echo 'to a PubMed Central ID (PMCID) number.' echo 'The program uses wget to get web pages.' echo 'The last line output is the PMCID.' echo '' echo 'If there is any second argument, the program runs in' echo 'verbose mode, showing results. Use this for testing.' echo '' echo 'If there is no PubMed Central paper and no PMCID is found,' echo 'A blank line is returned.' echo '' echo 'Pubmed:' echo 'http://www.ncbi.nlm.nih.gov/sites/entrez' echo '' echo 'Pubmed Central:' echo 'http://www.pubmedcentral.nih.gov/' echo '' echo 'wget:' echo 'http://www.ccrnp.ncifcrf.gov/~toms/wget.html' echo '' echo 'PMCID is required by NIH:' echo 'Policy No. 118 3/10/08 Public Access Policy' echo 'http://web.ncifcrf.gov/campus/administrative/policies/100admin/118.asp' echo '' echo 'Examples:' echo '' echo '=== no argument ===================================================' echo 'pmid2pmcid' echo '=== argument for existing PMID ====================================' echo 'pmid2pmcid 15130839' echo '=== argument for existing PMID with verbose =======================' echo 'pmid2pmcid 15130839 verbose' echo '=== argument for non-existing PMID ================================' echo 'pmid2pmcid 17377584' echo '===================================================================' echo '' echo 'See also: bibtex2pmcid' echo '' echo '---' echo '' echo 'Dr. Thomas D. Schneider' echo 'National Institutes of Health' echo 'National Cancer Institute' echo 'Center for Cancer Research Nanobiology Program' echo 'Molecular Information Theory Group' echo 'Frederick, Maryland 21702-1201' echo 'toms@ncifcrf.gov' echo 'permanent email: toms@alum.mit.edu' echo 'http://www.ccrnp.ncifcrf.gov/~toms/' exit endif set PMID = $1 set tmp1 = /tmp/`whoami`1.pmid2pmc set tmp2 = /tmp/`whoami`2.pmid2pmc set source = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pubmed&pubmedid=${PMID}" if ($verbosemode) then # give the wget message wget -O "$tmp1" "$source" else # silence the wget message wget -O "$tmp1" "$source" >& $tmp1 endif # head $tmp1 # set PMCID = `cat $tmp1 | grep 'PMCID:'` # echo "$PMCID" #cat $tmp1 | grep 'PMCID:' > $tmp2 #cat $tmp2 set g = "" cat $tmp1 |\ grep 'PMCID:' |\ sed "s,
,${g},g" |\ tr "$g" "\n" |\ grep 'PMCID:' |\ cat > $tmp2 if ($verbosemode) then echo ----- isolated code for the PMCID: cat $tmp2 echo ----- the final PMCID: endif set PMCID = `cat $tmp2|tr '>' '\n'|sed 's/PMC//' |tail -1` echo $PMCID exit ******************************************************************************** Example: http://www.ncbi.nlm.nih.gov/pubmed/15130839?ordinalpos=1&itool=EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum points to http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pubmed&pubmedid=15130839 that page gives PMCID: PMC1852464 which is the desired number. The line on the page is:
PMCID: PMC1852464
NIHMSID: NIHMS8281
strawberry 7% grep PMCID toms.pmid2pmc | wc 1 338 11028 strawberry 8% wc toms.pmid2pmc 4 7861 116441 toms.pmid2pmc So since they are using really long lines (UGH!!) this cut it down only 4 fold.