#!/bin/tcsh -f #(ie run the cshell on this but don't read the .cshrc) echo version = 1.20 of medquery 2004 May 15 # 2004 May 15, 1.20: pubmedgrab 12086598 crashes - [] problem # 2004 May 13, 1.19: handle 'PMID: 13918161 PubMed - OLDMEDLINE for Pre1966' # 2004 Apr 22, 1.18: eutils now functional # 2004 Apr 7, 1.17: use E-utilities to get entry more cleanly (failed) # 2004 Mar 18, 1.16: handle
to get PMID properly
# 2003 Jul 15, 1.15: now working with "E-Utilities"
# 2003 Jul 15, 1.14: Pubmed format changed to "E-Utilities" - broke this script!
# 2002 May 4, 1.13: handle bibquery being empty when medlinebib fails
# 2001 May 24, 1.12: pubmed format changed! This fixes it
# 2001 Mar 29, 1.11: make medquery handle html if the person uses that to save.
# 2001 Mar 29, 1.10: rename query0 query2
# 2000 Jan 24, 1.05: use pmid preferentially
# 1999 Nov 22, 1.03: medquery now uses query.fcgi from the new pubmed
# origin 1999 Sep 5 from mq
# This medquery script converts a saved PubMed reference
# into BibTex format.
# PubMed is a database of biology-related references at
# http://www.ncbi.nlm.nih.gov/PubMed/medline.html
# Information about LaTeX (a typesetting language) and bibtex
# (a database language for references in papers) is at:
# http://www.lecb.ncifcrf.gov/~toms/latex.html
# Medquery works with medlinebib:
# http://www.lecb.ncifcrf.gov/~toms/delila/medlinebib.html
# Dr. Thomas D. Schneider
# National Cancer Institute
# Laboratory of Experimental and Computational Biology
# Frederick, Maryland 21702-1201
# toms@ncifcrf.gov
# permanent email: toms@alum.mit.edu (use only if first address fails)
# http://www.lecb.ncifcrf.gov/~toms/
# ******************************************************************************
# The convertion program from pubmed to bibtex format
# now accepts the new pubmed format. To use it, put an automate
# in your home directory containing three lines:
#
# query.fcgi medquery
#
# query medquery
#
# (The space between them is important, remove the # of course.)
#
# Start the automation by typing
#
# au
#
# Then find your reference in pubmed and simply save it to your
# home directory. If you use the old pubmed you will generate the query
# file; the new pubmed generates query.fcgi. In either case the above
# lines will trigger medquery to process the reference.
#
# Reference for au and atchange to automate:
# http://www.lecb.ncifcrf.gov/~toms/atchange.html
#
# ******************************************************************************
# ******************************************************************************
# 2003 July 15
# http://www.nlm.nih.gov/pubs/techbull/ma03/ma03_technote.html#eutil
# PubMedŽ to Complete Transition to E-Utilities
# and Manually Constructed URLs
#
# April 03, 2003 [posted]
#
# In July 2002, NCBI announced the availability of new
# programming for the Entrez Utilities (E-Utilities) and informed
# utility users that they should convert URLs to the new format by
# the end of 2002.
#
# NCBI will phase out the old utilities completely in June 2003. This
# may affect customers of some products such as EndNoteŽ,
# ProCiteŽ, and Reference ManagerŽ. Please contact user support
# for your respective product if you have questions. Questions
# concerning the use of E-Utilities can be sent to:
# eutilities@ncbi.nlm.nih.gov.
#
# If you have manually created links to PubMed that contain the
# string: /htbin-post/, these should be changed to follow the
# specifications provided on the page, Linking to PubMed and other
# Entrez Databases. These changes must be in place prior to June
# 2003.
# Entrez Utilities (E-Utilities)
# http://www.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
# Linking to PubMed and other Entrez Databases.
# http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
# ******************************************************************************
set query = query
set query1 = /tmp/`whoami`.query1
set query2 = /tmp/`whoami`.query2
if (-f query.fcgi) then
mv query.fcgi query
endif
if (-f $query) then
if !(-f medlinebibp) then
echo 'creating new medlinebibp file'
echo ''
echo "1.20 version of medlinebibp that this parameter file is designed for." > medlinebibp
echo "n 'd' = debug" >> medlinebibp
echo "n 'e' = do everything" >> medlinebibp
echo "f 'f' = use final author, otherwise second author" >> medlinebibp
endif
# detect html form
set line = `grep "" $query`
if ("$line" == '') then
echo the file is not html
# echo '$line' was blank
# echo '$line' was "$line"
# the tr changes control M's to returns
# in case the mac format was used
# cat $query
# extract the id line:
# 2001 May 24: bug version:
# set line = `cat "$query" | tr "
" "\n" | tr ";" "," | grep "PMID:"`
# remove brackets they just introduced!!
# set line = `cat "$query" | tr "
" "\n" | tr -d '[]' | tr ";" "," | grep "PMID:"`
# normal line:
# PMID: 3357886 PubMed - indexed for MEDLINE
# ancient line:
# PMID: 13918161 PubMed - OLDMEDLINE for Pre1966
#echo "*******************************************************************"
#cat "$query"
#echo "*******************************************************************"
#exit
set line = "`grep '^PMID: ' $query`"
# tr -d '[]' | tr ";" "," | `
echo The PMID containing line is:
echo "$line"
# clean the line:
# set pmid0 = `echo $line | tr "," "\n" | grep "PMID: " | sed -e "s/PMID: //"`
# remove the new junk they put in just before 2001 May 24
# set pmid = `echo $pmid0 | sed -e "s/PubMed - indexed for MEDLINE//"`
# that's not enough, they change the message,
# eg PMID: 11358999 [PubMed - in process]
# echo "PMID line is '$pmid0'"
# set pmid = `echo $pmid0 | tr -d "A-Z"`
# naw... try again:
# from man tr:
# When the -c option is specified with -d, all characters except those
# specified by string1 will be deleted. The contents of string2 will be
# ignored, unless the -s option is also specified.
# so... delete everything on the line EXCEPT digits:
# set pmid = `echo $line | tr -cd "[:digit:]"`
# that fails on the ancient line that has numbers!!
set pmid = `echo "$line" | tr " " '\012' | head -2| tail -1`
echo "PMID is '$pmid'"
# find the UI if it is on the line:
set ui = `echo $line | tr "," "\n" | grep "UI: " | sed -e "s/UI: //"`
echo " UI is '$ui'"
else
echo the file is html
set pmid = \
`cat $query | tr '<>[]' "\n\n\n\n" | grep PMID | sed -e "s/PMID: //"`
# `cat $query | tr ">" "\n" | tr "<" "\n" | grep PMID | sed -e "s/PMID: //"`
echo PMID is '"'$pmid'"'
endif
# see what we found and act accordingly: give preference to PMID
if ("$pmid" == "") then
if ($ui == "") then
echo "PMID and UI are missing"
echo "HALT"
exit
else
set uid = "$ui"
endif
else
set uid = "$pmid"
endif
echo ID used is: $uid
# ***************************************************************************
# given the $uid, grab the entry
# ***************************************************************************
# OLD FUNCTIONAL METHOD: using entrez
# wget -O $query1 "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=PubMed&uid=$uid&dopt=Medline"
# NON FUNCTIONAL ATTEMPTS:
# You really should not be using web query t o retrieve PubMed
# citations in text format, but should be using E-Utilities. Your
# query in EFetch would be:
#
# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=14602927&rettype=medline
#
# This will provide you with a clean text file. For more information
# about E-Utili ties, please go to:
# http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
#
# Sincerely,
# N. Ruiz
# National Library of Medicine
#
# 2004 Apr 7: Using E-utils:
#wget -O $query1 "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${uid}&rettype=medline"
# NEW FUNCTIONAL METHOD: using eutils
# 2004 Apr 22
# Retmode & rettype are outlined in the EFetch documentation at:
# http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.ht-
# ml#Retrieval Mode
#
# If you add retmode=text, you should get what you want.
# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&i-
# d=14602927&r ettype=medline&retmode=text
#
# Please forward any questions about e-utilities to:
# eutilities@ncbi.nlm.nih.gov.
#
# Sincerely, N. Ruiz National Library of Medicine
# using retmode=text:
wget -O $query1 "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${uid}&rettype=medline&retmode=text"
# 2004 Mar 18: Keep the html so that one keeps the PMID:
# this is now handled by medlinebib
cat $query1 |\
cat > $query
echo ----=============================
cat query
echo ----=============================
medlinebib
if !(-f bibformat) then
echo 'The medlinebib program failed to produce a bibformat file!'
exit
endif
set filesize = `cat bibformat | wc -c | tr -d " "`
if ($filesize == 0) then
echo 'The medlinebib program failed: the bibformat file is empty!'
exit
endif
echo " "
cat bibformat
echo " "
cat bibformat >> bib
echo "THE BIBLIOGRAPHY is IN FILE ~/bibformat"
echo "CONCATENATED BIBLIOGRAPHIES ARE IN FILE ~/bib"
# remove query file so that it is not in the way for the next file
echo query file at $query was moved to $query2
mv $query $query2
else
# There is no query file, but we don't want to say this because
# it is designed to be used with atchange. When the file is moved away
# atchange will call medquery and we should just end gracefully.
echo Medquery is DONE
echo
endif
echo ""
exit
********************************************************************************
OLD MATERIAL FOR THE wget:
# 2003 July 15: functional again!
wget -O $query1 "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=PubMed&uid=$uid&dopt=Medline"
#
# original htbin-post method NOW OBSOLETE:
# wget -O $query1 "http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=m&form=6&uid=$uid&Dopt=l&html=no&title=no"
#
# tests:
# wget -O zzz.html "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12177305&dopt=Medline"
# (gave web page - html)
# wget -O yyy.html "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&uid=12177305&dopt=Medline"
#
# works:
# wget -O uuu.html "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=PubMed&uid=12177305&dopt=Medline"
#
# 2001 Mar 29: For some reason they now have html stuff
# surrounding the medline, despite html=no!!
# so clear that out:
# grep -v "Entrez Reports" $query1 |\
# grep -v -- '----------------' |\
# grep -v -- '<' |\
# grep -v -- '>' |\
# grep -v -- '^$' |\
# cat > query