#!/bin/tcsh -f #(ie run the cshell on this but don't read the .cshrc) echo version = 1.20 of medquery 2004 May 15 # 2004 May 15, 1.20: pubmedgrab 12086598 crashes - [] problem # 2004 May 13, 1.19: handle 'PMID: 13918161 PubMed - OLDMEDLINE for Pre1966' # 2004 Apr 22, 1.18: eutils now functional # 2004 Apr 7, 1.17: use E-utilities to get entry more cleanly (failed) # 2004 Mar 18, 1.16: handle
 to get PMID properly
# 2003 Jul 15, 1.15: now working with "E-Utilities"
# 2003 Jul 15, 1.14: Pubmed format changed to "E-Utilities" - broke this script!
# 2002 May  4, 1.13: handle bibquery being empty when medlinebib fails
# 2001 May 24, 1.12: pubmed format changed!  This fixes it
# 2001 Mar 29, 1.11: make medquery handle html if the person uses that to save.
# 2001 Mar 29, 1.10: rename query0 query2
# 2000 Jan 24, 1.05: use pmid preferentially
# 1999 Nov 22, 1.03: medquery now uses query.fcgi from the new pubmed
# origin 1999 Sep 5 from mq

# This medquery script converts a saved PubMed reference
# into BibTex format.

# PubMed is a database of biology-related references at
# http://www.ncbi.nlm.nih.gov/PubMed/medline.html

# Information about LaTeX (a typesetting language) and bibtex
# (a database language for references in papers) is at:
# http://www.lecb.ncifcrf.gov/~toms/latex.html

# Medquery works with medlinebib:
# http://www.lecb.ncifcrf.gov/~toms/delila/medlinebib.html

#   Dr. Thomas D. Schneider
#   National Cancer Institute
#   Laboratory of Experimental and Computational Biology
#   Frederick, Maryland  21702-1201
#   toms@ncifcrf.gov
#   permanent email: toms@alum.mit.edu (use only if first address fails)
#   http://www.lecb.ncifcrf.gov/~toms/

# ******************************************************************************
# The convertion program from pubmed to bibtex format
# now accepts the new pubmed format.  To use it, put an automate
# in your home directory containing three lines:
# 
# query.fcgi medquery
# 
# query medquery
# 
# (The space between them is important, remove the # of course.)
# 
# Start the automation by typing
# 
#   au
#
# Then find your reference in pubmed and simply save it to your
# home directory.  If you use the old pubmed you will generate the query
# file; the new pubmed generates query.fcgi.  In either case the above
# lines will trigger medquery to process the reference.
#
# Reference for au and atchange to automate:
# http://www.lecb.ncifcrf.gov/~toms/atchange.html
#
# ******************************************************************************

# ******************************************************************************
# 2003 July 15

# http://www.nlm.nih.gov/pubs/techbull/ma03/ma03_technote.html#eutil
# PubMedŽ to Complete Transition to E-Utilities
#     and Manually Constructed URLs
# 
#     April 03, 2003 [posted] 
# 
#         In July 2002, NCBI announced the availability of new
#         programming for the Entrez Utilities (E-Utilities) and informed
#         utility users that they should convert URLs to the new format by
#         the end of 2002. 
# 
#         NCBI will phase out the old utilities completely in June 2003. This
#         may affect customers of some products such as EndNoteŽ,
#         ProCiteŽ, and Reference ManagerŽ. Please contact user support
#         for your respective product if you have questions. Questions
#         concerning the use of E-Utilities can be sent to:
#         eutilities@ncbi.nlm.nih.gov. 
# 
#         If you have manually created links to PubMed that contain the
#         string: /htbin-post/, these should be changed to follow the
#         specifications provided on the page, Linking to PubMed and other
#         Entrez Databases. These changes must be in place prior to June
#         2003. 

# Entrez Utilities (E-Utilities) 
# http://www.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html

# Linking to PubMed and other Entrez Databases.
# http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html

# ******************************************************************************

set query = query
set query1 = /tmp/`whoami`.query1
set query2 = /tmp/`whoami`.query2

if (-f query.fcgi) then
   mv query.fcgi query
endif 

if (-f $query) then

   if !(-f medlinebibp) then
      echo 'creating new medlinebibp file'
      echo ''

      echo "1.20  version of medlinebibp that this parameter file is designed for." > medlinebibp
      echo "n     'd' = debug" >> medlinebibp
      echo "n     'e' = do everything" >> medlinebibp
      echo "f     'f' = use final author, otherwise second author" >> medlinebibp
   endif

   # detect html form
   set line = `grep "" $query`

   if ("$line" == '') then
      echo the file is not html
# echo '$line' was blank
# echo '$line' was "$line"

      # the tr changes control M's to returns
      # in case the mac format was used

# cat $query

      # extract the id line:
# 2001 May 24: bug version:
#     set line = `cat "$query" | tr "
" "\n" | tr ";" "," | grep "PMID:"`
# remove brackets they just introduced!!
#      set line = `cat "$query" | tr "
" "\n" | tr -d '[]' | tr ";" "," | grep "PMID:"`

# normal line:
# PMID: 3357886 PubMed - indexed for MEDLINE
# ancient line:
# PMID: 13918161 PubMed - OLDMEDLINE for Pre1966

#echo "*******************************************************************"
#cat "$query"
#echo "*******************************************************************"
#exit

      set line = "`grep '^PMID: ' $query`"

# tr -d '[]' | tr ";" "," | `
      echo The PMID containing line is:
      echo "$line"

      # clean the line:
#      set pmid0 = `echo $line | tr "," "\n" | grep "PMID: " | sed -e "s/PMID: //"`
      # remove the new junk they put in just before 2001 May 24
#      set pmid = `echo $pmid0 | sed -e "s/PubMed - indexed for MEDLINE//"`
# that's not enough, they change the message,
# eg PMID: 11358999 [PubMed - in process] 
#      echo "PMID line is '$pmid0'"
#      set pmid = `echo $pmid0 | tr -d "A-Z"`
# naw... try again:

# from man tr:
# When the -c option is specified with -d,  all  characters   except  those
# specified  by  string1  will  be deleted. The contents  of string2  will  be
# ignored, unless the -s option is also specified.
# so... delete everything on the line EXCEPT digits:
#      set pmid = `echo $line | tr -cd "[:digit:]"`
# that fails on the ancient line that has numbers!!

      set pmid = `echo "$line" | tr " " '\012' | head -2| tail -1`

      echo "PMID is '$pmid'"

      # find the UI if it is on the line:
      set ui = `echo $line | tr "," "\n" | grep "UI: " | sed -e "s/UI: //"`
      echo "  UI is '$ui'"

   else
      echo the file is html
      set pmid = \
          `cat $query | tr '<>[]' "\n\n\n\n" | grep PMID | sed -e "s/PMID: //"`
#          `cat $query | tr ">" "\n" | tr "<" "\n" | grep PMID | sed -e "s/PMID: //"`
      echo PMID is '"'$pmid'"'
   endif

   # see what we found and act accordingly: give preference to PMID
   if ("$pmid" == "") then
      if ($ui == "") then
         echo "PMID and UI are missing"
         echo "HALT"
         exit
      else
         set uid = "$ui"
      endif
   else
      set uid = "$pmid"
   endif

   echo ID used is: $uid

# ***************************************************************************
# given the $uid, grab the entry
# ***************************************************************************

# OLD FUNCTIONAL METHOD: using entrez
# wget -O $query1 "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=PubMed&uid=$uid&dopt=Medline"

# NON FUNCTIONAL ATTEMPTS:
# You really should not be using web query t o retrieve PubMed
# citations in text format, but should be using E-Utilities. Your
# query in EFetch would be:
#
# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=14602927&rettype=medline
#
# This will provide you with a clean text file. For more information
# about E-Utili ties, please go to:
# http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
# 
# Sincerely,
# N. Ruiz
# National Library of Medicine
#
# 2004 Apr 7: Using E-utils:
#wget -O $query1 "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${uid}&rettype=medline"

# NEW FUNCTIONAL METHOD: using eutils
# 2004 Apr 22
# Retmode & rettype are outlined in the EFetch documentation at:
# http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.ht-
# ml#Retrieval Mode
#
# If you add retmode=text, you should get what you want.
# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&i-
# d=14602927&r ettype=medline&retmode=text
# 
# Please forward any questions about e-utilities to:
# eutilities@ncbi.nlm.nih.gov.
#
# Sincerely, N. Ruiz National Library of Medicine

# using retmode=text:
wget -O $query1 "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${uid}&rettype=medline&retmode=text"

    # 2004 Mar 18: Keep the html so that one keeps the PMID:
    # this is now handled by medlinebib
    cat $query1 |\
    cat > $query

echo ----=============================
cat query
echo ----=============================

   medlinebib

   if !(-f bibformat) then
      echo 'The medlinebib program failed to produce a bibformat file!'
      exit
   endif

   set filesize = `cat bibformat | wc -c | tr -d " "`

   if ($filesize == 0) then
      echo 'The medlinebib program failed: the bibformat file is empty!'
      exit
   endif

   echo " "
   cat bibformat
   echo " "
   cat bibformat >> bib 
   echo "THE BIBLIOGRAPHY is IN FILE ~/bibformat"
   echo "CONCATENATED BIBLIOGRAPHIES ARE IN FILE ~/bib"

   # remove query file so that it is not in the way for the next file
   echo query file at $query was moved to $query2
   mv $query $query2
else
   # There is no query file, but we don't want to say this because
   # it is designed to be used with atchange.  When the file is moved away
   # atchange will call medquery and we should just end gracefully.
   echo Medquery is DONE
   echo 
endif

echo ""

exit
********************************************************************************

OLD MATERIAL FOR THE wget:

# 2003 July 15: functional again!
wget -O $query1 "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=PubMed&uid=$uid&dopt=Medline"
#
# original htbin-post method NOW OBSOLETE:
#   wget -O $query1 "http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=m&form=6&uid=$uid&Dopt=l&html=no&title=no"
#
# tests:
# wget -O zzz.html "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12177305&dopt=Medline"
# (gave web page - html)
# wget -O yyy.html "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&uid=12177305&dopt=Medline"
#
# works:
# wget -O uuu.html "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=PubMed&uid=12177305&dopt=Medline"
#
   # 2001 Mar 29: For some reason they now have html stuff
   # surrounding the medline, despite html=no!!
   # so clear that out:
#    grep -v "Entrez Reports" $query1 |\
#    grep -v -- '----------------' |\
#    grep -v -- '<' |\
#    grep -v -- '>' |\
#    grep -v -- '^$' |\
#    cat > query