move to github

2026-02-27 21:51:52 +00:00 · 2018-04-06 14:00:35 -04:00
commit b0b69367c8
7 changed files with 849 additions and 0 deletions
--- a/templates/cron.erb
+++ b/templates/cron.erb
@@ -0,0 +1 @@
+20 4 * * * nobody /usr/share/pubcrawler/bin/pubcrawler-runall
--- a/templates/http.pubcrawler.conf.erb
+++ b/templates/http.pubcrawler.conf.erb
@@ -0,0 +1,7 @@
+Alias /pubcrawler /usr/share/pubcrawler/html
+<Directory /usr/share/pubcrawler/html>
+    Options FollowSymLinks Includes
+    AllowOverride None
+    Order allow,deny
+    Allow from all
+</Directory>
--- a/templates/product.config.erb
+++ b/templates/product.config.erb
@@ -0,0 +1,255 @@
+        ############   PubCrawler configuration file         ############
+        #######   (for PubCrawler Version higher than 0.53)        ######
+        ############################################################
+        #                                                                #
+        #   lines beginning with hash marks (#) are ignored.             #
+        #                                                                #
+        #   PubCrawler home page:                                        #
+        #               http://www.pubcrawler.ie                         #
+        #                                                                #
+        #   Specify your file locations and search options here.         #
+        #   Each line is in the format  FIELD space VALUE.               #
+        #   Any leading or trailing quotes will be chopped off.          # 
+        #   Hash marks separate comments from data.                      #
+        #   You must specify a value for all 6 mandatory fields.         #
+        #                                                                #
+        ############################################################
+        ############################################################
+        ################# MANDATORY SETTINGS #######################
+        ############################################################
+project <%= @name %>
+        # Identifier for a project. This value is displayed in the header
+        # of the results page.
+
+background_color <%= @bgcolor %>
+        # background color for sectional headers of the result page.
+
+header_icon <%= @header_icon %>
+        # relative or absolute URL for icon to display in results page
+        # header.
+
+<%- if @html_file -%>
+html_file <%= @html_file %>
+<%- else -%>
+html_file /usr/share/pubcrawler/html/<%= @name %>/index.html
+<%- end -%>
+        # html_file is the name of the output HTML file for results
+        # it will be written to the specified working directory
+        # unless an absolute pathname is given
+
+viewdays <%= @viewdays %>   
+        # viewdays is the number of days each document will be shown.
+
+relentrezdate <%= @relentrezdate %>
+        # relentrezdate (relative date of insertion into Entrez) 
+        # is the maximum age (in days) of database entries to be reported.
+        # NOTE: sometimes records first appear in the databases several
+        # days or even weeks later than indicated by their database
+        # date-stamp, i.e. with non-zero values of relentrezdate.
+        # Therefore relentrezdate needs to be high enough to find these
+        # records.  A relentrezdate of 90 days is suggested (if you make 
+        # relentrezdate too huge the searches will be very slow.)
+        # other valid entries are: 
+        # '1 year', '2 years', '5 years', '10 years', and 'no limit'
+
+getmax <%= @getmax %>
+        # getmax is the maximum number of documents to be retrieved
+        # for each search carried out.
+
+fullmax <%= @fullmax %>
+        # fullmax is the maximum number of documents for which a full
+        # report is being presented
+        # if more documents were retrieved, these can be accessed
+        # through a hyperlink (in groups of up to fullmax articles)
+
+include_config <%= @include_config %>
+        # include_config (yes/no) specifies whether or not to append 
+        # this config-file to the end of the output file
+        #-------------------------------------------------------------------#
+        ############################################################
+        ################## OPTIONAL SETTINGS #######################
+        ############################################################
+search_URL <%= @search_URL %>
+        # URL where searches are being sent to
+        # defaults to 
+        # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi
+
+neighbour_URL <%= @neighbour_URL %>
+        # URL where neighbourhood searches are being sent to
+        # defaults to 
+        # https://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor.fcgi
+
+retrieve_URL <%= @retrieve_URL %>
+        # URL where documents are retrieved from
+        # defaults to 
+        # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi
+
+<%- if @work_dir -%>
+work_dir <%= @work_dir %>
+<%- else -%>
+work_dir /var/lib/pubcrawler/<%= @name %>
+<%- end -%>
+        # specify a directory in which databases, output and log file 
+        # will be located
+        # if no value given, the current working directory will be used
+
+extra_range <%= @extra_range %>
+        # specifies the number of documents combined in a link
+        # minimum value is 1, defaults to 'fullmax'
+
+check <%= @check %>
+        # if set to '1' program will just check all settings
+        # without performing the actual search
+        # RECOMMENDED FOR THE FIRST RUN!
+
+prompt <%= @prompt %>
+        # for Mac-users only:
+        # if this option is set to '1' the program will ask you 
+        # explicitly for command line options
+        # NOTE: THIS IS NOT SUITABLE FOR AUTOMATED USE!
+
+verbose <%= @verbose %>
+        # verbose 0 runs silently and makes log file                 
+        # verbose 1 writes log output on screen 
+
+mute <%= @mute %>
+        # mute 0 writes some messages to STDERR
+        # mute 1 stops ALL messages going to STDERR
+        # unless an error was encountered                    
+
+log_file <%= @log_file %>
+        # name of file for log-output
+        # (verbose has to be set to '0')   
+        #base_URL 'http://www.gen.tcd.ie/pubcrawler/pubcrawler_output.html'
+base_URL <%= @base_URL %>
+        # specify a URL, that will be used for 
+        # the 'Back to Top' link in the output page   
+        # 'local_file' makes links relative to results file
+        # mail joe@hotmail.earth.com
+        # if the hash mark ('#') at the beginning of the above line
+        # is removed, PubCrawler will send the results file to
+        # the given address at the end of each run. You can specify
+        # multiple addresses using commas (no spaces!).
+        # notify jfk@hotmail.earth.com#joe
+        # if the hash mark ('#') at the beginning of the above line
+        # is removed, PubCrawler will send a notification to
+        # the given address (minus '#joe') at the end of each run
+        # The recipient will be addressed with joe (optional).
+        # You can specify multiple addresses using commas (no spaces!).
+
+mail_features <%= @mail_features %>
+        # comma-separated list of extra features for the mail
+        # to be sent (without them it will be plain text). These are:
+        # css,javascript,entrez_links,pubcrawler_links,images,html,description
+        # or simply 'all' for everything
+
+lynx <%= @lynx %>
+        # for Unix-users only:
+        # if you don't want to use the libwww-Perl module and
+        # have an alternative browser installed, that works from the 
+        # command line, like 'Lynx', you can use it by entering the
+        # command that evokes it (e.g. lynx '/usr/bin/lynx')
+        # NOTE: THIS OVERRIDES ANY PROXY SETTINGS!
+        #header 'head.html'
+        # specify a location of a header (in HTML-style) that will be used
+        # for the output file (disabled unless hash mark is removed)
+
+prefix <%= @prefix %>
+        # if you would like a different prefix to be used 
+        # for standard files (configuration, database, log)
+        # insert it here (default is program name up to first dot):
+
+system <%= @system %>
+        # name of operating system
+        # might need the explicit assignment of an adequate value
+        # ('MacOS','Win','Unix', or 'Linux')
+        # if Perl is not configured properly
+        #### PROXY SETTING (if desired and/or necessary) ####
+        #proxy www.tcd.ie/proxy.cgi
+        # insert either a proxy server (eg. 'proxy.domain.com')
+        # or the address of a proxy configuration file
+        # if known (eg. 'www.domain.com/proxy.cgi')
+        # and uncomment
+
+proxy_port <%= @proxy_port %>
+        # port of the proxy server,defaults to '80'
+
+proxy_auth <%= @proxy_auth %>
+proxy_pass <%= @proxy_pass %>
+        # in case you need to submit a username and a password
+        # for accessing your proxy, you can fill it in here:
+        # CAUTION! Having passwords stored in a file means a
+        # possible security risk! Please delete after usage
+        # or use the according command line option!
+        # !!! Please make sure that the module MIME::Base64 is 
+        # installed for the proxy authorization to work!!!
+
+time_out <%= @time_out %>
+        # specify how many SECONDS to give remote servers 
+        # in creating responses before the library disconnects
+        # (defaults to 180 seconds if no value is given) 
+
+test_URL <%= @test_URL %>
+        # test-URL for proxy-test
+
+no_test <%= @no_test %>
+        # if a proxy is given, the internet connection is tested
+        # at the start of the program by default; this can be
+        # suppressed if a value of '1' is given here
+
+indent <%= @indent %>
+        # amount of pixels that output is being shifted to the right
+
+no_decap <%= @no_decap %>
+        # put 1 inbetween single quotes if you want to disable 
+        # processing of the entrez documents (chopping of head and tail
+        # and collecting UIs)
+
+spacer <%= @spacer %>
+        # specify a gif that will be inserted in the output to shift
+        # text past the left, blue column
+        # (defaults to: http://www.gen.tcd.ie/pubcrawler/pics/spacer.gif)
+
+        #-----------------------------------------------------------------------------#
+
+        ############################################################
+        ################# SEARCH SPECIFICATION #####################
+        ############################################################
+        ###########################################################################
+        ######  Entrez abbreviations for fields                                     #
+        ######  (see http://www4.ncbi.nlm.nih.gov/PubMed/linking.html).             #
+        ######  combine fields with AND, OR, BUTNOT and parentheses.                #
+        #                                                                           #
+        # for PubMed : one of AFFL, ALL, AUTH, ECNO, JOUR, MESH, MAJR, PAGE,        #
+        #                     PDAT, PTYP, KYWD, WORD, TITLE, or VOL.                #
+        # for Nucleotide : one of ACCN, AUTH, PDAT, ECNO, FKEY, GENE, JOUR, KYWD,   #
+        #                         MDAT, ORGN, PROP, PROT, SQID, SLEN, SUBS, or      #
+        #                         WORD.                                             #
+        #                                                                           #
+        # where ACCN = Accession Number, AFFL = Afilliation, ALL = all fields,      #
+        #       AUTH = Author Name, ECNO = E. C. Number, FKEY = Feature Key,        #
+        #       GENE = gene name, JOUR =journal name, KYWD = Keywords,              #
+        #       MAJR = MeSH major topic, MDAT = modification date,                  #
+        #       MESH = mesh term,ORGN = organism, PACC = Primary Accesion Number,   #
+        #       PAGE = first page, PDAT = publication/creation date,                #
+        #       PROP = Properties,  PROT = protein name, PTYP = Publication Type,   #
+        #       SUBS = Substance, TITL = title word, WORD = text word,              #
+        #       VOL = volume.                                                       #
+        ###########################################################################
+        ##### Each search-specification has to be written on one line.
+        ##### The first word must specify the database: 
+        ##### pubmed, pm_neighbour, genbank, or gb_neighbour
+        ##### Any following words enclosed in single quotes (') will be used
+        ##### as an alias for this query, otherwise they will be considered
+        ##### Entrez-search-terms, as will the rest of the line.
+        ##### You can have as many different searches as you wish. The results of all
+        ##### searches will be combined according to their aliases.
+        ##### You CAN NOT use the same alias for searches at different databases!
+        ##### Write your search descriptions below this line.  
+        ##### (Upper/lower case does not matter.)
+
+<%- if @searches and @searches.kind_of?(Array) -%>
+<%- @searches.each do |search| -%>
+<%= search['database'] %> '<%= search['alias'] %>' <%= search['term'] %>
+<%- end -%>
+<%- end -%>
				`@@ -0,0 +1 @@`
				`20 4 * * * nobody /usr/share/pubcrawler/bin/pubcrawler-runall`