move to github

2026-01-11 15:03:15 +00:00 · 2018-04-06 14:00:35 -04:00
commit b0b69367c8
7 changed files with 849 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,72 @@
 Installs Pubcrawler from EuPathDB-specific RPM and manages
 `/etc/pubcrawler/` configuration files.
 The RPM includes a `/etc/cron.d` job that runs all *.config in `/etc/pubcrawler`.
 `pubcrawler::params` defines the common set of configuration parameters.
 See `templates/product.config.erb` in this module for documentation on
 each parameter.
 A configuration file will be generated in `/etc/pubcrawler` for each
 key in the `$site_specific_params` hash. All site-specific configuration
 files will use values in `$default_params` unless overridden in the
 `$site_specific_params`. For example, AmoebaDB will use '14' for
 `viewdays` whereas CryptoDB will use '120' because it is has been
 overriden in the `$site_specific_params` hash for the `CryptoDB` key.
    ...
    $viewdays         = '14'
    ...
    $site_specific_params = {
      'AmoebaDB' => {
        'bgcolor' => '#C8C5A2',
        'header_icon' => '/a/images/AmoebaDB/title_s.png',
        'searches' => [
          ...
        ],
      },
      'CryptoDB' => {
        'bgcolor' => '#ffcccc',
        'header_icon' => '/a/images/CryptoDB/title_s.png',
        'viewdays' => '120',
        'searches' => [
          ...
        ],
      },
    ...
    }
 ### Hiera
 The values in `::pubcrawler::params` can be overridden in heira.
    pubcrawler::site_specific_params:
      AmoebaDB:
        bgcolor: '#C8C5A2'
        header_icon: '/a/images/AmoebaDB/title_s.png'
        searches: 
          - database: pubmed
            alias: Entamoeba
            term: Entamoeba [ALL]
          - database: pubmed
            alias: Acanthamoeba
            term: Acanthamoeba [ALL]
 In this example, the `$site_specific_params` in  `::pubcrawler::params`
 will be replaced with a hash containing only AmoebaDB - so only
 AmoebaDB will be configured.
 Other individual params can be set in heira.
    pubcrawler::fullmax: 666
 ### Configuration Testing
 To run a single configuration manually, use
    PRODUCT=TrichDB
    sudo -u nobody  /usr/share/pubcrawler/bin/pubcrawler.pl -c /etc/pubcrawler/${PRODUCT}.config
 Results are written to `/usr/share/pubcrawler/html/ToxoDB/`
--- a/manifests/config.pp
+++ b/manifests/config.pp
@@ -0,0 +1,42 @@
 # generate one configuration file
 define pubcrawler::config (
  $bgcolor           = $pubcrawler::bgcolor,
  $header_icon       = $pubcrawler::header_icon,
  $searches          = $pubcrawler::searches,
  $viewdays          = $pubcrawler::viewdays,
  $relentrezdate     = $pubcrawler::relentrezdate,
  $getmax            = $pubcrawler::getmax,
  $fullmax           = $pubcrawler::fullmax,
  $include_config    = $pubcrawler::include_config,
  $search_URL        = $pubcrawler::search_URL,
  $neighbour_URL     = $pubcrawler::neighbour_URL,
  $retrieve_URL      = $pubcrawler::retrieve_URL,
  $work_dir          = $pubcrawler::work_dir,
  $extra_range      =  $pubcrawler::extra_range,
  $check             = $pubcrawler::check,
  $prompt            = $pubcrawler::prompt,
  $verbose           = $pubcrawler::verbose,
  $mute              = $pubcrawler::mute,
  $log_file          = $pubcrawler::log_file,
  $base_URL          = $pubcrawler::base_URL,
  $mail_features     = $pubcrawler::mail_features,
  $lynx              = $pubcrawler::lynx,
  $prefix            = $pubcrawler::prefix,
  $system            = $pubcrawler::system,
  $proxy_port        = $pubcrawler::proxy_port,
  $proxy_auth        = $pubcrawler::proxy_auth,
  $proxy_pass        = $pubcrawler::proxy_pass,
  $time_out          = $pubcrawler::time_out,
  $test_URL          = $pubcrawler::test_URL,
  $no_test           = $pubcrawler::no_test,
  $indent            = $pubcrawler::indent,
  $no_decap          = $pubcrawler::no_decap,
  $spacer            = $pubcrawler::spacer,
 ) {
  file {"/etc/pubcrawler/${name}.config":
    content => template('pubcrawler/product.config.erb'),
    require => Package['pubcrawler'],
  }
 }
--- a/manifests/init.pp
+++ b/manifests/init.pp
@@ -0,0 +1,59 @@
 # Install pubcrawler and configuration files.
 class pubcrawler (
  $site_specific_params = $pubcrawler::params::site_specific_params,
  $bgcolor              = $pubcrawler::params::bgcolor,
  $header_icon          = $pubcrawler::params::header_icon,
  $searches             = $pubcrawler::params::searches,
  $viewdays             = $pubcrawler::params::viewdays,
  $relentrezdate        = $pubcrawler::params::relentrezdate,
  $getmax               = $pubcrawler::params::getmax,
  $fullmax              = $pubcrawler::params::fullmax,
  $include_config       = $pubcrawler::params::include_config,
  $search_URL           = $pubcrawler::params::search_URL,
  $neighbour_URL        = $pubcrawler::params::neighbour_URL,
  $retrieve_URL         = $pubcrawler::params::retrieve_URL,
  $work_dir             = $pubcrawler::params::work_dir,
  $extra_range          = $pubcrawler::params::extra_range,
  $check                = $pubcrawler::params::check,
  $prompt               = $pubcrawler::params::prompt,
  $verbose              = $pubcrawler::params::verbose,
  $mute                 = $pubcrawler::params::mute,
  $log_file             = $pubcrawler::params::log_file,
  $base_URL             = $pubcrawler::params::base_URL,
  $mail_features        = $pubcrawler::params::mail_features,
  $lynx                 = $pubcrawler::params::lynx,
  $prefix               = $pubcrawler::params::prefix,
  $system               = $pubcrawler::params::system,
  $proxy_port           = $pubcrawler::params::proxy_port,
  $proxy_auth           = $pubcrawler::params::proxy_auth,
  $proxy_pass           = $pubcrawler::params::proxy_pass,
  $time_out             = $pubcrawler::params::time_out,
  $test_URL             = $pubcrawler::params::test_URL,
  $no_test              = $pubcrawler::params::no_test,
  $indent               = $pubcrawler::params::indent,
  $no_decap             = $pubcrawler::params::no_decap,
  $spacer               = $pubcrawler::params::spacer,
 ) inherits pubcrawler::params {
  package { 'pubcrawler':
    ensure => installed,
  }
  file { '/etc/httpd/conf.d/pubcrawler.conf':
    owner   => 'root',
    group   => 'root',
    content => template('pubcrawler/http.pubcrawler.conf.erb'),
  }
  file { '/etc/cron.d/pubcrawler':
    owner   => 'root',
    group   => 'root',
    mode    => '0644',
    content => template('pubcrawler/cron.erb'),
  }
  create_resources(pubcrawler::config, $site_specific_params)
 }
--- a/manifests/params.pp
+++ b/manifests/params.pp
@@ -0,0 +1,413 @@
 # Default parameters for Pubcrawler
 class pubcrawler::params {
  $bgcolor          = '#fff'
  $header_icon      = ''
  $viewdays         = '14'
  $relentrezdate    = '90'
  $getmax           = '800'
  $fullmax          = '500'
  $include_config   = 'no'
  $search_URL       = ''
  $neighbour_URL    = ''
  $retrieve_URL     = ''
  $extra_range      = '1000'
  $check            = '0'
  $prompt           = '1'
  $verbose          = '0'
  $mute             = '0'
  $log_file         = ''
  $base_URL         = 'local_file'
  $mail_features    = 'all'
  $lynx             = ''
  $prefix           = ''
  $system           = ''
  $proxy_port       = ''
  $proxy_auth       = ''
  $proxy_pass       = ''
  $time_out         = '180'
  $test_URL         = 'http://www.ncbi.nlm.nih.gov/'
  $no_test          = '0'
  $indent           = '125'
  $no_decap         = ''
  $spacer           = ''
  $searches         = undef
  # override some of the above defaults in hash form to make
  # configurations specific for a site.
  $site_specific_params = {
    'AmoebaDB' => {
      bgcolor     => '#C8C5A2',
      header_icon => '/a/images/AmoebaDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Entamoeba',
          term     => 'Entamoeba [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Acanthamoeba',
          term     => 'Acanthamoeba [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Entamoeba',
          term     => 'Entamoeba [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Acanthamoeba',
          term     => 'Acanthamoeba [ALL]',
        },
      ],
    },
    'CryptoDB' => {
      bgcolor     => '#ffcccc',
      header_icon => '/a/images/CryptoDB/title_s.png',
      searches => [
        {
          database => 'genbank',
          alias    => 'New Genbank sequences, C. parvum',
          term     => 'Cryptosporidium parvum [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences, C. hominis',
          term     => 'Cryptosporidium hominis [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences, C. muris',
          term     => 'Cryptosporidium muris [ORGN]',
        },
      ],
    },
    'EuPathDB' => {
      bgcolor     => '#507494',
      header_icon => '/a/images/EuPathDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Cryptosporidium',
          term     => 'Cryptosporidium [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Encephalitozoon or Enterocytozoon',
          term     => 'Enterocytozoon [ALL] or Encephalitozoon [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Entamoeba',
          term     => 'Entamoeba [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Giardia',
          term     => 'Giardia [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Microsporidia',
          term     => 'Edhazardia [ALL]  OR Encephalitozoon [ALL]  OR Enterocytozoon [ALL]  OR Hamiltosporidium [ALL]  OR Nematocida [ALL]  OR Nosema [ALL]  OR Vavraia [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Piroplasma genera',
          term     => 'Anthemosoma [ALL] OR Babesia [ALL] OR Cristalloidophora [ALL] OR Dactylosoma [ALL] OR Echinozoon [ALL] OR Haemohormidium [ALL] OR Sauroplasma [ALL] OR Theileria [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Plasmodium',
          term     => 'Plasmodium [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Toxoplasma or Eimeria or Neospora',
          term     => 'Toxoplasma [ALL] or Eimeria [ALL] or Neospora [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Trichomonas',
          term     => 'Trichomonas [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on other Apicomplexan organisms',
          term     => 'Apicomplexa OR Apicomplexan OR Eimeria OR Gregarina OR Neospora OR Sarcocystis OR Theileria NOT Cryptosporidium NOT Plasmodium NOT Toxoplasma [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Leishmania or Trypanosoma or Crithidia',
          term     => 'Trypanosoma [ALL] or  Leishmania [ALL] or Crithidia [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Cryptosporidium',
          term     => 'Cryptosporidium [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Entamoeba',
          term     => 'Entamoeba [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Giardia',
          term     => 'Giardia [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Microsporidia',
          term     => 'Edhazardia [ORGN]  OR Encephalitozoon [ORGN]  OR Enterocytozoon [ORGN]  OR Hamiltosporidium [ORGN]  OR Nematocida [ORGN]  OR Nosema [ORGN]  OR Vavraia [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Piroplasma genera',
          term     => 'Anthemosoma OR Babesia OR Cristalloidophora OR Dactylosoma OR Echinozoon OR Haemohormidium OR Sauroplasma OR Theileria [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Plasmodium',
          term     => 'Plasmodium [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Toxoplasma',
          term     => 'Toxoplasma [ORGN] or Eimeria [ORGN] or Neospora [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Trichomonas',
          term     => 'Trichomonas [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for other Apicomplexan organisms',
          term     => 'Apicomplexa NOT Toxoplasma NOT Plasmodium NOT Cryptosporidium [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Leishmania or Trypanosoma or Crithidia',
          term     => 'Leishmania [ORGN] or Trypanosoma [ORGN] or Crithidia [ORGN]',
        },
      ],
    },
    'FungiDB' => {
      bgcolor     => '#cd919e',
      header_icon => '/a/images/FungiDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Aspergillus or Candida or Coccidioides or Cryptococcus or Fusarium or Gibberella or Magnaporthe or Neurospora or Puccinia or Rhizopus or Saccharomyces',
          term     => 'Aspergillus [ALL] or Candida [ALL] or Coccidioides [ALL] or Cryptococcus [ALL] or Fusarium [ALL] or Gibberella [ALL] or Magnaporthe [ALL] or Neurospora [ALL] or Puccinia [ALL] or Rhizopus [ALL] or Saccharomyces [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Aspergillus or Candida or Coccidioides or Cryptococcus or Fusarium or Gibberella or Magnaporthe or Neurospora or Puccinia or Rhizopus or Saccharomyces',
          term     => 'Aspergillus [ORGN] or Candida [ORGN] or Coccidioides [ORGN] or Cryptococcus [ORGN] or Fusarium [ORGN] or Gibberella [ORGN] or Magnaporthe [ORGN] or Neurospora [ORGN] or Puccinia [ORGN] or Rhizopus [ORGN] or Saccharomyces [ORGN]',
        },
      ],
    },
    'GiardiaDB' => {
      bgcolor     => '#993333',
      header_icon => '/a/images/GiardiaDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Giardia',
          term     => 'Giardia [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Giardia',
          term     => 'Giardia [ORGN]',
        },
      ],
    },
    'HostDB' => {
      bgcolor     => '#e08265',
      header_icon => '/a/images/HostDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on host parasite interaction',
          term     => 'host parasite interaction [ALL]',
        },
      ],
    },
    'MicrosporidiaDB' => {
      bgcolor     => '#C4BAD3',
      header_icon => '/a/images/MicrosporidiaDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Microsporidia',
          term     => 'Edhazardia [ALL]  OR Encephalitozoon [ALL]  OR Enterocytozoon [ALL]  OR Hamiltosporidium [ALL]  OR Nematocida [ALL]  OR Nosema [ALL]  OR Vavraia [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Microsporidia',
          term     => 'Edhazardia [ORGN]  OR Encephalitozoon [ORGN]  OR Enterocytozoon [ORGN]  OR Hamiltosporidium [ORGN]  OR Nematocida [ORGN]  OR Nosema [ORGN]  OR Vavraia [ORGN] or Anncaliia  [ORGN] or Vittaforma [ORGN]',
        },
      ],
    },
    'PiroplasmaDB' => {
      bgcolor     => '#e08265',
      header_icon => '/a/images/PiroplasmaDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Piroplasma genera',
          term     => 'Anthemosoma [ALL] OR Babesia [ALL] OR Cristalloidophora [ALL] OR Dactylosoma [ALL] OR Echinozoon [ALL] OR Haemohormidium [ALL] OR Sauroplasma [ALL] OR Theileria [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Piroplasma genera',
          term     => 'Anthemosoma [ORGN] OR [ORGN] Babesia [ORGN] OR Cristalloidophora [ORGN] OR Dactylosoma [ORGN] OR Echinozoon [ORGN] OR Haemohormidium [ORGN] OR Sauroplasma [ORGN] OR Theileria [ORGN]',
        },
      ],
    },
    'PlasmoDB' => {
      bgcolor     => '#bbaacc',
      header_icon => '/a/images/PlasmoDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Plasmodium',
          term     => 'Plasmodium [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Plasmodium',
          term     => 'Plasmodium [ORGN]',
        },
      ],
    },
    'SchistoDB' => {
      bgcolor     => '#cd919e',
      header_icon => '/a/images/SchistoDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles related to Schistosoma',
          term     => 'Schistosoma [ALL] or schistosoma [ALL] or blood-fluke [ALL] or Schistosomatidae [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Schistosoma',
          term     => 'Schistosoma [ORGN]',
        },
      ],
    },
    'ToxoDB' => {
      bgcolor     => '#cd919e',
      header_icon => '/a/images/ToxoDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Toxoplasma',
          term     => 'Toxoplasma [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Eimeria',
          term     => 'Eimeria [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Eimeria',
          term     => 'Gregarina [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Neospora',
          term     => 'Neospora [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Toxoplasma, Eimeria, or Neospora',
          term     => 'Toxoplasma [ORGN] or Eimeria [ORGN] or Neospora [ORGN] or Gregarina [ORGN]',
        },
      ],
    },
    'TrichDB' => {
      bgcolor     => '#993333',
      header_icon => '/a/images/TrichDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Trichomonas',
          term     => 'Trichomonas [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Trichomonas',
          term     => 'Trichomonas [ORGN]',
        },
      ],
    },
    'TriTrypDB' => {
      bgcolor     => '#dfbba6',
      header_icon => '/a/images/TriTrypDB/title_s.png',
      searches => [
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Trypanosoma',
          term     => 'Trypanosoma [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Leishmania',
          term     => 'Leishmania [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Crithidia',
          term     => 'Crithidia [ALL]',
        },
        {
          database => 'pubmed',
          alias    => 'New PubMed articles on Endotrypanum',
          term     => 'Endotrypanum [ALL]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Trypanosoma',
          term     => 'Trypanosoma [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Crithidia',
          term     => 'Crithidia [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Endotrypanum',
          term     => 'Endotrypanum [ORGN]',
        },
        {
          database => 'genbank',
          alias    => 'New Genbank sequences for Leishmania',
          term     => 'Leishmania [ORGN]',
        },
      ],
    },
  }
 }
--- a/templates/cron.erb
+++ b/templates/cron.erb
@@ -0,0 +1 @@
 20 4 * * * nobody /usr/share/pubcrawler/bin/pubcrawler-runall
--- a/templates/http.pubcrawler.conf.erb
+++ b/templates/http.pubcrawler.conf.erb
@@ -0,0 +1,7 @@
 Alias /pubcrawler /usr/share/pubcrawler/html
 <Directory /usr/share/pubcrawler/html>
    Options FollowSymLinks Includes
    AllowOverride None
    Order allow,deny
    Allow from all
 </Directory>
--- a/templates/product.config.erb
+++ b/templates/product.config.erb
@@ -0,0 +1,255 @@
        ############   PubCrawler configuration file         ############
        #######   (for PubCrawler Version higher than 0.53)        ######
        ############################################################
        #                                                                #
        #   lines beginning with hash marks (#) are ignored.             #
        #                                                                #
        #   PubCrawler home page:                                        #
        #               http://www.pubcrawler.ie                         #
        #                                                                #
        #   Specify your file locations and search options here.         #
        #   Each line is in the format  FIELD space VALUE.               #
        #   Any leading or trailing quotes will be chopped off.          # 
        #   Hash marks separate comments from data.                      #
        #   You must specify a value for all 6 mandatory fields.         #
        #                                                                #
        ############################################################
        ############################################################
        ################# MANDATORY SETTINGS #######################
        ############################################################
 project <%= @name %>
        # Identifier for a project. This value is displayed in the header
        # of the results page.
 background_color <%= @bgcolor %>
        # background color for sectional headers of the result page.
 header_icon <%= @header_icon %>
        # relative or absolute URL for icon to display in results page
        # header.
 <%- if @html_file -%>
 html_file <%= @html_file %>
 <%- else -%>
 html_file /usr/share/pubcrawler/html/<%= @name %>/index.html
 <%- end -%>
        # html_file is the name of the output HTML file for results
        # it will be written to the specified working directory
        # unless an absolute pathname is given
 viewdays <%= @viewdays %>   
        # viewdays is the number of days each document will be shown.
 relentrezdate <%= @relentrezdate %>
        # relentrezdate (relative date of insertion into Entrez) 
        # is the maximum age (in days) of database entries to be reported.
        # NOTE: sometimes records first appear in the databases several
        # days or even weeks later than indicated by their database
        # date-stamp, i.e. with non-zero values of relentrezdate.
        # Therefore relentrezdate needs to be high enough to find these
        # records.  A relentrezdate of 90 days is suggested (if you make 
        # relentrezdate too huge the searches will be very slow.)
        # other valid entries are: 
        # '1 year', '2 years', '5 years', '10 years', and 'no limit'
 getmax <%= @getmax %>
        # getmax is the maximum number of documents to be retrieved
        # for each search carried out.
 fullmax <%= @fullmax %>
        # fullmax is the maximum number of documents for which a full
        # report is being presented
        # if more documents were retrieved, these can be accessed
        # through a hyperlink (in groups of up to fullmax articles)
 include_config <%= @include_config %>
        # include_config (yes/no) specifies whether or not to append 
        # this config-file to the end of the output file
        #-------------------------------------------------------------------#
        ############################################################
        ################## OPTIONAL SETTINGS #######################
        ############################################################
 search_URL <%= @search_URL %>
        # URL where searches are being sent to
        # defaults to 
        # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi
 neighbour_URL <%= @neighbour_URL %>
        # URL where neighbourhood searches are being sent to
        # defaults to 
        # https://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor.fcgi
 retrieve_URL <%= @retrieve_URL %>
        # URL where documents are retrieved from
        # defaults to 
        # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi
 <%- if @work_dir -%>
 work_dir <%= @work_dir %>
 <%- else -%>
 work_dir /var/lib/pubcrawler/<%= @name %>
 <%- end -%>
        # specify a directory in which databases, output and log file 
        # will be located
        # if no value given, the current working directory will be used
 extra_range <%= @extra_range %>
        # specifies the number of documents combined in a link
        # minimum value is 1, defaults to 'fullmax'
 check <%= @check %>
        # if set to '1' program will just check all settings
        # without performing the actual search
        # RECOMMENDED FOR THE FIRST RUN!
 prompt <%= @prompt %>
        # for Mac-users only:
        # if this option is set to '1' the program will ask you 
        # explicitly for command line options
        # NOTE: THIS IS NOT SUITABLE FOR AUTOMATED USE!
 verbose <%= @verbose %>
        # verbose 0 runs silently and makes log file                 
        # verbose 1 writes log output on screen 
 mute <%= @mute %>
        # mute 0 writes some messages to STDERR
        # mute 1 stops ALL messages going to STDERR
        # unless an error was encountered                    
 log_file <%= @log_file %>
        # name of file for log-output
        # (verbose has to be set to '0')   
        #base_URL 'http://www.gen.tcd.ie/pubcrawler/pubcrawler_output.html'
 base_URL <%= @base_URL %>
        # specify a URL, that will be used for 
        # the 'Back to Top' link in the output page   
        # 'local_file' makes links relative to results file
        # mail joe@hotmail.earth.com
        # if the hash mark ('#') at the beginning of the above line
        # is removed, PubCrawler will send the results file to
        # the given address at the end of each run. You can specify
        # multiple addresses using commas (no spaces!).
        # notify jfk@hotmail.earth.com#joe
        # if the hash mark ('#') at the beginning of the above line
        # is removed, PubCrawler will send a notification to
        # the given address (minus '#joe') at the end of each run
        # The recipient will be addressed with joe (optional).
        # You can specify multiple addresses using commas (no spaces!).
 mail_features <%= @mail_features %>
        # comma-separated list of extra features for the mail
        # to be sent (without them it will be plain text). These are:
        # css,javascript,entrez_links,pubcrawler_links,images,html,description
        # or simply 'all' for everything
 lynx <%= @lynx %>
        # for Unix-users only:
        # if you don't want to use the libwww-Perl module and
        # have an alternative browser installed, that works from the 
        # command line, like 'Lynx', you can use it by entering the
        # command that evokes it (e.g. lynx '/usr/bin/lynx')
        # NOTE: THIS OVERRIDES ANY PROXY SETTINGS!
        #header 'head.html'
        # specify a location of a header (in HTML-style) that will be used
        # for the output file (disabled unless hash mark is removed)
 prefix <%= @prefix %>
        # if you would like a different prefix to be used 
        # for standard files (configuration, database, log)
        # insert it here (default is program name up to first dot):
 system <%= @system %>
        # name of operating system
        # might need the explicit assignment of an adequate value
        # ('MacOS','Win','Unix', or 'Linux')
        # if Perl is not configured properly
        #### PROXY SETTING (if desired and/or necessary) ####
        #proxy www.tcd.ie/proxy.cgi
        # insert either a proxy server (eg. 'proxy.domain.com')
        # or the address of a proxy configuration file
        # if known (eg. 'www.domain.com/proxy.cgi')
        # and uncomment
 proxy_port <%= @proxy_port %>
        # port of the proxy server,defaults to '80'
 proxy_auth <%= @proxy_auth %>
 proxy_pass <%= @proxy_pass %>
        # in case you need to submit a username and a password
        # for accessing your proxy, you can fill it in here:
        # CAUTION! Having passwords stored in a file means a
        # possible security risk! Please delete after usage
        # or use the according command line option!
        # !!! Please make sure that the module MIME::Base64 is 
        # installed for the proxy authorization to work!!!
 time_out <%= @time_out %>
        # specify how many SECONDS to give remote servers 
        # in creating responses before the library disconnects
        # (defaults to 180 seconds if no value is given) 
 test_URL <%= @test_URL %>
        # test-URL for proxy-test
 no_test <%= @no_test %>
        # if a proxy is given, the internet connection is tested
        # at the start of the program by default; this can be
        # suppressed if a value of '1' is given here
 indent <%= @indent %>
        # amount of pixels that output is being shifted to the right
 no_decap <%= @no_decap %>
        # put 1 inbetween single quotes if you want to disable 
        # processing of the entrez documents (chopping of head and tail
        # and collecting UIs)
 spacer <%= @spacer %>
        # specify a gif that will be inserted in the output to shift
        # text past the left, blue column
        # (defaults to: http://www.gen.tcd.ie/pubcrawler/pics/spacer.gif)
        #-----------------------------------------------------------------------------#
        ############################################################
        ################# SEARCH SPECIFICATION #####################
        ############################################################
        ###########################################################################
        ######  Entrez abbreviations for fields                                     #
        ######  (see http://www4.ncbi.nlm.nih.gov/PubMed/linking.html).             #
        ######  combine fields with AND, OR, BUTNOT and parentheses.                #
        #                                                                           #
        # for PubMed : one of AFFL, ALL, AUTH, ECNO, JOUR, MESH, MAJR, PAGE,        #
        #                     PDAT, PTYP, KYWD, WORD, TITLE, or VOL.                #
        # for Nucleotide : one of ACCN, AUTH, PDAT, ECNO, FKEY, GENE, JOUR, KYWD,   #
        #                         MDAT, ORGN, PROP, PROT, SQID, SLEN, SUBS, or      #
        #                         WORD.                                             #
        #                                                                           #
        # where ACCN = Accession Number, AFFL = Afilliation, ALL = all fields,      #
        #       AUTH = Author Name, ECNO = E. C. Number, FKEY = Feature Key,        #
        #       GENE = gene name, JOUR =journal name, KYWD = Keywords,              #
        #       MAJR = MeSH major topic, MDAT = modification date,                  #
        #       MESH = mesh term,ORGN = organism, PACC = Primary Accesion Number,   #
        #       PAGE = first page, PDAT = publication/creation date,                #
        #       PROP = Properties,  PROT = protein name, PTYP = Publication Type,   #
        #       SUBS = Substance, TITL = title word, WORD = text word,              #
        #       VOL = volume.                                                       #
        ###########################################################################
        ##### Each search-specification has to be written on one line.
        ##### The first word must specify the database: 
        ##### pubmed, pm_neighbour, genbank, or gb_neighbour
        ##### Any following words enclosed in single quotes (') will be used
        ##### as an alias for this query, otherwise they will be considered
        ##### Entrez-search-terms, as will the rest of the line.
        ##### You can have as many different searches as you wish. The results of all
        ##### searches will be combined according to their aliases.
        ##### You CAN NOT use the same alias for searches at different databases!
        ##### Write your search descriptions below this line.  
        ##### (Upper/lower case does not matter.)
 <%- if @searches and @searches.kind_of?(Array) -%>
 <%- @searches.each do |search| -%>
 <%= search['database'] %> '<%= search['alias'] %>' <%= search['term'] %>
 <%- end -%>
 <%- end -%>
		`@@ -0,0 +1 @@`
							`20 4 * * * nobody /usr/share/pubcrawler/bin/pubcrawler-runall`