From b0b69367c85316a2306e6f7f8c683a9085fa926b Mon Sep 17 00:00:00 2001 From: drew Date: Fri, 6 Apr 2018 14:00:35 -0400 Subject: [PATCH] move to github --- README.md | 72 +++++ manifests/config.pp | 42 +++ manifests/init.pp | 59 +++++ manifests/params.pp | 413 +++++++++++++++++++++++++++++ templates/cron.erb | 1 + templates/http.pubcrawler.conf.erb | 7 + templates/product.config.erb | 255 ++++++++++++++++++ 7 files changed, 849 insertions(+) create mode 100644 README.md create mode 100644 manifests/config.pp create mode 100644 manifests/init.pp create mode 100644 manifests/params.pp create mode 100644 templates/cron.erb create mode 100644 templates/http.pubcrawler.conf.erb create mode 100644 templates/product.config.erb diff --git a/README.md b/README.md new file mode 100644 index 0000000..900fa8c --- /dev/null +++ b/README.md @@ -0,0 +1,72 @@ +Installs Pubcrawler from EuPathDB-specific RPM and manages +`/etc/pubcrawler/` configuration files. + +The RPM includes a `/etc/cron.d` job that runs all *.config in `/etc/pubcrawler`. + +`pubcrawler::params` defines the common set of configuration parameters. +See `templates/product.config.erb` in this module for documentation on +each parameter. + +A configuration file will be generated in `/etc/pubcrawler` for each +key in the `$site_specific_params` hash. All site-specific configuration +files will use values in `$default_params` unless overridden in the +`$site_specific_params`. For example, AmoebaDB will use '14' for +`viewdays` whereas CryptoDB will use '120' because it is has been +overriden in the `$site_specific_params` hash for the `CryptoDB` key. + + ... + $viewdays = '14' + ... + + $site_specific_params = { + 'AmoebaDB' => { + 'bgcolor' => '#C8C5A2', + 'header_icon' => '/a/images/AmoebaDB/title_s.png', + 'searches' => [ + ... + ], + }, + + 'CryptoDB' => { + 'bgcolor' => '#ffcccc', + 'header_icon' => '/a/images/CryptoDB/title_s.png', + 'viewdays' => '120', + 'searches' => [ + ... + ], + }, + ... + } + +### Hiera + +The values in `::pubcrawler::params` can be overridden in heira. + + pubcrawler::site_specific_params: + AmoebaDB: + bgcolor: '#C8C5A2' + header_icon: '/a/images/AmoebaDB/title_s.png' + searches: + - database: pubmed + alias: Entamoeba + term: Entamoeba [ALL] + - database: pubmed + alias: Acanthamoeba + term: Acanthamoeba [ALL] + +In this example, the `$site_specific_params` in `::pubcrawler::params` +will be replaced with a hash containing only AmoebaDB - so only +AmoebaDB will be configured. + +Other individual params can be set in heira. + + pubcrawler::fullmax: 666 + +### Configuration Testing + +To run a single configuration manually, use + + PRODUCT=TrichDB + sudo -u nobody /usr/share/pubcrawler/bin/pubcrawler.pl -c /etc/pubcrawler/${PRODUCT}.config + +Results are written to `/usr/share/pubcrawler/html/ToxoDB/` \ No newline at end of file diff --git a/manifests/config.pp b/manifests/config.pp new file mode 100644 index 0000000..13f91b4 --- /dev/null +++ b/manifests/config.pp @@ -0,0 +1,42 @@ +# generate one configuration file +define pubcrawler::config ( + $bgcolor = $pubcrawler::bgcolor, + $header_icon = $pubcrawler::header_icon, + $searches = $pubcrawler::searches, + $viewdays = $pubcrawler::viewdays, + $relentrezdate = $pubcrawler::relentrezdate, + $getmax = $pubcrawler::getmax, + $fullmax = $pubcrawler::fullmax, + $include_config = $pubcrawler::include_config, + $search_URL = $pubcrawler::search_URL, + $neighbour_URL = $pubcrawler::neighbour_URL, + $retrieve_URL = $pubcrawler::retrieve_URL, + $work_dir = $pubcrawler::work_dir, + $extra_range = $pubcrawler::extra_range, + $check = $pubcrawler::check, + $prompt = $pubcrawler::prompt, + $verbose = $pubcrawler::verbose, + $mute = $pubcrawler::mute, + $log_file = $pubcrawler::log_file, + $base_URL = $pubcrawler::base_URL, + $mail_features = $pubcrawler::mail_features, + $lynx = $pubcrawler::lynx, + $prefix = $pubcrawler::prefix, + $system = $pubcrawler::system, + $proxy_port = $pubcrawler::proxy_port, + $proxy_auth = $pubcrawler::proxy_auth, + $proxy_pass = $pubcrawler::proxy_pass, + $time_out = $pubcrawler::time_out, + $test_URL = $pubcrawler::test_URL, + $no_test = $pubcrawler::no_test, + $indent = $pubcrawler::indent, + $no_decap = $pubcrawler::no_decap, + $spacer = $pubcrawler::spacer, +) { + + file {"/etc/pubcrawler/${name}.config": + content => template('pubcrawler/product.config.erb'), + require => Package['pubcrawler'], + } + +} \ No newline at end of file diff --git a/manifests/init.pp b/manifests/init.pp new file mode 100644 index 0000000..51fc112 --- /dev/null +++ b/manifests/init.pp @@ -0,0 +1,59 @@ +# Install pubcrawler and configuration files. +class pubcrawler ( + $site_specific_params = $pubcrawler::params::site_specific_params, + $bgcolor = $pubcrawler::params::bgcolor, + $header_icon = $pubcrawler::params::header_icon, + $searches = $pubcrawler::params::searches, + $viewdays = $pubcrawler::params::viewdays, + $relentrezdate = $pubcrawler::params::relentrezdate, + $getmax = $pubcrawler::params::getmax, + $fullmax = $pubcrawler::params::fullmax, + $include_config = $pubcrawler::params::include_config, + $search_URL = $pubcrawler::params::search_URL, + $neighbour_URL = $pubcrawler::params::neighbour_URL, + $retrieve_URL = $pubcrawler::params::retrieve_URL, + $work_dir = $pubcrawler::params::work_dir, + $extra_range = $pubcrawler::params::extra_range, + $check = $pubcrawler::params::check, + $prompt = $pubcrawler::params::prompt, + $verbose = $pubcrawler::params::verbose, + $mute = $pubcrawler::params::mute, + $log_file = $pubcrawler::params::log_file, + $base_URL = $pubcrawler::params::base_URL, + $mail_features = $pubcrawler::params::mail_features, + $lynx = $pubcrawler::params::lynx, + $prefix = $pubcrawler::params::prefix, + $system = $pubcrawler::params::system, + $proxy_port = $pubcrawler::params::proxy_port, + $proxy_auth = $pubcrawler::params::proxy_auth, + $proxy_pass = $pubcrawler::params::proxy_pass, + $time_out = $pubcrawler::params::time_out, + $test_URL = $pubcrawler::params::test_URL, + $no_test = $pubcrawler::params::no_test, + $indent = $pubcrawler::params::indent, + $no_decap = $pubcrawler::params::no_decap, + $spacer = $pubcrawler::params::spacer, +) inherits pubcrawler::params { + + package { 'pubcrawler': + ensure => installed, + } + + file { '/etc/httpd/conf.d/pubcrawler.conf': + owner => 'root', + group => 'root', + content => template('pubcrawler/http.pubcrawler.conf.erb'), + } + + file { '/etc/cron.d/pubcrawler': + owner => 'root', + group => 'root', + mode => '0644', + content => template('pubcrawler/cron.erb'), + } + + create_resources(pubcrawler::config, $site_specific_params) +} + + + diff --git a/manifests/params.pp b/manifests/params.pp new file mode 100644 index 0000000..10053f5 --- /dev/null +++ b/manifests/params.pp @@ -0,0 +1,413 @@ +# Default parameters for Pubcrawler +class pubcrawler::params { + + $bgcolor = '#fff' + $header_icon = '' + $viewdays = '14' + $relentrezdate = '90' + $getmax = '800' + $fullmax = '500' + $include_config = 'no' + $search_URL = '' + $neighbour_URL = '' + $retrieve_URL = '' + $extra_range = '1000' + $check = '0' + $prompt = '1' + $verbose = '0' + $mute = '0' + $log_file = '' + $base_URL = 'local_file' + $mail_features = 'all' + $lynx = '' + $prefix = '' + $system = '' + $proxy_port = '' + $proxy_auth = '' + $proxy_pass = '' + $time_out = '180' + $test_URL = 'http://www.ncbi.nlm.nih.gov/' + $no_test = '0' + $indent = '125' + $no_decap = '' + $spacer = '' + $searches = undef + + # override some of the above defaults in hash form to make + # configurations specific for a site. + $site_specific_params = { + 'AmoebaDB' => { + bgcolor => '#C8C5A2', + header_icon => '/a/images/AmoebaDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Entamoeba', + term => 'Entamoeba [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Acanthamoeba', + term => 'Acanthamoeba [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Entamoeba', + term => 'Entamoeba [ALL]', + }, + + { + database => 'genbank', + alias => 'New Genbank sequences for Acanthamoeba', + term => 'Acanthamoeba [ALL]', + }, + ], + }, + + 'CryptoDB' => { + bgcolor => '#ffcccc', + header_icon => '/a/images/CryptoDB/title_s.png', + searches => [ + { + database => 'genbank', + alias => 'New Genbank sequences, C. parvum', + term => 'Cryptosporidium parvum [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences, C. hominis', + term => 'Cryptosporidium hominis [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences, C. muris', + term => 'Cryptosporidium muris [ORGN]', + }, + ], + }, + + 'EuPathDB' => { + bgcolor => '#507494', + header_icon => '/a/images/EuPathDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Cryptosporidium', + term => 'Cryptosporidium [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Encephalitozoon or Enterocytozoon', + term => 'Enterocytozoon [ALL] or Encephalitozoon [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Entamoeba', + term => 'Entamoeba [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Giardia', + term => 'Giardia [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Microsporidia', + term => 'Edhazardia [ALL] OR Encephalitozoon [ALL] OR Enterocytozoon [ALL] OR Hamiltosporidium [ALL] OR Nematocida [ALL] OR Nosema [ALL] OR Vavraia [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Piroplasma genera', + term => 'Anthemosoma [ALL] OR Babesia [ALL] OR Cristalloidophora [ALL] OR Dactylosoma [ALL] OR Echinozoon [ALL] OR Haemohormidium [ALL] OR Sauroplasma [ALL] OR Theileria [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Plasmodium', + term => 'Plasmodium [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Toxoplasma or Eimeria or Neospora', + term => 'Toxoplasma [ALL] or Eimeria [ALL] or Neospora [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Trichomonas', + term => 'Trichomonas [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on other Apicomplexan organisms', + term => 'Apicomplexa OR Apicomplexan OR Eimeria OR Gregarina OR Neospora OR Sarcocystis OR Theileria NOT Cryptosporidium NOT Plasmodium NOT Toxoplasma [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Leishmania or Trypanosoma or Crithidia', + term => 'Trypanosoma [ALL] or Leishmania [ALL] or Crithidia [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Cryptosporidium', + term => 'Cryptosporidium [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Entamoeba', + term => 'Entamoeba [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Giardia', + term => 'Giardia [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Microsporidia', + term => 'Edhazardia [ORGN] OR Encephalitozoon [ORGN] OR Enterocytozoon [ORGN] OR Hamiltosporidium [ORGN] OR Nematocida [ORGN] OR Nosema [ORGN] OR Vavraia [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Piroplasma genera', + term => 'Anthemosoma OR Babesia OR Cristalloidophora OR Dactylosoma OR Echinozoon OR Haemohormidium OR Sauroplasma OR Theileria [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Plasmodium', + term => 'Plasmodium [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Toxoplasma', + term => 'Toxoplasma [ORGN] or Eimeria [ORGN] or Neospora [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Trichomonas', + term => 'Trichomonas [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for other Apicomplexan organisms', + term => 'Apicomplexa NOT Toxoplasma NOT Plasmodium NOT Cryptosporidium [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Leishmania or Trypanosoma or Crithidia', + term => 'Leishmania [ORGN] or Trypanosoma [ORGN] or Crithidia [ORGN]', + }, + ], + }, + + 'FungiDB' => { + bgcolor => '#cd919e', + header_icon => '/a/images/FungiDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Aspergillus or Candida or Coccidioides or Cryptococcus or Fusarium or Gibberella or Magnaporthe or Neurospora or Puccinia or Rhizopus or Saccharomyces', + term => 'Aspergillus [ALL] or Candida [ALL] or Coccidioides [ALL] or Cryptococcus [ALL] or Fusarium [ALL] or Gibberella [ALL] or Magnaporthe [ALL] or Neurospora [ALL] or Puccinia [ALL] or Rhizopus [ALL] or Saccharomyces [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Aspergillus or Candida or Coccidioides or Cryptococcus or Fusarium or Gibberella or Magnaporthe or Neurospora or Puccinia or Rhizopus or Saccharomyces', + term => 'Aspergillus [ORGN] or Candida [ORGN] or Coccidioides [ORGN] or Cryptococcus [ORGN] or Fusarium [ORGN] or Gibberella [ORGN] or Magnaporthe [ORGN] or Neurospora [ORGN] or Puccinia [ORGN] or Rhizopus [ORGN] or Saccharomyces [ORGN]', + }, + ], + }, + + 'GiardiaDB' => { + bgcolor => '#993333', + header_icon => '/a/images/GiardiaDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Giardia', + term => 'Giardia [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Giardia', + term => 'Giardia [ORGN]', + }, + ], + }, + + 'HostDB' => { + bgcolor => '#e08265', + header_icon => '/a/images/HostDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on host parasite interaction', + term => 'host parasite interaction [ALL]', + }, + ], + }, + + 'MicrosporidiaDB' => { + bgcolor => '#C4BAD3', + header_icon => '/a/images/MicrosporidiaDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Microsporidia', + term => 'Edhazardia [ALL] OR Encephalitozoon [ALL] OR Enterocytozoon [ALL] OR Hamiltosporidium [ALL] OR Nematocida [ALL] OR Nosema [ALL] OR Vavraia [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Microsporidia', + term => 'Edhazardia [ORGN] OR Encephalitozoon [ORGN] OR Enterocytozoon [ORGN] OR Hamiltosporidium [ORGN] OR Nematocida [ORGN] OR Nosema [ORGN] OR Vavraia [ORGN] or Anncaliia [ORGN] or Vittaforma [ORGN]', + }, + ], + }, + + 'PiroplasmaDB' => { + bgcolor => '#e08265', + header_icon => '/a/images/PiroplasmaDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Piroplasma genera', + term => 'Anthemosoma [ALL] OR Babesia [ALL] OR Cristalloidophora [ALL] OR Dactylosoma [ALL] OR Echinozoon [ALL] OR Haemohormidium [ALL] OR Sauroplasma [ALL] OR Theileria [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Piroplasma genera', + term => 'Anthemosoma [ORGN] OR [ORGN] Babesia [ORGN] OR Cristalloidophora [ORGN] OR Dactylosoma [ORGN] OR Echinozoon [ORGN] OR Haemohormidium [ORGN] OR Sauroplasma [ORGN] OR Theileria [ORGN]', + }, + ], + }, + + 'PlasmoDB' => { + bgcolor => '#bbaacc', + header_icon => '/a/images/PlasmoDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Plasmodium', + term => 'Plasmodium [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Plasmodium', + term => 'Plasmodium [ORGN]', + }, + ], + }, + + 'SchistoDB' => { + bgcolor => '#cd919e', + header_icon => '/a/images/SchistoDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles related to Schistosoma', + term => 'Schistosoma [ALL] or schistosoma [ALL] or blood-fluke [ALL] or Schistosomatidae [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Schistosoma', + term => 'Schistosoma [ORGN]', + }, + ], + }, + + 'ToxoDB' => { + bgcolor => '#cd919e', + header_icon => '/a/images/ToxoDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Toxoplasma', + term => 'Toxoplasma [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Eimeria', + term => 'Eimeria [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Eimeria', + term => 'Gregarina [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Neospora', + term => 'Neospora [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Toxoplasma, Eimeria, or Neospora', + term => 'Toxoplasma [ORGN] or Eimeria [ORGN] or Neospora [ORGN] or Gregarina [ORGN]', + }, + ], + }, + + 'TrichDB' => { + bgcolor => '#993333', + header_icon => '/a/images/TrichDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Trichomonas', + term => 'Trichomonas [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Trichomonas', + term => 'Trichomonas [ORGN]', + }, + ], + }, + + 'TriTrypDB' => { + bgcolor => '#dfbba6', + header_icon => '/a/images/TriTrypDB/title_s.png', + searches => [ + { + database => 'pubmed', + alias => 'New PubMed articles on Trypanosoma', + term => 'Trypanosoma [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Leishmania', + term => 'Leishmania [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Crithidia', + term => 'Crithidia [ALL]', + }, + { + database => 'pubmed', + alias => 'New PubMed articles on Endotrypanum', + term => 'Endotrypanum [ALL]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Trypanosoma', + term => 'Trypanosoma [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Crithidia', + term => 'Crithidia [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Endotrypanum', + term => 'Endotrypanum [ORGN]', + }, + { + database => 'genbank', + alias => 'New Genbank sequences for Leishmania', + term => 'Leishmania [ORGN]', + }, + ], + }, + + } + +} \ No newline at end of file diff --git a/templates/cron.erb b/templates/cron.erb new file mode 100644 index 0000000..995e9f6 --- /dev/null +++ b/templates/cron.erb @@ -0,0 +1 @@ +20 4 * * * nobody /usr/share/pubcrawler/bin/pubcrawler-runall diff --git a/templates/http.pubcrawler.conf.erb b/templates/http.pubcrawler.conf.erb new file mode 100644 index 0000000..b2d2d88 --- /dev/null +++ b/templates/http.pubcrawler.conf.erb @@ -0,0 +1,7 @@ +Alias /pubcrawler /usr/share/pubcrawler/html + + Options FollowSymLinks Includes + AllowOverride None + Order allow,deny + Allow from all + diff --git a/templates/product.config.erb b/templates/product.config.erb new file mode 100644 index 0000000..ea31f24 --- /dev/null +++ b/templates/product.config.erb @@ -0,0 +1,255 @@ + ############ PubCrawler configuration file ############ + ####### (for PubCrawler Version higher than 0.53) ###### + ############################################################ + # # + # lines beginning with hash marks (#) are ignored. # + # # + # PubCrawler home page: # + # http://www.pubcrawler.ie # + # # + # Specify your file locations and search options here. # + # Each line is in the format FIELD space VALUE. # + # Any leading or trailing quotes will be chopped off. # + # Hash marks separate comments from data. # + # You must specify a value for all 6 mandatory fields. # + # # + ############################################################ + ############################################################ + ################# MANDATORY SETTINGS ####################### + ############################################################ +project <%= @name %> + # Identifier for a project. This value is displayed in the header + # of the results page. + +background_color <%= @bgcolor %> + # background color for sectional headers of the result page. + +header_icon <%= @header_icon %> + # relative or absolute URL for icon to display in results page + # header. + +<%- if @html_file -%> +html_file <%= @html_file %> +<%- else -%> +html_file /usr/share/pubcrawler/html/<%= @name %>/index.html +<%- end -%> + # html_file is the name of the output HTML file for results + # it will be written to the specified working directory + # unless an absolute pathname is given + +viewdays <%= @viewdays %> + # viewdays is the number of days each document will be shown. + +relentrezdate <%= @relentrezdate %> + # relentrezdate (relative date of insertion into Entrez) + # is the maximum age (in days) of database entries to be reported. + # NOTE: sometimes records first appear in the databases several + # days or even weeks later than indicated by their database + # date-stamp, i.e. with non-zero values of relentrezdate. + # Therefore relentrezdate needs to be high enough to find these + # records. A relentrezdate of 90 days is suggested (if you make + # relentrezdate too huge the searches will be very slow.) + # other valid entries are: + # '1 year', '2 years', '5 years', '10 years', and 'no limit' + +getmax <%= @getmax %> + # getmax is the maximum number of documents to be retrieved + # for each search carried out. + +fullmax <%= @fullmax %> + # fullmax is the maximum number of documents for which a full + # report is being presented + # if more documents were retrieved, these can be accessed + # through a hyperlink (in groups of up to fullmax articles) + +include_config <%= @include_config %> + # include_config (yes/no) specifies whether or not to append + # this config-file to the end of the output file + #-------------------------------------------------------------------# + ############################################################ + ################## OPTIONAL SETTINGS ####################### + ############################################################ +search_URL <%= @search_URL %> + # URL where searches are being sent to + # defaults to + # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi + +neighbour_URL <%= @neighbour_URL %> + # URL where neighbourhood searches are being sent to + # defaults to + # https://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor.fcgi + +retrieve_URL <%= @retrieve_URL %> + # URL where documents are retrieved from + # defaults to + # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi + +<%- if @work_dir -%> +work_dir <%= @work_dir %> +<%- else -%> +work_dir /var/lib/pubcrawler/<%= @name %> +<%- end -%> + # specify a directory in which databases, output and log file + # will be located + # if no value given, the current working directory will be used + +extra_range <%= @extra_range %> + # specifies the number of documents combined in a link + # minimum value is 1, defaults to 'fullmax' + +check <%= @check %> + # if set to '1' program will just check all settings + # without performing the actual search + # RECOMMENDED FOR THE FIRST RUN! + +prompt <%= @prompt %> + # for Mac-users only: + # if this option is set to '1' the program will ask you + # explicitly for command line options + # NOTE: THIS IS NOT SUITABLE FOR AUTOMATED USE! + +verbose <%= @verbose %> + # verbose 0 runs silently and makes log file + # verbose 1 writes log output on screen + +mute <%= @mute %> + # mute 0 writes some messages to STDERR + # mute 1 stops ALL messages going to STDERR + # unless an error was encountered + +log_file <%= @log_file %> + # name of file for log-output + # (verbose has to be set to '0') + #base_URL 'http://www.gen.tcd.ie/pubcrawler/pubcrawler_output.html' +base_URL <%= @base_URL %> + # specify a URL, that will be used for + # the 'Back to Top' link in the output page + # 'local_file' makes links relative to results file + # mail joe@hotmail.earth.com + # if the hash mark ('#') at the beginning of the above line + # is removed, PubCrawler will send the results file to + # the given address at the end of each run. You can specify + # multiple addresses using commas (no spaces!). + # notify jfk@hotmail.earth.com#joe + # if the hash mark ('#') at the beginning of the above line + # is removed, PubCrawler will send a notification to + # the given address (minus '#joe') at the end of each run + # The recipient will be addressed with joe (optional). + # You can specify multiple addresses using commas (no spaces!). + +mail_features <%= @mail_features %> + # comma-separated list of extra features for the mail + # to be sent (without them it will be plain text). These are: + # css,javascript,entrez_links,pubcrawler_links,images,html,description + # or simply 'all' for everything + +lynx <%= @lynx %> + # for Unix-users only: + # if you don't want to use the libwww-Perl module and + # have an alternative browser installed, that works from the + # command line, like 'Lynx', you can use it by entering the + # command that evokes it (e.g. lynx '/usr/bin/lynx') + # NOTE: THIS OVERRIDES ANY PROXY SETTINGS! + #header 'head.html' + # specify a location of a header (in HTML-style) that will be used + # for the output file (disabled unless hash mark is removed) + +prefix <%= @prefix %> + # if you would like a different prefix to be used + # for standard files (configuration, database, log) + # insert it here (default is program name up to first dot): + +system <%= @system %> + # name of operating system + # might need the explicit assignment of an adequate value + # ('MacOS','Win','Unix', or 'Linux') + # if Perl is not configured properly + #### PROXY SETTING (if desired and/or necessary) #### + #proxy www.tcd.ie/proxy.cgi + # insert either a proxy server (eg. 'proxy.domain.com') + # or the address of a proxy configuration file + # if known (eg. 'www.domain.com/proxy.cgi') + # and uncomment + +proxy_port <%= @proxy_port %> + # port of the proxy server,defaults to '80' + +proxy_auth <%= @proxy_auth %> +proxy_pass <%= @proxy_pass %> + # in case you need to submit a username and a password + # for accessing your proxy, you can fill it in here: + # CAUTION! Having passwords stored in a file means a + # possible security risk! Please delete after usage + # or use the according command line option! + # !!! Please make sure that the module MIME::Base64 is + # installed for the proxy authorization to work!!! + +time_out <%= @time_out %> + # specify how many SECONDS to give remote servers + # in creating responses before the library disconnects + # (defaults to 180 seconds if no value is given) + +test_URL <%= @test_URL %> + # test-URL for proxy-test + +no_test <%= @no_test %> + # if a proxy is given, the internet connection is tested + # at the start of the program by default; this can be + # suppressed if a value of '1' is given here + +indent <%= @indent %> + # amount of pixels that output is being shifted to the right + +no_decap <%= @no_decap %> + # put 1 inbetween single quotes if you want to disable + # processing of the entrez documents (chopping of head and tail + # and collecting UIs) + +spacer <%= @spacer %> + # specify a gif that will be inserted in the output to shift + # text past the left, blue column + # (defaults to: http://www.gen.tcd.ie/pubcrawler/pics/spacer.gif) + + #-----------------------------------------------------------------------------# + + ############################################################ + ################# SEARCH SPECIFICATION ##################### + ############################################################ + ########################################################################### + ###### Entrez abbreviations for fields # + ###### (see http://www4.ncbi.nlm.nih.gov/PubMed/linking.html). # + ###### combine fields with AND, OR, BUTNOT and parentheses. # + # # + # for PubMed : one of AFFL, ALL, AUTH, ECNO, JOUR, MESH, MAJR, PAGE, # + # PDAT, PTYP, KYWD, WORD, TITLE, or VOL. # + # for Nucleotide : one of ACCN, AUTH, PDAT, ECNO, FKEY, GENE, JOUR, KYWD, # + # MDAT, ORGN, PROP, PROT, SQID, SLEN, SUBS, or # + # WORD. # + # # + # where ACCN = Accession Number, AFFL = Afilliation, ALL = all fields, # + # AUTH = Author Name, ECNO = E. C. Number, FKEY = Feature Key, # + # GENE = gene name, JOUR =journal name, KYWD = Keywords, # + # MAJR = MeSH major topic, MDAT = modification date, # + # MESH = mesh term,ORGN = organism, PACC = Primary Accesion Number, # + # PAGE = first page, PDAT = publication/creation date, # + # PROP = Properties, PROT = protein name, PTYP = Publication Type, # + # SUBS = Substance, TITL = title word, WORD = text word, # + # VOL = volume. # + ########################################################################### + ##### Each search-specification has to be written on one line. + ##### The first word must specify the database: + ##### pubmed, pm_neighbour, genbank, or gb_neighbour + ##### Any following words enclosed in single quotes (') will be used + ##### as an alias for this query, otherwise they will be considered + ##### Entrez-search-terms, as will the rest of the line. + ##### You can have as many different searches as you wish. The results of all + ##### searches will be combined according to their aliases. + ##### You CAN NOT use the same alias for searches at different databases! + ##### Write your search descriptions below this line. + ##### (Upper/lower case does not matter.) + +<%- if @searches and @searches.kind_of?(Array) -%> +<%- @searches.each do |search| -%> +<%= search['database'] %> '<%= search['alias'] %>' <%= search['term'] %> +<%- end -%> +<%- end -%>