mirror of
https://github.com/krislamo/puppet-pubcrawler
synced 2024-11-10 01:40:35 +00:00
move to github
This commit is contained in:
commit
b0b69367c8
72
README.md
Normal file
72
README.md
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
Installs Pubcrawler from EuPathDB-specific RPM and manages
|
||||||
|
`/etc/pubcrawler/` configuration files.
|
||||||
|
|
||||||
|
The RPM includes a `/etc/cron.d` job that runs all *.config in `/etc/pubcrawler`.
|
||||||
|
|
||||||
|
`pubcrawler::params` defines the common set of configuration parameters.
|
||||||
|
See `templates/product.config.erb` in this module for documentation on
|
||||||
|
each parameter.
|
||||||
|
|
||||||
|
A configuration file will be generated in `/etc/pubcrawler` for each
|
||||||
|
key in the `$site_specific_params` hash. All site-specific configuration
|
||||||
|
files will use values in `$default_params` unless overridden in the
|
||||||
|
`$site_specific_params`. For example, AmoebaDB will use '14' for
|
||||||
|
`viewdays` whereas CryptoDB will use '120' because it is has been
|
||||||
|
overriden in the `$site_specific_params` hash for the `CryptoDB` key.
|
||||||
|
|
||||||
|
...
|
||||||
|
$viewdays = '14'
|
||||||
|
...
|
||||||
|
|
||||||
|
$site_specific_params = {
|
||||||
|
'AmoebaDB' => {
|
||||||
|
'bgcolor' => '#C8C5A2',
|
||||||
|
'header_icon' => '/a/images/AmoebaDB/title_s.png',
|
||||||
|
'searches' => [
|
||||||
|
...
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'CryptoDB' => {
|
||||||
|
'bgcolor' => '#ffcccc',
|
||||||
|
'header_icon' => '/a/images/CryptoDB/title_s.png',
|
||||||
|
'viewdays' => '120',
|
||||||
|
'searches' => [
|
||||||
|
...
|
||||||
|
],
|
||||||
|
},
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
### Hiera
|
||||||
|
|
||||||
|
The values in `::pubcrawler::params` can be overridden in heira.
|
||||||
|
|
||||||
|
pubcrawler::site_specific_params:
|
||||||
|
AmoebaDB:
|
||||||
|
bgcolor: '#C8C5A2'
|
||||||
|
header_icon: '/a/images/AmoebaDB/title_s.png'
|
||||||
|
searches:
|
||||||
|
- database: pubmed
|
||||||
|
alias: Entamoeba
|
||||||
|
term: Entamoeba [ALL]
|
||||||
|
- database: pubmed
|
||||||
|
alias: Acanthamoeba
|
||||||
|
term: Acanthamoeba [ALL]
|
||||||
|
|
||||||
|
In this example, the `$site_specific_params` in `::pubcrawler::params`
|
||||||
|
will be replaced with a hash containing only AmoebaDB - so only
|
||||||
|
AmoebaDB will be configured.
|
||||||
|
|
||||||
|
Other individual params can be set in heira.
|
||||||
|
|
||||||
|
pubcrawler::fullmax: 666
|
||||||
|
|
||||||
|
### Configuration Testing
|
||||||
|
|
||||||
|
To run a single configuration manually, use
|
||||||
|
|
||||||
|
PRODUCT=TrichDB
|
||||||
|
sudo -u nobody /usr/share/pubcrawler/bin/pubcrawler.pl -c /etc/pubcrawler/${PRODUCT}.config
|
||||||
|
|
||||||
|
Results are written to `/usr/share/pubcrawler/html/ToxoDB/`
|
42
manifests/config.pp
Normal file
42
manifests/config.pp
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# generate one configuration file
|
||||||
|
define pubcrawler::config (
|
||||||
|
$bgcolor = $pubcrawler::bgcolor,
|
||||||
|
$header_icon = $pubcrawler::header_icon,
|
||||||
|
$searches = $pubcrawler::searches,
|
||||||
|
$viewdays = $pubcrawler::viewdays,
|
||||||
|
$relentrezdate = $pubcrawler::relentrezdate,
|
||||||
|
$getmax = $pubcrawler::getmax,
|
||||||
|
$fullmax = $pubcrawler::fullmax,
|
||||||
|
$include_config = $pubcrawler::include_config,
|
||||||
|
$search_URL = $pubcrawler::search_URL,
|
||||||
|
$neighbour_URL = $pubcrawler::neighbour_URL,
|
||||||
|
$retrieve_URL = $pubcrawler::retrieve_URL,
|
||||||
|
$work_dir = $pubcrawler::work_dir,
|
||||||
|
$extra_range = $pubcrawler::extra_range,
|
||||||
|
$check = $pubcrawler::check,
|
||||||
|
$prompt = $pubcrawler::prompt,
|
||||||
|
$verbose = $pubcrawler::verbose,
|
||||||
|
$mute = $pubcrawler::mute,
|
||||||
|
$log_file = $pubcrawler::log_file,
|
||||||
|
$base_URL = $pubcrawler::base_URL,
|
||||||
|
$mail_features = $pubcrawler::mail_features,
|
||||||
|
$lynx = $pubcrawler::lynx,
|
||||||
|
$prefix = $pubcrawler::prefix,
|
||||||
|
$system = $pubcrawler::system,
|
||||||
|
$proxy_port = $pubcrawler::proxy_port,
|
||||||
|
$proxy_auth = $pubcrawler::proxy_auth,
|
||||||
|
$proxy_pass = $pubcrawler::proxy_pass,
|
||||||
|
$time_out = $pubcrawler::time_out,
|
||||||
|
$test_URL = $pubcrawler::test_URL,
|
||||||
|
$no_test = $pubcrawler::no_test,
|
||||||
|
$indent = $pubcrawler::indent,
|
||||||
|
$no_decap = $pubcrawler::no_decap,
|
||||||
|
$spacer = $pubcrawler::spacer,
|
||||||
|
) {
|
||||||
|
|
||||||
|
file {"/etc/pubcrawler/${name}.config":
|
||||||
|
content => template('pubcrawler/product.config.erb'),
|
||||||
|
require => Package['pubcrawler'],
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
59
manifests/init.pp
Normal file
59
manifests/init.pp
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
# Install pubcrawler and configuration files.
|
||||||
|
class pubcrawler (
|
||||||
|
$site_specific_params = $pubcrawler::params::site_specific_params,
|
||||||
|
$bgcolor = $pubcrawler::params::bgcolor,
|
||||||
|
$header_icon = $pubcrawler::params::header_icon,
|
||||||
|
$searches = $pubcrawler::params::searches,
|
||||||
|
$viewdays = $pubcrawler::params::viewdays,
|
||||||
|
$relentrezdate = $pubcrawler::params::relentrezdate,
|
||||||
|
$getmax = $pubcrawler::params::getmax,
|
||||||
|
$fullmax = $pubcrawler::params::fullmax,
|
||||||
|
$include_config = $pubcrawler::params::include_config,
|
||||||
|
$search_URL = $pubcrawler::params::search_URL,
|
||||||
|
$neighbour_URL = $pubcrawler::params::neighbour_URL,
|
||||||
|
$retrieve_URL = $pubcrawler::params::retrieve_URL,
|
||||||
|
$work_dir = $pubcrawler::params::work_dir,
|
||||||
|
$extra_range = $pubcrawler::params::extra_range,
|
||||||
|
$check = $pubcrawler::params::check,
|
||||||
|
$prompt = $pubcrawler::params::prompt,
|
||||||
|
$verbose = $pubcrawler::params::verbose,
|
||||||
|
$mute = $pubcrawler::params::mute,
|
||||||
|
$log_file = $pubcrawler::params::log_file,
|
||||||
|
$base_URL = $pubcrawler::params::base_URL,
|
||||||
|
$mail_features = $pubcrawler::params::mail_features,
|
||||||
|
$lynx = $pubcrawler::params::lynx,
|
||||||
|
$prefix = $pubcrawler::params::prefix,
|
||||||
|
$system = $pubcrawler::params::system,
|
||||||
|
$proxy_port = $pubcrawler::params::proxy_port,
|
||||||
|
$proxy_auth = $pubcrawler::params::proxy_auth,
|
||||||
|
$proxy_pass = $pubcrawler::params::proxy_pass,
|
||||||
|
$time_out = $pubcrawler::params::time_out,
|
||||||
|
$test_URL = $pubcrawler::params::test_URL,
|
||||||
|
$no_test = $pubcrawler::params::no_test,
|
||||||
|
$indent = $pubcrawler::params::indent,
|
||||||
|
$no_decap = $pubcrawler::params::no_decap,
|
||||||
|
$spacer = $pubcrawler::params::spacer,
|
||||||
|
) inherits pubcrawler::params {
|
||||||
|
|
||||||
|
package { 'pubcrawler':
|
||||||
|
ensure => installed,
|
||||||
|
}
|
||||||
|
|
||||||
|
file { '/etc/httpd/conf.d/pubcrawler.conf':
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
content => template('pubcrawler/http.pubcrawler.conf.erb'),
|
||||||
|
}
|
||||||
|
|
||||||
|
file { '/etc/cron.d/pubcrawler':
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => template('pubcrawler/cron.erb'),
|
||||||
|
}
|
||||||
|
|
||||||
|
create_resources(pubcrawler::config, $site_specific_params)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
413
manifests/params.pp
Normal file
413
manifests/params.pp
Normal file
@ -0,0 +1,413 @@
|
|||||||
|
# Default parameters for Pubcrawler
|
||||||
|
class pubcrawler::params {
|
||||||
|
|
||||||
|
$bgcolor = '#fff'
|
||||||
|
$header_icon = ''
|
||||||
|
$viewdays = '14'
|
||||||
|
$relentrezdate = '90'
|
||||||
|
$getmax = '800'
|
||||||
|
$fullmax = '500'
|
||||||
|
$include_config = 'no'
|
||||||
|
$search_URL = ''
|
||||||
|
$neighbour_URL = ''
|
||||||
|
$retrieve_URL = ''
|
||||||
|
$extra_range = '1000'
|
||||||
|
$check = '0'
|
||||||
|
$prompt = '1'
|
||||||
|
$verbose = '0'
|
||||||
|
$mute = '0'
|
||||||
|
$log_file = ''
|
||||||
|
$base_URL = 'local_file'
|
||||||
|
$mail_features = 'all'
|
||||||
|
$lynx = ''
|
||||||
|
$prefix = ''
|
||||||
|
$system = ''
|
||||||
|
$proxy_port = ''
|
||||||
|
$proxy_auth = ''
|
||||||
|
$proxy_pass = ''
|
||||||
|
$time_out = '180'
|
||||||
|
$test_URL = 'http://www.ncbi.nlm.nih.gov/'
|
||||||
|
$no_test = '0'
|
||||||
|
$indent = '125'
|
||||||
|
$no_decap = ''
|
||||||
|
$spacer = ''
|
||||||
|
$searches = undef
|
||||||
|
|
||||||
|
# override some of the above defaults in hash form to make
|
||||||
|
# configurations specific for a site.
|
||||||
|
$site_specific_params = {
|
||||||
|
'AmoebaDB' => {
|
||||||
|
bgcolor => '#C8C5A2',
|
||||||
|
header_icon => '/a/images/AmoebaDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Entamoeba',
|
||||||
|
term => 'Entamoeba [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Acanthamoeba',
|
||||||
|
term => 'Acanthamoeba [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Entamoeba',
|
||||||
|
term => 'Entamoeba [ALL]',
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Acanthamoeba',
|
||||||
|
term => 'Acanthamoeba [ALL]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'CryptoDB' => {
|
||||||
|
bgcolor => '#ffcccc',
|
||||||
|
header_icon => '/a/images/CryptoDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences, C. parvum',
|
||||||
|
term => 'Cryptosporidium parvum [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences, C. hominis',
|
||||||
|
term => 'Cryptosporidium hominis [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences, C. muris',
|
||||||
|
term => 'Cryptosporidium muris [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'EuPathDB' => {
|
||||||
|
bgcolor => '#507494',
|
||||||
|
header_icon => '/a/images/EuPathDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Cryptosporidium',
|
||||||
|
term => 'Cryptosporidium [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Encephalitozoon or Enterocytozoon',
|
||||||
|
term => 'Enterocytozoon [ALL] or Encephalitozoon [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Entamoeba',
|
||||||
|
term => 'Entamoeba [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Giardia',
|
||||||
|
term => 'Giardia [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Microsporidia',
|
||||||
|
term => 'Edhazardia [ALL] OR Encephalitozoon [ALL] OR Enterocytozoon [ALL] OR Hamiltosporidium [ALL] OR Nematocida [ALL] OR Nosema [ALL] OR Vavraia [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Piroplasma genera',
|
||||||
|
term => 'Anthemosoma [ALL] OR Babesia [ALL] OR Cristalloidophora [ALL] OR Dactylosoma [ALL] OR Echinozoon [ALL] OR Haemohormidium [ALL] OR Sauroplasma [ALL] OR Theileria [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Plasmodium',
|
||||||
|
term => 'Plasmodium [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Toxoplasma or Eimeria or Neospora',
|
||||||
|
term => 'Toxoplasma [ALL] or Eimeria [ALL] or Neospora [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Trichomonas',
|
||||||
|
term => 'Trichomonas [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on other Apicomplexan organisms',
|
||||||
|
term => 'Apicomplexa OR Apicomplexan OR Eimeria OR Gregarina OR Neospora OR Sarcocystis OR Theileria NOT Cryptosporidium NOT Plasmodium NOT Toxoplasma [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Leishmania or Trypanosoma or Crithidia',
|
||||||
|
term => 'Trypanosoma [ALL] or Leishmania [ALL] or Crithidia [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Cryptosporidium',
|
||||||
|
term => 'Cryptosporidium [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Entamoeba',
|
||||||
|
term => 'Entamoeba [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Giardia',
|
||||||
|
term => 'Giardia [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Microsporidia',
|
||||||
|
term => 'Edhazardia [ORGN] OR Encephalitozoon [ORGN] OR Enterocytozoon [ORGN] OR Hamiltosporidium [ORGN] OR Nematocida [ORGN] OR Nosema [ORGN] OR Vavraia [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Piroplasma genera',
|
||||||
|
term => 'Anthemosoma OR Babesia OR Cristalloidophora OR Dactylosoma OR Echinozoon OR Haemohormidium OR Sauroplasma OR Theileria [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Plasmodium',
|
||||||
|
term => 'Plasmodium [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Toxoplasma',
|
||||||
|
term => 'Toxoplasma [ORGN] or Eimeria [ORGN] or Neospora [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Trichomonas',
|
||||||
|
term => 'Trichomonas [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for other Apicomplexan organisms',
|
||||||
|
term => 'Apicomplexa NOT Toxoplasma NOT Plasmodium NOT Cryptosporidium [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Leishmania or Trypanosoma or Crithidia',
|
||||||
|
term => 'Leishmania [ORGN] or Trypanosoma [ORGN] or Crithidia [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'FungiDB' => {
|
||||||
|
bgcolor => '#cd919e',
|
||||||
|
header_icon => '/a/images/FungiDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Aspergillus or Candida or Coccidioides or Cryptococcus or Fusarium or Gibberella or Magnaporthe or Neurospora or Puccinia or Rhizopus or Saccharomyces',
|
||||||
|
term => 'Aspergillus [ALL] or Candida [ALL] or Coccidioides [ALL] or Cryptococcus [ALL] or Fusarium [ALL] or Gibberella [ALL] or Magnaporthe [ALL] or Neurospora [ALL] or Puccinia [ALL] or Rhizopus [ALL] or Saccharomyces [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Aspergillus or Candida or Coccidioides or Cryptococcus or Fusarium or Gibberella or Magnaporthe or Neurospora or Puccinia or Rhizopus or Saccharomyces',
|
||||||
|
term => 'Aspergillus [ORGN] or Candida [ORGN] or Coccidioides [ORGN] or Cryptococcus [ORGN] or Fusarium [ORGN] or Gibberella [ORGN] or Magnaporthe [ORGN] or Neurospora [ORGN] or Puccinia [ORGN] or Rhizopus [ORGN] or Saccharomyces [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'GiardiaDB' => {
|
||||||
|
bgcolor => '#993333',
|
||||||
|
header_icon => '/a/images/GiardiaDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Giardia',
|
||||||
|
term => 'Giardia [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Giardia',
|
||||||
|
term => 'Giardia [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'HostDB' => {
|
||||||
|
bgcolor => '#e08265',
|
||||||
|
header_icon => '/a/images/HostDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on host parasite interaction',
|
||||||
|
term => 'host parasite interaction [ALL]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'MicrosporidiaDB' => {
|
||||||
|
bgcolor => '#C4BAD3',
|
||||||
|
header_icon => '/a/images/MicrosporidiaDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Microsporidia',
|
||||||
|
term => 'Edhazardia [ALL] OR Encephalitozoon [ALL] OR Enterocytozoon [ALL] OR Hamiltosporidium [ALL] OR Nematocida [ALL] OR Nosema [ALL] OR Vavraia [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Microsporidia',
|
||||||
|
term => 'Edhazardia [ORGN] OR Encephalitozoon [ORGN] OR Enterocytozoon [ORGN] OR Hamiltosporidium [ORGN] OR Nematocida [ORGN] OR Nosema [ORGN] OR Vavraia [ORGN] or Anncaliia [ORGN] or Vittaforma [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'PiroplasmaDB' => {
|
||||||
|
bgcolor => '#e08265',
|
||||||
|
header_icon => '/a/images/PiroplasmaDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Piroplasma genera',
|
||||||
|
term => 'Anthemosoma [ALL] OR Babesia [ALL] OR Cristalloidophora [ALL] OR Dactylosoma [ALL] OR Echinozoon [ALL] OR Haemohormidium [ALL] OR Sauroplasma [ALL] OR Theileria [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Piroplasma genera',
|
||||||
|
term => 'Anthemosoma [ORGN] OR [ORGN] Babesia [ORGN] OR Cristalloidophora [ORGN] OR Dactylosoma [ORGN] OR Echinozoon [ORGN] OR Haemohormidium [ORGN] OR Sauroplasma [ORGN] OR Theileria [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'PlasmoDB' => {
|
||||||
|
bgcolor => '#bbaacc',
|
||||||
|
header_icon => '/a/images/PlasmoDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Plasmodium',
|
||||||
|
term => 'Plasmodium [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Plasmodium',
|
||||||
|
term => 'Plasmodium [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'SchistoDB' => {
|
||||||
|
bgcolor => '#cd919e',
|
||||||
|
header_icon => '/a/images/SchistoDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles related to Schistosoma',
|
||||||
|
term => 'Schistosoma [ALL] or schistosoma [ALL] or blood-fluke [ALL] or Schistosomatidae [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Schistosoma',
|
||||||
|
term => 'Schistosoma [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'ToxoDB' => {
|
||||||
|
bgcolor => '#cd919e',
|
||||||
|
header_icon => '/a/images/ToxoDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Toxoplasma',
|
||||||
|
term => 'Toxoplasma [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Eimeria',
|
||||||
|
term => 'Eimeria [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Eimeria',
|
||||||
|
term => 'Gregarina [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Neospora',
|
||||||
|
term => 'Neospora [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Toxoplasma, Eimeria, or Neospora',
|
||||||
|
term => 'Toxoplasma [ORGN] or Eimeria [ORGN] or Neospora [ORGN] or Gregarina [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'TrichDB' => {
|
||||||
|
bgcolor => '#993333',
|
||||||
|
header_icon => '/a/images/TrichDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Trichomonas',
|
||||||
|
term => 'Trichomonas [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Trichomonas',
|
||||||
|
term => 'Trichomonas [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
'TriTrypDB' => {
|
||||||
|
bgcolor => '#dfbba6',
|
||||||
|
header_icon => '/a/images/TriTrypDB/title_s.png',
|
||||||
|
searches => [
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Trypanosoma',
|
||||||
|
term => 'Trypanosoma [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Leishmania',
|
||||||
|
term => 'Leishmania [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Crithidia',
|
||||||
|
term => 'Crithidia [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'pubmed',
|
||||||
|
alias => 'New PubMed articles on Endotrypanum',
|
||||||
|
term => 'Endotrypanum [ALL]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Trypanosoma',
|
||||||
|
term => 'Trypanosoma [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Crithidia',
|
||||||
|
term => 'Crithidia [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Endotrypanum',
|
||||||
|
term => 'Endotrypanum [ORGN]',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
database => 'genbank',
|
||||||
|
alias => 'New Genbank sequences for Leishmania',
|
||||||
|
term => 'Leishmania [ORGN]',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
1
templates/cron.erb
Normal file
1
templates/cron.erb
Normal file
@ -0,0 +1 @@
|
|||||||
|
20 4 * * * nobody /usr/share/pubcrawler/bin/pubcrawler-runall
|
7
templates/http.pubcrawler.conf.erb
Normal file
7
templates/http.pubcrawler.conf.erb
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
Alias /pubcrawler /usr/share/pubcrawler/html
|
||||||
|
<Directory /usr/share/pubcrawler/html>
|
||||||
|
Options FollowSymLinks Includes
|
||||||
|
AllowOverride None
|
||||||
|
Order allow,deny
|
||||||
|
Allow from all
|
||||||
|
</Directory>
|
255
templates/product.config.erb
Normal file
255
templates/product.config.erb
Normal file
@ -0,0 +1,255 @@
|
|||||||
|
############ PubCrawler configuration file ############
|
||||||
|
####### (for PubCrawler Version higher than 0.53) ######
|
||||||
|
############################################################
|
||||||
|
# #
|
||||||
|
# lines beginning with hash marks (#) are ignored. #
|
||||||
|
# #
|
||||||
|
# PubCrawler home page: #
|
||||||
|
# http://www.pubcrawler.ie #
|
||||||
|
# #
|
||||||
|
# Specify your file locations and search options here. #
|
||||||
|
# Each line is in the format FIELD space VALUE. #
|
||||||
|
# Any leading or trailing quotes will be chopped off. #
|
||||||
|
# Hash marks separate comments from data. #
|
||||||
|
# You must specify a value for all 6 mandatory fields. #
|
||||||
|
# #
|
||||||
|
############################################################
|
||||||
|
############################################################
|
||||||
|
################# MANDATORY SETTINGS #######################
|
||||||
|
############################################################
|
||||||
|
project <%= @name %>
|
||||||
|
# Identifier for a project. This value is displayed in the header
|
||||||
|
# of the results page.
|
||||||
|
|
||||||
|
background_color <%= @bgcolor %>
|
||||||
|
# background color for sectional headers of the result page.
|
||||||
|
|
||||||
|
header_icon <%= @header_icon %>
|
||||||
|
# relative or absolute URL for icon to display in results page
|
||||||
|
# header.
|
||||||
|
|
||||||
|
<%- if @html_file -%>
|
||||||
|
html_file <%= @html_file %>
|
||||||
|
<%- else -%>
|
||||||
|
html_file /usr/share/pubcrawler/html/<%= @name %>/index.html
|
||||||
|
<%- end -%>
|
||||||
|
# html_file is the name of the output HTML file for results
|
||||||
|
# it will be written to the specified working directory
|
||||||
|
# unless an absolute pathname is given
|
||||||
|
|
||||||
|
viewdays <%= @viewdays %>
|
||||||
|
# viewdays is the number of days each document will be shown.
|
||||||
|
|
||||||
|
relentrezdate <%= @relentrezdate %>
|
||||||
|
# relentrezdate (relative date of insertion into Entrez)
|
||||||
|
# is the maximum age (in days) of database entries to be reported.
|
||||||
|
# NOTE: sometimes records first appear in the databases several
|
||||||
|
# days or even weeks later than indicated by their database
|
||||||
|
# date-stamp, i.e. with non-zero values of relentrezdate.
|
||||||
|
# Therefore relentrezdate needs to be high enough to find these
|
||||||
|
# records. A relentrezdate of 90 days is suggested (if you make
|
||||||
|
# relentrezdate too huge the searches will be very slow.)
|
||||||
|
# other valid entries are:
|
||||||
|
# '1 year', '2 years', '5 years', '10 years', and 'no limit'
|
||||||
|
|
||||||
|
getmax <%= @getmax %>
|
||||||
|
# getmax is the maximum number of documents to be retrieved
|
||||||
|
# for each search carried out.
|
||||||
|
|
||||||
|
fullmax <%= @fullmax %>
|
||||||
|
# fullmax is the maximum number of documents for which a full
|
||||||
|
# report is being presented
|
||||||
|
# if more documents were retrieved, these can be accessed
|
||||||
|
# through a hyperlink (in groups of up to fullmax articles)
|
||||||
|
|
||||||
|
include_config <%= @include_config %>
|
||||||
|
# include_config (yes/no) specifies whether or not to append
|
||||||
|
# this config-file to the end of the output file
|
||||||
|
#-------------------------------------------------------------------#
|
||||||
|
############################################################
|
||||||
|
################## OPTIONAL SETTINGS #######################
|
||||||
|
############################################################
|
||||||
|
search_URL <%= @search_URL %>
|
||||||
|
# URL where searches are being sent to
|
||||||
|
# defaults to
|
||||||
|
# https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi
|
||||||
|
|
||||||
|
neighbour_URL <%= @neighbour_URL %>
|
||||||
|
# URL where neighbourhood searches are being sent to
|
||||||
|
# defaults to
|
||||||
|
# https://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor.fcgi
|
||||||
|
|
||||||
|
retrieve_URL <%= @retrieve_URL %>
|
||||||
|
# URL where documents are retrieved from
|
||||||
|
# defaults to
|
||||||
|
# https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi
|
||||||
|
|
||||||
|
<%- if @work_dir -%>
|
||||||
|
work_dir <%= @work_dir %>
|
||||||
|
<%- else -%>
|
||||||
|
work_dir /var/lib/pubcrawler/<%= @name %>
|
||||||
|
<%- end -%>
|
||||||
|
# specify a directory in which databases, output and log file
|
||||||
|
# will be located
|
||||||
|
# if no value given, the current working directory will be used
|
||||||
|
|
||||||
|
extra_range <%= @extra_range %>
|
||||||
|
# specifies the number of documents combined in a link
|
||||||
|
# minimum value is 1, defaults to 'fullmax'
|
||||||
|
|
||||||
|
check <%= @check %>
|
||||||
|
# if set to '1' program will just check all settings
|
||||||
|
# without performing the actual search
|
||||||
|
# RECOMMENDED FOR THE FIRST RUN!
|
||||||
|
|
||||||
|
prompt <%= @prompt %>
|
||||||
|
# for Mac-users only:
|
||||||
|
# if this option is set to '1' the program will ask you
|
||||||
|
# explicitly for command line options
|
||||||
|
# NOTE: THIS IS NOT SUITABLE FOR AUTOMATED USE!
|
||||||
|
|
||||||
|
verbose <%= @verbose %>
|
||||||
|
# verbose 0 runs silently and makes log file
|
||||||
|
# verbose 1 writes log output on screen
|
||||||
|
|
||||||
|
mute <%= @mute %>
|
||||||
|
# mute 0 writes some messages to STDERR
|
||||||
|
# mute 1 stops ALL messages going to STDERR
|
||||||
|
# unless an error was encountered
|
||||||
|
|
||||||
|
log_file <%= @log_file %>
|
||||||
|
# name of file for log-output
|
||||||
|
# (verbose has to be set to '0')
|
||||||
|
#base_URL 'http://www.gen.tcd.ie/pubcrawler/pubcrawler_output.html'
|
||||||
|
base_URL <%= @base_URL %>
|
||||||
|
# specify a URL, that will be used for
|
||||||
|
# the 'Back to Top' link in the output page
|
||||||
|
# 'local_file' makes links relative to results file
|
||||||
|
# mail joe@hotmail.earth.com
|
||||||
|
# if the hash mark ('#') at the beginning of the above line
|
||||||
|
# is removed, PubCrawler will send the results file to
|
||||||
|
# the given address at the end of each run. You can specify
|
||||||
|
# multiple addresses using commas (no spaces!).
|
||||||
|
# notify jfk@hotmail.earth.com#joe
|
||||||
|
# if the hash mark ('#') at the beginning of the above line
|
||||||
|
# is removed, PubCrawler will send a notification to
|
||||||
|
# the given address (minus '#joe') at the end of each run
|
||||||
|
# The recipient will be addressed with joe (optional).
|
||||||
|
# You can specify multiple addresses using commas (no spaces!).
|
||||||
|
|
||||||
|
mail_features <%= @mail_features %>
|
||||||
|
# comma-separated list of extra features for the mail
|
||||||
|
# to be sent (without them it will be plain text). These are:
|
||||||
|
# css,javascript,entrez_links,pubcrawler_links,images,html,description
|
||||||
|
# or simply 'all' for everything
|
||||||
|
|
||||||
|
lynx <%= @lynx %>
|
||||||
|
# for Unix-users only:
|
||||||
|
# if you don't want to use the libwww-Perl module and
|
||||||
|
# have an alternative browser installed, that works from the
|
||||||
|
# command line, like 'Lynx', you can use it by entering the
|
||||||
|
# command that evokes it (e.g. lynx '/usr/bin/lynx')
|
||||||
|
# NOTE: THIS OVERRIDES ANY PROXY SETTINGS!
|
||||||
|
#header 'head.html'
|
||||||
|
# specify a location of a header (in HTML-style) that will be used
|
||||||
|
# for the output file (disabled unless hash mark is removed)
|
||||||
|
|
||||||
|
prefix <%= @prefix %>
|
||||||
|
# if you would like a different prefix to be used
|
||||||
|
# for standard files (configuration, database, log)
|
||||||
|
# insert it here (default is program name up to first dot):
|
||||||
|
|
||||||
|
system <%= @system %>
|
||||||
|
# name of operating system
|
||||||
|
# might need the explicit assignment of an adequate value
|
||||||
|
# ('MacOS','Win','Unix', or 'Linux')
|
||||||
|
# if Perl is not configured properly
|
||||||
|
#### PROXY SETTING (if desired and/or necessary) ####
|
||||||
|
#proxy www.tcd.ie/proxy.cgi
|
||||||
|
# insert either a proxy server (eg. 'proxy.domain.com')
|
||||||
|
# or the address of a proxy configuration file
|
||||||
|
# if known (eg. 'www.domain.com/proxy.cgi')
|
||||||
|
# and uncomment
|
||||||
|
|
||||||
|
proxy_port <%= @proxy_port %>
|
||||||
|
# port of the proxy server,defaults to '80'
|
||||||
|
|
||||||
|
proxy_auth <%= @proxy_auth %>
|
||||||
|
proxy_pass <%= @proxy_pass %>
|
||||||
|
# in case you need to submit a username and a password
|
||||||
|
# for accessing your proxy, you can fill it in here:
|
||||||
|
# CAUTION! Having passwords stored in a file means a
|
||||||
|
# possible security risk! Please delete after usage
|
||||||
|
# or use the according command line option!
|
||||||
|
# !!! Please make sure that the module MIME::Base64 is
|
||||||
|
# installed for the proxy authorization to work!!!
|
||||||
|
|
||||||
|
time_out <%= @time_out %>
|
||||||
|
# specify how many SECONDS to give remote servers
|
||||||
|
# in creating responses before the library disconnects
|
||||||
|
# (defaults to 180 seconds if no value is given)
|
||||||
|
|
||||||
|
test_URL <%= @test_URL %>
|
||||||
|
# test-URL for proxy-test
|
||||||
|
|
||||||
|
no_test <%= @no_test %>
|
||||||
|
# if a proxy is given, the internet connection is tested
|
||||||
|
# at the start of the program by default; this can be
|
||||||
|
# suppressed if a value of '1' is given here
|
||||||
|
|
||||||
|
indent <%= @indent %>
|
||||||
|
# amount of pixels that output is being shifted to the right
|
||||||
|
|
||||||
|
no_decap <%= @no_decap %>
|
||||||
|
# put 1 inbetween single quotes if you want to disable
|
||||||
|
# processing of the entrez documents (chopping of head and tail
|
||||||
|
# and collecting UIs)
|
||||||
|
|
||||||
|
spacer <%= @spacer %>
|
||||||
|
# specify a gif that will be inserted in the output to shift
|
||||||
|
# text past the left, blue column
|
||||||
|
# (defaults to: http://www.gen.tcd.ie/pubcrawler/pics/spacer.gif)
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------#
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
################# SEARCH SPECIFICATION #####################
|
||||||
|
############################################################
|
||||||
|
###########################################################################
|
||||||
|
###### Entrez abbreviations for fields #
|
||||||
|
###### (see http://www4.ncbi.nlm.nih.gov/PubMed/linking.html). #
|
||||||
|
###### combine fields with AND, OR, BUTNOT and parentheses. #
|
||||||
|
# #
|
||||||
|
# for PubMed : one of AFFL, ALL, AUTH, ECNO, JOUR, MESH, MAJR, PAGE, #
|
||||||
|
# PDAT, PTYP, KYWD, WORD, TITLE, or VOL. #
|
||||||
|
# for Nucleotide : one of ACCN, AUTH, PDAT, ECNO, FKEY, GENE, JOUR, KYWD, #
|
||||||
|
# MDAT, ORGN, PROP, PROT, SQID, SLEN, SUBS, or #
|
||||||
|
# WORD. #
|
||||||
|
# #
|
||||||
|
# where ACCN = Accession Number, AFFL = Afilliation, ALL = all fields, #
|
||||||
|
# AUTH = Author Name, ECNO = E. C. Number, FKEY = Feature Key, #
|
||||||
|
# GENE = gene name, JOUR =journal name, KYWD = Keywords, #
|
||||||
|
# MAJR = MeSH major topic, MDAT = modification date, #
|
||||||
|
# MESH = mesh term,ORGN = organism, PACC = Primary Accesion Number, #
|
||||||
|
# PAGE = first page, PDAT = publication/creation date, #
|
||||||
|
# PROP = Properties, PROT = protein name, PTYP = Publication Type, #
|
||||||
|
# SUBS = Substance, TITL = title word, WORD = text word, #
|
||||||
|
# VOL = volume. #
|
||||||
|
###########################################################################
|
||||||
|
##### Each search-specification has to be written on one line.
|
||||||
|
##### The first word must specify the database:
|
||||||
|
##### pubmed, pm_neighbour, genbank, or gb_neighbour
|
||||||
|
##### Any following words enclosed in single quotes (') will be used
|
||||||
|
##### as an alias for this query, otherwise they will be considered
|
||||||
|
##### Entrez-search-terms, as will the rest of the line.
|
||||||
|
##### You can have as many different searches as you wish. The results of all
|
||||||
|
##### searches will be combined according to their aliases.
|
||||||
|
##### You CAN NOT use the same alias for searches at different databases!
|
||||||
|
##### Write your search descriptions below this line.
|
||||||
|
##### (Upper/lower case does not matter.)
|
||||||
|
|
||||||
|
<%- if @searches and @searches.kind_of?(Array) -%>
|
||||||
|
<%- @searches.each do |search| -%>
|
||||||
|
<%= search['database'] %> '<%= search['alias'] %>' <%= search['term'] %>
|
||||||
|
<%- end -%>
|
||||||
|
<%- end -%>
|
Loading…
Reference in New Issue
Block a user