mirror of
				https://github.com/krislamo/puppet-pubcrawler
				synced 2025-10-30 16:18:35 +00:00 
			
		
		
		
	move to github
This commit is contained in:
		
							
								
								
									
										72
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| Installs Pubcrawler from EuPathDB-specific RPM and manages | ||||
| `/etc/pubcrawler/` configuration files. | ||||
|  | ||||
| The RPM includes a `/etc/cron.d` job that runs all *.config in `/etc/pubcrawler`. | ||||
|  | ||||
| `pubcrawler::params` defines the common set of configuration parameters. | ||||
| See `templates/product.config.erb` in this module for documentation on | ||||
| each parameter. | ||||
|  | ||||
| A configuration file will be generated in `/etc/pubcrawler` for each | ||||
| key in the `$site_specific_params` hash. All site-specific configuration | ||||
| files will use values in `$default_params` unless overridden in the | ||||
| `$site_specific_params`. For example, AmoebaDB will use '14' for | ||||
| `viewdays` whereas CryptoDB will use '120' because it is has been | ||||
| overriden in the `$site_specific_params` hash for the `CryptoDB` key. | ||||
|  | ||||
|     ... | ||||
|     $viewdays         = '14' | ||||
|     ... | ||||
|  | ||||
|     $site_specific_params = { | ||||
|       'AmoebaDB' => { | ||||
|         'bgcolor' => '#C8C5A2', | ||||
|         'header_icon' => '/a/images/AmoebaDB/title_s.png', | ||||
|         'searches' => [ | ||||
|           ... | ||||
|         ], | ||||
|       }, | ||||
|  | ||||
|       'CryptoDB' => { | ||||
|         'bgcolor' => '#ffcccc', | ||||
|         'header_icon' => '/a/images/CryptoDB/title_s.png', | ||||
|         'viewdays' => '120', | ||||
|         'searches' => [ | ||||
|           ... | ||||
|         ], | ||||
|       }, | ||||
|     ... | ||||
|     } | ||||
|  | ||||
| ### Hiera | ||||
|  | ||||
| The values in `::pubcrawler::params` can be overridden in heira. | ||||
|  | ||||
|     pubcrawler::site_specific_params: | ||||
|       AmoebaDB: | ||||
|         bgcolor: '#C8C5A2' | ||||
|         header_icon: '/a/images/AmoebaDB/title_s.png' | ||||
|         searches:  | ||||
|           - database: pubmed | ||||
|             alias: Entamoeba | ||||
|             term: Entamoeba [ALL] | ||||
|           - database: pubmed | ||||
|             alias: Acanthamoeba | ||||
|             term: Acanthamoeba [ALL] | ||||
|  | ||||
| In this example, the `$site_specific_params` in  `::pubcrawler::params` | ||||
| will be replaced with a hash containing only AmoebaDB - so only | ||||
| AmoebaDB will be configured. | ||||
|  | ||||
| Other individual params can be set in heira. | ||||
|  | ||||
|     pubcrawler::fullmax: 666 | ||||
|  | ||||
| ### Configuration Testing | ||||
|  | ||||
| To run a single configuration manually, use | ||||
|  | ||||
|     PRODUCT=TrichDB | ||||
|     sudo -u nobody  /usr/share/pubcrawler/bin/pubcrawler.pl -c /etc/pubcrawler/${PRODUCT}.config | ||||
|  | ||||
| Results are written to `/usr/share/pubcrawler/html/ToxoDB/` | ||||
							
								
								
									
										42
									
								
								manifests/config.pp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								manifests/config.pp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| # generate one configuration file | ||||
| define pubcrawler::config ( | ||||
|   $bgcolor           = $pubcrawler::bgcolor, | ||||
|   $header_icon       = $pubcrawler::header_icon, | ||||
|   $searches          = $pubcrawler::searches, | ||||
|   $viewdays          = $pubcrawler::viewdays, | ||||
|   $relentrezdate     = $pubcrawler::relentrezdate, | ||||
|   $getmax            = $pubcrawler::getmax, | ||||
|   $fullmax           = $pubcrawler::fullmax, | ||||
|   $include_config    = $pubcrawler::include_config, | ||||
|   $search_URL        = $pubcrawler::search_URL, | ||||
|   $neighbour_URL     = $pubcrawler::neighbour_URL, | ||||
|   $retrieve_URL      = $pubcrawler::retrieve_URL, | ||||
|   $work_dir          = $pubcrawler::work_dir, | ||||
|   $extra_range      =  $pubcrawler::extra_range, | ||||
|   $check             = $pubcrawler::check, | ||||
|   $prompt            = $pubcrawler::prompt, | ||||
|   $verbose           = $pubcrawler::verbose, | ||||
|   $mute              = $pubcrawler::mute, | ||||
|   $log_file          = $pubcrawler::log_file, | ||||
|   $base_URL          = $pubcrawler::base_URL, | ||||
|   $mail_features     = $pubcrawler::mail_features, | ||||
|   $lynx              = $pubcrawler::lynx, | ||||
|   $prefix            = $pubcrawler::prefix, | ||||
|   $system            = $pubcrawler::system, | ||||
|   $proxy_port        = $pubcrawler::proxy_port, | ||||
|   $proxy_auth        = $pubcrawler::proxy_auth, | ||||
|   $proxy_pass        = $pubcrawler::proxy_pass, | ||||
|   $time_out          = $pubcrawler::time_out, | ||||
|   $test_URL          = $pubcrawler::test_URL, | ||||
|   $no_test           = $pubcrawler::no_test, | ||||
|   $indent            = $pubcrawler::indent, | ||||
|   $no_decap          = $pubcrawler::no_decap, | ||||
|   $spacer            = $pubcrawler::spacer, | ||||
| ) { | ||||
|  | ||||
|   file {"/etc/pubcrawler/${name}.config": | ||||
|     content => template('pubcrawler/product.config.erb'), | ||||
|     require => Package['pubcrawler'], | ||||
|   } | ||||
|  | ||||
| } | ||||
							
								
								
									
										59
									
								
								manifests/init.pp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								manifests/init.pp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | ||||
| # Install pubcrawler and configuration files. | ||||
| class pubcrawler ( | ||||
|   $site_specific_params = $pubcrawler::params::site_specific_params, | ||||
|   $bgcolor              = $pubcrawler::params::bgcolor, | ||||
|   $header_icon          = $pubcrawler::params::header_icon, | ||||
|   $searches             = $pubcrawler::params::searches, | ||||
|   $viewdays             = $pubcrawler::params::viewdays, | ||||
|   $relentrezdate        = $pubcrawler::params::relentrezdate, | ||||
|   $getmax               = $pubcrawler::params::getmax, | ||||
|   $fullmax              = $pubcrawler::params::fullmax, | ||||
|   $include_config       = $pubcrawler::params::include_config, | ||||
|   $search_URL           = $pubcrawler::params::search_URL, | ||||
|   $neighbour_URL        = $pubcrawler::params::neighbour_URL, | ||||
|   $retrieve_URL         = $pubcrawler::params::retrieve_URL, | ||||
|   $work_dir             = $pubcrawler::params::work_dir, | ||||
|   $extra_range          = $pubcrawler::params::extra_range, | ||||
|   $check                = $pubcrawler::params::check, | ||||
|   $prompt               = $pubcrawler::params::prompt, | ||||
|   $verbose              = $pubcrawler::params::verbose, | ||||
|   $mute                 = $pubcrawler::params::mute, | ||||
|   $log_file             = $pubcrawler::params::log_file, | ||||
|   $base_URL             = $pubcrawler::params::base_URL, | ||||
|   $mail_features        = $pubcrawler::params::mail_features, | ||||
|   $lynx                 = $pubcrawler::params::lynx, | ||||
|   $prefix               = $pubcrawler::params::prefix, | ||||
|   $system               = $pubcrawler::params::system, | ||||
|   $proxy_port           = $pubcrawler::params::proxy_port, | ||||
|   $proxy_auth           = $pubcrawler::params::proxy_auth, | ||||
|   $proxy_pass           = $pubcrawler::params::proxy_pass, | ||||
|   $time_out             = $pubcrawler::params::time_out, | ||||
|   $test_URL             = $pubcrawler::params::test_URL, | ||||
|   $no_test              = $pubcrawler::params::no_test, | ||||
|   $indent               = $pubcrawler::params::indent, | ||||
|   $no_decap             = $pubcrawler::params::no_decap, | ||||
|   $spacer               = $pubcrawler::params::spacer, | ||||
| ) inherits pubcrawler::params { | ||||
|  | ||||
|   package { 'pubcrawler': | ||||
|     ensure => installed, | ||||
|   } | ||||
|  | ||||
|   file { '/etc/httpd/conf.d/pubcrawler.conf': | ||||
|     owner   => 'root', | ||||
|     group   => 'root', | ||||
|     content => template('pubcrawler/http.pubcrawler.conf.erb'), | ||||
|   } | ||||
|  | ||||
|   file { '/etc/cron.d/pubcrawler': | ||||
|     owner   => 'root', | ||||
|     group   => 'root', | ||||
|     mode    => '0644', | ||||
|     content => template('pubcrawler/cron.erb'), | ||||
|   } | ||||
|  | ||||
|   create_resources(pubcrawler::config, $site_specific_params) | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
							
								
								
									
										413
									
								
								manifests/params.pp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										413
									
								
								manifests/params.pp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,413 @@ | ||||
| # Default parameters for Pubcrawler | ||||
| class pubcrawler::params { | ||||
|  | ||||
|   $bgcolor          = '#fff' | ||||
|   $header_icon      = '' | ||||
|   $viewdays         = '14' | ||||
|   $relentrezdate    = '90' | ||||
|   $getmax           = '800' | ||||
|   $fullmax          = '500' | ||||
|   $include_config   = 'no' | ||||
|   $search_URL       = '' | ||||
|   $neighbour_URL    = '' | ||||
|   $retrieve_URL     = '' | ||||
|   $extra_range      = '1000' | ||||
|   $check            = '0' | ||||
|   $prompt           = '1' | ||||
|   $verbose          = '0' | ||||
|   $mute             = '0' | ||||
|   $log_file         = '' | ||||
|   $base_URL         = 'local_file' | ||||
|   $mail_features    = 'all' | ||||
|   $lynx             = '' | ||||
|   $prefix           = '' | ||||
|   $system           = '' | ||||
|   $proxy_port       = '' | ||||
|   $proxy_auth       = '' | ||||
|   $proxy_pass       = '' | ||||
|   $time_out         = '180' | ||||
|   $test_URL         = 'http://www.ncbi.nlm.nih.gov/' | ||||
|   $no_test          = '0' | ||||
|   $indent           = '125' | ||||
|   $no_decap         = '' | ||||
|   $spacer           = '' | ||||
|   $searches         = undef | ||||
|  | ||||
|   # override some of the above defaults in hash form to make | ||||
|   # configurations specific for a site. | ||||
|   $site_specific_params = { | ||||
|     'AmoebaDB' => { | ||||
|       bgcolor     => '#C8C5A2', | ||||
|       header_icon => '/a/images/AmoebaDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Entamoeba', | ||||
|           term     => 'Entamoeba [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Acanthamoeba', | ||||
|           term     => 'Acanthamoeba [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Entamoeba', | ||||
|           term     => 'Entamoeba [ALL]', | ||||
|         }, | ||||
|  | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Acanthamoeba', | ||||
|           term     => 'Acanthamoeba [ALL]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'CryptoDB' => { | ||||
|       bgcolor     => '#ffcccc', | ||||
|       header_icon => '/a/images/CryptoDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences, C. parvum', | ||||
|           term     => 'Cryptosporidium parvum [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences, C. hominis', | ||||
|           term     => 'Cryptosporidium hominis [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences, C. muris', | ||||
|           term     => 'Cryptosporidium muris [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'EuPathDB' => { | ||||
|       bgcolor     => '#507494', | ||||
|       header_icon => '/a/images/EuPathDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Cryptosporidium', | ||||
|           term     => 'Cryptosporidium [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Encephalitozoon or Enterocytozoon', | ||||
|           term     => 'Enterocytozoon [ALL] or Encephalitozoon [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Entamoeba', | ||||
|           term     => 'Entamoeba [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Giardia', | ||||
|           term     => 'Giardia [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Microsporidia', | ||||
|           term     => 'Edhazardia [ALL]  OR Encephalitozoon [ALL]  OR Enterocytozoon [ALL]  OR Hamiltosporidium [ALL]  OR Nematocida [ALL]  OR Nosema [ALL]  OR Vavraia [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Piroplasma genera', | ||||
|           term     => 'Anthemosoma [ALL] OR Babesia [ALL] OR Cristalloidophora [ALL] OR Dactylosoma [ALL] OR Echinozoon [ALL] OR Haemohormidium [ALL] OR Sauroplasma [ALL] OR Theileria [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Plasmodium', | ||||
|           term     => 'Plasmodium [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Toxoplasma or Eimeria or Neospora', | ||||
|           term     => 'Toxoplasma [ALL] or Eimeria [ALL] or Neospora [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Trichomonas', | ||||
|           term     => 'Trichomonas [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on other Apicomplexan organisms', | ||||
|           term     => 'Apicomplexa OR Apicomplexan OR Eimeria OR Gregarina OR Neospora OR Sarcocystis OR Theileria NOT Cryptosporidium NOT Plasmodium NOT Toxoplasma [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Leishmania or Trypanosoma or Crithidia', | ||||
|           term     => 'Trypanosoma [ALL] or  Leishmania [ALL] or Crithidia [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Cryptosporidium', | ||||
|           term     => 'Cryptosporidium [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Entamoeba', | ||||
|           term     => 'Entamoeba [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Giardia', | ||||
|           term     => 'Giardia [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Microsporidia', | ||||
|           term     => 'Edhazardia [ORGN]  OR Encephalitozoon [ORGN]  OR Enterocytozoon [ORGN]  OR Hamiltosporidium [ORGN]  OR Nematocida [ORGN]  OR Nosema [ORGN]  OR Vavraia [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Piroplasma genera', | ||||
|           term     => 'Anthemosoma OR Babesia OR Cristalloidophora OR Dactylosoma OR Echinozoon OR Haemohormidium OR Sauroplasma OR Theileria [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Plasmodium', | ||||
|           term     => 'Plasmodium [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Toxoplasma', | ||||
|           term     => 'Toxoplasma [ORGN] or Eimeria [ORGN] or Neospora [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Trichomonas', | ||||
|           term     => 'Trichomonas [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for other Apicomplexan organisms', | ||||
|           term     => 'Apicomplexa NOT Toxoplasma NOT Plasmodium NOT Cryptosporidium [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Leishmania or Trypanosoma or Crithidia', | ||||
|           term     => 'Leishmania [ORGN] or Trypanosoma [ORGN] or Crithidia [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'FungiDB' => { | ||||
|       bgcolor     => '#cd919e', | ||||
|       header_icon => '/a/images/FungiDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Aspergillus or Candida or Coccidioides or Cryptococcus or Fusarium or Gibberella or Magnaporthe or Neurospora or Puccinia or Rhizopus or Saccharomyces', | ||||
|           term     => 'Aspergillus [ALL] or Candida [ALL] or Coccidioides [ALL] or Cryptococcus [ALL] or Fusarium [ALL] or Gibberella [ALL] or Magnaporthe [ALL] or Neurospora [ALL] or Puccinia [ALL] or Rhizopus [ALL] or Saccharomyces [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Aspergillus or Candida or Coccidioides or Cryptococcus or Fusarium or Gibberella or Magnaporthe or Neurospora or Puccinia or Rhizopus or Saccharomyces', | ||||
|           term     => 'Aspergillus [ORGN] or Candida [ORGN] or Coccidioides [ORGN] or Cryptococcus [ORGN] or Fusarium [ORGN] or Gibberella [ORGN] or Magnaporthe [ORGN] or Neurospora [ORGN] or Puccinia [ORGN] or Rhizopus [ORGN] or Saccharomyces [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'GiardiaDB' => { | ||||
|       bgcolor     => '#993333', | ||||
|       header_icon => '/a/images/GiardiaDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Giardia', | ||||
|           term     => 'Giardia [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Giardia', | ||||
|           term     => 'Giardia [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'HostDB' => { | ||||
|       bgcolor     => '#e08265', | ||||
|       header_icon => '/a/images/HostDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on host parasite interaction', | ||||
|           term     => 'host parasite interaction [ALL]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'MicrosporidiaDB' => { | ||||
|       bgcolor     => '#C4BAD3', | ||||
|       header_icon => '/a/images/MicrosporidiaDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Microsporidia', | ||||
|           term     => 'Edhazardia [ALL]  OR Encephalitozoon [ALL]  OR Enterocytozoon [ALL]  OR Hamiltosporidium [ALL]  OR Nematocida [ALL]  OR Nosema [ALL]  OR Vavraia [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Microsporidia', | ||||
|           term     => 'Edhazardia [ORGN]  OR Encephalitozoon [ORGN]  OR Enterocytozoon [ORGN]  OR Hamiltosporidium [ORGN]  OR Nematocida [ORGN]  OR Nosema [ORGN]  OR Vavraia [ORGN] or Anncaliia  [ORGN] or Vittaforma [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'PiroplasmaDB' => { | ||||
|       bgcolor     => '#e08265', | ||||
|       header_icon => '/a/images/PiroplasmaDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Piroplasma genera', | ||||
|           term     => 'Anthemosoma [ALL] OR Babesia [ALL] OR Cristalloidophora [ALL] OR Dactylosoma [ALL] OR Echinozoon [ALL] OR Haemohormidium [ALL] OR Sauroplasma [ALL] OR Theileria [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Piroplasma genera', | ||||
|           term     => 'Anthemosoma [ORGN] OR [ORGN] Babesia [ORGN] OR Cristalloidophora [ORGN] OR Dactylosoma [ORGN] OR Echinozoon [ORGN] OR Haemohormidium [ORGN] OR Sauroplasma [ORGN] OR Theileria [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'PlasmoDB' => { | ||||
|       bgcolor     => '#bbaacc', | ||||
|       header_icon => '/a/images/PlasmoDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Plasmodium', | ||||
|           term     => 'Plasmodium [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Plasmodium', | ||||
|           term     => 'Plasmodium [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'SchistoDB' => { | ||||
|       bgcolor     => '#cd919e', | ||||
|       header_icon => '/a/images/SchistoDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles related to Schistosoma', | ||||
|           term     => 'Schistosoma [ALL] or schistosoma [ALL] or blood-fluke [ALL] or Schistosomatidae [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Schistosoma', | ||||
|           term     => 'Schistosoma [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'ToxoDB' => { | ||||
|       bgcolor     => '#cd919e', | ||||
|       header_icon => '/a/images/ToxoDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Toxoplasma', | ||||
|           term     => 'Toxoplasma [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Eimeria', | ||||
|           term     => 'Eimeria [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Eimeria', | ||||
|           term     => 'Gregarina [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Neospora', | ||||
|           term     => 'Neospora [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Toxoplasma, Eimeria, or Neospora', | ||||
|           term     => 'Toxoplasma [ORGN] or Eimeria [ORGN] or Neospora [ORGN] or Gregarina [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'TrichDB' => { | ||||
|       bgcolor     => '#993333', | ||||
|       header_icon => '/a/images/TrichDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Trichomonas', | ||||
|           term     => 'Trichomonas [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Trichomonas', | ||||
|           term     => 'Trichomonas [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|     'TriTrypDB' => { | ||||
|       bgcolor     => '#dfbba6', | ||||
|       header_icon => '/a/images/TriTrypDB/title_s.png', | ||||
|       searches => [ | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Trypanosoma', | ||||
|           term     => 'Trypanosoma [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Leishmania', | ||||
|           term     => 'Leishmania [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Crithidia', | ||||
|           term     => 'Crithidia [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'pubmed', | ||||
|           alias    => 'New PubMed articles on Endotrypanum', | ||||
|           term     => 'Endotrypanum [ALL]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Trypanosoma', | ||||
|           term     => 'Trypanosoma [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Crithidia', | ||||
|           term     => 'Crithidia [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Endotrypanum', | ||||
|           term     => 'Endotrypanum [ORGN]', | ||||
|         }, | ||||
|         { | ||||
|           database => 'genbank', | ||||
|           alias    => 'New Genbank sequences for Leishmania', | ||||
|           term     => 'Leishmania [ORGN]', | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|  | ||||
|   } | ||||
|  | ||||
| } | ||||
							
								
								
									
										1
									
								
								templates/cron.erb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								templates/cron.erb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| 20 4 * * * nobody /usr/share/pubcrawler/bin/pubcrawler-runall | ||||
							
								
								
									
										7
									
								
								templates/http.pubcrawler.conf.erb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								templates/http.pubcrawler.conf.erb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | ||||
| Alias /pubcrawler /usr/share/pubcrawler/html | ||||
| <Directory /usr/share/pubcrawler/html> | ||||
|     Options FollowSymLinks Includes | ||||
|     AllowOverride None | ||||
|     Order allow,deny | ||||
|     Allow from all | ||||
| </Directory> | ||||
							
								
								
									
										255
									
								
								templates/product.config.erb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										255
									
								
								templates/product.config.erb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,255 @@ | ||||
|         ############   PubCrawler configuration file         ############ | ||||
|         #######   (for PubCrawler Version higher than 0.53)        ###### | ||||
|         ############################################################ | ||||
|         #                                                                # | ||||
|         #   lines beginning with hash marks (#) are ignored.             # | ||||
|         #                                                                # | ||||
|         #   PubCrawler home page:                                        # | ||||
|         #               http://www.pubcrawler.ie                         # | ||||
|         #                                                                # | ||||
|         #   Specify your file locations and search options here.         # | ||||
|         #   Each line is in the format  FIELD space VALUE.               # | ||||
|         #   Any leading or trailing quotes will be chopped off.          #  | ||||
|         #   Hash marks separate comments from data.                      # | ||||
|         #   You must specify a value for all 6 mandatory fields.         # | ||||
|         #                                                                # | ||||
|         ############################################################ | ||||
|         ############################################################ | ||||
|         ################# MANDATORY SETTINGS ####################### | ||||
|         ############################################################ | ||||
| project <%= @name %> | ||||
|         # Identifier for a project. This value is displayed in the header | ||||
|         # of the results page. | ||||
|  | ||||
| background_color <%= @bgcolor %> | ||||
|         # background color for sectional headers of the result page. | ||||
|  | ||||
| header_icon <%= @header_icon %> | ||||
|         # relative or absolute URL for icon to display in results page | ||||
|         # header. | ||||
|  | ||||
| <%- if @html_file -%> | ||||
| html_file <%= @html_file %> | ||||
| <%- else -%> | ||||
| html_file /usr/share/pubcrawler/html/<%= @name %>/index.html | ||||
| <%- end -%> | ||||
|         # html_file is the name of the output HTML file for results | ||||
|         # it will be written to the specified working directory | ||||
|         # unless an absolute pathname is given | ||||
|  | ||||
| viewdays <%= @viewdays %>    | ||||
|         # viewdays is the number of days each document will be shown. | ||||
|  | ||||
| relentrezdate <%= @relentrezdate %> | ||||
|         # relentrezdate (relative date of insertion into Entrez)  | ||||
|         # is the maximum age (in days) of database entries to be reported. | ||||
|         # NOTE: sometimes records first appear in the databases several | ||||
|         # days or even weeks later than indicated by their database | ||||
|         # date-stamp, i.e. with non-zero values of relentrezdate. | ||||
|         # Therefore relentrezdate needs to be high enough to find these | ||||
|         # records.  A relentrezdate of 90 days is suggested (if you make  | ||||
|         # relentrezdate too huge the searches will be very slow.) | ||||
|         # other valid entries are:  | ||||
|         # '1 year', '2 years', '5 years', '10 years', and 'no limit' | ||||
|  | ||||
| getmax <%= @getmax %> | ||||
|         # getmax is the maximum number of documents to be retrieved | ||||
|         # for each search carried out. | ||||
|  | ||||
| fullmax <%= @fullmax %> | ||||
|         # fullmax is the maximum number of documents for which a full | ||||
|         # report is being presented | ||||
|         # if more documents were retrieved, these can be accessed | ||||
|         # through a hyperlink (in groups of up to fullmax articles) | ||||
|  | ||||
| include_config <%= @include_config %> | ||||
|         # include_config (yes/no) specifies whether or not to append  | ||||
|         # this config-file to the end of the output file | ||||
|         #-------------------------------------------------------------------# | ||||
|         ############################################################ | ||||
|         ################## OPTIONAL SETTINGS ####################### | ||||
|         ############################################################ | ||||
| search_URL <%= @search_URL %> | ||||
|         # URL where searches are being sent to | ||||
|         # defaults to  | ||||
|         # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi | ||||
|  | ||||
| neighbour_URL <%= @neighbour_URL %> | ||||
|         # URL where neighbourhood searches are being sent to | ||||
|         # defaults to  | ||||
|         # https://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor.fcgi | ||||
|  | ||||
| retrieve_URL <%= @retrieve_URL %> | ||||
|         # URL where documents are retrieved from | ||||
|         # defaults to  | ||||
|         # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi | ||||
|  | ||||
| <%- if @work_dir -%> | ||||
| work_dir <%= @work_dir %> | ||||
| <%- else -%> | ||||
| work_dir /var/lib/pubcrawler/<%= @name %> | ||||
| <%- end -%> | ||||
|         # specify a directory in which databases, output and log file  | ||||
|         # will be located | ||||
|         # if no value given, the current working directory will be used | ||||
|  | ||||
| extra_range <%= @extra_range %> | ||||
|         # specifies the number of documents combined in a link | ||||
|         # minimum value is 1, defaults to 'fullmax' | ||||
|  | ||||
| check <%= @check %> | ||||
|         # if set to '1' program will just check all settings | ||||
|         # without performing the actual search | ||||
|         # RECOMMENDED FOR THE FIRST RUN! | ||||
|  | ||||
| prompt <%= @prompt %> | ||||
|         # for Mac-users only: | ||||
|         # if this option is set to '1' the program will ask you  | ||||
|         # explicitly for command line options | ||||
|         # NOTE: THIS IS NOT SUITABLE FOR AUTOMATED USE! | ||||
|  | ||||
| verbose <%= @verbose %> | ||||
|         # verbose 0 runs silently and makes log file                  | ||||
|         # verbose 1 writes log output on screen  | ||||
|  | ||||
| mute <%= @mute %> | ||||
|         # mute 0 writes some messages to STDERR | ||||
|         # mute 1 stops ALL messages going to STDERR | ||||
|         # unless an error was encountered                     | ||||
|  | ||||
| log_file <%= @log_file %> | ||||
|         # name of file for log-output | ||||
|         # (verbose has to be set to '0')    | ||||
|         #base_URL 'http://www.gen.tcd.ie/pubcrawler/pubcrawler_output.html' | ||||
| base_URL <%= @base_URL %> | ||||
|         # specify a URL, that will be used for  | ||||
|         # the 'Back to Top' link in the output page    | ||||
|         # 'local_file' makes links relative to results file | ||||
|         # mail joe@hotmail.earth.com | ||||
|         # if the hash mark ('#') at the beginning of the above line | ||||
|         # is removed, PubCrawler will send the results file to | ||||
|         # the given address at the end of each run. You can specify | ||||
|         # multiple addresses using commas (no spaces!). | ||||
|         # notify jfk@hotmail.earth.com#joe | ||||
|         # if the hash mark ('#') at the beginning of the above line | ||||
|         # is removed, PubCrawler will send a notification to | ||||
|         # the given address (minus '#joe') at the end of each run | ||||
|         # The recipient will be addressed with joe (optional). | ||||
|         # You can specify multiple addresses using commas (no spaces!). | ||||
|  | ||||
| mail_features <%= @mail_features %> | ||||
|         # comma-separated list of extra features for the mail | ||||
|         # to be sent (without them it will be plain text). These are: | ||||
|         # css,javascript,entrez_links,pubcrawler_links,images,html,description | ||||
|         # or simply 'all' for everything | ||||
|  | ||||
| lynx <%= @lynx %> | ||||
|         # for Unix-users only: | ||||
|         # if you don't want to use the libwww-Perl module and | ||||
|         # have an alternative browser installed, that works from the  | ||||
|         # command line, like 'Lynx', you can use it by entering the | ||||
|         # command that evokes it (e.g. lynx '/usr/bin/lynx') | ||||
|         # NOTE: THIS OVERRIDES ANY PROXY SETTINGS! | ||||
|         #header 'head.html' | ||||
|         # specify a location of a header (in HTML-style) that will be used | ||||
|         # for the output file (disabled unless hash mark is removed) | ||||
|  | ||||
| prefix <%= @prefix %> | ||||
|         # if you would like a different prefix to be used  | ||||
|         # for standard files (configuration, database, log) | ||||
|         # insert it here (default is program name up to first dot): | ||||
|  | ||||
| system <%= @system %> | ||||
|         # name of operating system | ||||
|         # might need the explicit assignment of an adequate value | ||||
|         # ('MacOS','Win','Unix', or 'Linux') | ||||
|         # if Perl is not configured properly | ||||
|         #### PROXY SETTING (if desired and/or necessary) #### | ||||
|         #proxy www.tcd.ie/proxy.cgi | ||||
|         # insert either a proxy server (eg. 'proxy.domain.com') | ||||
|         # or the address of a proxy configuration file | ||||
|         # if known (eg. 'www.domain.com/proxy.cgi') | ||||
|         # and uncomment | ||||
|  | ||||
| proxy_port <%= @proxy_port %> | ||||
|         # port of the proxy server,defaults to '80' | ||||
|  | ||||
| proxy_auth <%= @proxy_auth %> | ||||
| proxy_pass <%= @proxy_pass %> | ||||
|         # in case you need to submit a username and a password | ||||
|         # for accessing your proxy, you can fill it in here: | ||||
|         # CAUTION! Having passwords stored in a file means a | ||||
|         # possible security risk! Please delete after usage | ||||
|         # or use the according command line option! | ||||
|         # !!! Please make sure that the module MIME::Base64 is  | ||||
|         # installed for the proxy authorization to work!!! | ||||
|  | ||||
| time_out <%= @time_out %> | ||||
|         # specify how many SECONDS to give remote servers  | ||||
|         # in creating responses before the library disconnects | ||||
|         # (defaults to 180 seconds if no value is given)  | ||||
|  | ||||
| test_URL <%= @test_URL %> | ||||
|         # test-URL for proxy-test | ||||
|  | ||||
| no_test <%= @no_test %> | ||||
|         # if a proxy is given, the internet connection is tested | ||||
|         # at the start of the program by default; this can be | ||||
|         # suppressed if a value of '1' is given here | ||||
|  | ||||
| indent <%= @indent %> | ||||
|         # amount of pixels that output is being shifted to the right | ||||
|  | ||||
| no_decap <%= @no_decap %> | ||||
|         # put 1 inbetween single quotes if you want to disable  | ||||
|         # processing of the entrez documents (chopping of head and tail | ||||
|         # and collecting UIs) | ||||
|  | ||||
| spacer <%= @spacer %> | ||||
|         # specify a gif that will be inserted in the output to shift | ||||
|         # text past the left, blue column | ||||
|         # (defaults to: http://www.gen.tcd.ie/pubcrawler/pics/spacer.gif) | ||||
|  | ||||
|         #-----------------------------------------------------------------------------# | ||||
|  | ||||
|         ############################################################ | ||||
|         ################# SEARCH SPECIFICATION ##################### | ||||
|         ############################################################ | ||||
|         ########################################################################### | ||||
|         ######  Entrez abbreviations for fields                                     # | ||||
|         ######  (see http://www4.ncbi.nlm.nih.gov/PubMed/linking.html).             # | ||||
|         ######  combine fields with AND, OR, BUTNOT and parentheses.                # | ||||
|         #                                                                           # | ||||
|         # for PubMed : one of AFFL, ALL, AUTH, ECNO, JOUR, MESH, MAJR, PAGE,        # | ||||
|         #                     PDAT, PTYP, KYWD, WORD, TITLE, or VOL.                # | ||||
|         # for Nucleotide : one of ACCN, AUTH, PDAT, ECNO, FKEY, GENE, JOUR, KYWD,   # | ||||
|         #                         MDAT, ORGN, PROP, PROT, SQID, SLEN, SUBS, or      # | ||||
|         #                         WORD.                                             # | ||||
|         #                                                                           # | ||||
|         # where ACCN = Accession Number, AFFL = Afilliation, ALL = all fields,      # | ||||
|         #       AUTH = Author Name, ECNO = E. C. Number, FKEY = Feature Key,        # | ||||
|         #       GENE = gene name, JOUR =journal name, KYWD = Keywords,              # | ||||
|         #       MAJR = MeSH major topic, MDAT = modification date,                  # | ||||
|         #       MESH = mesh term,ORGN = organism, PACC = Primary Accesion Number,   # | ||||
|         #       PAGE = first page, PDAT = publication/creation date,                # | ||||
|         #       PROP = Properties,  PROT = protein name, PTYP = Publication Type,   # | ||||
|         #       SUBS = Substance, TITL = title word, WORD = text word,              # | ||||
|         #       VOL = volume.                                                       # | ||||
|         ########################################################################### | ||||
|         ##### Each search-specification has to be written on one line. | ||||
|         ##### The first word must specify the database:  | ||||
|         ##### pubmed, pm_neighbour, genbank, or gb_neighbour | ||||
|         ##### Any following words enclosed in single quotes (') will be used | ||||
|         ##### as an alias for this query, otherwise they will be considered | ||||
|         ##### Entrez-search-terms, as will the rest of the line. | ||||
|         ##### You can have as many different searches as you wish. The results of all | ||||
|         ##### searches will be combined according to their aliases. | ||||
|         ##### You CAN NOT use the same alias for searches at different databases! | ||||
|         ##### Write your search descriptions below this line.   | ||||
|         ##### (Upper/lower case does not matter.) | ||||
|  | ||||
| <%- if @searches and @searches.kind_of?(Array) -%> | ||||
| <%- @searches.each do |search| -%> | ||||
| <%= search['database'] %> '<%= search['alias'] %>' <%= search['term'] %> | ||||
| <%- end -%> | ||||
| <%- end -%> | ||||
		Reference in New Issue
	
	Block a user