sci-biology/TransDecoder: install properly perl modules; fix PERL5INC paths by adding TransDecoder::, ebuild cleanup

Package-Manager: portage-2.2.18
author: Martin Mokrejš <mmokrejs@fold.natur.cuni.cz> 2015-11-19 21:28:40 +0100
committer: Martin Mokrejš <mmokrejs@fold.natur.cuni.cz> 2015-11-19 21:28:40 +0100
commit: 8f7a916bbdbb62d44a963c77692e3895d6b838ba (patch)
tree: b9f2cb4c2c1a55e231687afc19058d3fb885e3b7 /sci-biology/TransDecoder
parent: sci-biology/BBmap: version bump (diff)
download: sci-8f7a916bbdbb62d44a963c77692e3895d6b838ba.tar.gz
sci-8f7a916bbdbb62d44a963c77692e3895d6b838ba.tar.bz2
sci-8f7a916bbdbb62d44a963c77692e3895d6b838ba.zip
5 files changed, 63 insertions, 157 deletions
diff --git a/sci-biology/TransDecoder/ChangeLog b/sci-biology/TransDecoder/ChangeLog
index 680a908e8..67b410e0f 100644
--- a/sci-biology/TransDecoder/ChangeLog
+++ b/sci-biology/TransDecoder/ChangeLog
@@ -2,6 +2,12 @@
 # Copyright 1999-2015 Gentoo Foundation; Distributed under the GPL v2
 # $Id$
 
+  19 Nov 2015; Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
+  +files/TransDecoder-2.0.1__fix_paths.patch, -files/TransDecoder.patch,
+  TransDecoder-2.0.1.ebuild, files/pfam_runner.pl.patch:
+  sci-biology/TransDecoder: install properly perl modules; fix PERL5INC paths by
+  adding TransDecoder::, ebuild cleanup
+
   06 Jun 2015; Justin Lecher <jlec@gentoo.org> metadata.xml:
   sci-biology/TransDecoder: Add github to remote-id in metadata.xml
 
diff --git a/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild b/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild
index b99375f44..5894f2ae5 100644
--- a/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild
+++ b/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild
@@ -4,43 +4,56 @@
 
 EAPI=5
 
+[ "$PV" == "9999" ] && inherit git-r3
+
 PERL_EXPORT_PHASE_FUNCTIONS=no
 inherit perl-module eutils toolchain-funcs
 
 DESCRIPTION="Extract ORF/CDS regions from FASTA sequences"
 HOMEPAGE="http://sourceforge.net/projects/transdecoder/"
-SRC_URI="https://github.com/TransDecoder/TransDecoder/archive/"${PV}".tar.gz -> ${P}.tar.gz"
+if [ "$PV" == "9999" ]; then
+	EGIT_REPO_URI="https://github.com/TransDecoder/TransDecoder.git"
+	KEYWORDS=""
+else
+    SRC_URI="https://github.com/TransDecoder/TransDecoder/archive/"${PV}".tar.gz -> ${P}.tar.gz"
+	KEYWORDS="~amd64"
+	S="${WORKDIR}"/TransDecoder-"${PV}"
+fi
 
 LICENSE="BSD-BroadInstitute"
 SLOT="0"
-KEYWORDS="~amd64"
 IUSE=""
 
 DEPEND=""
 RDEPEND="${DEPEND}
 	sci-biology/cd-hit
-	sci-biology/hmmer
 	sci-biology/parafly
 	sci-biology/ffindex"
-# cdhit-4.6.1 is a real dependency, at least hmmer is optional (also ncbi-tools++ is now used for ORF searches)
 
-S="${WORKDIR}"/TransDecoder-2.0.1
+src_prepare(){
+	rm -rf transdecoder_plugins/cd-hit
+	for f in PerlLib/*.pm; do
+		p=`basename $f .pm`;
+		sed -e "s#use $p;#use TransDecoder::$p;#" -i PerlLib/*.pm util/*.pl TransDecoder.LongOrfs TransDecoder.Predict;
+	done
+	epatch "${FILESDIR}"/"${P}"__fix_paths.patch
+	epatch "${FILESDIR}"/pfam_runner.pl.patch
+}
 
-##src_prepare(){
-#	#mv Makefile Makefile.old
-#	#epatch "${FILESDIR}"/TransDecoder.patch
-#	#epatch "${FILESDIR}"/pfam_runner.pl.patch
-#}
+src_compile(){
+	einfo "Skipping compilation of bundled cd-hit code, nothing else to do"
+}
 
 # avoid fetching 1.5TB "${S}"/pfam/Pfam-AB.hmm.bin, see
-# "Re: [Transdecoder-users] Announcement: Transdecoder release r20140704" thread in archives
-#
-# you cna get it from http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin
+# "Re: [Transdecoder-users] Announcement: Transdecoder release r20140704"
+# thread in archives. You can get it from 
+# http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin
 
 src_install(){
 	dobin TransDecoder.Predict TransDecoder.LongOrfs
 	insinto /usr/share/${PN}/util
-	dobin util/*.pl
+	doins util/*.pl
+	chmod -R a+rx "${D}"/usr/share/${PN}/util
 	# zap the bundled cdhit binaries copied from transdecoder_plugins/cdhit/ to util/bin
 	rm -rf util/bin
 	#
@@ -55,15 +68,16 @@ src_install(){
 	#
 	perl_set_version
 	insinto ${VENDOR_LIB}/${PN}
-	dobin PerlLib/*.pm # BUG: install into /usr/bin but wanted to have it readable and executable in ${VENDOR_LIB}/${PN} instead
+	doins PerlLib/*.pm
+	dodoc Release.Notes
 	einfo "Fetch on your own:"
 	einfo "wget --mirror -nH -nd http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin"
 	einfo "hmmpress Pfam-AB.hmm.bin"
 }
 
 pkg_postinst(){
-	einfo "It is recommended to use TransDecoder with hmmer-3 or at least NCBI blast"
-	einfo "from either sci-biology/ncbi-blast+ (released more often) or"
-	einfo "from sci-biology/ncbi-toolkit++ (huge bundle with releases and less frequent bugfixes)"
-	einfo "Author says the minimum requirement is sci-biology/cd-hit"
+	einfo "It is recommended to use TransDecoder with sci-biology/hmmer-3 or"
+	einfo "at least with NCBI blast from either:"
+	einfo "    sci-biology/ncbi-blast+ (released more often) or from"
+	einfo "    sci-biology/ncbi-toolkit++ (a huge bundle with releases and less frequent bugfixes)"
 }
diff --git a/sci-biology/TransDecoder/files/TransDecoder-2.0.1__fix_paths.patch b/sci-biology/TransDecoder/files/TransDecoder-2.0.1__fix_paths.patch
new file mode 100644
index 000000000..0a6fca050
--- /dev/null
+++ b/sci-biology/TransDecoder/files/TransDecoder-2.0.1__fix_paths.patch
@@ -0,0 +1,22 @@
+--- TransDecoder-2.0.1/TransDecoder.LongOrfs.ori	2015-11-19 21:05:53.340219051 +0100
++++ TransDecoder-2.0.1/TransDecoder.LongOrfs	2015-11-19 21:20:44.870221380 +0100
+@@ -64,7 +64,7 @@
+ use TransDecoder::Fasta_reader;
+ use TransDecoder::Longest_orf;
+ 
+-my $UTIL_DIR = "$FindBin::RealBin/util";
++my $UTIL_DIR = "/usr/share/TransDecoder/util/";
+ $ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
+ 
+ 
+--- TransDecoder-2.0.1/TransDecoder.Predict.ori	2015-11-19 21:06:04.280219080 +0100
++++ TransDecoder-2.0.1/TransDecoder.Predict	2015-11-19 21:21:22.560221479 +0100
+@@ -52,7 +52,7 @@
+ use TransDecoder::Fasta_reader;
+ use TransDecoder::Longest_orf;
+ 
+-my $UTIL_DIR = "$FindBin::RealBin/util";
++my $UTIL_DIR = "/usr/share/TransDecoder/util/";
+ $ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
+ 
+ 
diff --git a/sci-biology/TransDecoder/files/TransDecoder.patch b/sci-biology/TransDecoder/files/TransDecoder.patch
deleted file mode 100644
index c0cff94d4..000000000
--- a/sci-biology/TransDecoder/files/TransDecoder.patch
+++ /dev/null
@@ -1,136 +0,0 @@
---- /usr/bin/TransDecoder	2015-01-09 11:22:55.000000000 +0100
-+++ TransDecoder	2015-01-09 14:31:44.095839522 +0100
-@@ -48,7 +48,7 @@
-  --prepare_pfam                         Prepare data for PFAM search and then quit (for running PFAM on HPC/computing cluster
-                                          with or without MPI )
- 
-- --CPU <int>                            number of threads to use; (default: 2)
-+ --CPU <int>                            number of threads to use; (default: 1)
- 
-  --MPI                                  use MPI w/ execution of hmmscan
- 
-@@ -76,7 +76,7 @@
- 
- =head1 PFAM 
- 
--You will need hmmer installed. Use hmmpress to prepare the database for hmmer.
-+You will need hmmer installed. Use hmmpress from >=hmmer-3.0 to prepare the database for hmmer.
-  L<See|https://sourceforge.net/projects/transdecoder/files/Pfam-AB.hmm.bin> for downloading the database.
- 
- =head1 CD-HIT
-@@ -105,7 +105,6 @@
- use Longest_orf;
- 
- my $UTIL_DIR = "$FindBin::RealBin/util";
--$ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
- $ENV{LD_LIBRARY_PATH} .= ":$FindBin::RealBin/util/lib64";
- 
- my ($cd_hit_est_exec) = &check_program('cd-hit-est');
-@@ -124,7 +123,7 @@
- my $verbose;
- my $search_pfam = "";
- my ($reuse,$pfam_out);
--my $CPU = 2;
-+my $CPU = 1;
- my $RETAIN_LONG_ORFS = 900;
- my $MPI = 0;
- 
-@@ -330,15 +329,15 @@
- my $top_cds_file = $train_file && -s $train_file ? $train_file : "$cds_file.top_${top_ORFs_train}_longest";
- if (!-s $top_cds_file) {
-     # get longest entries
--    my $cmd = "$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file";
-+    my $cmd = "get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file";
-     
-     unless ($reuse && -s $top_cds_file){
-         if ($cd_hit_est_exec){
-             # to speed things up only check for redundancy up to 4x the number of entries we want
-             my $red_num = $top_ORFs_train * 4 ;
--            &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top");
-+            &process_cmd("get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top");
-             &process_cmd("$cd_hit_est_exec -r 1 -i $workdir/redundant_top -o $workdir/redundant_top.nr90 -M 0 -T $CPU >/dev/null 2>/dev/null");
--            &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file");
-+            &process_cmd("get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file");
-             unlink("$workdir/redundant_top");
-             unlink("$workdir/redundant_top.nr90");
-             unlink("$workdir/redundant_top.nr90.bak.clstr");
-@@ -349,20 +348,20 @@
-     }
- }
- 
--$cmd = "$UTIL_DIR/compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat";
-+$cmd = "compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat";
- &process_cmd($cmd) unless $reuse && -s "$workdir/base_freqs.dat";
- 
- 
- # get hexamer scores
--#$cmd = "$UTIL_DIR/seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores";
-+#$cmd = "seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores";
- #&process_cmd($cmd) unless ($reuse && -s "hexamer.scores");
- 
--$cmd = "$UTIL_DIR/seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores";
-+$cmd = "seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores";
- &process_cmd($cmd) unless $reuse && -s "$workdir/hexamer.scores";
- 
- 
- # score all cds entries
--$cmd = "$UTIL_DIR/score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores";
-+$cmd = "score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores";
- &process_cmd($cmd) unless ($reuse && -s "$cds_file.scores");
- 
- 
-@@ -440,18 +439,18 @@
- }
- 
- # index the current gff file:
--$cmd = "$UTIL_DIR/index_gff3_files_by_isoform.pl $gff3_file";
-+$cmd = "index_gff3_files_by_isoform.pl $gff3_file";
- &process_cmd($cmd);
- 
- # retrieve the best entries:
--$cmd = "$UTIL_DIR/gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3";
-+$cmd = "gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3";
- &process_cmd($cmd);
- 
- {
-     my $final_output_prefix = basename($transcripts_file) . ".transdecoder";
-     
-     # exclude shadow orfs (smaller orfs in different reading frame that are eclipsed by longer orfs)
--    $cmd = "$UTIL_DIR/remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3";
-+    $cmd = "remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3";
-     &process_cmd($cmd);
-     
- 
-@@ -462,14 +461,14 @@
-     my $gff3_file = "$final_output_prefix.gff3";
-     my $bed_file = $gff3_file;
-     $bed_file =~ s/\.gff3$/\.bed/;
--    $cmd = "$UTIL_DIR/gff3_file_to_bed.pl $gff3_file > $bed_file";
-+    $cmd = "gff3_file_to_bed.pl $gff3_file > $bed_file";
-     &process_cmd($cmd);
-     
-     
-     # make a peptide file:
-     my $best_pep_file = $gff3_file;
-     $best_pep_file =~ s/\.gff3$/\.pep/;
--    $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file";
-+    $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file";
-     &process_cmd($cmd);
- 
- 
-@@ -477,13 +476,13 @@
-     # make a CDS file:
-     my $best_cds_file = $best_pep_file;
-     $best_cds_file =~ s/\.pep$/\.cds/;
--    $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file";
-+    $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file";
-     &process_cmd($cmd);
- 
-     # make a CDS file:
-     my $best_cdna_file = $best_pep_file;
-     $best_cdna_file =~ s/\.pep$/\.mRNA/;
--    $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file";
-+    $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file";
-     &process_cmd($cmd);
-     
- }
diff --git a/sci-biology/TransDecoder/files/pfam_runner.pl.patch b/sci-biology/TransDecoder/files/pfam_runner.pl.patch
index 7809b1aaa..fbc6f6d92 100644
--- a/sci-biology/TransDecoder/files/pfam_runner.pl.patch
+++ b/sci-biology/TransDecoder/files/pfam_runner.pl.patch
@@ -1,5 +1,5 @@
---- /usr/bin/pfam_runner.pl	2015-01-09 11:22:55.000000000 +0100
-+++ pfam_runner.pl	2015-01-09 14:25:43.385838579 +0100
+--- util/pfam_runner.pl	2015-01-09 11:22:55.000000000 +0100
++++ util/pfam_runner.pl	2015-01-09 14:25:43.385838579 +0100
 @@ -24,7 +24,7 @@
  my $workdir;
  my $verbose;
author	Martin Mokrejš <mmokrejs@fold.natur.cuni.cz>	2015-11-19 21:28:40 +0100
committer	Martin Mokrejš <mmokrejs@fold.natur.cuni.cz>	2015-11-19 21:28:40 +0100
commit	8f7a916bbdbb62d44a963c77692e3895d6b838ba (patch)
tree	b9f2cb4c2c1a55e231687afc19058d3fb885e3b7 /sci-biology/TransDecoder
parent	sci-biology/BBmap: version bump (diff)
download	sci-8f7a916bbdbb62d44a963c77692e3895d6b838ba.tar.gz sci-8f7a916bbdbb62d44a963c77692e3895d6b838ba.tar.bz2 sci-8f7a916bbdbb62d44a963c77692e3895d6b838ba.zip