bioinformatics.scm 397 KB


  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2014, 2015, 2016, 2017 Ricardo Wurmus <rekado@elephly.net>
  3. ;;; Copyright © 2015, 2016, 2017 Ben Woodcroft <donttrustben@gmail.com>
  4. ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
  5. ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
  6. ;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
  7. ;;; Copyright © 2016, 2017 Efraim Flashner <efraim@flashner.co.il>
  8. ;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
  9. ;;; Copyright © 2016 Raoul Bonnal <ilpuccio.febo@gmail.com>
  10. ;;;
  11. ;;; This file is part of GNU Guix.
  12. ;;;
  13. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  14. ;;; under the terms of the GNU General Public License as published by
  15. ;;; the Free Software Foundation; either version 3 of the License, or (at
  16. ;;; your option) any later version.
  17. ;;;
  18. ;;; GNU Guix is distributed in the hope that it will be useful, but
  19. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  20. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. ;;; GNU General Public License for more details.
  22. ;;;
  23. ;;; You should have received a copy of the GNU General Public License
  24. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  25. (define-module (gnu packages bioinformatics)
  26. #:use-module ((guix licenses) #:prefix license:)
  27. #:use-module (guix packages)
  28. #:use-module (guix utils)
  29. #:use-module (guix download)
  30. #:use-module (guix git-download)
  31. #:use-module (guix hg-download)
  32. #:use-module (guix build-system ant)
  33. #:use-module (guix build-system gnu)
  34. #:use-module (guix build-system cmake)
  35. #:use-module (guix build-system ocaml)
  36. #:use-module (guix build-system perl)
  37. #:use-module (guix build-system python)
  38. #:use-module (guix build-system r)
  39. #:use-module (guix build-system ruby)
  40. #:use-module (guix build-system trivial)
  41. #:use-module (gnu packages)
  42. #:use-module (gnu packages autotools)
  43. #:use-module (gnu packages algebra)
  44. #:use-module (gnu packages base)
  45. #:use-module (gnu packages bash)
  46. #:use-module (gnu packages bison)
  47. #:use-module (gnu packages boost)
  48. #:use-module (gnu packages compression)
  49. #:use-module (gnu packages cpio)
  50. #:use-module (gnu packages cran)
  51. #:use-module (gnu packages curl)
  52. #:use-module (gnu packages documentation)
  53. #:use-module (gnu packages databases)
  54. #:use-module (gnu packages datastructures)
  55. #:use-module (gnu packages file)
  56. #:use-module (gnu packages flex)
  57. #:use-module (gnu packages gawk)
  58. #:use-module (gnu packages gcc)
  59. #:use-module (gnu packages gd)
  60. #:use-module (gnu packages gtk)
  61. #:use-module (gnu packages glib)
  62. #:use-module (gnu packages graph)
  63. #:use-module (gnu packages groff)
  64. #:use-module (gnu packages guile)
  65. #:use-module (gnu packages haskell)
  66. #:use-module (gnu packages image)
  67. #:use-module (gnu packages imagemagick)
  68. #:use-module (gnu packages java)
  69. #:use-module (gnu packages ldc)
  70. #:use-module (gnu packages linux)
  71. #:use-module (gnu packages logging)
  72. #:use-module (gnu packages machine-learning)
  73. #:use-module (gnu packages man)
  74. #:use-module (gnu packages maths)
  75. #:use-module (gnu packages mpi)
  76. #:use-module (gnu packages ncurses)
  77. #:use-module (gnu packages ocaml)
  78. #:use-module (gnu packages pcre)
  79. #:use-module (gnu packages parallel)
  80. #:use-module (gnu packages pdf)
  81. #:use-module (gnu packages perl)
  82. #:use-module (gnu packages pkg-config)
  83. #:use-module (gnu packages popt)
  84. #:use-module (gnu packages protobuf)
  85. #:use-module (gnu packages python)
  86. #:use-module (gnu packages readline)
  87. #:use-module (gnu packages ruby)
  88. #:use-module (gnu packages serialization)
  89. #:use-module (gnu packages shells)
  90. #:use-module (gnu packages statistics)
  91. #:use-module (gnu packages swig)
  92. #:use-module (gnu packages tbb)
  93. #:use-module (gnu packages tex)
  94. #:use-module (gnu packages texinfo)
  95. #:use-module (gnu packages textutils)
  96. #:use-module (gnu packages time)
  97. #:use-module (gnu packages tls)
  98. #:use-module (gnu packages vim)
  99. #:use-module (gnu packages web)
  100. #:use-module (gnu packages xml)
  101. #:use-module (gnu packages xorg)
  102. #:use-module (srfi srfi-1)
  103. #:use-module (ice-9 match))
  104. (define-public r-ape
  105. (package
  106. (name "r-ape")
  107. (version "4.1")
  108. (source
  109. (origin
  110. (method url-fetch)
  111. (uri (cran-uri "ape" version))
  112. (sha256
  113. (base32
  114. "0959fiiy11rzfzrzaknmgrx64bhszj02l0ycz79k5a6bmpfzanlk"))))
  115. (build-system r-build-system)
  116. (propagated-inputs
  117. `(("r-lattice" ,r-lattice)
  118. ("r-nlme" ,r-nlme)))
  119. (home-page "http://ape-package.ird.fr/")
  120. (synopsis "Analyses of phylogenetics and evolution")
  121. (description
  122. "This package provides functions for reading, writing, plotting, and
  123. manipulating phylogenetic trees, analyses of comparative data in a
  124. phylogenetic framework, ancestral character analyses, analyses of
  125. diversification and macroevolution, computing distances from DNA sequences,
  126. and several other tools.")
  127. (license license:gpl2+)))
  128. (define-public aragorn
  129. (package
  130. (name "aragorn")
  131. (version "1.2.38")
  132. (source (origin
  133. (method url-fetch)
  134. (uri (string-append
  135. "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
  136. version ".tgz"))
  137. (sha256
  138. (base32
  139. "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
  140. (build-system gnu-build-system)
  141. (arguments
  142. `(#:tests? #f ; there are no tests
  143. #:phases
  144. (modify-phases %standard-phases
  145. (delete 'configure)
  146. (replace 'build
  147. (lambda _
  148. (zero? (system* "gcc"
  149. "-O3"
  150. "-ffast-math"
  151. "-finline-functions"
  152. "-o"
  153. "aragorn"
  154. (string-append "aragorn" ,version ".c")))))
  155. (replace 'install
  156. (lambda* (#:key outputs #:allow-other-keys)
  157. (let* ((out (assoc-ref outputs "out"))
  158. (bin (string-append out "/bin"))
  159. (man (string-append out "/share/man/man1")))
  160. (mkdir-p bin)
  161. (install-file "aragorn" bin)
  162. (mkdir-p man)
  163. (install-file "aragorn.1" man))
  164. #t)))))
  165. (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
  166. (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
  167. (description
  168. "Aragorn identifies transfer RNA, mitochondrial RNA and
  169. transfer-messenger RNA from nucleotide sequences, based on homology to known
  170. tRNA consensus sequences and RNA structure. It also outputs the secondary
  171. structure of the predicted RNA.")
  172. (license license:gpl2)))
  173. (define-public bamm
  174. (package
  175. (name "bamm")
  176. (version "1.7.3")
  177. (source (origin
  178. (method url-fetch)
  179. ;; BamM is not available on pypi.
  180. (uri (string-append
  181. "https://github.com/Ecogenomics/BamM/archive/"
  182. version ".tar.gz"))
  183. (file-name (string-append name "-" version ".tar.gz"))
  184. (sha256
  185. (base32
  186. "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
  187. (modules '((guix build utils)))
  188. (snippet
  189. `(begin
  190. ;; Delete bundled htslib.
  191. (delete-file-recursively "c/htslib-1.3.1")
  192. #t))))
  193. (build-system python-build-system)
  194. (arguments
  195. `(#:python ,python-2 ; BamM is Python 2 only.
  196. ;; Do not use bundled libhts. Do use the bundled libcfu because it has
  197. ;; been modified from its original form.
  198. #:configure-flags
  199. (let ((htslib (assoc-ref %build-inputs "htslib")))
  200. (list "--with-libhts-lib" (string-append htslib "/lib")
  201. "--with-libhts-inc" (string-append htslib "/include/htslib")))
  202. #:phases
  203. (modify-phases %standard-phases
  204. (add-after 'unpack 'autogen
  205. (lambda _
  206. (with-directory-excursion "c"
  207. (let ((sh (which "sh")))
  208. ;; Use autogen so that 'configure' works.
  209. (substitute* "autogen.sh" (("/bin/sh") sh))
  210. (setenv "CONFIG_SHELL" sh)
  211. (substitute* "configure" (("/bin/sh") sh))
  212. (zero? (system* "./autogen.sh"))))))
  213. (delete 'build)
  214. ;; Run tests after installation so compilation only happens once.
  215. (delete 'check)
  216. (add-after 'install 'wrap-executable
  217. (lambda* (#:key outputs #:allow-other-keys)
  218. (let* ((out (assoc-ref outputs "out"))
  219. (path (getenv "PATH")))
  220. (wrap-program (string-append out "/bin/bamm")
  221. `("PATH" ":" prefix (,path))))
  222. #t))
  223. (add-after 'wrap-executable 'post-install-check
  224. (lambda* (#:key inputs outputs #:allow-other-keys)
  225. (setenv "PATH"
  226. (string-append (assoc-ref outputs "out")
  227. "/bin:"
  228. (getenv "PATH")))
  229. (setenv "PYTHONPATH"
  230. (string-append
  231. (assoc-ref outputs "out")
  232. "/lib/python"
  233. (string-take (string-take-right
  234. (assoc-ref inputs "python") 5) 3)
  235. "/site-packages:"
  236. (getenv "PYTHONPATH")))
  237. ;; There are 2 errors printed, but they are safe to ignore:
  238. ;; 1) [E::hts_open_format] fail to open file ...
  239. ;; 2) samtools view: failed to open ...
  240. (zero? (system* "nosetests")))))))
  241. (native-inputs
  242. `(("autoconf" ,autoconf)
  243. ("automake" ,automake)
  244. ("libtool" ,libtool)
  245. ("zlib" ,zlib)
  246. ("python-nose" ,python2-nose)
  247. ("python-pysam" ,python2-pysam)))
  248. (inputs
  249. `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
  250. ("samtools" ,samtools)
  251. ("bwa" ,bwa)
  252. ("grep" ,grep)
  253. ("sed" ,sed)
  254. ("coreutils" ,coreutils)))
  255. (propagated-inputs
  256. `(("python-numpy" ,python2-numpy)))
  257. (home-page "http://ecogenomics.github.io/BamM/")
  258. (synopsis "Metagenomics-focused BAM file manipulator")
  259. (description
  260. "BamM is a C library, wrapped in python, to efficiently generate and
  261. parse BAM files, specifically for the analysis of metagenomic data. For
  262. instance, it implements several methods to assess contig-wise read coverage.")
  263. (license license:lgpl3+)))
  264. (define-public bamtools
  265. (package
  266. (name "bamtools")
  267. (version "2.4.1")
  268. (source (origin
  269. (method url-fetch)
  270. (uri (string-append
  271. "https://github.com/pezmaster31/bamtools/archive/v"
  272. version ".tar.gz"))
  273. (file-name (string-append name "-" version ".tar.gz"))
  274. (sha256
  275. (base32
  276. "0jr024kcrhjb82cm69i7p5fcg5375zlc1h3qh2n1v368hcd0qflk"))))
  277. (build-system cmake-build-system)
  278. (arguments
  279. `(#:tests? #f ;no "check" target
  280. #:phases
  281. (modify-phases %standard-phases
  282. (add-before
  283. 'configure 'set-ldflags
  284. (lambda* (#:key outputs #:allow-other-keys)
  285. (setenv "LDFLAGS"
  286. (string-append
  287. "-Wl,-rpath="
  288. (assoc-ref outputs "out") "/lib/bamtools")))))))
  289. (inputs `(("zlib" ,zlib)))
  290. (home-page "https://github.com/pezmaster31/bamtools")
  291. (synopsis "C++ API and command-line toolkit for working with BAM data")
  292. (description
  293. "BamTools provides both a C++ API and a command-line toolkit for handling
  294. BAM files.")
  295. (license license:expat)))
  296. (define-public bcftools
  297. (package
  298. (name "bcftools")
  299. (version "1.5")
  300. (source (origin
  301. (method url-fetch)
  302. (uri (string-append
  303. "https://github.com/samtools/bcftools/releases/download/"
  304. version "/bcftools-" version ".tar.bz2"))
  305. (sha256
  306. (base32
  307. "0093hkkvxmbwfaa7905s6185jymynvg42kq6sxv7fili11l5mxwz"))
  308. (patches (search-patches "bcftools-regidx-unsigned-char.patch"))
  309. (modules '((guix build utils)))
  310. (snippet
  311. ;; Delete bundled htslib.
  312. '(delete-file-recursively "htslib-1.5"))))
  313. (build-system gnu-build-system)
  314. (arguments
  315. `(#:test-target "test"
  316. #:configure-flags (list "--with-htslib=system")
  317. #:make-flags
  318. (list
  319. "USE_GPL=1"
  320. "LIBS=-lgsl -lgslcblas"
  321. (string-append "prefix=" (assoc-ref %outputs "out"))
  322. (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
  323. (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.so")
  324. (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
  325. (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix")
  326. (string-append "PACKAGE_VERSION=" ,version))
  327. #:phases
  328. (modify-phases %standard-phases
  329. (add-before 'check 'patch-tests
  330. (lambda _
  331. (substitute* "test/test.pl"
  332. (("/bin/bash") (which "bash")))
  333. #t)))))
  334. (native-inputs
  335. `(("htslib" ,htslib)
  336. ("perl" ,perl)))
  337. (inputs
  338. `(("gsl" ,gsl)
  339. ("zlib" ,zlib)))
  340. (home-page "https://samtools.github.io/bcftools/")
  341. (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
  342. (description
  343. "BCFtools is a set of utilities that manipulate variant calls in the
  344. Variant Call Format (VCF) and its binary counterpart BCF. All commands work
  345. transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
  346. ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
  347. (license (list license:gpl3+ license:expat))))
  348. (define-public bedops
  349. (package
  350. (name "bedops")
  351. (version "2.4.14")
  352. (source (origin
  353. (method url-fetch)
  354. (uri (string-append "https://github.com/bedops/bedops/archive/v"
  355. version ".tar.gz"))
  356. (file-name (string-append name "-" version ".tar.gz"))
  357. (sha256
  358. (base32
  359. "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
  360. (build-system gnu-build-system)
  361. (arguments
  362. '(#:tests? #f
  363. #:make-flags (list (string-append "BINDIR=" %output "/bin"))
  364. #:phases
  365. (alist-cons-after
  366. 'unpack 'unpack-tarballs
  367. (lambda _
  368. ;; FIXME: Bedops includes tarballs of minimally patched upstream
  369. ;; libraries jansson, zlib, and bzip2. We cannot just use stock
  370. ;; libraries because at least one of the libraries (zlib) is
  371. ;; patched to add a C++ function definition (deflateInit2cpp).
  372. ;; Until the Bedops developers offer a way to link against system
  373. ;; libraries we have to build the in-tree copies of these three
  374. ;; libraries.
  375. ;; See upstream discussion:
  376. ;; https://github.com/bedops/bedops/issues/124
  377. ;; Unpack the tarballs to benefit from shebang patching.
  378. (with-directory-excursion "third-party"
  379. (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
  380. (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
  381. (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
  382. ;; Disable unpacking of tarballs in Makefile.
  383. (substitute* "system.mk/Makefile.linux"
  384. (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
  385. (("\\./configure") "CONFIG_SHELL=bash ./configure"))
  386. (substitute* "third-party/zlib-1.2.7/Makefile.in"
  387. (("^SHELL=.*$") "SHELL=bash\n")))
  388. (alist-delete 'configure %standard-phases))))
  389. (home-page "https://github.com/bedops/bedops")
  390. (synopsis "Tools for high-performance genomic feature operations")
  391. (description
  392. "BEDOPS is a suite of tools to address common questions raised in genomic
  393. studies---mostly with regard to overlap and proximity relationships between
  394. data sets. It aims to be scalable and flexible, facilitating the efficient
  395. and accurate analysis and management of large-scale genomic data.
  396. BEDOPS provides tools that perform highly efficient and scalable Boolean and
  397. other set operations, statistical calculations, archiving, conversion and
  398. other management of genomic data of arbitrary scale. Tasks can be easily
  399. split by chromosome for distributing whole-genome analyses across a
  400. computational cluster.")
  401. (license license:gpl2+)))
  402. (define-public bedtools
  403. (package
  404. (name "bedtools")
  405. (version "2.26.0")
  406. (source (origin
  407. (method url-fetch)
  408. (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
  409. version ".tar.gz"))
  410. (file-name (string-append name "-" version ".tar.gz"))
  411. (sha256
  412. (base32
  413. "0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
  414. (build-system gnu-build-system)
  415. (native-inputs `(("python" ,python-2)))
  416. (inputs `(("samtools" ,samtools)
  417. ("zlib" ,zlib)))
  418. (arguments
  419. '(#:test-target "test"
  420. #:phases
  421. (modify-phases %standard-phases
  422. (delete 'configure)
  423. (replace 'install
  424. (lambda* (#:key outputs #:allow-other-keys)
  425. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  426. (for-each (lambda (file)
  427. (install-file file bin))
  428. (find-files "bin" ".*")))
  429. #t)))))
  430. (home-page "https://github.com/arq5x/bedtools2")
  431. (synopsis "Tools for genome analysis and arithmetic")
  432. (description
  433. "Collectively, the bedtools utilities are a swiss-army knife of tools for
  434. a wide-range of genomics analysis tasks. The most widely-used tools enable
  435. genome arithmetic: that is, set theory on the genome. For example, bedtools
  436. allows one to intersect, merge, count, complement, and shuffle genomic
  437. intervals from multiple files in widely-used genomic file formats such as BAM,
  438. BED, GFF/GTF, VCF.")
  439. (license license:gpl2)))
  440. ;; Later releases of bedtools produce files with more columns than
  441. ;; what Ribotaper expects.
  442. (define-public bedtools-2.18
  443. (package (inherit bedtools)
  444. (name "bedtools")
  445. (version "2.18.0")
  446. (source (origin
  447. (method url-fetch)
  448. (uri (string-append "https://github.com/arq5x/bedtools2/"
  449. "archive/v" version ".tar.gz"))
  450. (file-name (string-append name "-" version ".tar.gz"))
  451. (sha256
  452. (base32
  453. "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
  454. (define-public ribotaper
  455. (package
  456. (name "ribotaper")
  457. (version "1.3.1")
  458. (source (origin
  459. (method url-fetch)
  460. (uri (string-append "https://ohlerlab.mdc-berlin.de/"
  461. "files/RiboTaper/RiboTaper_Version_"
  462. version ".tar.gz"))
  463. (sha256
  464. (base32
  465. "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
  466. (build-system gnu-build-system)
  467. (inputs
  468. `(("bedtools" ,bedtools-2.18)
  469. ("samtools" ,samtools-0.1)
  470. ("r-minimal" ,r-minimal)
  471. ("r-foreach" ,r-foreach)
  472. ("r-xnomial" ,r-xnomial)
  473. ("r-domc" ,r-domc)
  474. ("r-multitaper" ,r-multitaper)
  475. ("r-seqinr" ,r-seqinr)))
  476. (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
  477. (synopsis "Define translated ORFs using ribosome profiling data")
  478. (description
  479. "Ribotaper is a method for defining translated @dfn{open reading
  480. frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
  481. provides the Ribotaper pipeline.")
  482. (license license:gpl3+)))
  483. (define-public ribodiff
  484. (package
  485. (name "ribodiff")
  486. (version "0.2.2")
  487. (source
  488. (origin
  489. (method url-fetch)
  490. (uri (string-append "https://github.com/ratschlab/RiboDiff/"
  491. "archive/v" version ".tar.gz"))
  492. (file-name (string-append name "-" version ".tar.gz"))
  493. (sha256
  494. (base32
  495. "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj"))))
  496. (build-system python-build-system)
  497. (arguments
  498. `(#:python ,python-2
  499. #:phases
  500. (modify-phases %standard-phases
  501. ;; Generate an installable executable script wrapper.
  502. (add-after 'unpack 'patch-setup.py
  503. (lambda _
  504. (substitute* "setup.py"
  505. (("^(.*)packages=.*" line prefix)
  506. (string-append line "\n"
  507. prefix "scripts=['scripts/TE.py'],\n")))
  508. #t)))))
  509. (inputs
  510. `(("python-numpy" ,python2-numpy)
  511. ("python-matplotlib" ,python2-matplotlib)
  512. ("python-scipy" ,python2-scipy)
  513. ("python-statsmodels" ,python2-statsmodels)))
  514. (native-inputs
  515. `(("python-mock" ,python2-mock)
  516. ("python-nose" ,python2-nose)))
  517. (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
  518. (synopsis "Detect translation efficiency changes from ribosome footprints")
  519. (description "RiboDiff is a statistical tool that detects the protein
  520. translational efficiency change from Ribo-Seq (ribosome footprinting) and
  521. RNA-Seq data. It uses a generalized linear model to detect genes showing
  522. difference in translational profile taking mRNA abundance into account. It
  523. facilitates us to decipher the translational regulation that behave
  524. independently with transcriptional regulation.")
  525. (license license:gpl3+)))
  526. (define-public bioawk
  527. (package
  528. (name "bioawk")
  529. (version "1.0")
  530. (source (origin
  531. (method url-fetch)
  532. (uri (string-append "https://github.com/lh3/bioawk/archive/v"
  533. version ".tar.gz"))
  534. (file-name (string-append name "-" version ".tar.gz"))
  535. (sha256
  536. (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
  537. (build-system gnu-build-system)
  538. (inputs
  539. `(("zlib" ,zlib)))
  540. (native-inputs
  541. `(("bison" ,bison)))
  542. (arguments
  543. `(#:tests? #f ; There are no tests to run.
  544. ;; Bison must generate files, before other targets can build.
  545. #:parallel-build? #f
  546. #:phases
  547. (modify-phases %standard-phases
  548. (delete 'configure) ; There is no configure phase.
  549. (replace 'install
  550. (lambda* (#:key outputs #:allow-other-keys)
  551. (let* ((out (assoc-ref outputs "out"))
  552. (bin (string-append out "/bin"))
  553. (man (string-append out "/share/man/man1")))
  554. (mkdir-p man)
  555. (copy-file "awk.1" (string-append man "/bioawk.1"))
  556. (install-file "bioawk" bin)))))))
  557. (home-page "https://github.com/lh3/bioawk")
  558. (synopsis "AWK with bioinformatics extensions")
  559. (description "Bioawk is an extension to Brian Kernighan's awk, adding the
  560. support of several common biological data formats, including optionally gzip'ed
  561. BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
  562. also adds a few built-in functions and a command line option to use TAB as the
  563. input/output delimiter. When the new functionality is not used, bioawk is
  564. intended to behave exactly the same as the original BWK awk.")
  565. (license license:x11)))
  566. (define-public python2-pybedtools
  567. (package
  568. (name "python2-pybedtools")
  569. (version "0.6.9")
  570. (source (origin
  571. (method url-fetch)
  572. (uri (string-append
  573. "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
  574. version ".tar.gz"))
  575. (sha256
  576. (base32
  577. "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
  578. (build-system python-build-system)
  579. (arguments `(#:python ,python-2)) ; no Python 3 support
  580. (inputs
  581. `(("python-matplotlib" ,python2-matplotlib)))
  582. (propagated-inputs
  583. `(("bedtools" ,bedtools)
  584. ("samtools" ,samtools)))
  585. (native-inputs
  586. `(("python-cython" ,python2-cython)
  587. ("python-pyyaml" ,python2-pyyaml)
  588. ("python-nose" ,python2-nose)))
  589. (home-page "https://pythonhosted.org/pybedtools/")
  590. (synopsis "Python wrapper for BEDtools programs")
  591. (description
  592. "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
  593. which are widely used for genomic interval manipulation or \"genome algebra\".
  594. pybedtools extends BEDTools by offering feature-level manipulations from with
  595. Python.")
  596. (license license:gpl2+)))
  597. (define-public python-biom-format
  598. (package
  599. (name "python-biom-format")
  600. (version "2.1.6")
  601. (source
  602. (origin
  603. (method url-fetch)
  604. ;; Use GitHub as source because PyPI distribution does not contain
  605. ;; test data: https://github.com/biocore/biom-format/issues/693
  606. (uri (string-append "https://github.com/biocore/biom-format/archive/"
  607. version ".tar.gz"))
  608. (file-name (string-append name "-" version ".tar.gz"))
  609. (sha256
  610. (base32
  611. "08cr7wpahk6zb31h4bs7jmzpvxcqv9s13xz40h6y2h656jvdvnpj"))))
  612. (build-system python-build-system)
  613. (propagated-inputs
  614. `(("python-numpy" ,python-numpy)
  615. ("python-scipy" ,python-scipy)
  616. ("python-future" ,python-future)
  617. ("python-click" ,python-click)
  618. ("python-h5py" ,python-h5py)
  619. ("python-pandas" ,python-pandas)))
  620. (native-inputs
  621. `(("python-nose" ,python-nose)))
  622. (home-page "http://www.biom-format.org")
  623. (synopsis "Biological Observation Matrix (BIOM) format utilities")
  624. (description
  625. "The BIOM file format is designed to be a general-use format for
  626. representing counts of observations e.g. operational taxonomic units, KEGG
  627. orthology groups or lipid types, in one or more biological samples
  628. e.g. microbiome samples, genomes, metagenomes.")
  629. (license license:bsd-3)
  630. (properties `((python2-variant . ,(delay python2-biom-format))))))
  631. (define-public python2-biom-format
  632. (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
  633. (package
  634. (inherit base)
  635. (arguments
  636. `(#:phases
  637. (modify-phases %standard-phases
  638. ;; Do not require the unmaintained pyqi library.
  639. (add-after 'unpack 'remove-pyqi
  640. (lambda _
  641. (substitute* "setup.py"
  642. (("install_requires.append\\(\"pyqi\"\\)") "pass"))
  643. #t)))
  644. ,@(package-arguments base))))))
  645. (define-public bioperl-minimal
  646. (let* ((inputs `(("perl-module-build" ,perl-module-build)
  647. ("perl-data-stag" ,perl-data-stag)
  648. ("perl-libwww" ,perl-libwww)
  649. ("perl-uri" ,perl-uri)))
  650. (transitive-inputs
  651. (map (compose package-name cadr)
  652. (delete-duplicates
  653. (concatenate
  654. (map (compose package-transitive-target-inputs cadr) inputs))))))
  655. (package
  656. (name "bioperl-minimal")
  657. (version "1.7.0")
  658. (source
  659. (origin
  660. (method url-fetch)
  661. (uri (string-append "https://github.com/bioperl/bioperl-live/"
  662. "archive/release-"
  663. (string-map (lambda (c)
  664. (if (char=? c #\.)
  665. #\- c)) version)
  666. ".tar.gz"))
  667. (sha256
  668. (base32
  669. "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
  670. (build-system perl-build-system)
  671. (arguments
  672. `(#:phases
  673. (modify-phases %standard-phases
  674. (add-after
  675. 'install 'wrap-programs
  676. (lambda* (#:key outputs #:allow-other-keys)
  677. ;; Make sure all executables in "bin" find the required Perl
  678. ;; modules at runtime. As the PERL5LIB variable contains also
  679. ;; the paths of native inputs, we pick the transitive target
  680. ;; inputs from %build-inputs.
  681. (let* ((out (assoc-ref outputs "out"))
  682. (bin (string-append out "/bin/"))
  683. (path (string-join
  684. (cons (string-append out "/lib/perl5/site_perl")
  685. (map (lambda (name)
  686. (assoc-ref %build-inputs name))
  687. ',transitive-inputs))
  688. ":")))
  689. (for-each (lambda (file)
  690. (wrap-program file
  691. `("PERL5LIB" ":" prefix (,path))))
  692. (find-files bin "\\.pl$"))
  693. #t))))))
  694. (inputs inputs)
  695. (native-inputs
  696. `(("perl-test-most" ,perl-test-most)))
  697. (home-page "http://search.cpan.org/dist/BioPerl")
  698. (synopsis "Bioinformatics toolkit")
  699. (description
  700. "BioPerl is the product of a community effort to produce Perl code which
  701. is useful in biology. Examples include Sequence objects, Alignment objects
  702. and database searching objects. These objects not only do what they are
  703. advertised to do in the documentation, but they also interact - Alignment
  704. objects are made from the Sequence objects, Sequence objects have access to
  705. Annotation and SeqFeature objects and databases, Blast objects can be
  706. converted to Alignment objects, and so on. This means that the objects
  707. provide a coordinated and extensible framework to do computational biology.")
  708. (license license:perl-license))))
  709. (define-public python-biopython
  710. (package
  711. (name "python-biopython")
  712. (version "1.68")
  713. (source (origin
  714. (method url-fetch)
  715. ;; use PyPi rather than biopython.org to ease updating
  716. (uri (pypi-uri "biopython" version))
  717. (sha256
  718. (base32
  719. "07qc7nz0k77y8hf8s18rscvibvm91zw0kkq7ylrhisf8vp8hkp6i"))))
  720. (build-system python-build-system)
  721. (arguments
  722. `(#:phases
  723. (modify-phases %standard-phases
  724. (add-before 'check 'set-home
  725. ;; Some tests require a home directory to be set.
  726. (lambda _ (setenv "HOME" "/tmp") #t)))))
  727. (propagated-inputs
  728. `(("python-numpy" ,python-numpy)))
  729. (home-page "http://biopython.org/")
  730. (synopsis "Tools for biological computation in Python")
  731. (description
  732. "Biopython is a set of tools for biological computation including parsers
  733. for bioinformatics files into Python data structures; interfaces to common
  734. bioinformatics programs; a standard sequence class and tools for performing
  735. common operations on them; code to perform data classification; code for
  736. dealing with alignments; code making it easy to split up parallelizable tasks
  737. into separate processes; and more.")
  738. (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
  739. (define-public python2-biopython
  740. (package-with-python2 python-biopython))
  741. ;; An outdated version of biopython is required for seqmagick, see
  742. ;; https://github.com/fhcrc/seqmagick/issues/59
  743. ;; When that issue has been resolved this package should be removed.
  744. (define python2-biopython-1.66
  745. (package
  746. (inherit python2-biopython)
  747. (version "1.66")
  748. (source (origin
  749. (method url-fetch)
  750. (uri (pypi-uri "biopython" version))
  751. (sha256
  752. (base32
  753. "1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))))
  754. (define-public bpp-core
  755. ;; The last release was in 2014 and the recommended way to install from source
  756. ;; is to clone the git repository, so we do this.
  757. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  758. (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
  759. (package
  760. (name "bpp-core")
  761. (version (string-append "2.2.0-1." (string-take commit 7)))
  762. (source (origin
  763. (method git-fetch)
  764. (uri (git-reference
  765. (url "http://biopp.univ-montp2.fr/git/bpp-core")
  766. (commit commit)))
  767. (file-name (string-append name "-" version "-checkout"))
  768. (sha256
  769. (base32
  770. "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
  771. (build-system cmake-build-system)
  772. (arguments
  773. `(#:parallel-build? #f))
  774. (inputs
  775. `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
  776. ; compile all of the bpp packages with GCC 5.
  777. (home-page "http://biopp.univ-montp2.fr")
  778. (synopsis "C++ libraries for Bioinformatics")
  779. (description
  780. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  781. analysis, phylogenetics, molecular evolution and population genetics. It is
  782. Object Oriented and is designed to be both easy to use and computer efficient.
  783. Bio++ intends to help programmers to write computer expensive programs, by
  784. providing them a set of re-usable tools.")
  785. (license license:cecill-c))))
  786. (define-public bpp-phyl
  787. ;; The last release was in 2014 and the recommended way to install from source
  788. ;; is to clone the git repository, so we do this.
  789. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  790. (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
  791. (package
  792. (name "bpp-phyl")
  793. (version (string-append "2.2.0-1." (string-take commit 7)))
  794. (source (origin
  795. (method git-fetch)
  796. (uri (git-reference
  797. (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
  798. (commit commit)))
  799. (file-name (string-append name "-" version "-checkout"))
  800. (sha256
  801. (base32
  802. "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
  803. (build-system cmake-build-system)
  804. (arguments
  805. `(#:parallel-build? #f
  806. ;; If out-of-source, test data is not copied into the build directory
  807. ;; so the tests fail.
  808. #:out-of-source? #f))
  809. (inputs
  810. `(("bpp-core" ,bpp-core)
  811. ("bpp-seq" ,bpp-seq)
  812. ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
  813. ;; modern GCC.
  814. ("gcc" ,gcc-5)))
  815. (home-page "http://biopp.univ-montp2.fr")
  816. (synopsis "Bio++ phylogenetic Library")
  817. (description
  818. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  819. analysis, phylogenetics, molecular evolution and population genetics. This
  820. library provides phylogenetics-related modules.")
  821. (license license:cecill-c))))
  822. (define-public bpp-popgen
  823. ;; The last release was in 2014 and the recommended way to install from source
  824. ;; is to clone the git repository, so we do this.
  825. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  826. (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
  827. (package
  828. (name "bpp-popgen")
  829. (version (string-append "2.2.0-1." (string-take commit 7)))
  830. (source (origin
  831. (method git-fetch)
  832. (uri (git-reference
  833. (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
  834. (commit commit)))
  835. (file-name (string-append name "-" version "-checkout"))
  836. (sha256
  837. (base32
  838. "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
  839. (build-system cmake-build-system)
  840. (arguments
  841. `(#:parallel-build? #f
  842. #:tests? #f)) ; There are no tests.
  843. (inputs
  844. `(("bpp-core" ,bpp-core)
  845. ("bpp-seq" ,bpp-seq)
  846. ("gcc" ,gcc-5)))
  847. (home-page "http://biopp.univ-montp2.fr")
  848. (synopsis "Bio++ population genetics library")
  849. (description
  850. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  851. analysis, phylogenetics, molecular evolution and population genetics. This
  852. library provides population genetics-related modules.")
  853. (license license:cecill-c))))
  854. (define-public bpp-seq
  855. ;; The last release was in 2014 and the recommended way to install from source
  856. ;; is to clone the git repository, so we do this.
  857. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  858. (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
  859. (package
  860. (name "bpp-seq")
  861. (version (string-append "2.2.0-1." (string-take commit 7)))
  862. (source (origin
  863. (method git-fetch)
  864. (uri (git-reference
  865. (url "http://biopp.univ-montp2.fr/git/bpp-seq")
  866. (commit commit)))
  867. (file-name (string-append name "-" version "-checkout"))
  868. (sha256
  869. (base32
  870. "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
  871. (build-system cmake-build-system)
  872. (arguments
  873. `(#:parallel-build? #f
  874. ;; If out-of-source, test data is not copied into the build directory
  875. ;; so the tests fail.
  876. #:out-of-source? #f))
  877. (inputs
  878. `(("bpp-core" ,bpp-core)
  879. ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
  880. (home-page "http://biopp.univ-montp2.fr")
  881. (synopsis "Bio++ sequence library")
  882. (description
  883. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  884. analysis, phylogenetics, molecular evolution and population genetics. This
  885. library provides sequence-related modules.")
  886. (license license:cecill-c))))
  887. (define-public bppsuite
  888. ;; The last release was in 2014 and the recommended way to install from source
  889. ;; is to clone the git repository, so we do this.
  890. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  891. (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
  892. (package
  893. (name "bppsuite")
  894. (version (string-append "2.2.0-1." (string-take commit 7)))
  895. (source (origin
  896. (method git-fetch)
  897. (uri (git-reference
  898. (url "http://biopp.univ-montp2.fr/git/bppsuite")
  899. (commit commit)))
  900. (file-name (string-append name "-" version "-checkout"))
  901. (sha256
  902. (base32
  903. "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
  904. (build-system cmake-build-system)
  905. (arguments
  906. `(#:parallel-build? #f
  907. #:tests? #f)) ; There are no tests.
  908. (native-inputs
  909. `(("groff" ,groff)
  910. ("man-db" ,man-db)
  911. ("texinfo" ,texinfo)))
  912. (inputs
  913. `(("bpp-core" ,bpp-core)
  914. ("bpp-seq" ,bpp-seq)
  915. ("bpp-phyl" ,bpp-phyl)
  916. ("bpp-phyl" ,bpp-popgen)
  917. ("gcc" ,gcc-5)))
  918. (home-page "http://biopp.univ-montp2.fr")
  919. (synopsis "Bioinformatics tools written with the Bio++ libraries")
  920. (description
  921. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  922. analysis, phylogenetics, molecular evolution and population genetics. This
  923. package provides command line tools using the Bio++ library.")
  924. (license license:cecill-c))))
  925. (define-public blast+
  926. (package
  927. (name "blast+")
  928. (version "2.6.0")
  929. (source (origin
  930. (method url-fetch)
  931. (uri (string-append
  932. "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
  933. version "/ncbi-blast-" version "+-src.tar.gz"))
  934. (sha256
  935. (base32
  936. "15n937pw5aqmyfjb6l387d18grqbb96l63d5xj4l7yyh0zbf2405"))
  937. (patches (search-patches "blast+-fix-makefile.patch"))
  938. (modules '((guix build utils)))
  939. (snippet
  940. '(begin
  941. ;; Remove bundled bzip2, zlib and pcre.
  942. (delete-file-recursively "c++/src/util/compress/bzip2")
  943. (delete-file-recursively "c++/src/util/compress/zlib")
  944. (delete-file-recursively "c++/src/util/regexp")
  945. (substitute* "c++/src/util/compress/Makefile.in"
  946. (("bzip2 zlib api") "api"))
  947. ;; Remove useless msbuild directory
  948. (delete-file-recursively
  949. "c++/src/build-system/project_tree_builder/msbuild")
  950. #t))))
  951. (build-system gnu-build-system)
  952. (arguments
  953. `(;; There are two(!) tests for this massive library, and both fail with
  954. ;; "unparsable timing stats".
  955. ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
  956. ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
  957. #:tests? #f
  958. #:out-of-source? #t
  959. #:parallel-build? #f ; not supported
  960. #:phases
  961. (modify-phases %standard-phases
  962. (add-before
  963. 'configure 'set-HOME
  964. ;; $HOME needs to be set at some point during the configure phase
  965. (lambda _ (setenv "HOME" "/tmp") #t))
  966. (add-after
  967. 'unpack 'enter-dir
  968. (lambda _ (chdir "c++") #t))
  969. (add-after
  970. 'enter-dir 'fix-build-system
  971. (lambda _
  972. (define (which* cmd)
  973. (cond ((string=? cmd "date")
  974. ;; make call to "date" deterministic
  975. "date -d @0")
  976. ((which cmd)
  977. => identity)
  978. (else
  979. (format (current-error-port)
  980. "WARNING: Unable to find absolute path for ~s~%"
  981. cmd)
  982. #f)))
  983. ;; Rewrite hardcoded paths to various tools
  984. (substitute* (append '("src/build-system/configure.ac"
  985. "src/build-system/configure"
  986. "src/build-system/helpers/run_with_lock.c"
  987. "scripts/common/impl/if_diff.sh"
  988. "scripts/common/impl/run_with_lock.sh"
  989. "src/build-system/Makefile.configurables.real"
  990. "src/build-system/Makefile.in.top"
  991. "src/build-system/Makefile.meta.gmake=no"
  992. "src/build-system/Makefile.meta.in"
  993. "src/build-system/Makefile.meta_l"
  994. "src/build-system/Makefile.meta_p"
  995. "src/build-system/Makefile.meta_r"
  996. "src/build-system/Makefile.mk.in"
  997. "src/build-system/Makefile.requirements"
  998. "src/build-system/Makefile.rules_with_autodep.in")
  999. (find-files "scripts/common/check" "\\.sh$"))
  1000. (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
  1001. (or (which* cmd) all)))
  1002. (substitute* (find-files "src/build-system" "^config.*")
  1003. (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
  1004. (("^PATH=.*") ""))
  1005. ;; rewrite "/var/tmp" in check script
  1006. (substitute* "scripts/common/check/check_make_unix.sh"
  1007. (("/var/tmp") "/tmp"))
  1008. ;; do not reset PATH
  1009. (substitute* (find-files "scripts/common/impl/" "\\.sh$")
  1010. (("^ *PATH=.*") "")
  1011. (("action=/bin/") "action=")
  1012. (("export PATH") ":"))
  1013. #t))
  1014. (replace
  1015. 'configure
  1016. (lambda* (#:key inputs outputs #:allow-other-keys)
  1017. (let ((out (assoc-ref outputs "out"))
  1018. (lib (string-append (assoc-ref outputs "lib") "/lib"))
  1019. (include (string-append (assoc-ref outputs "include")
  1020. "/include/ncbi-tools++")))
  1021. ;; The 'configure' script doesn't recognize things like
  1022. ;; '--enable-fast-install'.
  1023. (zero? (system* "./configure.orig"
  1024. (string-append "--with-build-root=" (getcwd) "/build")
  1025. (string-append "--prefix=" out)
  1026. (string-append "--libdir=" lib)
  1027. (string-append "--includedir=" include)
  1028. (string-append "--with-bz2="
  1029. (assoc-ref inputs "bzip2"))
  1030. (string-append "--with-z="
  1031. (assoc-ref inputs "zlib"))
  1032. (string-append "--with-pcre="
  1033. (assoc-ref inputs "pcre"))
  1034. ;; Each library is built twice by default, once
  1035. ;; with "-static" in its name, and again
  1036. ;; without.
  1037. "--without-static"
  1038. "--with-dll"))))))))
  1039. (outputs '("out" ; 21 MB
  1040. "lib" ; 226 MB
  1041. "include")) ; 33 MB
  1042. (inputs
  1043. `(("bzip2" ,bzip2)
  1044. ("zlib" ,zlib)
  1045. ("pcre" ,pcre)
  1046. ("perl" ,perl)
  1047. ("python" ,python-wrapper)))
  1048. (native-inputs
  1049. `(("cpio" ,cpio)))
  1050. (home-page "http://blast.ncbi.nlm.nih.gov")
  1051. (synopsis "Basic local alignment search tool")
  1052. (description
  1053. "BLAST is a popular method of performing a DNA or protein sequence
  1054. similarity search, using heuristics to produce results quickly. It also
  1055. calculates an “expect value” that estimates how many matches would have
  1056. occurred at a given score by chance, which can aid a user in judging how much
  1057. confidence to have in an alignment.")
  1058. ;; Most of the sources are in the public domain, with the following
  1059. ;; exceptions:
  1060. ;; * Expat:
  1061. ;; * ./c++/include/util/bitset/
  1062. ;; * ./c++/src/html/ncbi_menu*.js
  1063. ;; * Boost license:
  1064. ;; * ./c++/include/util/impl/floating_point_comparison.hpp
  1065. ;; * LGPL 2+:
  1066. ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
  1067. ;; * ASL 2.0:
  1068. ;; * ./c++/src/corelib/teamcity_*
  1069. (license (list license:public-domain
  1070. license:expat
  1071. license:boost1.0
  1072. license:lgpl2.0+
  1073. license:asl2.0))))
  1074. (define-public bless
  1075. (package
  1076. (name "bless")
  1077. (version "1p02")
  1078. (source (origin
  1079. (method url-fetch)
  1080. (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
  1081. version ".tgz"))
  1082. (sha256
  1083. (base32
  1084. "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
  1085. (modules '((guix build utils)))
  1086. (snippet
  1087. `(begin
  1088. ;; Remove bundled boost, pigz, zlib, and .git directory
  1089. ;; FIXME: also remove bundled sources for murmurhash3 and
  1090. ;; kmc once packaged.
  1091. (delete-file-recursively "boost")
  1092. (delete-file-recursively "pigz")
  1093. (delete-file-recursively "google-sparsehash")
  1094. (delete-file-recursively "zlib")
  1095. (delete-file-recursively ".git")
  1096. #t))))
  1097. (build-system gnu-build-system)
  1098. (arguments
  1099. '(#:tests? #f ;no "check" target
  1100. #:make-flags
  1101. (list (string-append "ZLIB="
  1102. (assoc-ref %build-inputs "zlib")
  1103. "/lib/libz.a")
  1104. (string-append "LDFLAGS="
  1105. (string-join '("-lboost_filesystem"
  1106. "-lboost_system"
  1107. "-lboost_iostreams"
  1108. "-lz"
  1109. "-fopenmp"
  1110. "-std=c++11"))))
  1111. #:phases
  1112. (modify-phases %standard-phases
  1113. (add-after 'unpack 'do-not-build-bundled-pigz
  1114. (lambda* (#:key inputs outputs #:allow-other-keys)
  1115. (substitute* "Makefile"
  1116. (("cd pigz/pigz-2.3.3; make") ""))
  1117. #t))
  1118. (add-after 'unpack 'patch-paths-to-executables
  1119. (lambda* (#:key inputs outputs #:allow-other-keys)
  1120. (substitute* "parse_args.cpp"
  1121. (("kmc_binary = .*")
  1122. (string-append "kmc_binary = \""
  1123. (assoc-ref outputs "out")
  1124. "/bin/kmc\";"))
  1125. (("pigz_binary = .*")
  1126. (string-append "pigz_binary = \""
  1127. (assoc-ref inputs "pigz")
  1128. "/bin/pigz\";")))
  1129. #t))
  1130. (replace 'install
  1131. (lambda* (#:key outputs #:allow-other-keys)
  1132. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  1133. (for-each (lambda (file)
  1134. (install-file file bin))
  1135. '("bless" "kmc/bin/kmc"))
  1136. #t)))
  1137. (delete 'configure))))
  1138. (native-inputs
  1139. `(("perl" ,perl)))
  1140. (inputs
  1141. `(("openmpi" ,openmpi)
  1142. ("boost" ,boost)
  1143. ("sparsehash" ,sparsehash)
  1144. ("pigz" ,pigz)
  1145. ("zlib" ,zlib)))
  1146. (supported-systems '("x86_64-linux"))
  1147. (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
  1148. (synopsis "Bloom-filter-based error correction tool for NGS reads")
  1149. (description
  1150. "@dfn{Bloom-filter-based error correction solution for high-throughput
  1151. sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
  1152. correction tool for genomic reads produced by @dfn{Next-generation
  1153. sequencing} (NGS). BLESS produces accurate correction results with much less
  1154. memory compared with previous solutions and is also able to tolerate a higher
  1155. false-positive rate. BLESS can extend reads like DNA assemblers to correct
  1156. errors at the end of reads.")
  1157. (license license:gpl3+)))
  1158. (define-public bowtie
  1159. (package
  1160. (name "bowtie")
  1161. (version "2.3.2")
  1162. (source (origin
  1163. (method url-fetch)
  1164. (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
  1165. version ".tar.gz"))
  1166. (file-name (string-append name "-" version ".tar.gz"))
  1167. (sha256
  1168. (base32
  1169. "0hwa5r9qbglppb7sz5z79rlmmddr3n51n468jb3wh8rwjgn3yr90"))
  1170. (modules '((guix build utils)))
  1171. (snippet
  1172. '(substitute* "Makefile"
  1173. ;; replace BUILD_HOST and BUILD_TIME for deterministic build
  1174. (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
  1175. (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
  1176. (build-system gnu-build-system)
  1177. (inputs
  1178. `(("perl" ,perl)
  1179. ("perl-clone" ,perl-clone)
  1180. ("perl-test-deep" ,perl-test-deep)
  1181. ("perl-test-simple" ,perl-test-simple)
  1182. ("python" ,python-2)
  1183. ("tbb" ,tbb)
  1184. ("zlib" ,zlib)))
  1185. (arguments
  1186. '(#:make-flags
  1187. (list "allall"
  1188. "WITH_TBB=1"
  1189. (string-append "prefix=" (assoc-ref %outputs "out")))
  1190. #:phases
  1191. (modify-phases %standard-phases
  1192. (delete 'configure)
  1193. (replace 'check
  1194. (lambda* (#:key outputs #:allow-other-keys)
  1195. (zero? (system* "perl"
  1196. "scripts/test/simple_tests.pl"
  1197. "--bowtie2=./bowtie2"
  1198. "--bowtie2-build=./bowtie2-build")))))))
  1199. (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
  1200. (synopsis "Fast and sensitive nucleotide sequence read aligner")
  1201. (description
  1202. "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
  1203. reads to long reference sequences. It is particularly good at aligning reads
  1204. of about 50 up to 100s or 1,000s of characters, and particularly good at
  1205. aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
  1206. genome with an FM Index to keep its memory footprint small: for the human
  1207. genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
  1208. gapped, local, and paired-end alignment modes.")
  1209. (supported-systems '("x86_64-linux"))
  1210. (license license:gpl3+)))
  1211. (define-public tophat
  1212. (package
  1213. (name "tophat")
  1214. (version "2.1.0")
  1215. (source (origin
  1216. (method url-fetch)
  1217. (uri (string-append
  1218. "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
  1219. version ".tar.gz"))
  1220. (sha256
  1221. (base32
  1222. "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
  1223. (patches (search-patches "tophat-build-with-later-seqan.patch"))
  1224. (modules '((guix build utils)))
  1225. (snippet
  1226. '(begin
  1227. ;; Remove bundled SeqAn and samtools
  1228. (delete-file-recursively "src/SeqAn-1.3")
  1229. (delete-file-recursively "src/samtools-0.1.18")
  1230. #t))))
  1231. (build-system gnu-build-system)
  1232. (arguments
  1233. '(#:parallel-build? #f ; not supported
  1234. #:phases
  1235. (modify-phases %standard-phases
  1236. (add-after 'unpack 'use-system-samtools
  1237. (lambda* (#:key inputs #:allow-other-keys)
  1238. (substitute* "src/Makefile.in"
  1239. (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
  1240. (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
  1241. (("SAMPROG = samtools_0\\.1\\.18") "")
  1242. (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
  1243. (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
  1244. (substitute* '("src/common.cpp"
  1245. "src/tophat.py")
  1246. (("samtools_0.1.18") (which "samtools")))
  1247. (substitute* '("src/common.h"
  1248. "src/bam2fastx.cpp")
  1249. (("#include \"bam.h\"") "#include <samtools/bam.h>")
  1250. (("#include \"sam.h\"") "#include <samtools/sam.h>"))
  1251. (substitute* '("src/bwt_map.h"
  1252. "src/map2gtf.h"
  1253. "src/align_status.h")
  1254. (("#include <bam.h>") "#include <samtools/bam.h>")
  1255. (("#include <sam.h>") "#include <samtools/sam.h>"))
  1256. #t)))))
  1257. (inputs
  1258. `(("boost" ,boost)
  1259. ("bowtie" ,bowtie)
  1260. ("samtools" ,samtools-0.1)
  1261. ("ncurses" ,ncurses)
  1262. ("python" ,python-2)
  1263. ("perl" ,perl)
  1264. ("zlib" ,zlib)
  1265. ("seqan" ,seqan)))
  1266. (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
  1267. (synopsis "Spliced read mapper for RNA-Seq data")
  1268. (description
  1269. "TopHat is a fast splice junction mapper for nucleotide sequence
  1270. reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
  1271. mammalian-sized genomes using the ultra high-throughput short read
  1272. aligner Bowtie, and then analyzes the mapping results to identify
  1273. splice junctions between exons.")
  1274. ;; TopHat is released under the Boost Software License, Version 1.0
  1275. ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
  1276. (license license:boost1.0)))
  1277. (define-public bwa
  1278. (package
  1279. (name "bwa")
  1280. (version "0.7.15")
  1281. (source (origin
  1282. (method url-fetch)
  1283. (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
  1284. version ".tar.bz2"))
  1285. (sha256
  1286. (base32
  1287. "0585ikg0gv0mpyw9iq0bq9n0hr95867bbv8jbzs9pk4slkpsymig"))))
  1288. (build-system gnu-build-system)
  1289. (arguments
  1290. '(#:tests? #f ;no "check" target
  1291. #:phases
  1292. (alist-replace
  1293. 'install
  1294. (lambda* (#:key outputs #:allow-other-keys)
  1295. (let ((bin (string-append
  1296. (assoc-ref outputs "out") "/bin"))
  1297. (doc (string-append
  1298. (assoc-ref outputs "out") "/share/doc/bwa"))
  1299. (man (string-append
  1300. (assoc-ref outputs "out") "/share/man/man1")))
  1301. (install-file "bwa" bin)
  1302. (install-file "README.md" doc)
  1303. (install-file "bwa.1" man)))
  1304. ;; no "configure" script
  1305. (alist-delete 'configure %standard-phases))))
  1306. (inputs `(("zlib" ,zlib)))
  1307. ;; Non-portable SSE instructions are used so building fails on platforms
  1308. ;; other than x86_64.
  1309. (supported-systems '("x86_64-linux"))
  1310. (home-page "http://bio-bwa.sourceforge.net/")
  1311. (synopsis "Burrows-Wheeler sequence aligner")
  1312. (description
  1313. "BWA is a software package for mapping low-divergent sequences against a
  1314. large reference genome, such as the human genome. It consists of three
  1315. algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
  1316. designed for Illumina sequence reads up to 100bp, while the rest two for
  1317. longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
  1318. features such as long-read support and split alignment, but BWA-MEM, which is
  1319. the latest, is generally recommended for high-quality queries as it is faster
  1320. and more accurate. BWA-MEM also has better performance than BWA-backtrack for
  1321. 70-100bp Illumina reads.")
  1322. (license license:gpl3+)))
  1323. (define-public bwa-pssm
  1324. (package (inherit bwa)
  1325. (name "bwa-pssm")
  1326. (version "0.5.11")
  1327. (source (origin
  1328. (method url-fetch)
  1329. (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
  1330. "archive/" version ".tar.gz"))
  1331. (file-name (string-append name "-" version ".tar.gz"))
  1332. (sha256
  1333. (base32
  1334. "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
  1335. (build-system gnu-build-system)
  1336. (inputs
  1337. `(("gdsl" ,gdsl)
  1338. ("zlib" ,zlib)
  1339. ("perl" ,perl)))
  1340. (home-page "http://bwa-pssm.binf.ku.dk/")
  1341. (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
  1342. (description
  1343. "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
  1344. the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
  1345. existing aligners it is fast and sensitive. Unlike most other aligners,
  1346. however, it is also adaptible in the sense that one can direct the alignment
  1347. based on known biases within the data set. It is coded as a modification of
  1348. the original BWA alignment program and shares the genome index structure as
  1349. well as many of the command line options.")
  1350. (license license:gpl3+)))
  1351. (define-public python2-bx-python
  1352. (package
  1353. (name "python2-bx-python")
  1354. (version "0.7.3")
  1355. (source (origin
  1356. (method url-fetch)
  1357. (uri (pypi-uri "bx-python" version))
  1358. (sha256
  1359. (base32
  1360. "15z2w3bvnc0n4qmb9bd6d8ylc2h2nj883x2w9iixf4x3vki9b22i"))
  1361. (modules '((guix build utils)))
  1362. (snippet
  1363. '(substitute* "setup.py"
  1364. ;; remove dependency on outdated "distribute" module
  1365. (("^from distribute_setup import use_setuptools") "")
  1366. (("^use_setuptools\\(\\)") "")))))
  1367. (build-system python-build-system)
  1368. (arguments
  1369. `(#:tests? #f ;tests fail because test data are not included
  1370. #:python ,python-2))
  1371. (inputs
  1372. `(("python-numpy" ,python2-numpy)
  1373. ("zlib" ,zlib)))
  1374. (native-inputs
  1375. `(("python-nose" ,python2-nose)))
  1376. (home-page "http://bitbucket.org/james_taylor/bx-python/")
  1377. (synopsis "Tools for manipulating biological data")
  1378. (description
  1379. "bx-python provides tools for manipulating biological data, particularly
  1380. multiple sequence alignments.")
  1381. (license license:expat)))
  1382. (define-public python-pysam
  1383. (package
  1384. (name "python-pysam")
  1385. (version "0.11.2.2")
  1386. (source (origin
  1387. (method url-fetch)
  1388. ;; Test data is missing on PyPi.
  1389. (uri (string-append
  1390. "https://github.com/pysam-developers/pysam/archive/v"
  1391. version ".tar.gz"))
  1392. (file-name (string-append name "-" version ".tar.gz"))
  1393. (sha256
  1394. (base32
  1395. "1cfqdxsqs3xhacns9n0271ck6wkc76px66ddjm91wfw2jxxfklvc"))
  1396. (modules '((guix build utils)))
  1397. (snippet
  1398. ;; Drop bundled htslib. TODO: Also remove samtools and bcftools.
  1399. '(delete-file-recursively "htslib"))))
  1400. (build-system python-build-system)
  1401. (arguments
  1402. `(#:modules ((ice-9 ftw)
  1403. (srfi srfi-26)
  1404. (guix build python-build-system)
  1405. (guix build utils))
  1406. #:phases
  1407. (modify-phases %standard-phases
  1408. (add-before 'build 'set-flags
  1409. (lambda* (#:key inputs #:allow-other-keys)
  1410. (setenv "HTSLIB_MODE" "external")
  1411. (setenv "HTSLIB_LIBRARY_DIR"
  1412. (string-append (assoc-ref inputs "htslib") "/lib"))
  1413. (setenv "HTSLIB_INCLUDE_DIR"
  1414. (string-append (assoc-ref inputs "htslib") "/include"))
  1415. (setenv "LDFLAGS" "-lncurses")
  1416. (setenv "CFLAGS" "-D_CURSES_LIB=1")
  1417. #t))
  1418. (replace 'check
  1419. (lambda* (#:key inputs outputs #:allow-other-keys)
  1420. ;; Add first subdirectory of "build" directory to PYTHONPATH.
  1421. (setenv "PYTHONPATH"
  1422. (string-append
  1423. (getenv "PYTHONPATH")
  1424. ":" (getcwd) "/build/"
  1425. (car (scandir "build"
  1426. (negate (cut string-prefix? "." <>))))))
  1427. ;; Step out of source dir so python does not import from CWD.
  1428. (with-directory-excursion "tests"
  1429. (setenv "HOME" "/tmp")
  1430. (and (zero? (system* "make" "-C" "pysam_data"))
  1431. (zero? (system* "make" "-C" "cbcf_data"))
  1432. ;; Running nosetests without explicitly asking for a
  1433. ;; single process leads to a crash. Running with multiple
  1434. ;; processes fails because the tests are not designed to
  1435. ;; run in parallel.
  1436. ;; FIXME: tests keep timing out on some systems.
  1437. ;; (zero? (system* "nosetests" "-v"
  1438. ;; "--processes" "1"))
  1439. )))))))
  1440. (propagated-inputs
  1441. `(("htslib" ,htslib))) ; Included from installed header files.
  1442. (inputs
  1443. `(("ncurses" ,ncurses)
  1444. ("zlib" ,zlib)))
  1445. (native-inputs
  1446. `(("python-cython" ,python-cython)
  1447. ;; Dependencies below are are for tests only.
  1448. ("samtools" ,samtools)
  1449. ("bcftools" ,bcftools)
  1450. ("python-nose" ,python-nose)))
  1451. (home-page "https://github.com/pysam-developers/pysam")
  1452. (synopsis "Python bindings to the SAMtools C API")
  1453. (description
  1454. "Pysam is a Python module for reading and manipulating files in the
  1455. SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
  1456. also includes an interface for tabix.")
  1457. (license license:expat)))
  1458. (define-public python2-pysam
  1459. (package-with-python2 python-pysam))
  1460. (define-public python-twobitreader
  1461. (package
  1462. (name "python-twobitreader")
  1463. (version "3.1.4")
  1464. (source (origin
  1465. (method url-fetch)
  1466. (uri (pypi-uri "twobitreader" version))
  1467. (sha256
  1468. (base32
  1469. "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
  1470. (build-system python-build-system)
  1471. (arguments
  1472. '(;; Tests are not distributed in the PyPi release.
  1473. ;; TODO Try building from the Git repo or asking the upstream maintainer
  1474. ;; to distribute the tests on PyPi.
  1475. #:tests? #f))
  1476. (native-inputs
  1477. `(("python-sphinx" ,python-sphinx)))
  1478. (home-page "https://github.com/benjschiller/twobitreader")
  1479. (synopsis "Python library for reading .2bit files")
  1480. (description
  1481. "twobitreader is a Python library for reading .2bit files as used by the
  1482. UCSC genome browser.")
  1483. (license license:artistic2.0)))
  1484. (define-public python2-twobitreader
  1485. (package-with-python2 python-twobitreader))
  1486. (define-public python-plastid
  1487. (package
  1488. (name "python-plastid")
  1489. (version "0.4.8")
  1490. (source (origin
  1491. (method url-fetch)
  1492. (uri (pypi-uri "plastid" version))
  1493. (sha256
  1494. (base32
  1495. "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
  1496. (build-system python-build-system)
  1497. (arguments
  1498. ;; Some test files are not included.
  1499. `(#:tests? #f))
  1500. (propagated-inputs
  1501. `(("python-numpy" ,python-numpy)
  1502. ("python-scipy" ,python-scipy)
  1503. ("python-pandas" ,python-pandas)
  1504. ("python-pysam" ,python-pysam)
  1505. ("python-matplotlib" ,python-matplotlib)
  1506. ("python-biopython" ,python-biopython)
  1507. ("python-twobitreader" ,python-twobitreader)
  1508. ("python-termcolor" ,python-termcolor)))
  1509. (native-inputs
  1510. `(("python-cython" ,python-cython)
  1511. ("python-nose" ,python-nose)))
  1512. (home-page "https://github.com/joshuagryphon/plastid")
  1513. (synopsis "Python library for genomic analysis")
  1514. (description
  1515. "plastid is a Python library for genomic analysis – in particular,
  1516. high-throughput sequencing data – with an emphasis on simplicity.")
  1517. (license license:bsd-3)))
  1518. (define-public python2-plastid
  1519. (package-with-python2 python-plastid))
  1520. (define-public cd-hit
  1521. (package
  1522. (name "cd-hit")
  1523. (version "4.6.8")
  1524. (source (origin
  1525. (method url-fetch)
  1526. (uri (string-append "https://github.com/weizhongli/cdhit"
  1527. "/releases/download/V" version
  1528. "/cd-hit-v" version
  1529. "-2017-0621-source.tar.gz"))
  1530. (sha256
  1531. (base32
  1532. "1386dg2npx8p62wmv08mjzsd2z3waknb9j1gg3gkvblcy57hymnn"))))
  1533. (build-system gnu-build-system)
  1534. (arguments
  1535. `(#:tests? #f ; there are no tests
  1536. #:make-flags
  1537. ;; Executables are copied directly to the PREFIX.
  1538. (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
  1539. #:phases
  1540. (modify-phases %standard-phases
  1541. ;; No "configure" script
  1542. (delete 'configure)
  1543. ;; Remove sources of non-determinism
  1544. (add-after 'unpack 'be-timeless
  1545. (lambda _
  1546. (substitute* "cdhit-utility.c++"
  1547. ((" \\(built on \" __DATE__ \"\\)") ""))
  1548. (substitute* "cdhit-common.c++"
  1549. (("__DATE__") "\"0\"")
  1550. (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
  1551. #t))
  1552. ;; The "install" target does not create the target directory.
  1553. (add-before 'install 'create-target-dir
  1554. (lambda* (#:key outputs #:allow-other-keys)
  1555. (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
  1556. #t)))))
  1557. (inputs
  1558. `(("perl" ,perl)))
  1559. (home-page "http://weizhongli-lab.org/cd-hit/")
  1560. (synopsis "Cluster and compare protein or nucleotide sequences")
  1561. (description
  1562. "CD-HIT is a program for clustering and comparing protein or nucleotide
  1563. sequences. CD-HIT is designed to be fast and handle extremely large
  1564. databases.")
  1565. ;; The manual says: "It can be copied under the GNU General Public License
  1566. ;; version 2 (GPLv2)."
  1567. (license license:gpl2)))
  1568. (define-public clipper
  1569. (package
  1570. (name "clipper")
  1571. (version "1.1")
  1572. (source (origin
  1573. (method url-fetch)
  1574. (uri (string-append
  1575. "https://github.com/YeoLab/clipper/archive/"
  1576. version ".tar.gz"))
  1577. (file-name (string-append name "-" version ".tar.gz"))
  1578. (sha256
  1579. (base32
  1580. "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
  1581. (modules '((guix build utils)))
  1582. (snippet
  1583. '(begin
  1584. ;; remove unnecessary setup dependency
  1585. (substitute* "setup.py"
  1586. (("setup_requires = .*") ""))
  1587. (for-each delete-file
  1588. '("clipper/src/peaks.so"
  1589. "clipper/src/readsToWiggle.so"))
  1590. (delete-file-recursively "dist/")
  1591. #t))))
  1592. (build-system python-build-system)
  1593. (arguments `(#:python ,python-2)) ; only Python 2 is supported
  1594. (inputs
  1595. `(("htseq" ,python2-htseq)
  1596. ("python-pybedtools" ,python2-pybedtools)
  1597. ("python-cython" ,python2-cython)
  1598. ("python-scikit-learn" ,python2-scikit-learn)
  1599. ("python-matplotlib" ,python2-matplotlib)
  1600. ("python-pandas" ,python2-pandas)
  1601. ("python-pysam" ,python2-pysam)
  1602. ("python-numpy" ,python2-numpy)
  1603. ("python-scipy" ,python2-scipy)))
  1604. (native-inputs
  1605. `(("python-mock" ,python2-mock) ; for tests
  1606. ("python-nose" ,python2-nose) ; for tests
  1607. ("python-pytz" ,python2-pytz))) ; for tests
  1608. (home-page "https://github.com/YeoLab/clipper")
  1609. (synopsis "CLIP peak enrichment recognition")
  1610. (description
  1611. "CLIPper is a tool to define peaks in CLIP-seq datasets.")
  1612. (license license:gpl2)))
  1613. (define-public codingquarry
  1614. (package
  1615. (name "codingquarry")
  1616. (version "2.0")
  1617. (source (origin
  1618. (method url-fetch)
  1619. (uri (string-append
  1620. "mirror://sourceforge/codingquarry/CodingQuarry_v"
  1621. version ".tar.gz"))
  1622. (sha256
  1623. (base32
  1624. "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
  1625. (build-system gnu-build-system)
  1626. (arguments
  1627. '(#:tests? #f ; no "check" target
  1628. #:phases
  1629. (modify-phases %standard-phases
  1630. (delete 'configure)
  1631. (replace 'install
  1632. (lambda* (#:key outputs #:allow-other-keys)
  1633. (let* ((out (assoc-ref outputs "out"))
  1634. (bin (string-append out "/bin"))
  1635. (doc (string-append out "/share/doc/codingquarry")))
  1636. (install-file "INSTRUCTIONS.pdf" doc)
  1637. (copy-recursively "QuarryFiles"
  1638. (string-append out "/QuarryFiles"))
  1639. (install-file "CodingQuarry" bin)
  1640. (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
  1641. (inputs `(("openmpi" ,openmpi)))
  1642. (native-search-paths
  1643. (list (search-path-specification
  1644. (variable "QUARRY_PATH")
  1645. (files '("QuarryFiles")))))
  1646. (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
  1647. (synopsis "Fungal gene predictor")
  1648. (description "CodingQuarry is a highly accurate, self-training GHMM fungal
  1649. gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
  1650. (home-page "https://sourceforge.net/projects/codingquarry/")
  1651. (license license:gpl3+)))
  1652. (define-public couger
  1653. (package
  1654. (name "couger")
  1655. (version "1.8.2")
  1656. (source (origin
  1657. (method url-fetch)
  1658. (uri (string-append
  1659. "http://couger.oit.duke.edu/static/assets/COUGER"
  1660. version ".zip"))
  1661. (sha256
  1662. (base32
  1663. "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
  1664. (build-system gnu-build-system)
  1665. (arguments
  1666. `(#:tests? #f
  1667. #:phases
  1668. (modify-phases %standard-phases
  1669. (delete 'configure)
  1670. (delete 'build)
  1671. (replace
  1672. 'install
  1673. (lambda* (#:key outputs #:allow-other-keys)
  1674. (let* ((out (assoc-ref outputs "out"))
  1675. (bin (string-append out "/bin")))
  1676. (copy-recursively "src" (string-append out "/src"))
  1677. (mkdir bin)
  1678. ;; Add "src" directory to module lookup path.
  1679. (substitute* "couger"
  1680. (("from argparse")
  1681. (string-append "import sys\nsys.path.append(\""
  1682. out "\")\nfrom argparse")))
  1683. (install-file "couger" bin))
  1684. #t))
  1685. (add-after
  1686. 'install 'wrap-program
  1687. (lambda* (#:key inputs outputs #:allow-other-keys)
  1688. ;; Make sure 'couger' runs with the correct PYTHONPATH.
  1689. (let* ((out (assoc-ref outputs "out"))
  1690. (path (getenv "PYTHONPATH")))
  1691. (wrap-program (string-append out "/bin/couger")
  1692. `("PYTHONPATH" ":" prefix (,path))))
  1693. #t)))))
  1694. (inputs
  1695. `(("python" ,python-2)
  1696. ("python2-pillow" ,python2-pillow)
  1697. ("python2-numpy" ,python2-numpy)
  1698. ("python2-scipy" ,python2-scipy)
  1699. ("python2-matplotlib" ,python2-matplotlib)))
  1700. (propagated-inputs
  1701. `(("r-minimal" ,r-minimal)
  1702. ("libsvm" ,libsvm)
  1703. ("randomjungle" ,randomjungle)))
  1704. (native-inputs
  1705. `(("unzip" ,unzip)))
  1706. (home-page "http://couger.oit.duke.edu")
  1707. (synopsis "Identify co-factors in sets of genomic regions")
  1708. (description
  1709. "COUGER can be applied to any two sets of genomic regions bound by
  1710. paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
  1711. putative co-factors that provide specificity to each TF. The framework
  1712. determines the genomic targets uniquely-bound by each TF, and identifies a
  1713. small set of co-factors that best explain the in vivo binding differences
  1714. between the two TFs.
  1715. COUGER uses classification algorithms (support vector machines and random
  1716. forests) with features that reflect the DNA binding specificities of putative
  1717. co-factors. The features are generated either from high-throughput TF-DNA
  1718. binding data (from protein binding microarray experiments), or from large
  1719. collections of DNA motifs.")
  1720. (license license:gpl3+)))
  1721. (define-public clustal-omega
  1722. (package
  1723. (name "clustal-omega")
  1724. (version "1.2.1")
  1725. (source (origin
  1726. (method url-fetch)
  1727. (uri (string-append
  1728. "http://www.clustal.org/omega/clustal-omega-"
  1729. version ".tar.gz"))
  1730. (sha256
  1731. (base32
  1732. "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
  1733. (build-system gnu-build-system)
  1734. (inputs
  1735. `(("argtable" ,argtable)))
  1736. (home-page "http://www.clustal.org/omega/")
  1737. (synopsis "Multiple sequence aligner for protein and DNA/RNA")
  1738. (description
  1739. "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
  1740. program for protein and DNA/RNA. It produces high quality MSAs and is capable
  1741. of handling data-sets of hundreds of thousands of sequences in reasonable
  1742. time.")
  1743. (license license:gpl2+)))
  1744. (define-public crossmap
  1745. (package
  1746. (name "crossmap")
  1747. (version "0.2.1")
  1748. (source (origin
  1749. (method url-fetch)
  1750. (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
  1751. version ".tar.gz"))
  1752. (sha256
  1753. (base32
  1754. "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
  1755. ;; This patch has been sent upstream already and is available
  1756. ;; for download from Sourceforge, but it has not been merged.
  1757. (patches (search-patches "crossmap-allow-system-pysam.patch"))
  1758. (modules '((guix build utils)))
  1759. ;; remove bundled copy of pysam
  1760. (snippet
  1761. '(delete-file-recursively "lib/pysam"))))
  1762. (build-system python-build-system)
  1763. (arguments
  1764. `(#:python ,python-2
  1765. #:phases
  1766. (alist-cons-after
  1767. 'unpack 'set-env
  1768. (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
  1769. %standard-phases)))
  1770. (inputs
  1771. `(("python-numpy" ,python2-numpy)
  1772. ("python-pysam" ,python2-pysam)
  1773. ("zlib" ,zlib)))
  1774. (native-inputs
  1775. `(("python-cython" ,python2-cython)
  1776. ("python-nose" ,python2-nose)))
  1777. (home-page "http://crossmap.sourceforge.net/")
  1778. (synopsis "Convert genome coordinates between assemblies")
  1779. (description
  1780. "CrossMap is a program for conversion of genome coordinates or annotation
  1781. files between different genome assemblies. It supports most commonly used
  1782. file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
  1783. (license license:gpl2+)))
  1784. (define-public cufflinks
  1785. (package
  1786. (name "cufflinks")
  1787. (version "2.2.1")
  1788. (source (origin
  1789. (method url-fetch)
  1790. (uri (string-append "http://cole-trapnell-lab.github.io/"
  1791. "cufflinks/assets/downloads/cufflinks-"
  1792. version ".tar.gz"))
  1793. (sha256
  1794. (base32
  1795. "1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
  1796. (build-system gnu-build-system)
  1797. (arguments
  1798. `(#:make-flags
  1799. (list
  1800. ;; The includes for "eigen" are located in a subdirectory.
  1801. (string-append "EIGEN_CPPFLAGS="
  1802. "-I" (assoc-ref %build-inputs "eigen")
  1803. "/include/eigen3/")
  1804. ;; Cufflinks must be linked with various boost libraries.
  1805. (string-append "LDFLAGS="
  1806. (string-join '("-lboost_system"
  1807. "-lboost_serialization"
  1808. "-lboost_thread"))))
  1809. #:phases
  1810. (modify-phases %standard-phases
  1811. (add-after 'unpack 'fix-search-for-bam
  1812. (lambda _
  1813. (substitute* '("ax_bam.m4"
  1814. "configure"
  1815. "src/hits.h")
  1816. (("<bam/sam\\.h>") "<samtools/sam.h>")
  1817. (("<bam/bam\\.h>") "<samtools/bam.h>")
  1818. (("<bam/version\\.hpp>") "<samtools/version.h>"))
  1819. #t)))
  1820. #:configure-flags
  1821. (list (string-append "--with-bam="
  1822. (assoc-ref %build-inputs "samtools")))))
  1823. (inputs
  1824. `(("eigen" ,eigen)
  1825. ("samtools" ,samtools-0.1)
  1826. ("htslib" ,htslib)
  1827. ("boost" ,boost)
  1828. ("python" ,python-2)
  1829. ("zlib" ,zlib)))
  1830. (home-page "http://cole-trapnell-lab.github.io/cufflinks/")
  1831. (synopsis "Transcriptome assembly and RNA-Seq expression analysis")
  1832. (description
  1833. "Cufflinks assembles RNA transcripts, estimates their abundances,
  1834. and tests for differential expression and regulation in RNA-Seq
  1835. samples. It accepts aligned RNA-Seq reads and assembles the
  1836. alignments into a parsimonious set of transcripts. Cufflinks then
  1837. estimates the relative abundances of these transcripts based on how
  1838. many reads support each one, taking into account biases in library
  1839. preparation protocols.")
  1840. (license license:boost1.0)))
  1841. (define-public cutadapt
  1842. (package
  1843. (name "cutadapt")
  1844. (version "1.12")
  1845. (source (origin
  1846. (method url-fetch)
  1847. (uri (string-append
  1848. "https://github.com/marcelm/cutadapt/archive/v"
  1849. version ".tar.gz"))
  1850. (file-name (string-append name "-" version ".tar.gz"))
  1851. (sha256
  1852. (base32
  1853. "19smhh6444ikn4jlmyhvffw4m5aw7yg07rqsk7arg8dkwyga1i4v"))))
  1854. (build-system python-build-system)
  1855. (arguments
  1856. `(#:phases
  1857. (modify-phases %standard-phases
  1858. ;; The tests must be run after installation.
  1859. (delete 'check)
  1860. (add-after 'install 'check
  1861. (lambda* (#:key inputs outputs #:allow-other-keys)
  1862. (setenv "PYTHONPATH"
  1863. (string-append
  1864. (getenv "PYTHONPATH")
  1865. ":" (assoc-ref outputs "out")
  1866. "/lib/python"
  1867. (string-take (string-take-right
  1868. (assoc-ref inputs "python") 5) 3)
  1869. "/site-packages"))
  1870. (zero? (system* "nosetests" "-P" "tests")))))))
  1871. (inputs
  1872. `(("python-xopen" ,python-xopen)))
  1873. (native-inputs
  1874. `(("python-cython" ,python-cython)
  1875. ("python-nose" ,python-nose)))
  1876. (home-page "https://cutadapt.readthedocs.io/en/stable/")
  1877. (synopsis "Remove adapter sequences from nucleotide sequencing reads")
  1878. (description
  1879. "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
  1880. other types of unwanted sequence from high-throughput sequencing reads.")
  1881. (license license:expat)))
  1882. (define-public libbigwig
  1883. (package
  1884. (name "libbigwig")
  1885. (version "0.1.4")
  1886. (source (origin
  1887. (method url-fetch)
  1888. (uri (string-append "https://github.com/dpryan79/libBigWig/"
  1889. "archive/" version ".tar.gz"))
  1890. (file-name (string-append name "-" version ".tar.gz"))
  1891. (sha256
  1892. (base32
  1893. "098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
  1894. (build-system gnu-build-system)
  1895. (arguments
  1896. `(#:test-target "test"
  1897. #:make-flags
  1898. (list "CC=gcc"
  1899. (string-append "prefix=" (assoc-ref %outputs "out")))
  1900. #:phases
  1901. (modify-phases %standard-phases
  1902. (delete 'configure)
  1903. (add-before 'check 'disable-curl-test
  1904. (lambda _
  1905. (substitute* "Makefile"
  1906. (("./test/testRemote.*") ""))
  1907. #t))
  1908. ;; This has been fixed with the upstream commit 4ff6959cd8a0, but
  1909. ;; there has not yet been a release containing this change.
  1910. (add-before 'install 'create-target-dirs
  1911. (lambda* (#:key outputs #:allow-other-keys)
  1912. (let ((out (assoc-ref outputs "out")))
  1913. (mkdir-p (string-append out "/lib"))
  1914. (mkdir-p (string-append out "/include"))
  1915. #t))))))
  1916. (inputs
  1917. `(("zlib" ,zlib)
  1918. ("curl" ,curl)))
  1919. (native-inputs
  1920. `(("doxygen" ,doxygen)))
  1921. (home-page "https://github.com/dpryan79/libBigWig")
  1922. (synopsis "C library for handling bigWig files")
  1923. (description
  1924. "This package provides a C library for parsing local and remote BigWig
  1925. files.")
  1926. (license license:expat)))
  1927. (define-public python-pybigwig
  1928. (package
  1929. (name "python-pybigwig")
  1930. (version "0.2.5")
  1931. (source (origin
  1932. (method url-fetch)
  1933. (uri (pypi-uri "pyBigWig" version))
  1934. (sha256
  1935. (base32
  1936. "0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
  1937. (modules '((guix build utils)))
  1938. (snippet
  1939. '(begin
  1940. ;; Delete bundled libBigWig sources
  1941. (delete-file-recursively "libBigWig")))))
  1942. (build-system python-build-system)
  1943. (arguments
  1944. `(#:phases
  1945. (modify-phases %standard-phases
  1946. (add-after 'unpack 'link-with-libBigWig
  1947. (lambda* (#:key inputs #:allow-other-keys)
  1948. (substitute* "setup.py"
  1949. (("libs=\\[") "libs=[\"BigWig\", "))
  1950. #t)))))
  1951. (inputs
  1952. `(("libbigwig" ,libbigwig)
  1953. ("zlib" ,zlib)
  1954. ("curl" ,curl)))
  1955. (home-page "https://github.com/dpryan79/pyBigWig")
  1956. (synopsis "Access bigWig files in Python using libBigWig")
  1957. (description
  1958. "This package provides Python bindings to the libBigWig library for
  1959. accessing bigWig files.")
  1960. (license license:expat)))
  1961. (define-public python2-pybigwig
  1962. (package-with-python2 python-pybigwig))
  1963. (define-public python-dendropy
  1964. (package
  1965. (name "python-dendropy")
  1966. (version "4.2.0")
  1967. (source
  1968. (origin
  1969. (method url-fetch)
  1970. (uri (pypi-uri "DendroPy" version))
  1971. (sha256
  1972. (base32
  1973. "15c7s3d5gf19ljsxvq5advaa752wfi7pwrdjyhzmg85hccyvp47p"))
  1974. (patches (search-patches "python-dendropy-fix-tests.patch"))))
  1975. (build-system python-build-system)
  1976. (home-page "http://packages.python.org/DendroPy/")
  1977. (synopsis "Library for phylogenetics and phylogenetic computing")
  1978. (description
  1979. "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
  1980. writing, simulation, processing and manipulation of phylogenetic
  1981. trees (phylogenies) and characters.")
  1982. (license license:bsd-3)
  1983. (properties `((python2-variant . ,(delay python2-dendropy))))))
  1984. (define-public python2-dendropy
  1985. (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
  1986. (package
  1987. (inherit base)
  1988. (arguments
  1989. `(#:python ,python-2
  1990. #:phases
  1991. (modify-phases %standard-phases
  1992. (replace 'check
  1993. ;; There is currently a test failure that only happens on some
  1994. ;; systems, and only using "setup.py test"
  1995. (lambda _ (zero? (system* "nosetests")))))))
  1996. (native-inputs `(("python2-nose" ,python2-nose)
  1997. ,@(package-native-inputs base))))))
  1998. (define-public python-py2bit
  1999. (package
  2000. (name "python-py2bit")
  2001. (version "0.2.1")
  2002. (source
  2003. (origin
  2004. (method url-fetch)
  2005. (uri (pypi-uri "py2bit" version))
  2006. (sha256
  2007. (base32
  2008. "1cdf4qlmgwsh1f4k0wdv2sr8x9qn4366p0k3614vbd0fpqiarxrl"))))
  2009. (build-system python-build-system)
  2010. (home-page "https://github.com/dpryan79/py2bit")
  2011. (synopsis "Access 2bit files using lib2bit")
  2012. (description
  2013. "This package provides Python bindings for lib2bit to access 2bit files
  2014. with Python.")
  2015. (license license:expat)))
  2016. (define-public deeptools
  2017. (package
  2018. (name "deeptools")
  2019. (version "2.5.1")
  2020. (source (origin
  2021. (method url-fetch)
  2022. (uri (string-append "https://github.com/fidelram/deepTools/"
  2023. "archive/" version ".tar.gz"))
  2024. (file-name (string-append name "-" version ".tar.gz"))
  2025. (sha256
  2026. (base32
  2027. "1q8i12l2gvk4n2s8lhyzwhh9g4qbc8lrk5l7maz00yvd5g6z5540"))))
  2028. (build-system python-build-system)
  2029. (inputs
  2030. `(("python-scipy" ,python-scipy)
  2031. ("python-numpy" ,python-numpy)
  2032. ("python-numpydoc" ,python-numpydoc)
  2033. ("python-matplotlib" ,python-matplotlib)
  2034. ("python-pysam" ,python-pysam)
  2035. ("python-py2bit" ,python-py2bit)
  2036. ("python-pybigwig" ,python-pybigwig)))
  2037. (native-inputs
  2038. `(("python-mock" ,python-mock) ;for tests
  2039. ("python-nose" ,python-nose) ;for tests
  2040. ("python-pytz" ,python-pytz))) ;for tests
  2041. (home-page "https://github.com/fidelram/deepTools")
  2042. (synopsis "Tools for normalizing and visualizing deep-sequencing data")
  2043. (description
  2044. "DeepTools addresses the challenge of handling the large amounts of data
  2045. that are now routinely generated from DNA sequencing centers. To do so,
  2046. deepTools contains useful modules to process the mapped reads data to create
  2047. coverage files in standard bedGraph and bigWig file formats. By doing so,
  2048. deepTools allows the creation of normalized coverage files or the comparison
  2049. between two files (for example, treatment and control). Finally, using such
  2050. normalized and standardized files, multiple visualizations can be created to
  2051. identify enrichments with functional annotations of the genome.")
  2052. (license license:gpl3+)))
  2053. (define-public diamond
  2054. (package
  2055. (name "diamond")
  2056. (version "0.9.10")
  2057. (source (origin
  2058. (method url-fetch)
  2059. (uri (string-append
  2060. "https://github.com/bbuchfink/diamond/archive/v"
  2061. version ".tar.gz"))
  2062. (file-name (string-append name "-" version ".tar.gz"))
  2063. (sha256
  2064. (base32
  2065. "13qqzwg54n5dqh8pm5n3v8x6gqbczzakphwwjix63qv60hcd5bqd"))))
  2066. (build-system cmake-build-system)
  2067. (arguments
  2068. '(#:tests? #f ; no "check" target
  2069. #:phases
  2070. (modify-phases %standard-phases
  2071. (add-after 'unpack 'remove-native-compilation
  2072. (lambda _
  2073. (substitute* "CMakeLists.txt" (("-march=native") ""))
  2074. #t)))))
  2075. (inputs
  2076. `(("zlib" ,zlib)))
  2077. (home-page "https://github.com/bbuchfink/diamond")
  2078. (synopsis "Accelerated BLAST compatible local sequence aligner")
  2079. (description
  2080. "DIAMOND is a BLAST-compatible local aligner for mapping protein and
  2081. translated DNA query sequences against a protein reference database (BLASTP
  2082. and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
  2083. reads at a typical sensitivity of 90-99% relative to BLAST depending on the
  2084. data and settings.")
  2085. (license license:agpl3+)))
  2086. (define-public discrover
  2087. (package
  2088. (name "discrover")
  2089. (version "1.6.0")
  2090. (source
  2091. (origin
  2092. (method url-fetch)
  2093. (uri (string-append "https://github.com/maaskola/discrover/archive/"
  2094. version ".tar.gz"))
  2095. (file-name (string-append name "-" version ".tar.gz"))
  2096. (sha256
  2097. (base32
  2098. "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
  2099. (build-system cmake-build-system)
  2100. (arguments
  2101. `(#:tests? #f ; there are no tests
  2102. #:phases
  2103. (modify-phases %standard-phases
  2104. (add-after 'unpack 'add-missing-includes
  2105. (lambda _
  2106. (substitute* "src/executioninformation.hpp"
  2107. (("#define EXECUTIONINFORMATION_HPP" line)
  2108. (string-append line "\n#include <random>")))
  2109. (substitute* "src/plasma/fasta.hpp"
  2110. (("#define FASTA_HPP" line)
  2111. (string-append line "\n#include <random>")))
  2112. #t)))))
  2113. (inputs
  2114. `(("boost" ,boost)
  2115. ("cairo" ,cairo)))
  2116. (native-inputs
  2117. `(("texlive" ,texlive)
  2118. ("imagemagick" ,imagemagick)))
  2119. (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
  2120. (synopsis "Discover discriminative nucleotide sequence motifs")
  2121. (description "Discrover is a motif discovery method to find binding sites
  2122. of nucleic acid binding proteins.")
  2123. (license license:gpl3+)))
  2124. (define-public eigensoft
  2125. (let ((revision "1")
  2126. (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
  2127. (package
  2128. (name "eigensoft")
  2129. (version (string-append "6.1.2-"
  2130. revision "."
  2131. (string-take commit 9)))
  2132. (source
  2133. (origin
  2134. (method git-fetch)
  2135. (uri (git-reference
  2136. (url "https://github.com/DReichLab/EIG.git")
  2137. (commit commit)))
  2138. (file-name (string-append "eigensoft-" commit "-checkout"))
  2139. (sha256
  2140. (base32
  2141. "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
  2142. (modules '((guix build utils)))
  2143. ;; Remove pre-built binaries.
  2144. (snippet '(begin
  2145. (delete-file-recursively "bin")
  2146. (mkdir "bin")
  2147. #t))))
  2148. (build-system gnu-build-system)
  2149. (arguments
  2150. `(#:tests? #f ; There are no tests.
  2151. #:make-flags '("CC=gcc")
  2152. #:phases
  2153. (modify-phases %standard-phases
  2154. ;; There is no configure phase, but the Makefile is in a
  2155. ;; sub-directory.
  2156. (replace 'configure
  2157. (lambda _
  2158. (chdir "src")
  2159. ;; The link flags are incomplete.
  2160. (substitute* "Makefile"
  2161. (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
  2162. #t))
  2163. ;; The provided install target only copies executables to
  2164. ;; the "bin" directory in the build root.
  2165. (add-after 'install 'actually-install
  2166. (lambda* (#:key outputs #:allow-other-keys)
  2167. (let* ((out (assoc-ref outputs "out"))
  2168. (bin (string-append out "/bin")))
  2169. (for-each (lambda (file)
  2170. (install-file file bin))
  2171. (find-files "../bin" ".*"))
  2172. #t))))))
  2173. (inputs
  2174. `(("gsl" ,gsl)
  2175. ("lapack" ,lapack)
  2176. ("openblas" ,openblas)
  2177. ("perl" ,perl)
  2178. ("gfortran" ,gfortran "lib")))
  2179. (home-page "https://github.com/DReichLab/EIG")
  2180. (synopsis "Tools for population genetics")
  2181. (description "The EIGENSOFT package provides tools for population
  2182. genetics and stratification correction. EIGENSOFT implements methods commonly
  2183. used in population genetics analyses such as PCA, computation of Tracy-Widom
  2184. statistics, and finding related individuals in structured populations. It
  2185. comes with a built-in plotting script and supports multiple file formats and
  2186. quantitative phenotypes.")
  2187. ;; The license of the eigensoft tools is Expat, but since it's
  2188. ;; linking with the GNU Scientific Library (GSL) the effective
  2189. ;; license is the GPL.
  2190. (license license:gpl3+))))
  2191. (define-public edirect
  2192. (package
  2193. (name "edirect")
  2194. (version "4.10")
  2195. (source (origin
  2196. (method url-fetch)
  2197. (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
  2198. "versions/2016-05-03/edirect.tar.gz"))
  2199. (sha256
  2200. (base32
  2201. "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
  2202. (build-system perl-build-system)
  2203. (arguments
  2204. `(#:tests? #f ;no "check" target
  2205. #:phases
  2206. (modify-phases %standard-phases
  2207. (delete 'configure)
  2208. (delete 'build)
  2209. (replace 'install
  2210. (lambda* (#:key outputs #:allow-other-keys)
  2211. (let ((target (string-append (assoc-ref outputs "out")
  2212. "/bin")))
  2213. (mkdir-p target)
  2214. (install-file "edirect.pl" target)
  2215. #t)))
  2216. (add-after
  2217. 'install 'wrap-program
  2218. (lambda* (#:key inputs outputs #:allow-other-keys)
  2219. ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
  2220. (let* ((out (assoc-ref outputs "out"))
  2221. (path (getenv "PERL5LIB")))
  2222. (wrap-program (string-append out "/bin/edirect.pl")
  2223. `("PERL5LIB" ":" prefix (,path)))))))))
  2224. (inputs
  2225. `(("perl-html-parser" ,perl-html-parser)
  2226. ("perl-encode-locale" ,perl-encode-locale)
  2227. ("perl-file-listing" ,perl-file-listing)
  2228. ("perl-html-tagset" ,perl-html-tagset)
  2229. ("perl-html-tree" ,perl-html-tree)
  2230. ("perl-http-cookies" ,perl-http-cookies)
  2231. ("perl-http-date" ,perl-http-date)
  2232. ("perl-http-message" ,perl-http-message)
  2233. ("perl-http-negotiate" ,perl-http-negotiate)
  2234. ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
  2235. ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
  2236. ("perl-net-http" ,perl-net-http)
  2237. ("perl-uri" ,perl-uri)
  2238. ("perl-www-robotrules" ,perl-www-robotrules)
  2239. ("perl" ,perl)))
  2240. (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
  2241. (synopsis "Tools for accessing the NCBI's set of databases")
  2242. (description
  2243. "Entrez Direct (EDirect) is a method for accessing the National Center
  2244. for Biotechnology Information's (NCBI) set of interconnected
  2245. databases (publication, sequence, structure, gene, variation, expression,
  2246. etc.) from a terminal. Functions take search terms from command-line
  2247. arguments. Individual operations are combined to build multi-step queries.
  2248. Record retrieval and formatting normally complete the process.
  2249. EDirect also provides an argument-driven function that simplifies the
  2250. extraction of data from document summaries or other results that are returned
  2251. in structured XML format. This can eliminate the need for writing custom
  2252. software to answer ad hoc questions.")
  2253. (license license:public-domain)))
  2254. (define-public exonerate
  2255. (package
  2256. (name "exonerate")
  2257. (version "2.4.0")
  2258. (source
  2259. (origin
  2260. (method url-fetch)
  2261. (uri
  2262. (string-append
  2263. "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
  2264. "exonerate-" version ".tar.gz"))
  2265. (sha256
  2266. (base32
  2267. "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
  2268. (build-system gnu-build-system)
  2269. (arguments
  2270. `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
  2271. (native-inputs
  2272. `(("pkg-config" ,pkg-config)))
  2273. (inputs
  2274. `(("glib" ,glib)))
  2275. (home-page
  2276. "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
  2277. (synopsis "Generic tool for biological sequence alignment")
  2278. (description
  2279. "Exonerate is a generic tool for pairwise sequence comparison. It allows
  2280. the alignment of sequences using a many alignment models, either exhaustive
  2281. dynamic programming or a variety of heuristics.")
  2282. (license license:gpl3)))
  2283. (define-public express
  2284. (package
  2285. (name "express")
  2286. (version "1.5.1")
  2287. (source (origin
  2288. (method url-fetch)
  2289. (uri
  2290. (string-append
  2291. "http://bio.math.berkeley.edu/eXpress/downloads/express-"
  2292. version "/express-" version "-src.tgz"))
  2293. (sha256
  2294. (base32
  2295. "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
  2296. (build-system cmake-build-system)
  2297. (arguments
  2298. `(#:tests? #f ;no "check" target
  2299. #:phases
  2300. (alist-cons-after
  2301. 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
  2302. (lambda* (#:key inputs #:allow-other-keys)
  2303. (substitute* "CMakeLists.txt"
  2304. (("set\\(Boost_USE_STATIC_LIBS ON\\)")
  2305. "set(Boost_USE_STATIC_LIBS OFF)")
  2306. (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
  2307. (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
  2308. (substitute* "src/CMakeLists.txt"
  2309. (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
  2310. (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
  2311. #t)
  2312. %standard-phases)))
  2313. (inputs
  2314. `(("boost" ,boost)
  2315. ("bamtools" ,bamtools)
  2316. ("protobuf" ,protobuf)
  2317. ("zlib" ,zlib)))
  2318. (home-page "http://bio.math.berkeley.edu/eXpress")
  2319. (synopsis "Streaming quantification for high-throughput genomic sequencing")
  2320. (description
  2321. "eXpress is a streaming tool for quantifying the abundances of a set of
  2322. target sequences from sampled subsequences. Example applications include
  2323. transcript-level RNA-Seq quantification, allele-specific/haplotype expression
  2324. analysis (from RNA-Seq), transcription factor binding quantification in
  2325. ChIP-Seq, and analysis of metagenomic data.")
  2326. (license license:artistic2.0)))
  2327. (define-public express-beta-diversity
  2328. (package
  2329. (name "express-beta-diversity")
  2330. (version "1.0.7")
  2331. (source (origin
  2332. (method url-fetch)
  2333. (uri
  2334. (string-append
  2335. "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
  2336. version ".tar.gz"))
  2337. (file-name (string-append name "-" version ".tar.gz"))
  2338. (sha256
  2339. (base32
  2340. "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
  2341. (build-system gnu-build-system)
  2342. (arguments
  2343. `(#:phases
  2344. (modify-phases %standard-phases
  2345. (delete 'configure)
  2346. (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
  2347. (replace 'check
  2348. (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
  2349. "-u"))))
  2350. (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
  2351. (replace 'install
  2352. (lambda* (#:key outputs #:allow-other-keys)
  2353. (let ((bin (string-append (assoc-ref outputs "out")
  2354. "/bin")))
  2355. (mkdir-p bin)
  2356. (install-file "scripts/convertToEBD.py" bin)
  2357. (install-file "bin/ExpressBetaDiversity" bin)
  2358. #t))))))
  2359. (inputs
  2360. `(("python" ,python-2)))
  2361. (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
  2362. (synopsis "Taxon- and phylogenetic-based beta diversity measures")
  2363. (description
  2364. "Express Beta Diversity (EBD) calculates ecological beta diversity
  2365. (dissimilarity) measures between biological communities. EBD implements a
  2366. variety of diversity measures including those that make use of phylogenetic
  2367. similarity of community members.")
  2368. (license license:gpl3+)))
  2369. (define-public fasttree
  2370. (package
  2371. (name "fasttree")
  2372. (version "2.1.10")
  2373. (source (origin
  2374. (method url-fetch)
  2375. (uri (string-append
  2376. "http://www.microbesonline.org/fasttree/FastTree-"
  2377. version ".c"))
  2378. (sha256
  2379. (base32
  2380. "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
  2381. (build-system gnu-build-system)
  2382. (arguments
  2383. `(#:tests? #f ; no "check" target
  2384. #:phases
  2385. (modify-phases %standard-phases
  2386. (delete 'unpack)
  2387. (delete 'configure)
  2388. (replace 'build
  2389. (lambda* (#:key source #:allow-other-keys)
  2390. (and (zero? (system* "gcc"
  2391. "-O3"
  2392. "-finline-functions"
  2393. "-funroll-loops"
  2394. "-Wall"
  2395. "-o"
  2396. "FastTree"
  2397. source
  2398. "-lm"))
  2399. (zero? (system* "gcc"
  2400. "-DOPENMP"
  2401. "-fopenmp"
  2402. "-O3"
  2403. "-finline-functions"
  2404. "-funroll-loops"
  2405. "-Wall"
  2406. "-o"
  2407. "FastTreeMP"
  2408. source
  2409. "-lm")))))
  2410. (replace 'install
  2411. (lambda* (#:key outputs #:allow-other-keys)
  2412. (let ((bin (string-append (assoc-ref outputs "out")
  2413. "/bin")))
  2414. (mkdir-p bin)
  2415. (install-file "FastTree" bin)
  2416. (install-file "FastTreeMP" bin)
  2417. #t))))))
  2418. (home-page "http://www.microbesonline.org/fasttree")
  2419. (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
  2420. (description
  2421. "FastTree can handle alignments with up to a million of sequences in a
  2422. reasonable amount of time and memory. For large alignments, FastTree is
  2423. 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
  2424. (license license:gpl2+)))
  2425. (define-public fastx-toolkit
  2426. (package
  2427. (name "fastx-toolkit")
  2428. (version "0.0.14")
  2429. (source (origin
  2430. (method url-fetch)
  2431. (uri
  2432. (string-append
  2433. "https://github.com/agordon/fastx_toolkit/releases/download/"
  2434. version "/fastx_toolkit-" version ".tar.bz2"))
  2435. (sha256
  2436. (base32
  2437. "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
  2438. (build-system gnu-build-system)
  2439. (inputs
  2440. `(("libgtextutils" ,libgtextutils)))
  2441. (native-inputs
  2442. `(("pkg-config" ,pkg-config)))
  2443. (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
  2444. (synopsis "Tools for FASTA/FASTQ file preprocessing")
  2445. (description
  2446. "The FASTX-Toolkit is a collection of command line tools for Short-Reads
  2447. FASTA/FASTQ files preprocessing.
  2448. Next-Generation sequencing machines usually produce FASTA or FASTQ files,
  2449. containing multiple short-reads sequences. The main processing of such
  2450. FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
  2451. is sometimes more productive to preprocess the files before mapping the
  2452. sequences to the genome---manipulating the sequences to produce better mapping
  2453. results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
  2454. (license license:agpl3+)))
  2455. (define-public flexbar
  2456. (package
  2457. (name "flexbar")
  2458. (version "2.5")
  2459. (source (origin
  2460. (method url-fetch)
  2461. (uri
  2462. (string-append "mirror://sourceforge/flexbar/"
  2463. version "/flexbar_v" version "_src.tgz"))
  2464. (sha256
  2465. (base32
  2466. "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
  2467. (build-system cmake-build-system)
  2468. (arguments
  2469. `(#:configure-flags (list
  2470. (string-append "-DFLEXBAR_BINARY_DIR="
  2471. (assoc-ref %outputs "out")
  2472. "/bin/"))
  2473. #:phases
  2474. (alist-replace
  2475. 'check
  2476. (lambda* (#:key outputs #:allow-other-keys)
  2477. (setenv "PATH" (string-append
  2478. (assoc-ref outputs "out") "/bin:"
  2479. (getenv "PATH")))
  2480. (chdir "../flexbar_v2.5_src/test")
  2481. (zero? (system* "bash" "flexbar_validate.sh")))
  2482. (alist-delete 'install %standard-phases))))
  2483. (inputs
  2484. `(("tbb" ,tbb)
  2485. ("zlib" ,zlib)))
  2486. (native-inputs
  2487. `(("pkg-config" ,pkg-config)
  2488. ("seqan" ,seqan)))
  2489. (home-page "http://flexbar.sourceforge.net")
  2490. (synopsis "Barcode and adapter removal tool for sequencing platforms")
  2491. (description
  2492. "Flexbar preprocesses high-throughput nucleotide sequencing data
  2493. efficiently. It demultiplexes barcoded runs and removes adapter sequences.
  2494. Moreover, trimming and filtering features are provided. Flexbar increases
  2495. read mapping rates and improves genome and transcriptome assemblies. It
  2496. supports next-generation sequencing data in fasta/q and csfasta/q format from
  2497. Illumina, Roche 454, and the SOLiD platform.")
  2498. (license license:gpl3)))
  2499. (define-public fraggenescan
  2500. (package
  2501. (name "fraggenescan")
  2502. (version "1.30")
  2503. (source
  2504. (origin
  2505. (method url-fetch)
  2506. (uri
  2507. (string-append "mirror://sourceforge/fraggenescan/"
  2508. "FragGeneScan" version ".tar.gz"))
  2509. (sha256
  2510. (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
  2511. (build-system gnu-build-system)
  2512. (arguments
  2513. `(#:phases
  2514. (modify-phases %standard-phases
  2515. (delete 'configure)
  2516. (add-before 'build 'patch-paths
  2517. (lambda* (#:key outputs #:allow-other-keys)
  2518. (let* ((out (string-append (assoc-ref outputs "out")))
  2519. (share (string-append out "/share/fraggenescan/")))
  2520. (substitute* "run_FragGeneScan.pl"
  2521. (("system\\(\"rm")
  2522. (string-append "system(\"" (which "rm")))
  2523. (("system\\(\"mv")
  2524. (string-append "system(\"" (which "mv")))
  2525. (("\\\"awk") (string-append "\"" (which "awk")))
  2526. ;; This script and other programs expect the training files
  2527. ;; to be in the non-standard location bin/train/XXX. Change
  2528. ;; this to be share/fraggenescan/train/XXX instead.
  2529. (("^\\$train.file = \\$dir.*")
  2530. (string-append "$train_file = \""
  2531. share
  2532. "train/\".$FGS_train_file;")))
  2533. (substitute* "run_hmm.c"
  2534. (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
  2535. (string-append " strcpy(train_dir, \"" share "/train/\");"))))
  2536. #t))
  2537. (replace 'build
  2538. (lambda _ (and (zero? (system* "make" "clean"))
  2539. (zero? (system* "make" "fgs")))))
  2540. (replace 'install
  2541. (lambda* (#:key outputs #:allow-other-keys)
  2542. (let* ((out (string-append (assoc-ref outputs "out")))
  2543. (bin (string-append out "/bin/"))
  2544. (share (string-append out "/share/fraggenescan/train")))
  2545. (install-file "run_FragGeneScan.pl" bin)
  2546. (install-file "FragGeneScan" bin)
  2547. (copy-recursively "train" share))))
  2548. (delete 'check)
  2549. (add-after 'install 'post-install-check
  2550. ;; In lieu of 'make check', run one of the examples and check the
  2551. ;; output files gets created.
  2552. (lambda* (#:key outputs #:allow-other-keys)
  2553. (let* ((out (string-append (assoc-ref outputs "out")))
  2554. (bin (string-append out "/bin/"))
  2555. (frag (string-append bin "run_FragGeneScan.pl")))
  2556. (and (zero? (system* frag ; Test complete genome.
  2557. "-genome=./example/NC_000913.fna"
  2558. "-out=./test2"
  2559. "-complete=1"
  2560. "-train=complete"))
  2561. (file-exists? "test2.faa")
  2562. (file-exists? "test2.ffn")
  2563. (file-exists? "test2.gff")
  2564. (file-exists? "test2.out")
  2565. (zero? (system* ; Test incomplete sequences.
  2566. frag
  2567. "-genome=./example/NC_000913-fgs.ffn"
  2568. "-out=out"
  2569. "-complete=0"
  2570. "-train=454_30")))))))))
  2571. (inputs
  2572. `(("perl" ,perl)
  2573. ("python" ,python-2))) ;not compatible with python 3.
  2574. (home-page "https://sourceforge.net/projects/fraggenescan/")
  2575. (synopsis "Finds potentially fragmented genes in short reads")
  2576. (description
  2577. "FragGeneScan is a program for predicting bacterial and archaeal genes in
  2578. short and error-prone DNA sequencing reads. It can also be applied to predict
  2579. genes in incomplete assemblies or complete genomes.")
  2580. ;; GPL3+ according to private correspondense with the authors.
  2581. (license license:gpl3+)))
  2582. (define-public fxtract
  2583. (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
  2584. (package
  2585. (name "fxtract")
  2586. (version "2.3")
  2587. (source
  2588. (origin
  2589. (method url-fetch)
  2590. (uri (string-append
  2591. "https://github.com/ctSkennerton/fxtract/archive/"
  2592. version ".tar.gz"))
  2593. (file-name (string-append "ctstennerton-util-"
  2594. (string-take util-commit 7)
  2595. "-checkout"))
  2596. (sha256
  2597. (base32
  2598. "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
  2599. (build-system gnu-build-system)
  2600. (arguments
  2601. `(#:make-flags (list
  2602. (string-append "PREFIX=" (assoc-ref %outputs "out"))
  2603. "CC=gcc")
  2604. #:test-target "fxtract_test"
  2605. #:phases
  2606. (modify-phases %standard-phases
  2607. (delete 'configure)
  2608. (add-before 'build 'copy-util
  2609. (lambda* (#:key inputs #:allow-other-keys)
  2610. (rmdir "util")
  2611. (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
  2612. #t))
  2613. ;; Do not use make install as this requires additional dependencies.
  2614. (replace 'install
  2615. (lambda* (#:key outputs #:allow-other-keys)
  2616. (let* ((out (assoc-ref outputs "out"))
  2617. (bin (string-append out"/bin")))
  2618. (install-file "fxtract" bin)
  2619. #t))))))
  2620. (inputs
  2621. `(("pcre" ,pcre)
  2622. ("zlib" ,zlib)))
  2623. (native-inputs
  2624. ;; ctskennerton-util is licensed under GPL2.
  2625. `(("ctskennerton-util"
  2626. ,(origin
  2627. (method git-fetch)
  2628. (uri (git-reference
  2629. (url "https://github.com/ctSkennerton/util.git")
  2630. (commit util-commit)))
  2631. (file-name (string-append
  2632. "ctstennerton-util-" util-commit "-checkout"))
  2633. (sha256
  2634. (base32
  2635. "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
  2636. (home-page "https://github.com/ctSkennerton/fxtract")
  2637. (synopsis "Extract sequences from FASTA and FASTQ files")
  2638. (description
  2639. "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
  2640. or FASTQ) file given a subsequence. It uses a simple substring search for
  2641. basic tasks but can change to using POSIX regular expressions, PCRE, hash
  2642. lookups or multi-pattern searching as required. By default fxtract looks in
  2643. the sequence of each record but can also be told to look in the header,
  2644. comment or quality sections.")
  2645. ;; 'util' requires SSE instructions.
  2646. (supported-systems '("x86_64-linux"))
  2647. (license license:expat))))
  2648. (define-public gemma
  2649. (package
  2650. (name "gemma")
  2651. (version "0.96")
  2652. (source (origin
  2653. (method url-fetch)
  2654. (uri (string-append "https://github.com/xiangzhou/GEMMA/archive/v"
  2655. version ".tar.gz"))
  2656. (file-name (string-append name "-" version ".tar.gz"))
  2657. (sha256
  2658. (base32
  2659. "055ynn16gd12pf78n4vr2a9jlwsbwzajpdnf2y2yilg1krfff222"))
  2660. (patches (search-patches "gemma-intel-compat.patch"))))
  2661. (inputs
  2662. `(("gsl" ,gsl)
  2663. ("lapack" ,lapack)
  2664. ("zlib" ,zlib)))
  2665. (build-system gnu-build-system)
  2666. (arguments
  2667. `(#:make-flags
  2668. '(,@(match (%current-system)
  2669. ("x86_64-linux"
  2670. '("FORCE_DYNAMIC=1"))
  2671. ("i686-linux"
  2672. '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
  2673. (_
  2674. '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
  2675. #:phases
  2676. (modify-phases %standard-phases
  2677. (delete 'configure)
  2678. (add-before 'build 'bin-mkdir
  2679. (lambda _
  2680. (mkdir-p "bin")
  2681. #t))
  2682. (replace 'install
  2683. (lambda* (#:key outputs #:allow-other-keys)
  2684. (let ((out (assoc-ref outputs "out")))
  2685. (install-file "bin/gemma"
  2686. (string-append
  2687. out "/bin")))
  2688. #t)))
  2689. #:tests? #f)) ; no tests included yet
  2690. (home-page "https://github.com/xiangzhou/GEMMA")
  2691. (synopsis "Tool for genome-wide efficient mixed model association")
  2692. (description
  2693. "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
  2694. standard linear mixed model resolver with application in genome-wide
  2695. association studies (GWAS).")
  2696. (license license:gpl3)))
  2697. (define-public grit
  2698. (package
  2699. (name "grit")
  2700. (version "2.0.2")
  2701. (source (origin
  2702. (method url-fetch)
  2703. (uri (string-append
  2704. "https://github.com/nboley/grit/archive/"
  2705. version ".tar.gz"))
  2706. (file-name (string-append name "-" version ".tar.gz"))
  2707. (sha256
  2708. (base32
  2709. "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
  2710. (build-system python-build-system)
  2711. (arguments
  2712. `(#:python ,python-2
  2713. #:phases
  2714. (alist-cons-after
  2715. 'unpack 'generate-from-cython-sources
  2716. (lambda* (#:key inputs outputs #:allow-other-keys)
  2717. ;; Delete these C files to force fresh generation from pyx sources.
  2718. (delete-file "grit/sparsify_support_fns.c")
  2719. (delete-file "grit/call_peaks_support_fns.c")
  2720. (substitute* "setup.py"
  2721. (("Cython.Setup") "Cython.Build")
  2722. ;; Add numpy include path to fix compilation
  2723. (("pyx\", \\]")
  2724. (string-append "pyx\", ], include_dirs = ['"
  2725. (assoc-ref inputs "python-numpy")
  2726. "/lib/python2.7/site-packages/numpy/core/include/"
  2727. "']"))) #t)
  2728. %standard-phases)))
  2729. (inputs
  2730. `(("python-scipy" ,python2-scipy)
  2731. ("python-numpy" ,python2-numpy)
  2732. ("python-pysam" ,python2-pysam)
  2733. ("python-networkx" ,python2-networkx)))
  2734. (native-inputs
  2735. `(("python-cython" ,python2-cython)))
  2736. (home-page "http://grit-bio.org")
  2737. (synopsis "Tool for integrative analysis of RNA-seq type assays")
  2738. (description
  2739. "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
  2740. full length transcript models. When none of these data sources are available,
  2741. GRIT can be run by providing a candidate set of TES or TSS sites. In
  2742. addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
  2743. also be run in quantification mode, where it uses a provided GTF file and just
  2744. estimates transcript expression.")
  2745. (license license:gpl3+)))
  2746. (define-public hisat
  2747. (package
  2748. (name "hisat")
  2749. (version "0.1.4")
  2750. (source (origin
  2751. (method url-fetch)
  2752. (uri (string-append
  2753. "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
  2754. version "-beta-source.zip"))
  2755. (sha256
  2756. (base32
  2757. "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
  2758. (build-system gnu-build-system)
  2759. (arguments
  2760. `(#:tests? #f ;no check target
  2761. #:make-flags '("allall"
  2762. ;; Disable unsupported `popcnt' instructions on
  2763. ;; architectures other than x86_64
  2764. ,@(if (string-prefix? "x86_64"
  2765. (or (%current-target-system)
  2766. (%current-system)))
  2767. '()
  2768. '("POPCNT_CAPABILITY=0")))
  2769. #:phases
  2770. (alist-cons-after
  2771. 'unpack 'patch-sources
  2772. (lambda _
  2773. ;; XXX Cannot use snippet because zip files are not supported
  2774. (substitute* "Makefile"
  2775. (("^CC = .*$") "CC = gcc")
  2776. (("^CPP = .*$") "CPP = g++")
  2777. ;; replace BUILD_HOST and BUILD_TIME for deterministic build
  2778. (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
  2779. (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
  2780. (substitute* '("hisat-build" "hisat-inspect")
  2781. (("/usr/bin/env") (which "env"))))
  2782. (alist-replace
  2783. 'install
  2784. (lambda* (#:key outputs #:allow-other-keys)
  2785. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  2786. (for-each (lambda (file)
  2787. (install-file file bin))
  2788. (find-files
  2789. "."
  2790. "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
  2791. (alist-delete 'configure %standard-phases)))))
  2792. (native-inputs
  2793. `(("unzip" ,unzip)))
  2794. (inputs
  2795. `(("perl" ,perl)
  2796. ("python" ,python)
  2797. ("zlib" ,zlib)))
  2798. ;; Non-portable SSE instructions are used so building fails on platforms
  2799. ;; other than x86_64.
  2800. (supported-systems '("x86_64-linux"))
  2801. (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
  2802. (synopsis "Hierarchical indexing for spliced alignment of transcripts")
  2803. (description
  2804. "HISAT is a fast and sensitive spliced alignment program for mapping
  2805. RNA-seq reads. In addition to one global FM index that represents a whole
  2806. genome, HISAT uses a large set of small FM indexes that collectively cover the
  2807. whole genome. These small indexes (called local indexes) combined with
  2808. several alignment strategies enable effective alignment of RNA-seq reads, in
  2809. particular, reads spanning multiple exons.")
  2810. (license license:gpl3+)))
  2811. (define-public hisat2
  2812. (package
  2813. (name "hisat2")
  2814. (version "2.0.5")
  2815. (source
  2816. (origin
  2817. (method url-fetch)
  2818. ;; FIXME: a better source URL is
  2819. ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
  2820. ;; "/downloads/hisat2-" version "-source.zip")
  2821. ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"
  2822. ;; but it is currently unavailable.
  2823. (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz")
  2824. (file-name (string-append name "-" version ".tar.gz"))
  2825. (sha256
  2826. (base32
  2827. "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50"))))
  2828. (build-system gnu-build-system)
  2829. (arguments
  2830. `(#:tests? #f ; no check target
  2831. #:make-flags (list "CC=gcc" "CXX=g++" "allall")
  2832. #:modules ((guix build gnu-build-system)
  2833. (guix build utils)
  2834. (srfi srfi-26))
  2835. #:phases
  2836. (modify-phases %standard-phases
  2837. (add-after 'unpack 'make-deterministic
  2838. (lambda _
  2839. (substitute* "Makefile"
  2840. (("`date`") "0"))
  2841. #t))
  2842. (delete 'configure)
  2843. (replace 'install
  2844. (lambda* (#:key outputs #:allow-other-keys)
  2845. (let* ((out (assoc-ref outputs "out"))
  2846. (bin (string-append out "/bin/"))
  2847. (doc (string-append out "/share/doc/hisat2/")))
  2848. (for-each
  2849. (cut install-file <> bin)
  2850. (find-files "."
  2851. "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
  2852. (mkdir-p doc)
  2853. (install-file "doc/manual.inc.html" doc))
  2854. #t)))))
  2855. (native-inputs
  2856. `(("unzip" ,unzip) ; needed for archive from ftp
  2857. ("perl" ,perl)
  2858. ("pandoc" ,ghc-pandoc))) ; for documentation
  2859. (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
  2860. (synopsis "Graph-based alignment of genomic sequencing reads")
  2861. (description "HISAT2 is a fast and sensitive alignment program for mapping
  2862. next-generation sequencing reads (both DNA and RNA) to a population of human
  2863. genomes (as well as to a single reference genome). In addition to using one
  2864. global @dfn{graph FM} (GFM) index that represents a population of human
  2865. genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
  2866. the whole genome. These small indexes, combined with several alignment
  2867. strategies, enable rapid and accurate alignment of sequencing reads. This new
  2868. indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
  2869. ;; HISAT2 contains files from Bowtie2, which is released under
  2870. ;; GPLv2 or later. The HISAT2 source files are released under
  2871. ;; GPLv3 or later.
  2872. (license license:gpl3+)))
  2873. (define-public hmmer
  2874. (package
  2875. (name "hmmer")
  2876. (version "3.1b2")
  2877. (source
  2878. (origin
  2879. (method url-fetch)
  2880. (uri (string-append
  2881. "http://eddylab.org/software/hmmer"
  2882. (version-prefix version 1) "/"
  2883. version "/hmmer-" version ".tar.gz"))
  2884. (sha256
  2885. (base32
  2886. "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))
  2887. (patches (search-patches "hmmer-remove-cpu-specificity.patch"))))
  2888. (build-system gnu-build-system)
  2889. (native-inputs `(("perl" ,perl)))
  2890. (home-page "http://hmmer.org/")
  2891. (synopsis "Biosequence analysis using profile hidden Markov models")
  2892. (description
  2893. "HMMER is used for searching sequence databases for homologs of protein
  2894. sequences, and for making protein sequence alignments. It implements methods
  2895. using probabilistic models called profile hidden Markov models (profile
  2896. HMMs).")
  2897. (license (list license:gpl3+
  2898. ;; The bundled library 'easel' is distributed
  2899. ;; under The Janelia Farm Software License.
  2900. (license:non-copyleft
  2901. "file://easel/LICENSE"
  2902. "See easel/LICENSE in the distribution.")))))
  2903. (define-public htseq
  2904. (package
  2905. (name "htseq")
  2906. (version "0.9.1")
  2907. (source (origin
  2908. (method url-fetch)
  2909. (uri (pypi-uri "HTSeq" version))
  2910. (sha256
  2911. (base32
  2912. "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
  2913. (build-system python-build-system)
  2914. (native-inputs
  2915. `(("python-cython" ,python-cython)))
  2916. ;; Numpy needs to be propagated when htseq is used as a Python library.
  2917. (propagated-inputs
  2918. `(("python-numpy" ,python-numpy)))
  2919. (inputs
  2920. `(("python-pysam" ,python-pysam)
  2921. ("python-matplotlib" ,python-matplotlib)))
  2922. (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
  2923. (synopsis "Analysing high-throughput sequencing data with Python")
  2924. (description
  2925. "HTSeq is a Python package that provides infrastructure to process data
  2926. from high-throughput sequencing assays.")
  2927. (license license:gpl3+)))
  2928. (define-public python2-htseq
  2929. (package-with-python2 htseq))
  2930. (define-public java-htsjdk
  2931. (package
  2932. (name "java-htsjdk")
  2933. (version "1.129")
  2934. (source (origin
  2935. (method url-fetch)
  2936. (uri (string-append
  2937. "https://github.com/samtools/htsjdk/archive/"
  2938. version ".tar.gz"))
  2939. (file-name (string-append name "-" version ".tar.gz"))
  2940. (sha256
  2941. (base32
  2942. "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
  2943. (modules '((guix build utils)))
  2944. ;; remove build dependency on git
  2945. (snippet '(substitute* "build.xml"
  2946. (("failifexecutionfails=\"true\"")
  2947. "failifexecutionfails=\"false\"")))))
  2948. (build-system ant-build-system)
  2949. (arguments
  2950. `(#:tests? #f ; test require Internet access
  2951. #:make-flags
  2952. (list (string-append "-Ddist=" (assoc-ref %outputs "out")
  2953. "/share/java/htsjdk/"))
  2954. #:build-target "all"
  2955. #:phases
  2956. (modify-phases %standard-phases
  2957. ;; The build phase also installs the jars
  2958. (delete 'install))))
  2959. (home-page "http://samtools.github.io/htsjdk/")
  2960. (synopsis "Java API for high-throughput sequencing data (HTS) formats")
  2961. (description
  2962. "HTSJDK is an implementation of a unified Java library for accessing
  2963. common file formats, such as SAM and VCF, used for high-throughput
  2964. sequencing (HTS) data. There are also an number of useful utilities for
  2965. manipulating HTS data.")
  2966. (license license:expat)))
  2967. (define-public htslib
  2968. (package
  2969. (name "htslib")
  2970. (version "1.5")
  2971. (source (origin
  2972. (method url-fetch)
  2973. (uri (string-append
  2974. "https://github.com/samtools/htslib/releases/download/"
  2975. version "/htslib-" version ".tar.bz2"))
  2976. (sha256
  2977. (base32
  2978. "0bcjmnbwp2bib1z1bkrp95w9v2syzdwdfqww10mkb1hxlmg52ax0"))))
  2979. (build-system gnu-build-system)
  2980. (arguments
  2981. `(#:phases
  2982. (modify-phases %standard-phases
  2983. (add-after
  2984. 'unpack 'patch-tests
  2985. (lambda _
  2986. (substitute* "test/test.pl"
  2987. (("/bin/bash") (which "bash")))
  2988. #t)))))
  2989. (inputs
  2990. `(("openssl" ,openssl)
  2991. ("curl" ,curl)
  2992. ("zlib" ,zlib)))
  2993. (native-inputs
  2994. `(("perl" ,perl)))
  2995. (home-page "http://www.htslib.org")
  2996. (synopsis "C library for reading/writing high-throughput sequencing data")
  2997. (description
  2998. "HTSlib is a C library for reading/writing high-throughput sequencing
  2999. data. It also provides the bgzip, htsfile, and tabix utilities.")
  3000. ;; Files under cram/ are released under the modified BSD license;
  3001. ;; the rest is released under the Expat license
  3002. (license (list license:expat license:bsd-3))))
  3003. ;; This package should be removed once no packages rely upon it.
  3004. (define htslib-1.3
  3005. (package
  3006. (inherit htslib)
  3007. (version "1.3.1")
  3008. (source (origin
  3009. (method url-fetch)
  3010. (uri (string-append
  3011. "https://github.com/samtools/htslib/releases/download/"
  3012. version "/htslib-" version ".tar.bz2"))
  3013. (sha256
  3014. (base32
  3015. "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
  3016. (define-public idr
  3017. (package
  3018. (name "idr")
  3019. (version "2.0.0")
  3020. (source (origin
  3021. (method url-fetch)
  3022. (uri (string-append
  3023. "https://github.com/nboley/idr/archive/"
  3024. version ".tar.gz"))
  3025. (file-name (string-append name "-" version ".tar.gz"))
  3026. (sha256
  3027. (base32
  3028. "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
  3029. (build-system python-build-system)
  3030. (arguments
  3031. `(#:tests? #f)) ; FIXME: "ImportError: No module named 'utility'"
  3032. (propagated-inputs
  3033. `(("python-scipy" ,python-scipy)
  3034. ("python-sympy" ,python-sympy)
  3035. ("python-numpy" ,python-numpy)
  3036. ("python-matplotlib" ,python-matplotlib)))
  3037. (native-inputs
  3038. `(("python-cython" ,python-cython)))
  3039. (home-page "https://github.com/nboley/idr")
  3040. (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
  3041. (description
  3042. "The IDR (Irreproducible Discovery Rate) framework is a unified approach
  3043. to measure the reproducibility of findings identified from replicate
  3044. experiments and provide highly stable thresholds based on reproducibility.")
  3045. (license license:gpl3+)))
  3046. (define-public jellyfish
  3047. (package
  3048. (name "jellyfish")
  3049. (version "2.2.4")
  3050. (source (origin
  3051. (method url-fetch)
  3052. (uri (string-append "https://github.com/gmarcais/Jellyfish/"
  3053. "releases/download/v" version
  3054. "/jellyfish-" version ".tar.gz"))
  3055. (sha256
  3056. (base32
  3057. "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
  3058. (build-system gnu-build-system)
  3059. (outputs '("out" ;for library
  3060. "ruby" ;for Ruby bindings
  3061. "python")) ;for Python bindings
  3062. (arguments
  3063. `(#:configure-flags
  3064. (list (string-append "--enable-ruby-binding="
  3065. (assoc-ref %outputs "ruby"))
  3066. (string-append "--enable-python-binding="
  3067. (assoc-ref %outputs "python")))
  3068. #:phases
  3069. (modify-phases %standard-phases
  3070. (add-before 'check 'set-SHELL-variable
  3071. (lambda _
  3072. ;; generator_manager.hpp either uses /bin/sh or $SHELL
  3073. ;; to run tests.
  3074. (setenv "SHELL" (which "bash"))
  3075. #t)))))
  3076. (native-inputs
  3077. `(("bc" ,bc)
  3078. ("time" ,time)
  3079. ("ruby" ,ruby)
  3080. ("python" ,python-2)))
  3081. (synopsis "Tool for fast counting of k-mers in DNA")
  3082. (description
  3083. "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
  3084. DNA. A k-mer is a substring of length k, and counting the occurrences of all
  3085. such substrings is a central step in many analyses of DNA sequence. Jellyfish
  3086. is a command-line program that reads FASTA and multi-FASTA files containing
  3087. DNA sequences. It outputs its k-mer counts in a binary format, which can be
  3088. translated into a human-readable text format using the @code{jellyfish dump}
  3089. command, or queried for specific k-mers with @code{jellyfish query}.")
  3090. (home-page "http://www.genome.umd.edu/jellyfish.html")
  3091. ;; From their website: JELLYFISH runs on 64-bit Intel-compatible processors
  3092. (supported-systems '("x86_64-linux"))
  3093. ;; The combined work is published under the GPLv3 or later. Individual
  3094. ;; files such as lib/jsoncpp.cpp are released under the Expat license.
  3095. (license (list license:gpl3+ license:expat))))
  3096. (define-public khmer
  3097. (package
  3098. (name "khmer")
  3099. (version "2.0")
  3100. (source
  3101. (origin
  3102. (method url-fetch)
  3103. (uri (pypi-uri "khmer" version))
  3104. (sha256
  3105. (base32
  3106. "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
  3107. (patches (search-patches "khmer-use-libraries.patch"))))
  3108. (build-system python-build-system)
  3109. (arguments
  3110. `(#:phases
  3111. (modify-phases %standard-phases
  3112. (add-after 'unpack 'set-paths
  3113. (lambda* (#:key inputs outputs #:allow-other-keys)
  3114. ;; Delete bundled libraries.
  3115. (delete-file-recursively "third-party/zlib")
  3116. (delete-file-recursively "third-party/bzip2")
  3117. ;; Replace bundled seqan.
  3118. (let* ((seqan-all "third-party/seqan")
  3119. (seqan-include (string-append
  3120. seqan-all "/core/include")))
  3121. (delete-file-recursively seqan-all)
  3122. (copy-recursively (string-append (assoc-ref inputs "seqan")
  3123. "/include/seqan")
  3124. (string-append seqan-include "/seqan")))
  3125. ;; We do not replace the bundled MurmurHash as the canonical
  3126. ;; repository for this code 'SMHasher' is unsuitable for
  3127. ;; providing a library. See
  3128. ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
  3129. #t))
  3130. (add-after 'unpack 'set-cc
  3131. (lambda _
  3132. (setenv "CC" "gcc")
  3133. #t))
  3134. ;; It is simpler to test after installation.
  3135. (delete 'check)
  3136. (add-after 'install 'post-install-check
  3137. (lambda* (#:key inputs outputs #:allow-other-keys)
  3138. (let ((out (assoc-ref outputs "out")))
  3139. (setenv "PATH"
  3140. (string-append
  3141. (getenv "PATH")
  3142. ":"
  3143. (assoc-ref outputs "out")
  3144. "/bin"))
  3145. (setenv "PYTHONPATH"
  3146. (string-append
  3147. (getenv "PYTHONPATH")
  3148. ":"
  3149. out
  3150. "/lib/python"
  3151. (string-take (string-take-right
  3152. (assoc-ref inputs "python") 5) 3)
  3153. "/site-packages"))
  3154. (with-directory-excursion "build"
  3155. (zero? (system* "nosetests" "khmer" "--attr"
  3156. "!known_failing")))))))))
  3157. (native-inputs
  3158. `(("seqan" ,seqan)
  3159. ("python-nose" ,python-nose)))
  3160. (inputs
  3161. `(("zlib" ,zlib)
  3162. ("bzip2" ,bzip2)
  3163. ("python-screed" ,python-screed)
  3164. ("python-bz2file" ,python-bz2file)
  3165. ;; Tests fail when gcc-5 is used for compilation. Use gcc-4.9 at least
  3166. ;; until the next version of khmer (likely 2.1) is released.
  3167. ("gcc" ,gcc-4.9)))
  3168. (home-page "https://khmer.readthedocs.org/")
  3169. (synopsis "K-mer counting, filtering and graph traversal library")
  3170. (description "The khmer software is a set of command-line tools for
  3171. working with DNA shotgun sequencing data from genomes, transcriptomes,
  3172. metagenomes and single cells. Khmer can make de novo assemblies faster, and
  3173. sometimes better. Khmer can also identify and fix problems with shotgun
  3174. data.")
  3175. ;; When building on i686, armhf and mips64el, we get the following error:
  3176. ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
  3177. (supported-systems '("x86_64-linux"))
  3178. (license license:bsd-3)))
  3179. (define-public kaiju
  3180. (package
  3181. (name "kaiju")
  3182. (version "1.5.0")
  3183. (source (origin
  3184. (method url-fetch)
  3185. (uri (string-append
  3186. "https://github.com/bioinformatics-centre/kaiju/archive/v"
  3187. version ".tar.gz"))
  3188. (file-name (string-append name "-" version ".tar.gz"))
  3189. (sha256
  3190. (base32
  3191. "0afbfalfw9y39bkwnqjrh9bghs118ws1pzj5h8l0nblgn3mbjdks"))))
  3192. (build-system gnu-build-system)
  3193. (arguments
  3194. `(#:tests? #f ; There are no tests.
  3195. #:phases
  3196. (modify-phases %standard-phases
  3197. (delete 'configure)
  3198. (add-before 'build 'move-to-src-dir
  3199. (lambda _ (chdir "src") #t))
  3200. (replace 'install
  3201. (lambda* (#:key inputs outputs #:allow-other-keys)
  3202. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  3203. (mkdir-p bin)
  3204. (chdir "..")
  3205. (copy-recursively "bin" bin)
  3206. (copy-recursively "util" bin))
  3207. #t)))))
  3208. (inputs
  3209. `(("perl" ,perl)))
  3210. (home-page "http://kaiju.binf.ku.dk/")
  3211. (synopsis "Fast and sensitive taxonomic classification for metagenomics")
  3212. (description "Kaiju is a program for sensitive taxonomic classification
  3213. of high-throughput sequencing reads from metagenomic whole genome sequencing
  3214. experiments.")
  3215. (license license:gpl3+)))
  3216. (define-public macs
  3217. (package
  3218. (name "macs")
  3219. (version "2.1.0.20151222")
  3220. (source (origin
  3221. (method url-fetch)
  3222. (uri (pypi-uri "MACS2" version))
  3223. (sha256
  3224. (base32
  3225. "1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
  3226. (build-system python-build-system)
  3227. (arguments
  3228. `(#:python ,python-2 ; only compatible with Python 2.7
  3229. #:tests? #f)) ; no test target
  3230. (inputs
  3231. `(("python-numpy" ,python2-numpy)))
  3232. (home-page "https://github.com/taoliu/MACS/")
  3233. (synopsis "Model based analysis for ChIP-Seq data")
  3234. (description
  3235. "MACS is an implementation of a ChIP-Seq analysis algorithm for
  3236. identifying transcript factor binding sites named Model-based Analysis of
  3237. ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
  3238. the significance of enriched ChIP regions and it improves the spatial
  3239. resolution of binding sites through combining the information of both
  3240. sequencing tag position and orientation.")
  3241. (license license:bsd-3)))
  3242. (define-public mafft
  3243. (package
  3244. (name "mafft")
  3245. (version "7.310")
  3246. (source (origin
  3247. (method url-fetch)
  3248. (uri (string-append
  3249. "http://mafft.cbrc.jp/alignment/software/mafft-" version
  3250. "-without-extensions-src.tgz"))
  3251. (file-name (string-append name "-" version ".tgz"))
  3252. (sha256
  3253. (base32
  3254. "0gbsaz6z2qa307kd7wfb06c3y4ikmv1hsdvlns11f6zq4w1z9pwc"))))
  3255. (build-system gnu-build-system)
  3256. (arguments
  3257. `(#:tests? #f ; no automated tests, though there are tests in the read me
  3258. #:make-flags (let ((out (assoc-ref %outputs "out")))
  3259. (list (string-append "PREFIX=" out)
  3260. (string-append "BINDIR="
  3261. (string-append out "/bin"))))
  3262. #:phases
  3263. (modify-phases %standard-phases
  3264. (add-after 'unpack 'enter-dir
  3265. (lambda _ (chdir "core") #t))
  3266. (add-after 'enter-dir 'patch-makefile
  3267. (lambda _
  3268. ;; on advice from the MAFFT authors, there is no need to
  3269. ;; distribute mafft-profile, mafft-distance, or
  3270. ;; mafft-homologs.rb as they are too "specialised".
  3271. (substitute* "Makefile"
  3272. ;; remove mafft-homologs.rb from SCRIPTS
  3273. (("^SCRIPTS = mafft mafft-homologs.rb")
  3274. "SCRIPTS = mafft")
  3275. ;; remove mafft-homologs from MANPAGES
  3276. (("^MANPAGES = mafft.1 mafft-homologs.1")
  3277. "MANPAGES = mafft.1")
  3278. ;; remove mafft-distance from PROGS
  3279. (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
  3280. "PROGS = dvtditr dndfast7 dndblast sextet5")
  3281. ;; remove mafft-profile from PROGS
  3282. (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
  3283. "splittbfast disttbfast tbfast f2cl mccaskillwrap")
  3284. (("^rm -f mafft-profile mafft-profile.exe") "#")
  3285. (("^rm -f mafft-distance mafft-distance.exe") ")#")
  3286. ;; do not install MAN pages in libexec folder
  3287. (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
  3288. \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
  3289. #t))
  3290. (add-after 'enter-dir 'patch-paths
  3291. (lambda* (#:key inputs #:allow-other-keys)
  3292. (substitute* '("pairash.c"
  3293. "mafft.tmpl")
  3294. (("perl") (which "perl"))
  3295. (("([\"`| ])awk" _ prefix)
  3296. (string-append prefix (which "awk")))
  3297. (("grep") (which "grep")))
  3298. #t))
  3299. (delete 'configure)
  3300. (add-after 'install 'wrap-programs
  3301. (lambda* (#:key outputs #:allow-other-keys)
  3302. (let* ((out (assoc-ref outputs "out"))
  3303. (bin (string-append out "/bin"))
  3304. (path (string-append
  3305. (assoc-ref %build-inputs "coreutils") "/bin:")))
  3306. (for-each (lambda (file)
  3307. (wrap-program file
  3308. `("PATH" ":" prefix (,path))))
  3309. (find-files bin)))
  3310. #t)))))
  3311. (inputs
  3312. `(("perl" ,perl)
  3313. ("ruby" ,ruby)
  3314. ("gawk" ,gawk)
  3315. ("grep" ,grep)
  3316. ("coreutils" ,coreutils)))
  3317. (home-page "http://mafft.cbrc.jp/alignment/software/")
  3318. (synopsis "Multiple sequence alignment program")
  3319. (description
  3320. "MAFFT offers a range of multiple alignment methods for nucleotide and
  3321. protein sequences. For instance, it offers L-INS-i (accurate; for alignment
  3322. of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
  3323. sequences).")
  3324. (license (license:non-copyleft
  3325. "http://mafft.cbrc.jp/alignment/software/license.txt"
  3326. "BSD-3 with different formatting"))))
  3327. (define-public mash
  3328. (package
  3329. (name "mash")
  3330. (version "1.1.1")
  3331. (source (origin
  3332. (method url-fetch)
  3333. (uri (string-append
  3334. "https://github.com/marbl/mash/archive/v"
  3335. version ".tar.gz"))
  3336. (file-name (string-append name "-" version ".tar.gz"))
  3337. (sha256
  3338. (base32
  3339. "08znbvqq5xknfhmpp3wcj574zvi4p7i8zifi67c9qw9a6ikp42fj"))
  3340. (modules '((guix build utils)))
  3341. (snippet
  3342. ;; Delete bundled kseq.
  3343. ;; TODO: Also delete bundled murmurhash and open bloom filter.
  3344. '(delete-file "src/mash/kseq.h"))))
  3345. (build-system gnu-build-system)
  3346. (arguments
  3347. `(#:tests? #f ; No tests.
  3348. #:configure-flags
  3349. (list
  3350. (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
  3351. (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
  3352. #:make-flags (list "CC=gcc")
  3353. #:phases
  3354. (modify-phases %standard-phases
  3355. (add-after 'unpack 'fix-includes
  3356. (lambda _
  3357. (substitute* '("src/mash/Sketch.cpp" "src/mash/CommandFind.cpp")
  3358. (("^#include \"kseq\\.h\"")
  3359. "#include \"htslib/kseq.h\""))
  3360. #t))
  3361. (add-after 'fix-includes 'autoconf
  3362. (lambda _ (zero? (system* "autoconf")))))))
  3363. (native-inputs
  3364. `(("autoconf" ,autoconf)
  3365. ;; Capnproto and htslib are statically embedded in the final
  3366. ;; application. Therefore we also list their licenses, below.
  3367. ("capnproto" ,capnproto)
  3368. ("htslib" ,htslib)))
  3369. (inputs
  3370. `(("gsl" ,gsl)
  3371. ("zlib" ,zlib)))
  3372. (supported-systems '("x86_64-linux"))
  3373. (home-page "https://mash.readthedocs.io")
  3374. (synopsis "Fast genome and metagenome distance estimation using MinHash")
  3375. (description "Mash is a fast sequence distance estimator that uses the
  3376. MinHash algorithm and is designed to work with genomes and metagenomes in the
  3377. form of assemblies or reads.")
  3378. (license (list license:bsd-3 ; Mash
  3379. license:expat ; HTSlib and capnproto
  3380. license:public-domain ; MurmurHash 3
  3381. license:cpl1.0)))) ; Open Bloom Filter
  3382. (define-public metabat
  3383. (package
  3384. (name "metabat")
  3385. (version "2.12.1")
  3386. (source
  3387. (origin
  3388. (method url-fetch)
  3389. (uri (string-append "https://bitbucket.org/berkeleylab/metabat/get/v"
  3390. version ".tar.gz"))
  3391. (file-name (string-append name "-" version ".tar.gz"))
  3392. (sha256
  3393. (base32
  3394. "1hmvdalz3zj5sqqklg0l4npjdv37cv2hsdi1al9iby2ndxjs1b73"))
  3395. (patches (search-patches "metabat-fix-compilation.patch"))))
  3396. (build-system gnu-build-system)
  3397. (arguments
  3398. `(#:phases
  3399. (modify-phases %standard-phases
  3400. (add-after 'unpack 'fix-includes
  3401. (lambda _
  3402. (substitute* "src/BamUtils.h"
  3403. (("^#include \"bam/bam\\.h\"")
  3404. "#include \"samtools/bam.h\"")
  3405. (("^#include \"bam/sam\\.h\"")
  3406. "#include \"samtools/sam.h\""))
  3407. (substitute* "src/KseqReader.h"
  3408. (("^#include \"bam/kseq\\.h\"")
  3409. "#include \"htslib/kseq.h\""))
  3410. #t))
  3411. (add-after 'unpack 'fix-scons
  3412. (lambda* (#:key inputs #:allow-other-keys)
  3413. (substitute* "SConstruct"
  3414. (("^htslib_dir += 'samtools'")
  3415. (string-append "htslib_dir = '"
  3416. (assoc-ref inputs "htslib")
  3417. "'"))
  3418. (("^samtools_dir = 'samtools'")
  3419. (string-append "samtools_dir = '"
  3420. (assoc-ref inputs "samtools")
  3421. "'"))
  3422. (("^findStaticOrShared\\('bam', hts_lib")
  3423. (string-append "findStaticOrShared('bam', '"
  3424. (assoc-ref inputs "samtools")
  3425. "/lib'"))
  3426. ;; Do not distribute README.
  3427. (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
  3428. #t))
  3429. (delete 'configure)
  3430. (replace 'build
  3431. (lambda* (#:key inputs outputs #:allow-other-keys)
  3432. (mkdir (assoc-ref outputs "out"))
  3433. (zero? (system* "scons"
  3434. (string-append
  3435. "PREFIX="
  3436. (assoc-ref outputs "out"))
  3437. (string-append
  3438. "BOOST_ROOT="
  3439. (assoc-ref inputs "boost"))
  3440. "install"))))
  3441. ;; Check and install are carried out during build phase.
  3442. (delete 'check)
  3443. (delete 'install))))
  3444. (inputs
  3445. `(("zlib" ,zlib)
  3446. ("perl" ,perl)
  3447. ("samtools" ,samtools)
  3448. ("htslib" ,htslib)
  3449. ("boost" ,boost)))
  3450. (native-inputs
  3451. `(("scons" ,scons)))
  3452. (home-page "https://bitbucket.org/berkeleylab/metabat")
  3453. (synopsis
  3454. "Reconstruction of single genomes from complex microbial communities")
  3455. (description
  3456. "Grouping large genomic fragments assembled from shotgun metagenomic
  3457. sequences to deconvolute complex microbial communities, or metagenome binning,
  3458. enables the study of individual organisms and their interactions. MetaBAT is
  3459. an automated metagenome binning software, which integrates empirical
  3460. probabilistic distances of genome abundance and tetranucleotide frequency.")
  3461. ;; The source code contains inline assembly.
  3462. (supported-systems '("x86_64-linux" "i686-linux"))
  3463. (license (license:non-copyleft "file://license.txt"
  3464. "See license.txt in the distribution."))))
  3465. (define-public minced
  3466. (package
  3467. (name "minced")
  3468. (version "0.2.0")
  3469. (source (origin
  3470. (method url-fetch)
  3471. (uri (string-append
  3472. "https://github.com/ctSkennerton/minced/archive/"
  3473. version ".tar.gz"))
  3474. (file-name (string-append name "-" version ".tar.gz"))
  3475. (sha256
  3476. (base32
  3477. "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
  3478. (build-system gnu-build-system)
  3479. (arguments
  3480. `(#:test-target "test"
  3481. #:phases
  3482. (modify-phases %standard-phases
  3483. (delete 'configure)
  3484. (add-before 'check 'fix-test
  3485. (lambda _
  3486. ;; Fix test for latest version.
  3487. (substitute* "t/Aquifex_aeolicus_VF5.expected"
  3488. (("minced:0.1.6") "minced:0.2.0"))
  3489. #t))
  3490. (replace 'install ; No install target.
  3491. (lambda* (#:key inputs outputs #:allow-other-keys)
  3492. (let* ((out (assoc-ref outputs "out"))
  3493. (bin (string-append out "/bin"))
  3494. (wrapper (string-append bin "/minced")))
  3495. ;; Minced comes with a wrapper script that tries to figure out where
  3496. ;; it is located before running the JAR. Since these paths are known
  3497. ;; to us, we build our own wrapper to avoid coreutils dependency.
  3498. (install-file "minced.jar" bin)
  3499. (with-output-to-file wrapper
  3500. (lambda _
  3501. (display
  3502. (string-append
  3503. "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
  3504. (assoc-ref inputs "jre") "/bin/java -jar "
  3505. bin "/minced.jar \"$@\"\n"))))
  3506. (chmod wrapper #o555)))))))
  3507. (native-inputs
  3508. `(("jdk" ,icedtea "jdk")))
  3509. (inputs
  3510. `(("bash" ,bash)
  3511. ("jre" ,icedtea "out")))
  3512. (home-page "https://github.com/ctSkennerton/minced")
  3513. (synopsis "Mining CRISPRs in Environmental Datasets")
  3514. (description
  3515. "MinCED is a program to find Clustered Regularly Interspaced Short
  3516. Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
  3517. unassembled metagenomic reads, but is mainly designed for full genomes and
  3518. assembled metagenomic sequence.")
  3519. (license license:gpl3+)))
  3520. (define-public miso
  3521. (package
  3522. (name "miso")
  3523. (version "0.5.3")
  3524. (source (origin
  3525. (method url-fetch)
  3526. (uri (string-append
  3527. "https://pypi.python.org/packages/source/m/misopy/misopy-"
  3528. version ".tar.gz"))
  3529. (sha256
  3530. (base32
  3531. "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
  3532. (modules '((guix build utils)))
  3533. (snippet
  3534. '(substitute* "setup.py"
  3535. ;; Use setuptools, or else the executables are not
  3536. ;; installed.
  3537. (("distutils.core") "setuptools")
  3538. ;; use "gcc" instead of "cc" for compilation
  3539. (("^defines")
  3540. "cc.set_executables(
  3541. compiler='gcc',
  3542. compiler_so='gcc',
  3543. linker_exe='gcc',
  3544. linker_so='gcc -shared'); defines")))))
  3545. (build-system python-build-system)
  3546. (arguments
  3547. `(#:python ,python-2 ; only Python 2 is supported
  3548. #:tests? #f)) ; no "test" target
  3549. (inputs
  3550. `(("samtools" ,samtools)
  3551. ("python-numpy" ,python2-numpy)
  3552. ("python-pysam" ,python2-pysam)
  3553. ("python-scipy" ,python2-scipy)
  3554. ("python-matplotlib" ,python2-matplotlib)))
  3555. (native-inputs
  3556. `(("python-mock" ,python2-mock) ;for tests
  3557. ("python-pytz" ,python2-pytz))) ;for tests
  3558. (home-page "http://genes.mit.edu/burgelab/miso/index.html")
  3559. (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
  3560. (description
  3561. "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
  3562. the expression level of alternatively spliced genes from RNA-Seq data, and
  3563. identifies differentially regulated isoforms or exons across samples. By
  3564. modeling the generative process by which reads are produced from isoforms in
  3565. RNA-Seq, the MISO model uses Bayesian inference to compute the probability
  3566. that a read originated from a particular isoform.")
  3567. (license license:gpl2)))
  3568. (define-public muscle
  3569. (package
  3570. (name "muscle")
  3571. (version "3.8.1551")
  3572. (source (origin
  3573. (method url-fetch/tarbomb)
  3574. (uri (string-append
  3575. "http://www.drive5.com/muscle/muscle_src_"
  3576. version ".tar.gz"))
  3577. (sha256
  3578. (base32
  3579. "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
  3580. (build-system gnu-build-system)
  3581. (arguments
  3582. `(#:make-flags (list "LDLIBS = -lm")
  3583. #:phases
  3584. (modify-phases %standard-phases
  3585. (delete 'configure)
  3586. (replace 'check
  3587. ;; There are no tests, so just test if it runs.
  3588. (lambda _ (zero? (system* "./muscle" "-version"))))
  3589. (replace 'install
  3590. (lambda* (#:key outputs #:allow-other-keys)
  3591. (let* ((out (assoc-ref outputs "out"))
  3592. (bin (string-append out "/bin")))
  3593. (install-file "muscle" bin)))))))
  3594. (home-page "http://www.drive5.com/muscle")
  3595. (synopsis "Multiple sequence alignment program")
  3596. (description
  3597. "MUSCLE aims to be a fast and accurate multiple sequence alignment
  3598. program for nucleotide and protein sequences.")
  3599. ;; License information found in 'muscle -h' and usage.cpp.
  3600. (license license:public-domain)))
  3601. (define-public newick-utils
  3602. ;; There are no recent releases so we package from git.
  3603. (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
  3604. (package
  3605. (name "newick-utils")
  3606. (version (string-append "1.6-1." (string-take commit 8)))
  3607. (source (origin
  3608. (method git-fetch)
  3609. (uri (git-reference
  3610. (url "https://github.com/tjunier/newick_utils.git")
  3611. (commit commit)))
  3612. (file-name (string-append name "-" version "-checkout"))
  3613. (sha256
  3614. (base32
  3615. "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
  3616. (build-system gnu-build-system)
  3617. (arguments
  3618. `(#:phases
  3619. (modify-phases %standard-phases
  3620. (add-after 'unpack 'autoconf
  3621. (lambda _ (zero? (system* "autoreconf" "-vif")))))))
  3622. (inputs
  3623. ;; XXX: TODO: Enable Lua and Guile bindings.
  3624. ;; https://github.com/tjunier/newick_utils/issues/13
  3625. `(("libxml2" ,libxml2)
  3626. ("flex" ,flex)
  3627. ("bison" ,bison)))
  3628. (native-inputs
  3629. `(("autoconf" ,autoconf)
  3630. ("automake" ,automake)
  3631. ("libtool" ,libtool)))
  3632. (synopsis "Programs for working with newick format phylogenetic trees")
  3633. (description
  3634. "Newick-utils is a suite of utilities for processing phylogenetic trees
  3635. in Newick format. Functions include re-rooting, extracting subtrees,
  3636. trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
  3637. (home-page "https://github.com/tjunier/newick_utils")
  3638. (license license:bsd-3))))
  3639. (define-public orfm
  3640. (package
  3641. (name "orfm")
  3642. (version "0.7.1")
  3643. (source (origin
  3644. (method url-fetch)
  3645. (uri (string-append
  3646. "https://github.com/wwood/OrfM/releases/download/v"
  3647. version "/orfm-" version ".tar.gz"))
  3648. (sha256
  3649. (base32
  3650. "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
  3651. (build-system gnu-build-system)
  3652. (inputs `(("zlib" ,zlib)))
  3653. (native-inputs
  3654. `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
  3655. ("ruby-rspec" ,ruby-rspec)
  3656. ("ruby" ,ruby)))
  3657. (synopsis "Simple and not slow open reading frame (ORF) caller")
  3658. (description
  3659. "An ORF caller finds stretches of DNA that, when translated, are not
  3660. interrupted by stop codons. OrfM finds and prints these ORFs.")
  3661. (home-page "https://github.com/wwood/OrfM")
  3662. (license license:lgpl3+)))
  3663. (define-public pplacer
  3664. (let ((commit "g807f6f3"))
  3665. (package
  3666. (name "pplacer")
  3667. ;; The commit should be updated with each version change.
  3668. (version "1.1.alpha19")
  3669. (source
  3670. (origin
  3671. (method url-fetch)
  3672. (uri (string-append "https://github.com/matsen/pplacer/archive/v"
  3673. version ".tar.gz"))
  3674. (file-name (string-append name "-" version ".tar.gz"))
  3675. (sha256
  3676. (base32 "0z1lnd2s8sh6kpzg106wzbh2szw7h0hvq8syd5a6wv4rmyyz6x0f"))))
  3677. (build-system ocaml-build-system)
  3678. (arguments
  3679. `(#:ocaml ,ocaml-4.01
  3680. #:findlib ,ocaml4.01-findlib
  3681. #:modules ((guix build ocaml-build-system)
  3682. (guix build utils)
  3683. (ice-9 ftw))
  3684. #:phases
  3685. (modify-phases %standard-phases
  3686. (delete 'configure)
  3687. (add-after 'unpack 'replace-bundled-cddlib
  3688. (lambda* (#:key inputs #:allow-other-keys)
  3689. (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
  3690. (local-dir "cddlib_guix"))
  3691. (mkdir local-dir)
  3692. (with-directory-excursion local-dir
  3693. (system* "tar" "xvf" cddlib-src))
  3694. (let ((cddlib-src-folder
  3695. (string-append local-dir "/"
  3696. (list-ref (scandir local-dir) 2)
  3697. "/lib-src")))
  3698. (for-each
  3699. (lambda (file)
  3700. (copy-file file
  3701. (string-append "cdd_src/" (basename file))))
  3702. (find-files cddlib-src-folder ".*[ch]$")))
  3703. #t)))
  3704. (add-after 'unpack 'fix-makefile
  3705. (lambda _
  3706. ;; Remove system calls to 'git'.
  3707. (substitute* "Makefile"
  3708. (("^DESCRIPT:=pplacer-.*")
  3709. (string-append
  3710. "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
  3711. (substitute* "myocamlbuild.ml"
  3712. (("git describe --tags --long .*\\\" with")
  3713. (string-append
  3714. "echo -n v" ,version "-" ,commit "\" with")))
  3715. #t))
  3716. (replace 'install
  3717. (lambda* (#:key outputs #:allow-other-keys)
  3718. (let* ((out (assoc-ref outputs "out"))
  3719. (bin (string-append out "/bin")))
  3720. (copy-recursively "bin" bin))
  3721. #t)))))
  3722. (native-inputs
  3723. `(("zlib" ,zlib)
  3724. ("gsl" ,gsl)
  3725. ("ocaml-ounit" ,ocaml4.01-ounit)
  3726. ("ocaml-batteries" ,ocaml4.01-batteries)
  3727. ("ocaml-camlzip" ,ocaml4.01-camlzip)
  3728. ("ocaml-csv" ,ocaml4.01-csv)
  3729. ("ocaml-sqlite3" ,ocaml4.01-sqlite3)
  3730. ("ocaml-xmlm" ,ocaml4.01-xmlm)
  3731. ("ocaml-mcl" ,ocaml4.01-mcl)
  3732. ("ocaml-gsl" ,ocaml4.01-gsl)
  3733. ("cddlib-src" ,(package-source cddlib))))
  3734. (propagated-inputs
  3735. `(("pplacer-scripts" ,pplacer-scripts)))
  3736. (synopsis "Phylogenetic placement of biological sequences")
  3737. (description
  3738. "Pplacer places query sequences on a fixed reference phylogenetic tree
  3739. to maximize phylogenetic likelihood or posterior probability according to a
  3740. reference alignment. Pplacer is designed to be fast, to give useful
  3741. information about uncertainty, and to offer advanced visualization and
  3742. downstream analysis.")
  3743. (home-page "http://matsen.fhcrc.org/pplacer")
  3744. (license license:gpl3))))
  3745. ;; This package is installed alongside 'pplacer'. It is a separate package so
  3746. ;; that it can use the python-build-system for the scripts that are
  3747. ;; distributed alongside the main OCaml binaries.
  3748. (define pplacer-scripts
  3749. (package
  3750. (inherit pplacer)
  3751. (name "pplacer-scripts")
  3752. (build-system python-build-system)
  3753. (arguments
  3754. `(#:python ,python-2
  3755. #:phases
  3756. (modify-phases %standard-phases
  3757. (add-after 'unpack 'enter-scripts-dir
  3758. (lambda _ (chdir "scripts")))
  3759. (replace 'check
  3760. (lambda _
  3761. (zero? (system* "python" "-m" "unittest" "discover" "-v"))))
  3762. (add-after 'install 'wrap-executables
  3763. (lambda* (#:key inputs outputs #:allow-other-keys)
  3764. (let* ((out (assoc-ref outputs "out"))
  3765. (bin (string-append out "/bin")))
  3766. (let ((path (string-append
  3767. (assoc-ref inputs "hmmer") "/bin:"
  3768. (assoc-ref inputs "infernal") "/bin")))
  3769. (display path)
  3770. (wrap-program (string-append bin "/refpkg_align.py")
  3771. `("PATH" ":" prefix (,path))))
  3772. (let ((path (string-append
  3773. (assoc-ref inputs "hmmer") "/bin")))
  3774. (wrap-program (string-append bin "/hrefpkg_query.py")
  3775. `("PATH" ":" prefix (,path)))))
  3776. #t)))))
  3777. (inputs
  3778. `(("infernal" ,infernal)
  3779. ("hmmer" ,hmmer)))
  3780. (propagated-inputs
  3781. `(("python-biopython" ,python2-biopython)
  3782. ("taxtastic" ,taxtastic)))
  3783. (synopsis "Pplacer Python scripts")))
  3784. (define-public python2-pbcore
  3785. (package
  3786. (name "python2-pbcore")
  3787. (version "1.2.10")
  3788. (source (origin
  3789. (method url-fetch)
  3790. (uri (pypi-uri "pbcore" version))
  3791. (sha256
  3792. (base32
  3793. "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
  3794. (build-system python-build-system)
  3795. (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
  3796. (propagated-inputs
  3797. `(("python-cython" ,python2-cython)
  3798. ("python-numpy" ,python2-numpy)
  3799. ("python-pysam" ,python2-pysam)
  3800. ("python-h5py" ,python2-h5py)))
  3801. (native-inputs
  3802. `(("python-nose" ,python2-nose)
  3803. ("python-sphinx" ,python2-sphinx)
  3804. ("python-pyxb" ,python2-pyxb)))
  3805. (home-page "http://pacificbiosciences.github.io/pbcore/")
  3806. (synopsis "Library for reading and writing PacBio data files")
  3807. (description
  3808. "The pbcore package provides Python APIs for interacting with PacBio data
  3809. files and writing bioinformatics applications.")
  3810. (license license:bsd-3)))
  3811. (define-public python2-warpedlmm
  3812. (package
  3813. (name "python2-warpedlmm")
  3814. (version "0.21")
  3815. (source
  3816. (origin
  3817. (method url-fetch)
  3818. (uri (string-append
  3819. "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
  3820. version ".zip"))
  3821. (sha256
  3822. (base32
  3823. "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
  3824. (build-system python-build-system)
  3825. (arguments
  3826. `(#:python ,python-2)) ; requires Python 2.7
  3827. (propagated-inputs
  3828. `(("python-scipy" ,python2-scipy)
  3829. ("python-numpy" ,python2-numpy)
  3830. ("python-matplotlib" ,python2-matplotlib)
  3831. ("python-fastlmm" ,python2-fastlmm)
  3832. ("python-pandas" ,python2-pandas)
  3833. ("python-pysnptools" ,python2-pysnptools)))
  3834. (native-inputs
  3835. `(("python-mock" ,python2-mock)
  3836. ("python-nose" ,python2-nose)
  3837. ("unzip" ,unzip)))
  3838. (home-page "https://github.com/PMBio/warpedLMM")
  3839. (synopsis "Implementation of warped linear mixed models")
  3840. (description
  3841. "WarpedLMM is a Python implementation of the warped linear mixed model,
  3842. which automatically learns an optimal warping function (or transformation) for
  3843. the phenotype as it models the data.")
  3844. (license license:asl2.0)))
  3845. (define-public pbtranscript-tofu
  3846. (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
  3847. (package
  3848. (name "pbtranscript-tofu")
  3849. (version (string-append "2.2.3." (string-take commit 7)))
  3850. (source (origin
  3851. (method git-fetch)
  3852. (uri (git-reference
  3853. (url "https://github.com/PacificBiosciences/cDNA_primer.git")
  3854. (commit commit)))
  3855. (file-name (string-append name "-" version "-checkout"))
  3856. (sha256
  3857. (base32
  3858. "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
  3859. (modules '((guix build utils)))
  3860. (snippet
  3861. '(begin
  3862. ;; remove bundled Cython sources
  3863. (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
  3864. #t))))
  3865. (build-system python-build-system)
  3866. (arguments
  3867. `(#:python ,python-2
  3868. ;; FIXME: Tests fail with "No such file or directory:
  3869. ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
  3870. #:tests? #f
  3871. #:phases
  3872. (modify-phases %standard-phases
  3873. (add-after 'unpack 'enter-directory
  3874. (lambda _
  3875. (chdir "pbtranscript-tofu/pbtranscript/")
  3876. #t))
  3877. ;; With setuptools version 18.0 and later this setup.py hack causes
  3878. ;; a build error, so we disable it.
  3879. (add-after 'enter-directory 'patch-setuppy
  3880. (lambda _
  3881. (substitute* "setup.py"
  3882. (("if 'setuptools.extension' in sys.modules:")
  3883. "if False:"))
  3884. #t)))))
  3885. (inputs
  3886. `(("python-numpy" ,python2-numpy)
  3887. ("python-bx-python" ,python2-bx-python)
  3888. ("python-networkx" ,python2-networkx)
  3889. ("python-scipy" ,python2-scipy)
  3890. ("python-pbcore" ,python2-pbcore)
  3891. ("python-h5py" ,python2-h5py)))
  3892. (native-inputs
  3893. `(("python-cython" ,python2-cython)
  3894. ("python-nose" ,python2-nose)))
  3895. (home-page "https://github.com/PacificBiosciences/cDNA_primer")
  3896. (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
  3897. (description
  3898. "pbtranscript-tofu contains scripts to analyze transcriptome data
  3899. generated using the PacBio Iso-Seq protocol.")
  3900. (license license:bsd-3))))
  3901. (define-public prank
  3902. (package
  3903. (name "prank")
  3904. (version "150803")
  3905. (source (origin
  3906. (method url-fetch)
  3907. (uri (string-append
  3908. "http://wasabiapp.org/download/prank/prank.source."
  3909. version ".tgz"))
  3910. (sha256
  3911. (base32
  3912. "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
  3913. (build-system gnu-build-system)
  3914. (arguments
  3915. `(#:phases
  3916. (modify-phases %standard-phases
  3917. (add-after 'unpack 'enter-src-dir
  3918. (lambda _
  3919. (chdir "src")
  3920. #t))
  3921. (add-after 'unpack 'remove-m64-flag
  3922. ;; Prank will build with the correct 'bit-ness' without this flag
  3923. ;; and this allows building on 32-bit machines.
  3924. (lambda _ (substitute* "src/Makefile"
  3925. (("-m64") ""))
  3926. #t))
  3927. (delete 'configure)
  3928. (replace 'install
  3929. (lambda* (#:key outputs #:allow-other-keys)
  3930. (let* ((out (assoc-ref outputs "out"))
  3931. (bin (string-append out "/bin"))
  3932. (man (string-append out "/share/man/man1"))
  3933. (path (string-append
  3934. (assoc-ref %build-inputs "mafft") "/bin:"
  3935. (assoc-ref %build-inputs "exonerate") "/bin:"
  3936. (assoc-ref %build-inputs "bppsuite") "/bin")))
  3937. (install-file "prank" bin)
  3938. (wrap-program (string-append bin "/prank")
  3939. `("PATH" ":" prefix (,path)))
  3940. (install-file "prank.1" man))
  3941. #t)))))
  3942. (inputs
  3943. `(("mafft" ,mafft)
  3944. ("exonerate" ,exonerate)
  3945. ("bppsuite" ,bppsuite)))
  3946. (home-page "http://wasabiapp.org/software/prank/")
  3947. (synopsis "Probabilistic multiple sequence alignment program")
  3948. (description
  3949. "PRANK is a probabilistic multiple sequence alignment program for DNA,
  3950. codon and amino-acid sequences. It is based on a novel algorithm that treats
  3951. insertions correctly and avoids over-estimation of the number of deletion
  3952. events. In addition, PRANK borrows ideas from maximum likelihood methods used
  3953. in phylogenetics and correctly takes into account the evolutionary distances
  3954. between sequences. Lastly, PRANK allows for defining a potential structure
  3955. for sequences to be aligned and then, simultaneously with the alignment,
  3956. predicts the locations of structural units in the sequences.")
  3957. (license license:gpl2+)))
  3958. (define-public proteinortho
  3959. (package
  3960. (name "proteinortho")
  3961. (version "5.16b")
  3962. (source
  3963. (origin
  3964. (method url-fetch)
  3965. (uri
  3966. (string-append
  3967. "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
  3968. version "_src.tar.gz"))
  3969. (sha256
  3970. (base32
  3971. "1wl0dawpssqwfjvr651r4wlww8hhjin8nba6xh71ks7sbypx886j"))))
  3972. (build-system gnu-build-system)
  3973. (arguments
  3974. `(#:test-target "test"
  3975. #:phases
  3976. (modify-phases %standard-phases
  3977. (replace 'configure
  3978. ;; There is no configure script, so we modify the Makefile directly.
  3979. (lambda* (#:key outputs #:allow-other-keys)
  3980. (substitute* "Makefile"
  3981. (("INSTALLDIR=.*")
  3982. (string-append
  3983. "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
  3984. #t))
  3985. (add-before 'install 'make-install-directory
  3986. ;; The install directory is not created during 'make install'.
  3987. (lambda* (#:key outputs #:allow-other-keys)
  3988. (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
  3989. #t))
  3990. (add-after 'install 'wrap-programs
  3991. (lambda* (#:key inputs outputs #:allow-other-keys)
  3992. (let* ((path (getenv "PATH"))
  3993. (out (assoc-ref outputs "out"))
  3994. (binary (string-append out "/bin/proteinortho5.pl")))
  3995. (wrap-program binary `("PATH" ":" prefix (,path))))
  3996. #t)))))
  3997. (inputs
  3998. `(("perl" ,perl)
  3999. ("python" ,python-2)
  4000. ("blast+" ,blast+)))
  4001. (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
  4002. (synopsis "Detect orthologous genes across species")
  4003. (description
  4004. "Proteinortho is a tool to detect orthologous genes across different
  4005. species. For doing so, it compares similarities of given gene sequences and
  4006. clusters them to find significant groups. The algorithm was designed to handle
  4007. large-scale data and can be applied to hundreds of species at once.")
  4008. (license license:gpl2+)))
  4009. (define-public pyicoteo
  4010. (package
  4011. (name "pyicoteo")
  4012. (version "2.0.7")
  4013. (source
  4014. (origin
  4015. (method url-fetch)
  4016. (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
  4017. "pyicoteo/get/v" version ".tar.bz2"))
  4018. (file-name (string-append name "-" version ".tar.bz2"))
  4019. (sha256
  4020. (base32
  4021. "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
  4022. (build-system python-build-system)
  4023. (arguments
  4024. `(#:python ,python-2 ; does not work with Python 3
  4025. #:tests? #f)) ; there are no tests
  4026. (inputs
  4027. `(("python2-matplotlib" ,python2-matplotlib)))
  4028. (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
  4029. (synopsis "Analyze high-throughput genetic sequencing data")
  4030. (description
  4031. "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
  4032. sequencing data. It works with genomic coordinates. There are currently six
  4033. different command-line tools:
  4034. @enumerate
  4035. @item pyicoregion: for generating exploratory regions automatically;
  4036. @item pyicoenrich: for differential enrichment between two conditions;
  4037. @item pyicoclip: for calling CLIP-Seq peaks without a control;
  4038. @item pyicos: for genomic coordinates manipulation;
  4039. @item pyicoller: for peak calling on punctuated ChIP-Seq;
  4040. @item pyicount: to count how many reads from N experiment files overlap in a
  4041. region file;
  4042. @item pyicotrocol: to combine operations from pyicoteo.
  4043. @end enumerate\n")
  4044. (license license:gpl3+)))
  4045. (define-public prodigal
  4046. (package
  4047. (name "prodigal")
  4048. (version "2.6.3")
  4049. (source (origin
  4050. (method url-fetch)
  4051. (uri (string-append
  4052. "https://github.com/hyattpd/Prodigal/archive/v"
  4053. version ".tar.gz"))
  4054. (file-name (string-append name "-" version ".tar.gz"))
  4055. (sha256
  4056. (base32
  4057. "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
  4058. (build-system gnu-build-system)
  4059. (arguments
  4060. `(#:tests? #f ;no check target
  4061. #:make-flags (list (string-append "INSTALLDIR="
  4062. (assoc-ref %outputs "out")
  4063. "/bin"))
  4064. #:phases
  4065. (modify-phases %standard-phases
  4066. (delete 'configure))))
  4067. (home-page "http://prodigal.ornl.gov")
  4068. (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
  4069. (description
  4070. "Prodigal runs smoothly on finished genomes, draft genomes, and
  4071. metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
  4072. format. It runs quickly, in an unsupervised fashion, handles gaps, handles
  4073. partial genes, and identifies translation initiation sites.")
  4074. (license license:gpl3+)))
  4075. (define-public roary
  4076. (package
  4077. (name "roary")
  4078. (version "3.8.2")
  4079. (source
  4080. (origin
  4081. (method url-fetch)
  4082. (uri (string-append
  4083. "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
  4084. version ".tar.gz"))
  4085. (sha256
  4086. (base32
  4087. "03dfr2cd5fp80bcr65923zpdzrasvcxl7c2vgh8373v25a1yfap7"))))
  4088. (build-system perl-build-system)
  4089. (arguments
  4090. `(#:phases
  4091. (modify-phases %standard-phases
  4092. (delete 'configure)
  4093. (delete 'build)
  4094. (replace 'check
  4095. (lambda _
  4096. ;; The tests are not run by default, so we run each test file
  4097. ;; directly.
  4098. (setenv "PATH" (string-append (getcwd) "/bin" ":"
  4099. (getenv "PATH")))
  4100. (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
  4101. (getenv "PERL5LIB")))
  4102. (zero? (length (filter (lambda (file)
  4103. (display file)(display "\n")
  4104. (not (zero? (system* "perl" file))))
  4105. (find-files "t" ".*\\.t$"))))))
  4106. (replace 'install
  4107. ;; There is no 'install' target in the Makefile.
  4108. (lambda* (#:key outputs #:allow-other-keys)
  4109. (let* ((out (assoc-ref outputs "out"))
  4110. (bin (string-append out "/bin"))
  4111. (perl (string-append out "/lib/perl5/site_perl"))
  4112. (roary-plots "contrib/roary_plots"))
  4113. (mkdir-p bin)
  4114. (mkdir-p perl)
  4115. (copy-recursively "bin" bin)
  4116. (copy-recursively "lib" perl)
  4117. #t)))
  4118. (add-after 'install 'wrap-programs
  4119. (lambda* (#:key inputs outputs #:allow-other-keys)
  4120. (let* ((out (assoc-ref outputs "out"))
  4121. (perl5lib (getenv "PERL5LIB"))
  4122. (path (getenv "PATH")))
  4123. (for-each (lambda (prog)
  4124. (let ((binary (string-append out "/" prog)))
  4125. (wrap-program binary
  4126. `("PERL5LIB" ":" prefix
  4127. (,(string-append perl5lib ":" out
  4128. "/lib/perl5/site_perl"))))
  4129. (wrap-program binary
  4130. `("PATH" ":" prefix
  4131. (,(string-append path ":" out "/bin"))))))
  4132. (find-files "bin" ".*[^R]$"))
  4133. (let ((file
  4134. (string-append out "/bin/roary-create_pan_genome_plots.R"))
  4135. (r-site-lib (getenv "R_LIBS_SITE"))
  4136. (coreutils-path
  4137. (string-append (assoc-ref inputs "coreutils") "/bin")))
  4138. (wrap-program file
  4139. `("R_LIBS_SITE" ":" prefix
  4140. (,(string-append r-site-lib ":" out "/site-library/"))))
  4141. (wrap-program file
  4142. `("PATH" ":" prefix
  4143. (,(string-append coreutils-path ":" out "/bin"))))))
  4144. #t)))))
  4145. (native-inputs
  4146. `(("perl-env-path" ,perl-env-path)
  4147. ("perl-test-files" ,perl-test-files)
  4148. ("perl-test-most" ,perl-test-most)
  4149. ("perl-test-output" ,perl-test-output)))
  4150. (inputs
  4151. `(("perl-array-utils" ,perl-array-utils)
  4152. ("bioperl" ,bioperl-minimal)
  4153. ("perl-exception-class" ,perl-exception-class)
  4154. ("perl-file-find-rule" ,perl-file-find-rule)
  4155. ("perl-file-grep" ,perl-file-grep)
  4156. ("perl-file-slurper" ,perl-file-slurper)
  4157. ("perl-file-which" ,perl-file-which)
  4158. ("perl-graph" ,perl-graph)
  4159. ("perl-graph-readwrite" ,perl-graph-readwrite)
  4160. ("perl-log-log4perl" ,perl-log-log4perl)
  4161. ("perl-moose" ,perl-moose)
  4162. ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
  4163. ("perl-text-csv" ,perl-text-csv)
  4164. ("bedtools" ,bedtools)
  4165. ("cd-hit" ,cd-hit)
  4166. ("blast+" ,blast+)
  4167. ("mcl" ,mcl)
  4168. ("parallel" ,parallel)
  4169. ("prank" ,prank)
  4170. ("mafft" ,mafft)
  4171. ("fasttree" ,fasttree)
  4172. ("grep" ,grep)
  4173. ("sed" ,sed)
  4174. ("gawk" ,gawk)
  4175. ("r-minimal" ,r-minimal)
  4176. ("r-ggplot2" ,r-ggplot2)
  4177. ("coreutils" ,coreutils)))
  4178. (home-page "http://sanger-pathogens.github.io/Roary")
  4179. (synopsis "High speed stand-alone pan genome pipeline")
  4180. (description
  4181. "Roary is a high speed stand alone pan genome pipeline, which takes
  4182. annotated assemblies in GFF3 format (produced by the Prokka program) and
  4183. calculates the pan genome. Using a standard desktop PC, it can analyse
  4184. datasets with thousands of samples, without compromising the quality of the
  4185. results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
  4186. single processor. Roary is not intended for metagenomics or for comparing
  4187. extremely diverse sets of genomes.")
  4188. (license license:gpl3)))
  4189. (define-public raxml
  4190. (package
  4191. (name "raxml")
  4192. (version "8.2.10")
  4193. (source
  4194. (origin
  4195. (method url-fetch)
  4196. (uri
  4197. (string-append
  4198. "https://github.com/stamatak/standard-RAxML/archive/v"
  4199. version ".tar.gz"))
  4200. (file-name (string-append name "-" version ".tar.gz"))
  4201. (sha256
  4202. (base32
  4203. "13s7aspfdcfr6asynwdg1x6vznys6pzap5f8wsffbnnwpkkg9ya8"))))
  4204. (build-system gnu-build-system)
  4205. (arguments
  4206. `(#:tests? #f ; There are no tests.
  4207. ;; Use 'standard' Makefile rather than SSE or AVX ones.
  4208. #:make-flags (list "-f" "Makefile.HYBRID.gcc")
  4209. #:phases
  4210. (modify-phases %standard-phases
  4211. (delete 'configure)
  4212. (replace 'install
  4213. (lambda* (#:key outputs #:allow-other-keys)
  4214. (let* ((out (assoc-ref outputs "out"))
  4215. (bin (string-append out "/bin"))
  4216. (executable "raxmlHPC-HYBRID"))
  4217. (install-file executable bin)
  4218. (symlink (string-append bin "/" executable) "raxml"))
  4219. #t)))))
  4220. (inputs
  4221. `(("openmpi" ,openmpi)))
  4222. (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
  4223. (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
  4224. (description
  4225. "RAxML is a tool for phylogenetic analysis and post-analysis of large
  4226. phylogenies.")
  4227. ;; The source includes x86 specific code
  4228. (supported-systems '("x86_64-linux" "i686-linux"))
  4229. (license license:gpl2+)))
  4230. (define-public rsem
  4231. (package
  4232. (name "rsem")
  4233. (version "1.2.20")
  4234. (source
  4235. (origin
  4236. (method url-fetch)
  4237. (uri
  4238. (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
  4239. version ".tar.gz"))
  4240. (sha256
  4241. (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
  4242. (patches (search-patches "rsem-makefile.patch"))
  4243. (modules '((guix build utils)))
  4244. (snippet
  4245. '(begin
  4246. ;; remove bundled copy of boost
  4247. (delete-file-recursively "boost")
  4248. #t))))
  4249. (build-system gnu-build-system)
  4250. (arguments
  4251. `(#:tests? #f ;no "check" target
  4252. #:phases
  4253. (modify-phases %standard-phases
  4254. ;; No "configure" script.
  4255. ;; Do not build bundled samtools library.
  4256. (replace 'configure
  4257. (lambda _
  4258. (substitute* "Makefile"
  4259. (("^all : sam/libbam.a") "all : "))
  4260. #t))
  4261. (replace 'install
  4262. (lambda* (#:key outputs #:allow-other-keys)
  4263. (let* ((out (string-append (assoc-ref outputs "out")))
  4264. (bin (string-append out "/bin/"))
  4265. (perl (string-append out "/lib/perl5/site_perl")))
  4266. (mkdir-p bin)
  4267. (mkdir-p perl)
  4268. (for-each (lambda (file)
  4269. (install-file file bin))
  4270. (find-files "." "rsem-.*"))
  4271. (install-file "rsem_perl_utils.pm" perl))
  4272. #t))
  4273. (add-after
  4274. 'install 'wrap-program
  4275. (lambda* (#:key outputs #:allow-other-keys)
  4276. (let ((out (assoc-ref outputs "out")))
  4277. (for-each (lambda (prog)
  4278. (wrap-program (string-append out "/bin/" prog)
  4279. `("PERL5LIB" ":" prefix
  4280. (,(string-append out "/lib/perl5/site_perl")))))
  4281. '("rsem-plot-transcript-wiggles"
  4282. "rsem-calculate-expression"
  4283. "rsem-generate-ngvector"
  4284. "rsem-run-ebseq"
  4285. "rsem-prepare-reference")))
  4286. #t)))))
  4287. (inputs
  4288. `(("boost" ,boost)
  4289. ("ncurses" ,ncurses)
  4290. ("r-minimal" ,r-minimal)
  4291. ("perl" ,perl)
  4292. ("samtools" ,samtools-0.1)
  4293. ("zlib" ,zlib)))
  4294. (home-page "http://deweylab.biostat.wisc.edu/rsem/")
  4295. (synopsis "Estimate gene expression levels from RNA-Seq data")
  4296. (description
  4297. "RSEM is a software package for estimating gene and isoform expression
  4298. levels from RNA-Seq data. The RSEM package provides a user-friendly
  4299. interface, supports threads for parallel computation of the EM algorithm,
  4300. single-end and paired-end read data, quality scores, variable-length reads and
  4301. RSPD estimation. In addition, it provides posterior mean and 95% credibility
  4302. interval estimates for expression levels. For visualization, it can generate
  4303. BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
  4304. (license license:gpl3+)))
  4305. (define-public rseqc
  4306. (package
  4307. (name "rseqc")
  4308. (version "2.6.1")
  4309. (source
  4310. (origin
  4311. (method url-fetch)
  4312. (uri
  4313. (string-append "mirror://sourceforge/rseqc/"
  4314. "RSeQC-" version ".tar.gz"))
  4315. (sha256
  4316. (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
  4317. (modules '((guix build utils)))
  4318. (snippet
  4319. '(begin
  4320. ;; remove bundled copy of pysam
  4321. (delete-file-recursively "lib/pysam")
  4322. (substitute* "setup.py"
  4323. ;; remove dependency on outdated "distribute" module
  4324. (("^from distribute_setup import use_setuptools") "")
  4325. (("^use_setuptools\\(\\)") "")
  4326. ;; do not use bundled copy of pysam
  4327. (("^have_pysam = False") "have_pysam = True"))))))
  4328. (build-system python-build-system)
  4329. (arguments `(#:python ,python-2))
  4330. (inputs
  4331. `(("python-cython" ,python2-cython)
  4332. ("python-pysam" ,python2-pysam)
  4333. ("python-numpy" ,python2-numpy)
  4334. ("zlib" ,zlib)))
  4335. (native-inputs
  4336. `(("python-nose" ,python2-nose)))
  4337. (home-page "http://rseqc.sourceforge.net/")
  4338. (synopsis "RNA-seq quality control package")
  4339. (description
  4340. "RSeQC provides a number of modules that can comprehensively evaluate
  4341. high throughput sequence data, especially RNA-seq data. Some basic modules
  4342. inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
  4343. while RNA-seq specific modules evaluate sequencing saturation, mapped reads
  4344. distribution, coverage uniformity, strand specificity, etc.")
  4345. (license license:gpl3+)))
  4346. (define-public seek
  4347. ;; There are no release tarballs. According to the installation
  4348. ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
  4349. ;; stable release is identified by this changeset ID.
  4350. (let ((changeset "2329130")
  4351. (revision "1"))
  4352. (package
  4353. (name "seek")
  4354. (version (string-append "0-" revision "." changeset))
  4355. (source (origin
  4356. (method hg-fetch)
  4357. (uri (hg-reference
  4358. (url "https://bitbucket.org/libsleipnir/sleipnir")
  4359. (changeset changeset)))
  4360. (sha256
  4361. (base32
  4362. "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
  4363. (build-system gnu-build-system)
  4364. (arguments
  4365. `(#:modules ((srfi srfi-1)
  4366. (guix build gnu-build-system)
  4367. (guix build utils))
  4368. #:phases
  4369. (let ((dirs '("SeekMiner"
  4370. "SeekEvaluator"
  4371. "SeekPrep"
  4372. "Distancer"
  4373. "Data2DB"
  4374. "PCL2Bin")))
  4375. (modify-phases %standard-phases
  4376. (add-before 'configure 'bootstrap
  4377. (lambda _
  4378. (zero? (system* "bash" "gen_auto"))))
  4379. (add-after 'build 'build-additional-tools
  4380. (lambda* (#:key make-flags #:allow-other-keys)
  4381. (every (lambda (dir)
  4382. (with-directory-excursion (string-append "tools/" dir)
  4383. (zero? (apply system* "make" make-flags))))
  4384. dirs)))
  4385. (add-after 'install 'install-additional-tools
  4386. (lambda* (#:key make-flags #:allow-other-keys)
  4387. (fold (lambda (dir result)
  4388. (with-directory-excursion (string-append "tools/" dir)
  4389. (and result
  4390. (zero? (apply system*
  4391. `("make" ,@make-flags "install"))))))
  4392. #t dirs)))))))
  4393. (inputs
  4394. `(("gsl" ,gsl)
  4395. ("boost" ,boost)
  4396. ("libsvm" ,libsvm)
  4397. ("readline" ,readline)
  4398. ("gengetopt" ,gengetopt)
  4399. ("log4cpp" ,log4cpp)))
  4400. (native-inputs
  4401. `(("autoconf" ,autoconf)
  4402. ("automake" ,automake)
  4403. ("perl" ,perl)))
  4404. (home-page "http://seek.princeton.edu")
  4405. (synopsis "Gene co-expression search engine")
  4406. (description
  4407. "SEEK is a computational gene co-expression search engine. SEEK provides
  4408. biologists with a way to navigate the massive human expression compendium that
  4409. now contains thousands of expression datasets. SEEK returns a robust ranking
  4410. of co-expressed genes in the biological area of interest defined by the user's
  4411. query genes. It also prioritizes thousands of expression datasets according
  4412. to the user's query of interest.")
  4413. (license license:cc-by3.0))))
  4414. (define-public samtools
  4415. (package
  4416. (name "samtools")
  4417. (version "1.5")
  4418. (source
  4419. (origin
  4420. (method url-fetch)
  4421. (uri
  4422. (string-append "mirror://sourceforge/samtools/samtools/"
  4423. version "/samtools-" version ".tar.bz2"))
  4424. (sha256
  4425. (base32
  4426. "1xidmv0jmfy7l0kb32hdnlshcxgzi1hmygvig0cqrq1fhckdlhl5"))))
  4427. (build-system gnu-build-system)
  4428. (arguments
  4429. `(#:modules ((ice-9 ftw)
  4430. (ice-9 regex)
  4431. (guix build gnu-build-system)
  4432. (guix build utils))
  4433. #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
  4434. #:configure-flags (list "--with-ncurses" "--with-htslib=system")
  4435. #:phases
  4436. (modify-phases %standard-phases
  4437. (add-after 'unpack 'patch-tests
  4438. (lambda _
  4439. (substitute* "test/test.pl"
  4440. ;; The test script calls out to /bin/bash
  4441. (("/bin/bash") (which "bash")))
  4442. #t))
  4443. (add-after 'install 'install-library
  4444. (lambda* (#:key outputs #:allow-other-keys)
  4445. (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
  4446. (install-file "libbam.a" lib)
  4447. #t)))
  4448. (add-after 'install 'install-headers
  4449. (lambda* (#:key outputs #:allow-other-keys)
  4450. (let ((include (string-append (assoc-ref outputs "out")
  4451. "/include/samtools/")))
  4452. (for-each (lambda (file)
  4453. (install-file file include))
  4454. (scandir "." (lambda (name) (string-match "\\.h$" name))))
  4455. #t))))))
  4456. (native-inputs `(("pkg-config" ,pkg-config)))
  4457. (inputs
  4458. `(("htslib" ,htslib)
  4459. ("ncurses" ,ncurses)
  4460. ("perl" ,perl)
  4461. ("python" ,python)
  4462. ("zlib" ,zlib)))
  4463. (home-page "http://samtools.sourceforge.net")
  4464. (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
  4465. (description
  4466. "Samtools implements various utilities for post-processing nucleotide
  4467. sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
  4468. variant calling (in conjunction with bcftools), and a simple alignment
  4469. viewer.")
  4470. (license license:expat)))
  4471. (define-public samtools-0.1
  4472. ;; This is the most recent version of the 0.1 line of samtools. The input
  4473. ;; and output formats differ greatly from that used and produced by samtools
  4474. ;; 1.x and is still used in many bioinformatics pipelines.
  4475. (package (inherit samtools)
  4476. (version "0.1.19")
  4477. (source
  4478. (origin
  4479. (method url-fetch)
  4480. (uri
  4481. (string-append "mirror://sourceforge/samtools/samtools/"
  4482. version "/samtools-" version ".tar.bz2"))
  4483. (sha256
  4484. (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
  4485. (arguments
  4486. `(#:tests? #f ;no "check" target
  4487. ,@(substitute-keyword-arguments (package-arguments samtools)
  4488. ((#:make-flags flags)
  4489. `(cons "LIBCURSES=-lncurses" ,flags))
  4490. ((#:phases phases)
  4491. `(modify-phases ,phases
  4492. (replace 'install
  4493. (lambda* (#:key outputs #:allow-other-keys)
  4494. (let ((bin (string-append
  4495. (assoc-ref outputs "out") "/bin")))
  4496. (mkdir-p bin)
  4497. (install-file "samtools" bin)
  4498. #t)))
  4499. (delete 'patch-tests)
  4500. (delete 'configure))))))))
  4501. (define-public mosaik
  4502. (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
  4503. (package
  4504. (name "mosaik")
  4505. (version "2.2.30")
  4506. (source (origin
  4507. ;; There are no release tarballs nor tags.
  4508. (method git-fetch)
  4509. (uri (git-reference
  4510. (url "https://github.com/wanpinglee/MOSAIK.git")
  4511. (commit commit)))
  4512. (file-name (string-append name "-" version))
  4513. (sha256
  4514. (base32
  4515. "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
  4516. (build-system gnu-build-system)
  4517. (arguments
  4518. `(#:tests? #f ; no tests
  4519. #:make-flags (list "CC=gcc")
  4520. #:phases
  4521. (modify-phases %standard-phases
  4522. (replace 'configure
  4523. (lambda _ (chdir "src") #t))
  4524. (replace 'install
  4525. (lambda* (#:key outputs #:allow-other-keys)
  4526. (let ((bin (string-append (assoc-ref outputs "out")
  4527. "/bin")))
  4528. (mkdir-p bin)
  4529. (copy-recursively "../bin" bin)
  4530. #t))))))
  4531. (inputs
  4532. `(("perl" ,perl)
  4533. ("zlib" ,zlib)))
  4534. (supported-systems '("x86_64-linux"))
  4535. (home-page "https://github.com/wanpinglee/MOSAIK")
  4536. (synopsis "Map nucleotide sequence reads to reference genomes")
  4537. (description
  4538. "MOSAIK is a program for mapping second and third-generation sequencing
  4539. reads to a reference genome. MOSAIK can align reads generated by all the
  4540. major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
  4541. Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
  4542. ;; MOSAIK is released under the GPLv2+ with the exception of third-party
  4543. ;; code released into the public domain:
  4544. ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
  4545. ;; 2. MD5 implementation - RSA Data Security, RFC 1321
  4546. (license (list license:gpl2+ license:public-domain)))))
  4547. (define-public ngs-sdk
  4548. (package
  4549. (name "ngs-sdk")
  4550. (version "1.3.0")
  4551. (source
  4552. (origin
  4553. (method url-fetch)
  4554. (uri
  4555. (string-append "https://github.com/ncbi/ngs/archive/"
  4556. version ".tar.gz"))
  4557. (file-name (string-append name "-" version ".tar.gz"))
  4558. (sha256
  4559. (base32
  4560. "1wiyf4c6nm2j87pv015cbi0qny5byf3pbvcw3likifz5dl56ag40"))))
  4561. (build-system gnu-build-system)
  4562. (arguments
  4563. `(#:parallel-build? #f ; not supported
  4564. #:tests? #f ; no "check" target
  4565. #:phases
  4566. (alist-replace
  4567. 'configure
  4568. (lambda* (#:key outputs #:allow-other-keys)
  4569. (let ((out (assoc-ref outputs "out")))
  4570. ;; Allow 'konfigure.perl' to find 'package.prl'.
  4571. (setenv "PERL5LIB"
  4572. (string-append ".:" (getenv "PERL5LIB")))
  4573. ;; The 'configure' script doesn't recognize things like
  4574. ;; '--enable-fast-install'.
  4575. (zero? (system* "./configure"
  4576. (string-append "--build-prefix=" (getcwd) "/build")
  4577. (string-append "--prefix=" out)))))
  4578. (alist-cons-after
  4579. 'unpack 'enter-dir
  4580. (lambda _ (chdir "ngs-sdk") #t)
  4581. %standard-phases))))
  4582. (native-inputs `(("perl" ,perl)))
  4583. ;; According to the test
  4584. ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
  4585. ;; in ngs-sdk/setup/konfigure.perl
  4586. (supported-systems '("i686-linux" "x86_64-linux"))
  4587. (home-page "https://github.com/ncbi/ngs")
  4588. (synopsis "API for accessing Next Generation Sequencing data")
  4589. (description
  4590. "NGS is a domain-specific API for accessing reads, alignments and pileups
  4591. produced from Next Generation Sequencing. The API itself is independent from
  4592. any particular back-end implementation, and supports use of multiple back-ends
  4593. simultaneously.")
  4594. (license license:public-domain)))
  4595. (define-public java-ngs
  4596. (package (inherit ngs-sdk)
  4597. (name "java-ngs")
  4598. (arguments
  4599. `(,@(substitute-keyword-arguments
  4600. `(#:modules ((guix build gnu-build-system)
  4601. (guix build utils)
  4602. (srfi srfi-1)
  4603. (srfi srfi-26))
  4604. ,@(package-arguments ngs-sdk))
  4605. ((#:phases phases)
  4606. `(modify-phases ,phases
  4607. (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
  4608. (inputs
  4609. `(("jdk" ,icedtea "jdk")
  4610. ("ngs-sdk" ,ngs-sdk)))
  4611. (synopsis "Java bindings for NGS SDK")))
  4612. (define-public ncbi-vdb
  4613. (package
  4614. (name "ncbi-vdb")
  4615. (version "2.8.2")
  4616. (source
  4617. (origin
  4618. (method url-fetch)
  4619. (uri
  4620. (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
  4621. version ".tar.gz"))
  4622. (file-name (string-append name "-" version ".tar.gz"))
  4623. (sha256
  4624. (base32
  4625. "1acn4bv81mfl137qnbn9995mjjhwd36pm0b7qli1iw5skrxa9j8m"))))
  4626. (build-system gnu-build-system)
  4627. (arguments
  4628. `(#:parallel-build? #f ; not supported
  4629. #:tests? #f ; no "check" target
  4630. #:phases
  4631. (modify-phases %standard-phases
  4632. (add-before 'configure 'set-perl-search-path
  4633. (lambda _
  4634. ;; Work around "dotless @INC" build failure.
  4635. (setenv "PERL5LIB"
  4636. (string-append (getcwd) "/setup:"
  4637. (getenv "PERL5LIB")))
  4638. #t))
  4639. (replace 'configure
  4640. (lambda* (#:key inputs outputs #:allow-other-keys)
  4641. (let ((out (assoc-ref outputs "out")))
  4642. ;; Override include path for libmagic
  4643. (substitute* "setup/package.prl"
  4644. (("name => 'magic', Include => '/usr/include'")
  4645. (string-append "name=> 'magic', Include => '"
  4646. (assoc-ref inputs "libmagic")
  4647. "/include" "'")))
  4648. ;; Install kdf5 library (needed by sra-tools)
  4649. (substitute* "build/Makefile.install"
  4650. (("LIBRARIES_TO_INSTALL =")
  4651. "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
  4652. (substitute* "build/Makefile.env"
  4653. (("CFLAGS =" prefix)
  4654. (string-append prefix "-msse2 ")))
  4655. ;; Override search path for ngs-java
  4656. (substitute* "setup/package.prl"
  4657. (("/usr/local/ngs/ngs-java")
  4658. (assoc-ref inputs "java-ngs")))
  4659. ;; The 'configure' script doesn't recognize things like
  4660. ;; '--enable-fast-install'.
  4661. (zero? (system*
  4662. "./configure"
  4663. (string-append "--build-prefix=" (getcwd) "/build")
  4664. (string-append "--prefix=" (assoc-ref outputs "out"))
  4665. (string-append "--debug")
  4666. (string-append "--with-xml2-prefix="
  4667. (assoc-ref inputs "libxml2"))
  4668. (string-append "--with-ngs-sdk-prefix="
  4669. (assoc-ref inputs "ngs-sdk"))
  4670. (string-append "--with-hdf5-prefix="
  4671. (assoc-ref inputs "hdf5")))))))
  4672. (add-after 'install 'install-interfaces
  4673. (lambda* (#:key outputs #:allow-other-keys)
  4674. ;; Install interface libraries. On i686 the interface libraries
  4675. ;; are installed to "linux/gcc/i386", so we need to use the Linux
  4676. ;; architecture name ("i386") instead of the target system prefix
  4677. ;; ("i686").
  4678. (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
  4679. (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
  4680. ,(system->linux-architecture
  4681. (or (%current-target-system)
  4682. (%current-system)))
  4683. "/rel/ilib")
  4684. (string-append (assoc-ref outputs "out")
  4685. "/ilib"))
  4686. ;; Install interface headers
  4687. (copy-recursively "interfaces"
  4688. (string-append (assoc-ref outputs "out")
  4689. "/include"))
  4690. #t))
  4691. ;; These files are needed by sra-tools.
  4692. (add-after 'install 'install-configuration-files
  4693. (lambda* (#:key outputs #:allow-other-keys)
  4694. (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
  4695. (mkdir target)
  4696. (install-file "libs/kfg/default.kfg" target)
  4697. (install-file "libs/kfg/certs.kfg" target))
  4698. #t)))))
  4699. (inputs
  4700. `(("libxml2" ,libxml2)
  4701. ("ngs-sdk" ,ngs-sdk)
  4702. ("java-ngs" ,java-ngs)
  4703. ("libmagic" ,file)
  4704. ("hdf5" ,hdf5)))
  4705. (native-inputs `(("perl" ,perl)))
  4706. ;; NCBI-VDB requires SSE capability.
  4707. (supported-systems '("i686-linux" "x86_64-linux"))
  4708. (home-page "https://github.com/ncbi/ncbi-vdb")
  4709. (synopsis "Database engine for genetic information")
  4710. (description
  4711. "The NCBI-VDB library implements a highly compressed columnar data
  4712. warehousing engine that is most often used to store genetic information.
  4713. Databases are stored in a portable image within the file system, and can be
  4714. accessed/downloaded on demand across HTTP.")
  4715. (license license:public-domain)))
  4716. (define-public plink
  4717. (package
  4718. (name "plink")
  4719. (version "1.07")
  4720. (source
  4721. (origin
  4722. (method url-fetch)
  4723. (uri (string-append
  4724. "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
  4725. version "-src.zip"))
  4726. (sha256
  4727. (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
  4728. (patches (search-patches "plink-1.07-unclobber-i.patch"
  4729. "plink-endian-detection.patch"))))
  4730. (build-system gnu-build-system)
  4731. (arguments
  4732. '(#:tests? #f ;no "check" target
  4733. #:make-flags (list (string-append "LIB_LAPACK="
  4734. (assoc-ref %build-inputs "lapack")
  4735. "/lib/liblapack.so")
  4736. "WITH_LAPACK=1"
  4737. "FORCE_DYNAMIC=1"
  4738. ;; disable phoning home
  4739. "WITH_WEBCHECK=")
  4740. #:phases
  4741. (modify-phases %standard-phases
  4742. ;; no "configure" script
  4743. (delete 'configure)
  4744. (replace 'install
  4745. (lambda* (#:key outputs #:allow-other-keys)
  4746. (let ((bin (string-append (assoc-ref outputs "out")
  4747. "/bin/")))
  4748. (install-file "plink" bin)
  4749. #t))))))
  4750. (inputs
  4751. `(("zlib" ,zlib)
  4752. ("lapack" ,lapack)))
  4753. (native-inputs
  4754. `(("unzip" ,unzip)))
  4755. (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
  4756. (synopsis "Whole genome association analysis toolset")
  4757. (description
  4758. "PLINK is a whole genome association analysis toolset, designed to
  4759. perform a range of basic, large-scale analyses in a computationally efficient
  4760. manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
  4761. so there is no support for steps prior to this (e.g. study design and
  4762. planning, generating genotype or CNV calls from raw data). Through
  4763. integration with gPLINK and Haploview, there is some support for the
  4764. subsequent visualization, annotation and storage of results.")
  4765. ;; Code is released under GPLv2, except for fisher.h, which is under
  4766. ;; LGPLv2.1+
  4767. (license (list license:gpl2 license:lgpl2.1+))))
  4768. (define-public smithlab-cpp
  4769. (let ((revision "1")
  4770. (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
  4771. (package
  4772. (name "smithlab-cpp")
  4773. (version (string-append "0." revision "." (string-take commit 7)))
  4774. (source (origin
  4775. (method git-fetch)
  4776. (uri (git-reference
  4777. (url "https://github.com/smithlabcode/smithlab_cpp.git")
  4778. (commit commit)))
  4779. (file-name (string-append name "-" version "-checkout"))
  4780. (sha256
  4781. (base32
  4782. "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
  4783. (build-system gnu-build-system)
  4784. (arguments
  4785. `(#:modules ((guix build gnu-build-system)
  4786. (guix build utils)
  4787. (srfi srfi-26))
  4788. #:tests? #f ;no "check" target
  4789. #:phases
  4790. (modify-phases %standard-phases
  4791. (add-after 'unpack 'use-samtools-headers
  4792. (lambda _
  4793. (substitute* '("SAM.cpp"
  4794. "SAM.hpp")
  4795. (("sam.h") "samtools/sam.h"))
  4796. #t))
  4797. (replace 'install
  4798. (lambda* (#:key outputs #:allow-other-keys)
  4799. (let* ((out (assoc-ref outputs "out"))
  4800. (lib (string-append out "/lib"))
  4801. (include (string-append out "/include/smithlab-cpp")))
  4802. (mkdir-p lib)
  4803. (mkdir-p include)
  4804. (for-each (cut install-file <> lib)
  4805. (find-files "." "\\.o$"))
  4806. (for-each (cut install-file <> include)
  4807. (find-files "." "\\.hpp$")))
  4808. #t))
  4809. (delete 'configure))))
  4810. (inputs
  4811. `(("samtools" ,samtools-0.1)
  4812. ("zlib" ,zlib)))
  4813. (home-page "https://github.com/smithlabcode/smithlab_cpp")
  4814. (synopsis "C++ helper library for functions used in Smith lab projects")
  4815. (description
  4816. "Smithlab CPP is a C++ library that includes functions used in many of
  4817. the Smith lab bioinformatics projects, such as a wrapper around Samtools data
  4818. structures, classes for genomic regions, mapped sequencing reads, etc.")
  4819. (license license:gpl3+))))
  4820. (define-public preseq
  4821. (package
  4822. (name "preseq")
  4823. (version "2.0")
  4824. (source (origin
  4825. (method url-fetch)
  4826. (uri (string-append "https://github.com/smithlabcode/"
  4827. "preseq/archive/v" version ".tar.gz"))
  4828. (file-name (string-append name "-" version ".tar.gz"))
  4829. (sha256
  4830. (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
  4831. (modules '((guix build utils)))
  4832. (snippet
  4833. ;; Remove bundled samtools.
  4834. '(delete-file-recursively "samtools"))))
  4835. (build-system gnu-build-system)
  4836. (arguments
  4837. `(#:tests? #f ;no "check" target
  4838. #:phases
  4839. (modify-phases %standard-phases
  4840. (delete 'configure))
  4841. #:make-flags
  4842. (list (string-append "PREFIX="
  4843. (assoc-ref %outputs "out"))
  4844. (string-append "LIBBAM="
  4845. (assoc-ref %build-inputs "samtools")
  4846. "/lib/libbam.a")
  4847. (string-append "SMITHLAB_CPP="
  4848. (assoc-ref %build-inputs "smithlab-cpp")
  4849. "/lib")
  4850. "PROGS=preseq"
  4851. "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
  4852. (inputs
  4853. `(("gsl" ,gsl)
  4854. ("samtools" ,samtools-0.1)
  4855. ("smithlab-cpp" ,smithlab-cpp)
  4856. ("zlib" ,zlib)))
  4857. (home-page "http://smithlabresearch.org/software/preseq/")
  4858. (synopsis "Program for analyzing library complexity")
  4859. (description
  4860. "The preseq package is aimed at predicting and estimating the complexity
  4861. of a genomic sequencing library, equivalent to predicting and estimating the
  4862. number of redundant reads from a given sequencing depth and how many will be
  4863. expected from additional sequencing using an initial sequencing experiment.
  4864. The estimates can then be used to examine the utility of further sequencing,
  4865. optimize the sequencing depth, or to screen multiple libraries to avoid low
  4866. complexity samples.")
  4867. (license license:gpl3+)))
  4868. (define-public python-screed
  4869. (package
  4870. (name "python-screed")
  4871. (version "0.9")
  4872. (source
  4873. (origin
  4874. (method url-fetch)
  4875. (uri (pypi-uri "screed" version))
  4876. (sha256
  4877. (base32
  4878. "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
  4879. (build-system python-build-system)
  4880. (arguments
  4881. `(#:phases
  4882. (modify-phases %standard-phases
  4883. (replace 'check
  4884. (lambda _
  4885. (setenv "PYTHONPATH"
  4886. (string-append (getenv "PYTHONPATH") ":."))
  4887. (zero? (system* "nosetests" "--attr" "!known_failing")))))))
  4888. (native-inputs
  4889. `(("python-nose" ,python-nose)))
  4890. (inputs
  4891. `(("python-bz2file" ,python-bz2file)))
  4892. (home-page "https://github.com/dib-lab/screed/")
  4893. (synopsis "Short read sequence database utilities")
  4894. (description "Screed parses FASTA and FASTQ files and generates databases.
  4895. Values such as sequence name, sequence description, sequence quality and the
  4896. sequence itself can be retrieved from these databases.")
  4897. (license license:bsd-3)))
  4898. (define-public python2-screed
  4899. (package-with-python2 python-screed))
  4900. (define-public sra-tools
  4901. (package
  4902. (name "sra-tools")
  4903. (version "2.8.2-1")
  4904. (source
  4905. (origin
  4906. (method url-fetch)
  4907. (uri
  4908. (string-append "https://github.com/ncbi/sra-tools/archive/"
  4909. version ".tar.gz"))
  4910. (file-name (string-append name "-" version ".tar.gz"))
  4911. (sha256
  4912. (base32
  4913. "1camsijmvv2s45mb4iyf44ghl4gkd4rl0viphpcgl3ccchy32a0g"))))
  4914. (build-system gnu-build-system)
  4915. (arguments
  4916. `(#:parallel-build? #f ; not supported
  4917. #:tests? #f ; no "check" target
  4918. #:make-flags
  4919. (list (string-append "DEFAULT_CRT="
  4920. (assoc-ref %build-inputs "ncbi-vdb")
  4921. "/kfg/certs.kfg")
  4922. (string-append "DEFAULT_KFG="
  4923. (assoc-ref %build-inputs "ncbi-vdb")
  4924. "/kfg/default.kfg")
  4925. (string-append "VDB_LIBDIR="
  4926. (assoc-ref %build-inputs "ncbi-vdb")
  4927. ,(if (string-prefix? "x86_64"
  4928. (or (%current-target-system)
  4929. (%current-system)))
  4930. "/lib64"
  4931. "/lib32")))
  4932. #:phases
  4933. (modify-phases %standard-phases
  4934. (add-before 'configure 'set-perl-search-path
  4935. (lambda _
  4936. ;; Work around "dotless @INC" build failure.
  4937. (setenv "PERL5LIB"
  4938. (string-append (getcwd) "/setup:"
  4939. (getenv "PERL5LIB")))
  4940. #t))
  4941. (replace 'configure
  4942. (lambda* (#:key inputs outputs #:allow-other-keys)
  4943. ;; The build system expects a directory containing the sources and
  4944. ;; raw build output of ncbi-vdb, including files that are not
  4945. ;; installed. Since we are building against an installed version of
  4946. ;; ncbi-vdb, the following modifications are needed.
  4947. (substitute* "setup/konfigure.perl"
  4948. ;; Make the configure script look for the "ilib" directory of
  4949. ;; "ncbi-vdb" without first checking for the existence of a
  4950. ;; matching library in its "lib" directory.
  4951. (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
  4952. "my $f = File::Spec->catdir($ilibdir, $ilib);")
  4953. ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
  4954. (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
  4955. "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
  4956. ;; Dynamic linking
  4957. (substitute* "tools/copycat/Makefile"
  4958. (("smagic-static") "lmagic"))
  4959. ;; The 'configure' script doesn't recognize things like
  4960. ;; '--enable-fast-install'.
  4961. (zero? (system*
  4962. "./configure"
  4963. (string-append "--build-prefix=" (getcwd) "/build")
  4964. (string-append "--prefix=" (assoc-ref outputs "out"))
  4965. (string-append "--debug")
  4966. (string-append "--with-fuse-prefix="
  4967. (assoc-ref inputs "fuse"))
  4968. (string-append "--with-magic-prefix="
  4969. (assoc-ref inputs "libmagic"))
  4970. ;; TODO: building with libxml2 fails with linker errors
  4971. ;; (string-append "--with-xml2-prefix="
  4972. ;; (assoc-ref inputs "libxml2"))
  4973. (string-append "--with-ncbi-vdb-sources="
  4974. (assoc-ref inputs "ncbi-vdb"))
  4975. (string-append "--with-ncbi-vdb-build="
  4976. (assoc-ref inputs "ncbi-vdb"))
  4977. (string-append "--with-ngs-sdk-prefix="
  4978. (assoc-ref inputs "ngs-sdk"))
  4979. (string-append "--with-hdf5-prefix="
  4980. (assoc-ref inputs "hdf5"))))))
  4981. ;; This version of sra-tools fails to build with glibc because of a
  4982. ;; naming conflict. glibc-2.25/include/bits/mathcalls.h already
  4983. ;; contains a definition of "canonicalize", so we rename it.
  4984. ;;
  4985. ;; See upstream bug report:
  4986. ;; https://github.com/ncbi/sra-tools/issues/67
  4987. (add-after 'unpack 'patch-away-glibc-conflict
  4988. (lambda _
  4989. (substitute* "tools/bam-loader/bam.c"
  4990. (("canonicalize\\(" line)
  4991. (string-append "sra_tools_" line)))
  4992. #t)))))
  4993. (native-inputs `(("perl" ,perl)))
  4994. (inputs
  4995. `(("ngs-sdk" ,ngs-sdk)
  4996. ("ncbi-vdb" ,ncbi-vdb)
  4997. ("libmagic" ,file)
  4998. ("fuse" ,fuse)
  4999. ("hdf5" ,hdf5)
  5000. ("zlib" ,zlib)))
  5001. (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
  5002. (synopsis "Tools and libraries for reading and writing sequencing data")
  5003. (description
  5004. "The SRA Toolkit from NCBI is a collection of tools and libraries for
  5005. reading of sequencing files from the Sequence Read Archive (SRA) database and
  5006. writing files into the .sra format.")
  5007. (license license:public-domain)))
  5008. (define-public seqan
  5009. (package
  5010. (name "seqan")
  5011. (version "1.4.2")
  5012. (source (origin
  5013. (method url-fetch)
  5014. (uri (string-append "http://packages.seqan.de/seqan-library/"
  5015. "seqan-library-" version ".tar.bz2"))
  5016. (sha256
  5017. (base32
  5018. "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
  5019. ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
  5020. ;; makes sense to split the outputs.
  5021. (outputs '("out" "doc"))
  5022. (build-system trivial-build-system)
  5023. (arguments
  5024. `(#:modules ((guix build utils))
  5025. #:builder
  5026. (begin
  5027. (use-modules (guix build utils))
  5028. (let ((tar (assoc-ref %build-inputs "tar"))
  5029. (bzip (assoc-ref %build-inputs "bzip2"))
  5030. (out (assoc-ref %outputs "out"))
  5031. (doc (assoc-ref %outputs "doc")))
  5032. (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
  5033. (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
  5034. (chdir (string-append "seqan-library-" ,version))
  5035. (copy-recursively "include" (string-append out "/include"))
  5036. (copy-recursively "share" (string-append doc "/share"))))))
  5037. (native-inputs
  5038. `(("source" ,source)
  5039. ("tar" ,tar)
  5040. ("bzip2" ,bzip2)))
  5041. (home-page "http://www.seqan.de")
  5042. (synopsis "Library for nucleotide sequence analysis")
  5043. (description
  5044. "SeqAn is a C++ library of efficient algorithms and data structures for
  5045. the analysis of sequences with the focus on biological data. It contains
  5046. algorithms and data structures for string representation and their
  5047. manipulation, online and indexed string search, efficient I/O of
  5048. bioinformatics file formats, sequence alignment, and more.")
  5049. (license license:bsd-3)))
  5050. (define-public seqmagick
  5051. (package
  5052. (name "seqmagick")
  5053. (version "0.6.1")
  5054. (source
  5055. (origin
  5056. (method url-fetch)
  5057. (uri (string-append
  5058. "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
  5059. version ".tar.gz"))
  5060. (sha256
  5061. (base32
  5062. "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
  5063. (build-system python-build-system)
  5064. (arguments
  5065. ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
  5066. `(#:python ,python-2
  5067. #:phases
  5068. (modify-phases %standard-phases
  5069. ;; Current test in setup.py does not work as of 0.6.1,
  5070. ;; so use nose to run tests instead for now. See
  5071. ;; https://github.com/fhcrc/seqmagick/issues/55
  5072. (replace 'check (lambda _ (zero? (system* "nosetests")))))))
  5073. (inputs
  5074. ;; biopython-1.66 is required due to
  5075. ;; https://github.com/fhcrc/seqmagick/issues/59
  5076. ;; When that issue is resolved the 'python2-biopython-1.66' package
  5077. ;; should be removed.
  5078. `(("python-biopython" ,python2-biopython-1.66)))
  5079. (native-inputs
  5080. `(("python-nose" ,python2-nose)))
  5081. (home-page "https://github.com/fhcrc/seqmagick")
  5082. (synopsis "Tools for converting and modifying sequence files")
  5083. (description
  5084. "Bioinformaticians often have to convert sequence files between formats
  5085. and do little manipulations on them, and it's not worth writing scripts for
  5086. that. Seqmagick is a utility to expose the file format conversion in
  5087. BioPython in a convenient way. Instead of having a big mess of scripts, there
  5088. is one that takes arguments.")
  5089. (license license:gpl3)))
  5090. (define-public seqtk
  5091. (package
  5092. (name "seqtk")
  5093. (version "1.2")
  5094. (source (origin
  5095. (method url-fetch)
  5096. (uri (string-append
  5097. "https://github.com/lh3/seqtk/archive/v"
  5098. version ".tar.gz"))
  5099. (file-name (string-append name "-" version ".tar.gz"))
  5100. (sha256
  5101. (base32
  5102. "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
  5103. (modules '((guix build utils)))
  5104. (snippet
  5105. '(begin
  5106. ;; Remove extraneous header files, as is done in the seqtk
  5107. ;; master branch.
  5108. (for-each (lambda (file) (delete-file file))
  5109. (list "ksort.h" "kstring.h" "kvec.h"))
  5110. #t))))
  5111. (build-system gnu-build-system)
  5112. (arguments
  5113. `(#:phases
  5114. (modify-phases %standard-phases
  5115. (delete 'configure)
  5116. (replace 'check
  5117. ;; There are no tests, so we just run a sanity check.
  5118. (lambda _ (zero? (system* "./seqtk" "seq"))))
  5119. (replace 'install
  5120. (lambda* (#:key outputs #:allow-other-keys)
  5121. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  5122. (install-file "seqtk" bin)))))))
  5123. (inputs
  5124. `(("zlib" ,zlib)))
  5125. (home-page "https://github.com/lh3/seqtk")
  5126. (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
  5127. (description
  5128. "Seqtk is a fast and lightweight tool for processing sequences in the
  5129. FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
  5130. optionally compressed by gzip.")
  5131. (license license:expat)))
  5132. (define-public snap-aligner
  5133. (package
  5134. (name "snap-aligner")
  5135. (version "1.0beta.18")
  5136. (source (origin
  5137. (method url-fetch)
  5138. (uri (string-append
  5139. "https://github.com/amplab/snap/archive/v"
  5140. version ".tar.gz"))
  5141. (file-name (string-append name "-" version ".tar.gz"))
  5142. (sha256
  5143. (base32
  5144. "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
  5145. (build-system gnu-build-system)
  5146. (arguments
  5147. '(#:phases
  5148. (modify-phases %standard-phases
  5149. (delete 'configure)
  5150. (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
  5151. (replace 'install
  5152. (lambda* (#:key outputs #:allow-other-keys)
  5153. (let* ((out (assoc-ref outputs "out"))
  5154. (bin (string-append out "/bin")))
  5155. (install-file "snap-aligner" bin)
  5156. (install-file "SNAPCommand" bin)
  5157. #t))))))
  5158. (native-inputs
  5159. `(("zlib" ,zlib)))
  5160. (home-page "http://snap.cs.berkeley.edu/")
  5161. (synopsis "Short read DNA sequence aligner")
  5162. (description
  5163. "SNAP is a fast and accurate aligner for short DNA reads. It is
  5164. optimized for modern read lengths of 100 bases or higher, and takes advantage
  5165. of these reads to align data quickly through a hash-based indexing scheme.")
  5166. ;; 32-bit systems are not supported by the unpatched code.
  5167. ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
  5168. ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
  5169. ;; systems without a lot of memory cannot make good use of this program.
  5170. (supported-systems '("x86_64-linux"))
  5171. (license license:asl2.0)))
  5172. (define-public sortmerna
  5173. (package
  5174. (name "sortmerna")
  5175. (version "2.1b")
  5176. (source
  5177. (origin
  5178. (method url-fetch)
  5179. (uri (string-append
  5180. "https://github.com/biocore/sortmerna/archive/"
  5181. version ".tar.gz"))
  5182. (file-name (string-append name "-" version ".tar.gz"))
  5183. (sha256
  5184. (base32
  5185. "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
  5186. (build-system gnu-build-system)
  5187. (outputs '("out" ;for binaries
  5188. "db")) ;for sequence databases
  5189. (arguments
  5190. `(#:phases
  5191. (modify-phases %standard-phases
  5192. (replace 'install
  5193. (lambda* (#:key outputs #:allow-other-keys)
  5194. (let* ((out (assoc-ref outputs "out"))
  5195. (bin (string-append out "/bin"))
  5196. (db (assoc-ref outputs "db"))
  5197. (share
  5198. (string-append db "/share/sortmerna/rRNA_databases")))
  5199. (install-file "sortmerna" bin)
  5200. (install-file "indexdb_rna" bin)
  5201. (for-each (lambda (file)
  5202. (install-file file share))
  5203. (find-files "rRNA_databases" ".*fasta"))
  5204. #t))))))
  5205. (inputs
  5206. `(("zlib" ,zlib)))
  5207. (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
  5208. (synopsis "Biological sequence analysis tool for NGS reads")
  5209. (description
  5210. "SortMeRNA is a biological sequence analysis tool for filtering, mapping
  5211. and operational taxonomic unit (OTU) picking of next generation
  5212. sequencing (NGS) reads. The core algorithm is based on approximate seeds and
  5213. allows for fast and sensitive analyses of nucleotide sequences. The main
  5214. application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
  5215. ;; The source includes x86 specific code
  5216. (supported-systems '("x86_64-linux" "i686-linux"))
  5217. (license license:lgpl3)))
  5218. (define-public star
  5219. (package
  5220. (name "star")
  5221. (version "2.5.3a")
  5222. (source (origin
  5223. (method url-fetch)
  5224. (uri (string-append "https://github.com/alexdobin/STAR/archive/"
  5225. version ".tar.gz"))
  5226. (file-name (string-append name "-" version ".tar.gz"))
  5227. (sha256
  5228. (base32
  5229. "013wirlz8lllgjyagl48l75n1isxyabqb3sj7qlsl0x1rmvqw99a"))
  5230. (modules '((guix build utils)))
  5231. (snippet
  5232. '(begin
  5233. (substitute* "source/Makefile"
  5234. (("/bin/rm") "rm"))
  5235. ;; Remove pre-built binaries and bundled htslib sources.
  5236. (delete-file-recursively "bin/MacOSX_x86_64")
  5237. (delete-file-recursively "bin/Linux_x86_64")
  5238. (delete-file-recursively "bin/Linux_x86_64_static")
  5239. (delete-file-recursively "source/htslib")
  5240. #t))))
  5241. (build-system gnu-build-system)
  5242. (arguments
  5243. '(#:tests? #f ;no check target
  5244. #:make-flags '("STAR")
  5245. #:phases
  5246. (modify-phases %standard-phases
  5247. (add-after 'unpack 'enter-source-dir
  5248. (lambda _ (chdir "source") #t))
  5249. (add-after 'enter-source-dir 'do-not-use-bundled-htslib
  5250. (lambda _
  5251. (substitute* "Makefile"
  5252. (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
  5253. _ prefix) prefix))
  5254. (substitute* '("BAMfunctions.cpp"
  5255. "signalFromBAM.h"
  5256. "bam_cat.h"
  5257. "bam_cat.c"
  5258. "STAR.cpp"
  5259. "bamRemoveDuplicates.cpp")
  5260. (("#include \"htslib/([^\"]+\\.h)\"" _ header)
  5261. (string-append "#include <" header ">")))
  5262. (substitute* "IncludeDefine.h"
  5263. (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
  5264. (string-append "<" header ">")))
  5265. #t))
  5266. (replace 'install
  5267. (lambda* (#:key outputs #:allow-other-keys)
  5268. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  5269. (install-file "STAR" bin))
  5270. #t))
  5271. (delete 'configure))))
  5272. (native-inputs
  5273. `(("xxd" ,xxd)))
  5274. (inputs
  5275. `(("htslib" ,htslib)
  5276. ("zlib" ,zlib)))
  5277. (home-page "https://github.com/alexdobin/STAR")
  5278. (synopsis "Universal RNA-seq aligner")
  5279. (description
  5280. "The Spliced Transcripts Alignment to a Reference (STAR) software is
  5281. based on a previously undescribed RNA-seq alignment algorithm that uses
  5282. sequential maximum mappable seed search in uncompressed suffix arrays followed
  5283. by seed clustering and stitching procedure. In addition to unbiased de novo
  5284. detection of canonical junctions, STAR can discover non-canonical splices and
  5285. chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
  5286. sequences.")
  5287. ;; Only 64-bit systems are supported according to the README.
  5288. (supported-systems '("x86_64-linux" "mips64el-linux"))
  5289. ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
  5290. (license license:gpl3+)))
  5291. (define-public subread
  5292. (package
  5293. (name "subread")
  5294. (version "1.5.1")
  5295. (source (origin
  5296. (method url-fetch)
  5297. (uri (string-append "mirror://sourceforge/subread/subread-"
  5298. version "/subread-" version "-source.tar.gz"))
  5299. (sha256
  5300. (base32
  5301. "0gn5zhbvllks0mmdg3qlmsbg91p2mpdc2wixwfqpi85yzfrh8hcy"))))
  5302. (build-system gnu-build-system)
  5303. (arguments
  5304. `(#:tests? #f ;no "check" target
  5305. ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
  5306. ;; optimizations by default, so we override these flags such that x86_64
  5307. ;; flags are only added when the build target is an x86_64 system.
  5308. #:make-flags
  5309. (list (let ((system ,(or (%current-target-system)
  5310. (%current-system)))
  5311. (flags '("-ggdb" "-fomit-frame-pointer"
  5312. "-ffast-math" "-funroll-loops"
  5313. "-fmessage-length=0"
  5314. "-O9" "-Wall" "-DMAKE_FOR_EXON"
  5315. "-DMAKE_STANDALONE"
  5316. "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
  5317. (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
  5318. (if (string-prefix? "x86_64" system)
  5319. (string-append "CCFLAGS=" (string-join (append flags flags64)))
  5320. (string-append "CCFLAGS=" (string-join flags))))
  5321. "-f" "Makefile.Linux"
  5322. "CC=gcc ${CCFLAGS}")
  5323. #:phases
  5324. (alist-cons-after
  5325. 'unpack 'enter-dir
  5326. (lambda _ (chdir "src") #t)
  5327. (alist-replace
  5328. 'install
  5329. (lambda* (#:key outputs #:allow-other-keys)
  5330. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  5331. (mkdir-p bin)
  5332. (copy-recursively "../bin" bin)))
  5333. ;; no "configure" script
  5334. (alist-delete 'configure %standard-phases)))))
  5335. (inputs `(("zlib" ,zlib)))
  5336. (home-page "http://bioinf.wehi.edu.au/subread-package/")
  5337. (synopsis "Tool kit for processing next-gen sequencing data")
  5338. (description
  5339. "The subread package contains the following tools: subread aligner, a
  5340. general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
  5341. and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
  5342. features; exactSNP: a SNP caller that discovers SNPs by testing signals
  5343. against local background noises.")
  5344. (license license:gpl3+)))
  5345. (define-public stringtie
  5346. (package
  5347. (name "stringtie")
  5348. (version "1.2.1")
  5349. (source (origin
  5350. (method url-fetch)
  5351. (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
  5352. "stringtie-" version ".tar.gz"))
  5353. (sha256
  5354. (base32
  5355. "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
  5356. (modules '((guix build utils)))
  5357. (snippet
  5358. '(begin
  5359. (delete-file-recursively "samtools-0.1.18")
  5360. #t))))
  5361. (build-system gnu-build-system)
  5362. (arguments
  5363. `(#:tests? #f ;no test suite
  5364. #:phases
  5365. (modify-phases %standard-phases
  5366. ;; no configure script
  5367. (delete 'configure)
  5368. (add-before 'build 'use-system-samtools
  5369. (lambda _
  5370. (substitute* "Makefile"
  5371. (("stringtie: \\$\\{BAM\\}/libbam\\.a")
  5372. "stringtie: "))
  5373. (substitute* '("gclib/GBam.h"
  5374. "gclib/GBam.cpp")
  5375. (("#include \"(bam|sam|kstring).h\"" _ header)
  5376. (string-append "#include <samtools/" header ".h>")))
  5377. #t))
  5378. (add-after 'unpack 'remove-duplicate-typedef
  5379. (lambda _
  5380. ;; This typedef conflicts with the typedef in
  5381. ;; glibc-2.25/include/bits/types.h
  5382. (substitute* "gclib/GThreads.h"
  5383. (("typedef long long __intmax_t;") ""))
  5384. #t))
  5385. (replace 'install
  5386. (lambda* (#:key outputs #:allow-other-keys)
  5387. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  5388. (install-file "stringtie" bin)
  5389. #t))))))
  5390. (inputs
  5391. `(("samtools" ,samtools-0.1)
  5392. ("zlib" ,zlib)))
  5393. (home-page "http://ccb.jhu.edu/software/stringtie/")
  5394. (synopsis "Transcript assembly and quantification for RNA-Seq data")
  5395. (description
  5396. "StringTie is a fast and efficient assembler of RNA-Seq sequence
  5397. alignments into potential transcripts. It uses a novel network flow algorithm
  5398. as well as an optional de novo assembly step to assemble and quantitate
  5399. full-length transcripts representing multiple splice variants for each gene
  5400. locus. Its input can include not only the alignments of raw reads used by
  5401. other transcript assemblers, but also alignments of longer sequences that have
  5402. been assembled from those reads. To identify differentially expressed genes
  5403. between experiments, StringTie's output can be processed either by the
  5404. Cuffdiff or Ballgown programs.")
  5405. (license license:artistic2.0)))
  5406. (define-public taxtastic
  5407. (package
  5408. (name "taxtastic")
  5409. (version "0.6.4")
  5410. (source (origin
  5411. (method url-fetch)
  5412. (uri (pypi-uri "taxtastic" version))
  5413. (sha256
  5414. (base32
  5415. "0s79z8kfl853x7l4h8ms05k31q87aw62nrchlk20w9n227j35929"))))
  5416. (build-system python-build-system)
  5417. (arguments
  5418. `(#:python ,python-2
  5419. #:phases
  5420. (modify-phases %standard-phases
  5421. (replace 'check
  5422. (lambda _
  5423. (zero? (system* "python" "-m" "unittest" "discover" "-v")))))))
  5424. (propagated-inputs
  5425. `(("python-sqlalchemy" ,python2-sqlalchemy)
  5426. ("python-decorator" ,python2-decorator)
  5427. ("python-biopython" ,python2-biopython)
  5428. ("python-pandas" ,python2-pandas)))
  5429. (home-page "https://github.com/fhcrc/taxtastic")
  5430. (synopsis "Tools for taxonomic naming and annotation")
  5431. (description
  5432. "Taxtastic is software written in python used to build and maintain
  5433. reference packages i.e. collections of reference trees, reference alignments,
  5434. profiles, and associated taxonomic information.")
  5435. (license license:gpl3+)))
  5436. (define-public vcftools
  5437. (package
  5438. (name "vcftools")
  5439. (version "0.1.15")
  5440. (source (origin
  5441. (method url-fetch)
  5442. (uri (string-append
  5443. "https://github.com/vcftools/vcftools/releases/download/v"
  5444. version "/vcftools-" version ".tar.gz"))
  5445. (sha256
  5446. (base32
  5447. "1qw30c45wihgy632rbz4rh3njnwj4msj46l1rsgdhyg6bgypmr1i"))))
  5448. (build-system gnu-build-system)
  5449. (arguments
  5450. `(#:tests? #f ; no "check" target
  5451. #:make-flags (list
  5452. "CFLAGS=-O2" ; override "-m64" flag
  5453. (string-append "PREFIX=" (assoc-ref %outputs "out"))
  5454. (string-append "MANDIR=" (assoc-ref %outputs "out")
  5455. "/share/man/man1"))))
  5456. (native-inputs
  5457. `(("pkg-config" ,pkg-config)))
  5458. (inputs
  5459. `(("perl" ,perl)
  5460. ("zlib" ,zlib)))
  5461. (home-page "https://vcftools.github.io/")
  5462. (synopsis "Tools for working with VCF files")
  5463. (description
  5464. "VCFtools is a program package designed for working with VCF files, such
  5465. as those generated by the 1000 Genomes Project. The aim of VCFtools is to
  5466. provide easily accessible methods for working with complex genetic variation
  5467. data in the form of VCF files.")
  5468. ;; The license is declared as LGPLv3 in the README and
  5469. ;; at https://vcftools.github.io/license.html
  5470. (license license:lgpl3)))
  5471. (define-public infernal
  5472. (package
  5473. (name "infernal")
  5474. (version "1.1.2")
  5475. (source (origin
  5476. (method url-fetch)
  5477. (uri (string-append "http://eddylab.org/software/infernal/"
  5478. "infernal-" version ".tar.gz"))
  5479. (sha256
  5480. (base32
  5481. "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
  5482. (build-system gnu-build-system)
  5483. (native-inputs
  5484. `(("perl" ,perl))) ; for tests
  5485. (home-page "http://eddylab.org/infernal/")
  5486. (synopsis "Inference of RNA alignments")
  5487. (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
  5488. searching DNA sequence databases for RNA structure and sequence similarities.
  5489. It is an implementation of a special case of profile stochastic context-free
  5490. grammars called @dfn{covariance models} (CMs). A CM is like a sequence
  5491. profile, but it scores a combination of sequence consensus and RNA secondary
  5492. structure consensus, so in many cases, it is more capable of identifying RNA
  5493. homologs that conserve their secondary structure more than their primary
  5494. sequence.")
  5495. ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
  5496. (supported-systems '("i686-linux" "x86_64-linux"))
  5497. (license license:bsd-3)))
  5498. (define-public r-centipede
  5499. (package
  5500. (name "r-centipede")
  5501. (version "1.2")
  5502. (source (origin
  5503. (method url-fetch)
  5504. (uri (string-append "http://download.r-forge.r-project.org/"
  5505. "src/contrib/CENTIPEDE_" version ".tar.gz"))
  5506. (sha256
  5507. (base32
  5508. "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
  5509. (build-system r-build-system)
  5510. (home-page "http://centipede.uchicago.edu/")
  5511. (synopsis "Predict transcription factor binding sites")
  5512. (description
  5513. "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
  5514. of the genome that are bound by particular transcription factors. It starts
  5515. by identifying a set of candidate binding sites, and then aims to classify the
  5516. sites according to whether each site is bound or not bound by a transcription
  5517. factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
  5518. between two different types of motif instances using as much relevant
  5519. information as possible.")
  5520. (license (list license:gpl2+ license:gpl3+))))
  5521. (define-public r-vegan
  5522. (package
  5523. (name "r-vegan")
  5524. (version "2.4-4")
  5525. (source
  5526. (origin
  5527. (method url-fetch)
  5528. (uri (cran-uri "vegan" version))
  5529. (sha256
  5530. (base32
  5531. "1n57dzv2aid6iqd9fkqik401sidqanhzsawyak94qbiyh6dbd1x9"))))
  5532. (build-system r-build-system)
  5533. (native-inputs
  5534. `(("gfortran" ,gfortran)))
  5535. (propagated-inputs
  5536. `(("r-cluster" ,r-cluster)
  5537. ("r-lattice" ,r-lattice)
  5538. ("r-mass" ,r-mass)
  5539. ("r-mgcv" ,r-mgcv)
  5540. ("r-permute" ,r-permute)))
  5541. (home-page "https://cran.r-project.org/web/packages/vegan")
  5542. (synopsis "Functions for community ecology")
  5543. (description
  5544. "The vegan package provides tools for descriptive community ecology. It
  5545. has most basic functions of diversity analysis, community ordination and
  5546. dissimilarity analysis. Most of its multivariate tools can be used for other
  5547. data types as well.")
  5548. (license license:gpl2+)))
  5549. (define-public r-annotate
  5550. (package
  5551. (name "r-annotate")
  5552. (version "1.54.0")
  5553. (source
  5554. (origin
  5555. (method url-fetch)
  5556. (uri (bioconductor-uri "annotate" version))
  5557. (sha256
  5558. (base32
  5559. "03hmbvp3i6lvd307fqdg7akxi2qp322rlky3bzw0zccgm0i0221g"))))
  5560. (build-system r-build-system)
  5561. (propagated-inputs
  5562. `(("r-annotationdbi" ,r-annotationdbi)
  5563. ("r-biobase" ,r-biobase)
  5564. ("r-biocgenerics" ,r-biocgenerics)
  5565. ("r-dbi" ,r-dbi)
  5566. ("r-rcurl" ,r-rcurl)
  5567. ("r-xml" ,r-xml)
  5568. ("r-xtable" ,r-xtable)))
  5569. (home-page
  5570. "http://bioconductor.org/packages/annotate")
  5571. (synopsis "Annotation for microarrays")
  5572. (description "This package provides R environments for the annotation of
  5573. microarrays.")
  5574. (license license:artistic2.0)))
  5575. (define-public r-geneplotter
  5576. (package
  5577. (name "r-geneplotter")
  5578. (version "1.54.0")
  5579. (source
  5580. (origin
  5581. (method url-fetch)
  5582. (uri (bioconductor-uri "geneplotter" version))
  5583. (sha256
  5584. (base32
  5585. "0a0ajns21db5rrjl16bq6wawggsnxr00fg184pc38nmfghv4z4b6"))))
  5586. (build-system r-build-system)
  5587. (propagated-inputs
  5588. `(("r-annotate" ,r-annotate)
  5589. ("r-annotationdbi" ,r-annotationdbi)
  5590. ("r-biobase" ,r-biobase)
  5591. ("r-biocgenerics" ,r-biocgenerics)
  5592. ("r-lattice" ,r-lattice)
  5593. ("r-rcolorbrewer" ,r-rcolorbrewer)))
  5594. (home-page "http://bioconductor.org/packages/geneplotter")
  5595. (synopsis "Graphics functions for genomic data")
  5596. (description
  5597. "This package provides functions for plotting genomic data.")
  5598. (license license:artistic2.0)))
  5599. (define-public r-genefilter
  5600. (package
  5601. (name "r-genefilter")
  5602. (version "1.58.0")
  5603. (source
  5604. (origin
  5605. (method url-fetch)
  5606. (uri (bioconductor-uri "genefilter" version))
  5607. (sha256
  5608. (base32
  5609. "0sf2hdi9nv6r83vn1y65m4jiba8pffddpj46d6yjn5rlsixplmqg"))))
  5610. (build-system r-build-system)
  5611. (native-inputs
  5612. `(("gfortran" ,gfortran)))
  5613. (propagated-inputs
  5614. `(("r-annotate" ,r-annotate)
  5615. ("r-annotationdbi" ,r-annotationdbi)
  5616. ("r-biobase" ,r-biobase)
  5617. ("r-s4vectors" ,r-s4vectors)
  5618. ("r-survival" ,r-survival)))
  5619. (home-page "http://bioconductor.org/packages/genefilter")
  5620. (synopsis "Filter genes from high-throughput experiments")
  5621. (description
  5622. "This package provides basic functions for filtering genes from
  5623. high-throughput sequencing experiments.")
  5624. (license license:artistic2.0)))
  5625. (define-public r-deseq2
  5626. (package
  5627. (name "r-deseq2")
  5628. (version "1.16.1")
  5629. (source
  5630. (origin
  5631. (method url-fetch)
  5632. (uri (bioconductor-uri "DESeq2" version))
  5633. (sha256
  5634. (base32
  5635. "01pvyljxkwazxl510v7h0971nx65iqd2bdkbdhw3xzind0n9pdvq"))))
  5636. (properties `((upstream-name . "DESeq2")))
  5637. (build-system r-build-system)
  5638. (propagated-inputs
  5639. `(("r-biobase" ,r-biobase)
  5640. ("r-biocgenerics" ,r-biocgenerics)
  5641. ("r-biocparallel" ,r-biocparallel)
  5642. ("r-genefilter" ,r-genefilter)
  5643. ("r-geneplotter" ,r-geneplotter)
  5644. ("r-genomicranges" ,r-genomicranges)
  5645. ("r-ggplot2" ,r-ggplot2)
  5646. ("r-hmisc" ,r-hmisc)
  5647. ("r-iranges" ,r-iranges)
  5648. ("r-locfit" ,r-locfit)
  5649. ("r-rcpp" ,r-rcpp)
  5650. ("r-rcpparmadillo" ,r-rcpparmadillo)
  5651. ("r-s4vectors" ,r-s4vectors)
  5652. ("r-summarizedexperiment" ,r-summarizedexperiment)))
  5653. (home-page "http://bioconductor.org/packages/DESeq2")
  5654. (synopsis "Differential gene expression analysis")
  5655. (description
  5656. "This package provides functions to estimate variance-mean dependence in
  5657. count data from high-throughput nucleotide sequencing assays and test for
  5658. differential expression based on a model using the negative binomial
  5659. distribution.")
  5660. (license license:lgpl3+)))
  5661. (define-public r-dexseq
  5662. (package
  5663. (name "r-dexseq")
  5664. (version "1.22.0")
  5665. (source
  5666. (origin
  5667. (method url-fetch)
  5668. (uri (bioconductor-uri "DEXSeq" version))
  5669. (sha256
  5670. (base32
  5671. "085aqk1wlzzqcqcqhvz74y099kr2ln5dwdxd3rl6zan806mgwahg"))))
  5672. (properties `((upstream-name . "DEXSeq")))
  5673. (build-system r-build-system)
  5674. (propagated-inputs
  5675. `(("r-annotationdbi" ,r-annotationdbi)
  5676. ("r-biobase" ,r-biobase)
  5677. ("r-biocgenerics" ,r-biocgenerics)
  5678. ("r-biocparallel" ,r-biocparallel)
  5679. ("r-biomart" ,r-biomart)
  5680. ("r-deseq2" ,r-deseq2)
  5681. ("r-genefilter" ,r-genefilter)
  5682. ("r-geneplotter" ,r-geneplotter)
  5683. ("r-genomicranges" ,r-genomicranges)
  5684. ("r-hwriter" ,r-hwriter)
  5685. ("r-iranges" ,r-iranges)
  5686. ("r-rcolorbrewer" ,r-rcolorbrewer)
  5687. ("r-rsamtools" ,r-rsamtools)
  5688. ("r-s4vectors" ,r-s4vectors)
  5689. ("r-statmod" ,r-statmod)
  5690. ("r-stringr" ,r-stringr)
  5691. ("r-summarizedexperiment" ,r-summarizedexperiment)))
  5692. (home-page "http://bioconductor.org/packages/DEXSeq")
  5693. (synopsis "Inference of differential exon usage in RNA-Seq")
  5694. (description
  5695. "This package is focused on finding differential exon usage using RNA-seq
  5696. exon counts between samples with different experimental designs. It provides
  5697. functions that allows the user to make the necessary statistical tests based
  5698. on a model that uses the negative binomial distribution to estimate the
  5699. variance between biological replicates and generalized linear models for
  5700. testing. The package also provides functions for the visualization and
  5701. exploration of the results.")
  5702. (license license:gpl3+)))
  5703. (define-public r-annotationforge
  5704. (package
  5705. (name "r-annotationforge")
  5706. (version "1.18.1")
  5707. (source
  5708. (origin
  5709. (method url-fetch)
  5710. (uri (bioconductor-uri "AnnotationForge" version))
  5711. (sha256
  5712. (base32
  5713. "1366qvykd9cpcvwgc5g9mm9adw9rxw6p4814dd6l5fyb0pwpmysx"))))
  5714. (properties
  5715. `((upstream-name . "AnnotationForge")))
  5716. (build-system r-build-system)
  5717. (propagated-inputs
  5718. `(("r-annotationdbi" ,r-annotationdbi)
  5719. ("r-biobase" ,r-biobase)
  5720. ("r-biocgenerics" ,r-biocgenerics)
  5721. ("r-dbi" ,r-dbi)
  5722. ("r-rcurl" ,r-rcurl)
  5723. ("r-rsqlite" ,r-rsqlite)
  5724. ("r-s4vectors" ,r-s4vectors)
  5725. ("r-xml" ,r-xml)))
  5726. (home-page "http://bioconductor.org/packages/AnnotationForge")
  5727. (synopsis "Code for building annotation database packages")
  5728. (description
  5729. "This package provides code for generating Annotation packages and their
  5730. databases. Packages produced are intended to be used with AnnotationDbi.")
  5731. (license license:artistic2.0)))
  5732. (define-public r-rbgl
  5733. (package
  5734. (name "r-rbgl")
  5735. (version "1.52.0")
  5736. (source
  5737. (origin
  5738. (method url-fetch)
  5739. (uri (bioconductor-uri "RBGL" version))
  5740. (sha256
  5741. (base32
  5742. "11db6kvz453ypj9ds3xpjqzwrrjck84ijn4wlhkfyz2dzdgd5ryv"))))
  5743. (properties `((upstream-name . "RBGL")))
  5744. (build-system r-build-system)
  5745. (propagated-inputs `(("r-graph" ,r-graph)))
  5746. (home-page "http://www.bioconductor.org/packages/RBGL")
  5747. (synopsis "Interface to the Boost graph library")
  5748. (description
  5749. "This package provides a fairly extensive and comprehensive interface to
  5750. the graph algorithms contained in the Boost library.")
  5751. (license license:artistic2.0)))
  5752. (define-public r-gseabase
  5753. (package
  5754. (name "r-gseabase")
  5755. (version "1.38.0")
  5756. (source
  5757. (origin
  5758. (method url-fetch)
  5759. (uri (bioconductor-uri "GSEABase" version))
  5760. (sha256
  5761. (base32
  5762. "1c6i6g4fj3b8wjyxyygr7i3v8sxrq1ffb2bbicya5ah2gdaclfad"))))
  5763. (properties `((upstream-name . "GSEABase")))
  5764. (build-system r-build-system)
  5765. (propagated-inputs
  5766. `(("r-annotate" ,r-annotate)
  5767. ("r-annotationdbi" ,r-annotationdbi)
  5768. ("r-biobase" ,r-biobase)
  5769. ("r-biocgenerics" ,r-biocgenerics)
  5770. ("r-graph" ,r-graph)
  5771. ("r-xml" ,r-xml)))
  5772. (home-page "http://bioconductor.org/packages/GSEABase")
  5773. (synopsis "Gene set enrichment data structures and methods")
  5774. (description
  5775. "This package provides classes and methods to support @dfn{Gene Set
  5776. Enrichment Analysis} (GSEA).")
  5777. (license license:artistic2.0)))
  5778. (define-public r-category
  5779. (package
  5780. (name "r-category")
  5781. (version "2.42.1")
  5782. (source
  5783. (origin
  5784. (method url-fetch)
  5785. (uri (bioconductor-uri "Category" version))
  5786. (sha256
  5787. (base32
  5788. "1w186nhc85bglcgmbcrsdbb8l6rph21pl5kdwjqwkp0jnr9z0ifn"))))
  5789. (properties `((upstream-name . "Category")))
  5790. (build-system r-build-system)
  5791. (propagated-inputs
  5792. `(("r-annotate" ,r-annotate)
  5793. ("r-annotationdbi" ,r-annotationdbi)
  5794. ("r-biobase" ,r-biobase)
  5795. ("r-biocgenerics" ,r-biocgenerics)
  5796. ("r-genefilter" ,r-genefilter)
  5797. ("r-graph" ,r-graph)
  5798. ("r-gseabase" ,r-gseabase)
  5799. ("r-matrix" ,r-matrix)
  5800. ("r-rbgl" ,r-rbgl)
  5801. ("r-rsqlite" ,r-rsqlite)))
  5802. (home-page "http://bioconductor.org/packages/Category")
  5803. (synopsis "Category analysis")
  5804. (description
  5805. "This package provides a collection of tools for performing category
  5806. analysis.")
  5807. (license license:artistic2.0)))
  5808. (define-public r-gostats
  5809. (package
  5810. (name "r-gostats")
  5811. (version "2.42.0")
  5812. (source
  5813. (origin
  5814. (method url-fetch)
  5815. (uri (bioconductor-uri "GOstats" version))
  5816. (sha256
  5817. (base32
  5818. "0qvqjgfnd9ap4rikvyxa9p4dhcnccvkw8phzv88vghh6pq463d62"))))
  5819. (properties `((upstream-name . "GOstats")))
  5820. (build-system r-build-system)
  5821. (propagated-inputs
  5822. `(("r-annotate" ,r-annotate)
  5823. ("r-annotationdbi" ,r-annotationdbi)
  5824. ("r-annotationforge" ,r-annotationforge)
  5825. ("r-biobase" ,r-biobase)
  5826. ("r-category" ,r-category)
  5827. ("r-go-db" ,r-go-db)
  5828. ("r-graph" ,r-graph)
  5829. ("r-rbgl" ,r-rbgl)))
  5830. (home-page "http://bioconductor.org/packages/GOstats")
  5831. (synopsis "Tools for manipulating GO and microarrays")
  5832. (description
  5833. "This package provides a set of tools for interacting with GO and
  5834. microarray data. A variety of basic manipulation tools for graphs, hypothesis
  5835. testing and other simple calculations.")
  5836. (license license:artistic2.0)))
  5837. (define-public r-shortread
  5838. (package
  5839. (name "r-shortread")
  5840. (version "1.34.0")
  5841. (source
  5842. (origin
  5843. (method url-fetch)
  5844. (uri (bioconductor-uri "ShortRead" version))
  5845. (sha256
  5846. (base32
  5847. "0ayk3d5625ymb5g2gycq6banzqmyd642xrwjzhdshz2dwid7kly8"))))
  5848. (properties `((upstream-name . "ShortRead")))
  5849. (build-system r-build-system)
  5850. (inputs
  5851. `(("zlib" ,zlib)))
  5852. (propagated-inputs
  5853. `(("r-biobase" ,r-biobase)
  5854. ("r-biocgenerics" ,r-biocgenerics)
  5855. ("r-biocparallel" ,r-biocparallel)
  5856. ("r-biostrings" ,r-biostrings)
  5857. ("r-genomeinfodb" ,r-genomeinfodb)
  5858. ("r-genomicalignments" ,r-genomicalignments)
  5859. ("r-genomicranges" ,r-genomicranges)
  5860. ("r-hwriter" ,r-hwriter)
  5861. ("r-iranges" ,r-iranges)
  5862. ("r-lattice" ,r-lattice)
  5863. ("r-latticeextra" ,r-latticeextra)
  5864. ("r-rsamtools" ,r-rsamtools)
  5865. ("r-s4vectors" ,r-s4vectors)
  5866. ("r-xvector" ,r-xvector)
  5867. ("r-zlibbioc" ,r-zlibbioc)))
  5868. (home-page "http://bioconductor.org/packages/ShortRead")
  5869. (synopsis "FASTQ input and manipulation tools")
  5870. (description
  5871. "This package implements sampling, iteration, and input of FASTQ files.
  5872. It includes functions for filtering and trimming reads, and for generating a
  5873. quality assessment report. Data are represented as
  5874. @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
  5875. purposes. The package also contains legacy support for early single-end,
  5876. ungapped alignment formats.")
  5877. (license license:artistic2.0)))
  5878. (define-public r-systempiper
  5879. (package
  5880. (name "r-systempiper")
  5881. (version "1.10.0")
  5882. (source
  5883. (origin
  5884. (method url-fetch)
  5885. (uri (bioconductor-uri "systemPipeR" version))
  5886. (sha256
  5887. (base32
  5888. "0c3m5rq63ypv15yca97yag5d4vgd7xj9by2a4sd8z0pcmpajz0hw"))))
  5889. (properties `((upstream-name . "systemPipeR")))
  5890. (build-system r-build-system)
  5891. (propagated-inputs
  5892. `(("r-annotate" ,r-annotate)
  5893. ("r-batchjobs" ,r-batchjobs)
  5894. ("r-biocgenerics" ,r-biocgenerics)
  5895. ("r-biostrings" ,r-biostrings)
  5896. ("r-deseq2" ,r-deseq2)
  5897. ("r-edger" ,r-edger)
  5898. ("r-genomicfeatures" ,r-genomicfeatures)
  5899. ("r-genomicranges" ,r-genomicranges)
  5900. ("r-ggplot2" ,r-ggplot2)
  5901. ("r-go-db" ,r-go-db)
  5902. ("r-gostats" ,r-gostats)
  5903. ("r-limma" ,r-limma)
  5904. ("r-pheatmap" ,r-pheatmap)
  5905. ("r-rjson" ,r-rjson)
  5906. ("r-rsamtools" ,r-rsamtools)
  5907. ("r-shortread" ,r-shortread)
  5908. ("r-summarizedexperiment" ,r-summarizedexperiment)
  5909. ("r-variantannotation" ,r-variantannotation)))
  5910. (home-page "https://github.com/tgirke/systemPipeR")
  5911. (synopsis "Next generation sequencing workflow and reporting environment")
  5912. (description
  5913. "This R package provides tools for building and running automated
  5914. end-to-end analysis workflows for a wide range of @dfn{next generation
  5915. sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
  5916. Important features include a uniform workflow interface across different NGS
  5917. applications, automated report generation, and support for running both R and
  5918. command-line software, such as NGS aligners or peak/variant callers, on local
  5919. computers or compute clusters. Efficient handling of complex sample sets and
  5920. experimental designs is facilitated by a consistently implemented sample
  5921. annotation infrastructure.")
  5922. (license license:artistic2.0)))
  5923. (define-public r-grohmm
  5924. (package
  5925. (name "r-grohmm")
  5926. (version "1.10.0")
  5927. (source
  5928. (origin
  5929. (method url-fetch)
  5930. (uri (bioconductor-uri "groHMM" version))
  5931. (sha256
  5932. (base32
  5933. "16k1kp4sbhh0vp7dzywafq52csq42ksqfrqfy4bdv1qbd7536dpd"))))
  5934. (properties `((upstream-name . "groHMM")))
  5935. (build-system r-build-system)
  5936. (propagated-inputs
  5937. `(("r-genomeinfodb" ,r-genomeinfodb)
  5938. ("r-genomicalignments" ,r-genomicalignments)
  5939. ("r-genomicranges" ,r-genomicranges)
  5940. ("r-iranges" ,r-iranges)
  5941. ("r-mass" ,r-mass)
  5942. ("r-rtracklayer" ,r-rtracklayer)
  5943. ("r-s4vectors" ,r-s4vectors)))
  5944. (home-page "https://github.com/Kraus-Lab/groHMM")
  5945. (synopsis "GRO-seq analysis pipeline")
  5946. (description
  5947. "This package provides a pipeline for the analysis of GRO-seq data.")
  5948. (license license:gpl3+)))
  5949. (define-public r-txdb-hsapiens-ucsc-hg19-knowngene
  5950. (package
  5951. (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
  5952. (version "3.2.2")
  5953. (source (origin
  5954. (method url-fetch)
  5955. ;; We cannot use bioconductor-uri here because this tarball is
  5956. ;; located under "data/annotation/" instead of "bioc/".
  5957. (uri (string-append "http://bioconductor.org/packages/"
  5958. "release/data/annotation/src/contrib"
  5959. "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
  5960. version ".tar.gz"))
  5961. (sha256
  5962. (base32
  5963. "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
  5964. (properties
  5965. `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
  5966. (build-system r-build-system)
  5967. ;; As this package provides little more than a very large data file it
  5968. ;; doesn't make sense to build substitutes.
  5969. (arguments `(#:substitutable? #f))
  5970. (propagated-inputs
  5971. `(("r-genomicfeatures" ,r-genomicfeatures)))
  5972. (home-page
  5973. "http://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
  5974. (synopsis "Annotation package for human genome in TxDb format")
  5975. (description
  5976. "This package provides an annotation database of Homo sapiens genome
  5977. data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
  5978. track. The database is exposed as a @code{TxDb} object.")
  5979. (license license:artistic2.0)))
  5980. (define-public r-sparql
  5981. (package
  5982. (name "r-sparql")
  5983. (version "1.16")
  5984. (source (origin
  5985. (method url-fetch)
  5986. (uri (cran-uri "SPARQL" version))
  5987. (sha256
  5988. (base32
  5989. "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc"))))
  5990. (properties `((upstream-name . "SPARQL")))
  5991. (build-system r-build-system)
  5992. (propagated-inputs
  5993. `(("r-rcurl" ,r-rcurl)
  5994. ("r-xml" ,r-xml)))
  5995. (home-page "http://cran.r-project.org/web/packages/SPARQL")
  5996. (synopsis "SPARQL client for R")
  5997. (description "This package provides an interface to use SPARQL to pose
  5998. SELECT or UPDATE queries to an end-point.")
  5999. ;; The only license indication is found in the DESCRIPTION file,
  6000. ;; which states GPL-3. So we cannot assume GPLv3+.
  6001. (license license:gpl3)))
  6002. (define-public vsearch
  6003. (package
  6004. (name "vsearch")
  6005. (version "2.5.0")
  6006. (source
  6007. (origin
  6008. (method url-fetch)
  6009. (uri (string-append
  6010. "https://github.com/torognes/vsearch/archive/v"
  6011. version ".tar.gz"))
  6012. (file-name (string-append name "-" version ".tar.gz"))
  6013. (sha256
  6014. (base32
  6015. "1k8wf3qns4mqrsizywbkqcasqjw000m4drxsag3qd7390pwvf9kz"))
  6016. (patches (search-patches "vsearch-unbundle-cityhash.patch"))
  6017. (snippet
  6018. '(begin
  6019. ;; Remove bundled cityhash sources. The vsearch source is adjusted
  6020. ;; for this in the patch.
  6021. (delete-file "src/city.h")
  6022. (delete-file "src/citycrc.h")
  6023. (delete-file "src/city.cc")
  6024. #t))))
  6025. (build-system gnu-build-system)
  6026. (arguments
  6027. `(#:phases
  6028. (modify-phases %standard-phases
  6029. (add-after 'unpack 'autogen
  6030. (lambda _ (zero? (system* "autoreconf" "-vif")))))))
  6031. (inputs
  6032. `(("zlib" ,zlib)
  6033. ("bzip2" ,bzip2)
  6034. ("cityhash" ,cityhash)))
  6035. (native-inputs
  6036. `(("autoconf" ,autoconf)
  6037. ("automake" ,automake)))
  6038. (synopsis "Sequence search tools for metagenomics")
  6039. (description
  6040. "VSEARCH supports DNA sequence searching, clustering, chimera detection,
  6041. dereplication, pairwise alignment, shuffling, subsampling, sorting and
  6042. masking. The tool takes advantage of parallelism in the form of SIMD
  6043. vectorization as well as multiple threads to perform accurate alignments at
  6044. high speed. VSEARCH uses an optimal global aligner (full dynamic programming
  6045. Needleman-Wunsch).")
  6046. (home-page "https://github.com/torognes/vsearch")
  6047. ;; vsearch uses non-portable SSE intrinsics so building fails on other
  6048. ;; platforms.
  6049. (supported-systems '("x86_64-linux"))
  6050. ;; Dual licensed; also includes public domain source.
  6051. (license (list license:gpl3 license:bsd-2))))
  6052. (define-public pardre
  6053. (package
  6054. (name "pardre")
  6055. ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
  6056. (version "1.1.5-1")
  6057. (source
  6058. (origin
  6059. (method url-fetch)
  6060. (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
  6061. "1.1.5" ".tar.gz"))
  6062. (sha256
  6063. (base32
  6064. "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
  6065. (build-system gnu-build-system)
  6066. (arguments
  6067. `(#:tests? #f ; no tests included
  6068. #:phases
  6069. (modify-phases %standard-phases
  6070. (delete 'configure)
  6071. (replace 'install
  6072. (lambda* (#:key outputs #:allow-other-keys)
  6073. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  6074. (install-file "ParDRe" bin)
  6075. #t))))))
  6076. (inputs
  6077. `(("openmpi" ,openmpi)
  6078. ("zlib" ,zlib)))
  6079. (synopsis "Parallel tool to remove duplicate DNA reads")
  6080. (description
  6081. "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
  6082. Duplicate reads can be seen as identical or nearly identical sequences with
  6083. some mismatches. This tool lets users avoid the analysis of unnecessary
  6084. reads, reducing the time of subsequent procedures with the
  6085. dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
  6086. in order to exploit the parallel capabilities of multicore clusters. It is
  6087. faster than multithreaded counterparts (end of 2015) for the same number of
  6088. cores and, thanks to the message-passing technology, it can be executed on
  6089. clusters.")
  6090. (home-page "https://sourceforge.net/projects/pardre/")
  6091. (license license:gpl3+)))
  6092. (define-public ruby-bio-kseq
  6093. (package
  6094. (name "ruby-bio-kseq")
  6095. (version "0.0.2")
  6096. (source
  6097. (origin
  6098. (method url-fetch)
  6099. (uri (rubygems-uri "bio-kseq" version))
  6100. (sha256
  6101. (base32
  6102. "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
  6103. (build-system ruby-build-system)
  6104. (arguments
  6105. `(#:test-target "spec"))
  6106. (native-inputs
  6107. `(("bundler" ,bundler)
  6108. ("ruby-rspec" ,ruby-rspec)
  6109. ("ruby-rake-compiler" ,ruby-rake-compiler)))
  6110. (inputs
  6111. `(("zlib" ,zlib)))
  6112. (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
  6113. (description
  6114. "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
  6115. FASTQ parsing code. It provides a fast iterator over sequences and their
  6116. quality scores.")
  6117. (home-page "https://github.com/gusevfe/bio-kseq")
  6118. (license license:expat)))
  6119. (define-public bio-locus
  6120. (package
  6121. (name "bio-locus")
  6122. (version "0.0.7")
  6123. (source
  6124. (origin
  6125. (method url-fetch)
  6126. (uri (rubygems-uri "bio-locus" version))
  6127. (sha256
  6128. (base32
  6129. "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
  6130. (build-system ruby-build-system)
  6131. (native-inputs
  6132. `(("ruby-rspec" ,ruby-rspec)))
  6133. (synopsis "Tool for fast querying of genome locations")
  6134. (description
  6135. "Bio-locus is a tabix-like tool for fast querying of genome
  6136. locations. Many file formats in bioinformatics contain records that
  6137. start with a chromosome name and a position for a SNP, or a start-end
  6138. position for indels. Bio-locus allows users to store this chr+pos or
  6139. chr+pos+alt information in a database.")
  6140. (home-page "https://github.com/pjotrp/bio-locus")
  6141. (license license:expat)))
  6142. (define-public bio-blastxmlparser
  6143. (package
  6144. (name "bio-blastxmlparser")
  6145. (version "2.0.4")
  6146. (source (origin
  6147. (method url-fetch)
  6148. (uri (rubygems-uri "bio-blastxmlparser" version))
  6149. (sha256
  6150. (base32
  6151. "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
  6152. (build-system ruby-build-system)
  6153. (propagated-inputs
  6154. `(("ruby-bio-logger" ,ruby-bio-logger)
  6155. ("ruby-nokogiri" ,ruby-nokogiri)))
  6156. (inputs
  6157. `(("ruby-rspec" ,ruby-rspec)))
  6158. (synopsis "Fast big data BLAST XML parser and library")
  6159. (description
  6160. "Very fast parallel big-data BLAST XML file parser which can be used as
  6161. command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
  6162. generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
  6163. (home-page "https://github.com/pjotrp/blastxmlparser")
  6164. (license license:expat)))
  6165. (define-public bioruby
  6166. (package
  6167. (name "bioruby")
  6168. (version "1.5.1")
  6169. (source
  6170. (origin
  6171. (method url-fetch)
  6172. (uri (rubygems-uri "bio" version))
  6173. (sha256
  6174. (base32
  6175. "0hdl0789c9n4mprnx5pgd46bfwl8d000rqpamj5h6kkjgspijv49"))))
  6176. (build-system ruby-build-system)
  6177. (propagated-inputs
  6178. `(("ruby-libxml" ,ruby-libxml)))
  6179. (native-inputs
  6180. `(("which" ,which))) ; required for test phase
  6181. (arguments
  6182. `(#:phases
  6183. (modify-phases %standard-phases
  6184. (add-before 'build 'patch-test-command
  6185. (lambda _
  6186. (substitute* '("test/functional/bio/test_command.rb")
  6187. (("/bin/sh") (which "sh")))
  6188. (substitute* '("test/functional/bio/test_command.rb")
  6189. (("/bin/ls") (which "ls")))
  6190. (substitute* '("test/functional/bio/test_command.rb")
  6191. (("which") (which "which")))
  6192. (substitute* '("test/functional/bio/test_command.rb",
  6193. "test/data/command/echoarg2.sh")
  6194. (("/bin/echo") (which "echo")))
  6195. #t)))))
  6196. (synopsis "Ruby library, shell and utilities for bioinformatics")
  6197. (description "BioRuby comes with a comprehensive set of Ruby development
  6198. tools and libraries for bioinformatics and molecular biology. BioRuby has
  6199. components for sequence analysis, pathway analysis, protein modelling and
  6200. phylogenetic analysis; it supports many widely used data formats and provides
  6201. easy access to databases, external programs and public web services, including
  6202. BLAST, KEGG, GenBank, MEDLINE and GO.")
  6203. (home-page "http://bioruby.org/")
  6204. ;; Code is released under Ruby license, except for setup
  6205. ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
  6206. (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
  6207. (define-public r-acsnminer
  6208. (package
  6209. (name "r-acsnminer")
  6210. (version "0.16.8.25")
  6211. (source (origin
  6212. (method url-fetch)
  6213. (uri (cran-uri "ACSNMineR" version))
  6214. (sha256
  6215. (base32
  6216. "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
  6217. (properties `((upstream-name . "ACSNMineR")))
  6218. (build-system r-build-system)
  6219. (propagated-inputs
  6220. `(("r-ggplot2" ,r-ggplot2)
  6221. ("r-gridextra" ,r-gridextra)))
  6222. (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
  6223. (synopsis "Gene enrichment analysis")
  6224. (description
  6225. "This package provides tools to compute and represent gene set enrichment
  6226. or depletion from your data based on pre-saved maps from the @dfn{Atlas of
  6227. Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
  6228. enrichment can be run with hypergeometric test or Fisher exact test, and can
  6229. use multiple corrections. Visualization of data can be done either by
  6230. barplots or heatmaps.")
  6231. (license license:gpl2+)))
  6232. (define-public r-biocgenerics
  6233. (package
  6234. (name "r-biocgenerics")
  6235. (version "0.22.0")
  6236. (source (origin
  6237. (method url-fetch)
  6238. (uri (bioconductor-uri "BiocGenerics" version))
  6239. (sha256
  6240. (base32
  6241. "0qbmz2qxwwi30xpxpvp2h1h7l494rbbz5d9pls5cfhqdv3wnpzsv"))))
  6242. (properties
  6243. `((upstream-name . "BiocGenerics")))
  6244. (build-system r-build-system)
  6245. (home-page "http://bioconductor.org/packages/BiocGenerics")
  6246. (synopsis "S4 generic functions for Bioconductor")
  6247. (description
  6248. "This package provides S4 generic functions needed by many Bioconductor
  6249. packages.")
  6250. (license license:artistic2.0)))
  6251. (define-public r-biocinstaller
  6252. (package
  6253. (name "r-biocinstaller")
  6254. (version "1.26.0")
  6255. (source (origin
  6256. (method url-fetch)
  6257. (uri (bioconductor-uri "BiocInstaller" version))
  6258. (sha256
  6259. (base32
  6260. "0njw2q3lq1rrjx8qzw5d2130l72bmd3g2z8qlxqmkdcbmmgliyj2"))))
  6261. (properties
  6262. `((upstream-name . "BiocInstaller")))
  6263. (build-system r-build-system)
  6264. (home-page "http://bioconductor.org/packages/BiocInstaller")
  6265. (synopsis "Install Bioconductor packages")
  6266. (description "This package is used to install and update R packages from
  6267. Bioconductor, CRAN, and Github.")
  6268. (license license:artistic2.0)))
  6269. (define-public r-biocviews
  6270. (package
  6271. (name "r-biocviews")
  6272. (version "1.44.0")
  6273. (source (origin
  6274. (method url-fetch)
  6275. (uri (bioconductor-uri "biocViews" version))
  6276. (sha256
  6277. (base32
  6278. "17hi8w0w63f5yc43kid5pbld3ca78sj6n8x9dmkbl8h48818xbga"))))
  6279. (properties
  6280. `((upstream-name . "biocViews")))
  6281. (build-system r-build-system)
  6282. (propagated-inputs
  6283. `(("r-biobase" ,r-biobase)
  6284. ("r-graph" ,r-graph)
  6285. ("r-rbgl" ,r-rbgl)
  6286. ("r-rcurl" ,r-rcurl)
  6287. ("r-xml" ,r-xml)
  6288. ("r-knitr" ,r-knitr)
  6289. ("r-runit" ,r-runit)))
  6290. (home-page "http://bioconductor.org/packages/biocViews")
  6291. (synopsis "Bioconductor package categorization helper")
  6292. (description "The purpose of biocViews is to create HTML pages that
  6293. categorize packages in a Bioconductor package repository according to keywords,
  6294. also known as views, in a controlled vocabulary.")
  6295. (license license:artistic2.0)))
  6296. (define-public r-bookdown
  6297. (package
  6298. (name "r-bookdown")
  6299. (version "0.5")
  6300. (source (origin
  6301. (method url-fetch)
  6302. (uri (cran-uri "bookdown" version))
  6303. (sha256
  6304. (base32
  6305. "0zm63kr4f4kja4qpwkzl119zzyciqj7ihajfqgfjpgb4dzaiycxp"))))
  6306. (build-system r-build-system)
  6307. (propagated-inputs
  6308. `(("r-htmltools" ,r-htmltools)
  6309. ("r-knitr" ,r-knitr)
  6310. ("r-rmarkdown" ,r-rmarkdown)
  6311. ("r-yaml" ,r-yaml)))
  6312. (home-page "https://github.com/rstudio/bookdown")
  6313. (synopsis "Authoring books and technical documents with R markdown")
  6314. (description "This package provides output formats and utilities for
  6315. authoring books and technical documents with R Markdown.")
  6316. (license license:gpl3)))
  6317. (define-public r-biocstyle
  6318. (package
  6319. (name "r-biocstyle")
  6320. (version "2.4.1")
  6321. (source (origin
  6322. (method url-fetch)
  6323. (uri (bioconductor-uri "BiocStyle" version))
  6324. (sha256
  6325. (base32
  6326. "0bmgmsfll923v573g0kyzlmjd7gly5jwgd8vkrcwvbam1gz75f2c"))))
  6327. (properties
  6328. `((upstream-name . "BiocStyle")))
  6329. (build-system r-build-system)
  6330. (propagated-inputs
  6331. `(("r-bookdown" ,r-bookdown)
  6332. ("r-knitr" ,r-knitr)
  6333. ("r-rmarkdown" ,r-rmarkdown)
  6334. ("r-yaml" ,r-yaml)))
  6335. (home-page "http://bioconductor.org/packages/BiocStyle")
  6336. (synopsis "Bioconductor formatting styles")
  6337. (description "This package provides standard formatting styles for
  6338. Bioconductor PDF and HTML documents. Package vignettes illustrate use and
  6339. functionality.")
  6340. (license license:artistic2.0)))
  6341. (define-public r-bioccheck
  6342. (package
  6343. (name "r-bioccheck")
  6344. (version "1.12.0")
  6345. (source (origin
  6346. (method url-fetch)
  6347. (uri (bioconductor-uri "BiocCheck" version))
  6348. (sha256
  6349. (base32
  6350. "01zkw5hggzvn0wj4msac71k1mknq4h2inn1c2hwqgw4cy1675wl0"))))
  6351. (properties
  6352. `((upstream-name . "BiocCheck")))
  6353. (build-system r-build-system)
  6354. (arguments
  6355. '(#:phases
  6356. (modify-phases %standard-phases
  6357. ;; This package can be used by calling BiocCheck(<package>) from
  6358. ;; within R, or by running R CMD BiocCheck <package>. This phase
  6359. ;; makes sure the latter works. For this to work, the BiocCheck
  6360. ;; script must be somewhere on the PATH (not the R bin directory).
  6361. (add-after 'install 'install-bioccheck-subcommand
  6362. (lambda* (#:key outputs #:allow-other-keys)
  6363. (let* ((out (assoc-ref outputs "out"))
  6364. (dest-dir (string-append out "/bin"))
  6365. (script-dir
  6366. (string-append out "/site-library/BiocCheck/script/")))
  6367. (mkdir-p dest-dir)
  6368. (symlink (string-append script-dir "/checkBadDeps.R")
  6369. (string-append dest-dir "/checkBadDeps.R"))
  6370. (symlink (string-append script-dir "/BiocCheck")
  6371. (string-append dest-dir "/BiocCheck")))
  6372. #t)))))
  6373. (native-inputs
  6374. `(("which" ,which)))
  6375. (propagated-inputs
  6376. `(("r-codetools" ,r-codetools)
  6377. ("r-graph" ,r-graph)
  6378. ("r-httr" ,r-httr)
  6379. ("r-optparse" ,r-optparse)
  6380. ("r-biocinstaller" ,r-biocinstaller)
  6381. ("r-biocviews" ,r-biocviews)))
  6382. (home-page "http://bioconductor.org/packages/BiocCheck")
  6383. (synopsis "Executes Bioconductor-specific package checks")
  6384. (description "This package contains tools to perform additional quality
  6385. checks on R packages that are to be submitted to the Bioconductor repository.")
  6386. (license license:artistic2.0)))
  6387. (define-public r-getopt
  6388. (package
  6389. (name "r-getopt")
  6390. (version "1.20.0")
  6391. (source
  6392. (origin
  6393. (method url-fetch)
  6394. (uri (cran-uri "getopt" version))
  6395. (sha256
  6396. (base32
  6397. "00f57vgnzmg7cz80rjmjz1556xqcmx8nhrlbbhaq4w7gl2ibl87r"))))
  6398. (build-system r-build-system)
  6399. (home-page "https://github.com/trevorld/getopt")
  6400. (synopsis "Command-line option processor for R")
  6401. (description
  6402. "This package is designed to be used with Rscript to write shebang
  6403. scripts that accept short and long options. Many users will prefer to
  6404. use the packages @code{optparse} or @code{argparse} which add extra
  6405. features like automatically generated help options and usage texts,
  6406. support for default values, positional argument support, etc.")
  6407. (license license:gpl2+)))
  6408. (define-public r-optparse
  6409. (package
  6410. (name "r-optparse")
  6411. (version "1.4.4")
  6412. (source
  6413. (origin
  6414. (method url-fetch)
  6415. (uri (cran-uri "optparse" version))
  6416. (sha256
  6417. (base32
  6418. "1ff4wmsszrb3spwfp7ynfs8w11qpy1sdzfxm1wk8dqqvdwris7qb"))))
  6419. (build-system r-build-system)
  6420. (propagated-inputs
  6421. `(("r-getopt" ,r-getopt)))
  6422. (home-page
  6423. "https://github.com/trevorld/optparse")
  6424. (synopsis "Command line option parser")
  6425. (description
  6426. "This package provides a command line parser inspired by Python's
  6427. @code{optparse} library to be used with Rscript to write shebang scripts
  6428. that accept short and long options.")
  6429. (license license:gpl2+)))
  6430. (define-public r-dnacopy
  6431. (package
  6432. (name "r-dnacopy")
  6433. (version "1.50.1")
  6434. (source (origin
  6435. (method url-fetch)
  6436. (uri (bioconductor-uri "DNAcopy" version))
  6437. (sha256
  6438. (base32
  6439. "0f0x83db7rm5xf9fg5pjhvs4i165qqaf01lbwb8kj13fsqpwx15p"))))
  6440. (properties
  6441. `((upstream-name . "DNAcopy")))
  6442. (build-system r-build-system)
  6443. (inputs
  6444. `(("gfortran" ,gfortran)))
  6445. (home-page "https://bioconductor.org/packages/DNAcopy")
  6446. (synopsis "Implementation of a circular binary segmentation algorithm")
  6447. (description "This package implements the circular binary segmentation (CBS)
  6448. algorithm to segment DNA copy number data and identify genomic regions with
  6449. abnormal copy number.")
  6450. (license license:gpl2+)))
  6451. (define-public r-s4vectors
  6452. (package
  6453. (name "r-s4vectors")
  6454. (version "0.14.3")
  6455. (source (origin
  6456. (method url-fetch)
  6457. (uri (bioconductor-uri "S4Vectors" version))
  6458. (sha256
  6459. (base32
  6460. "1r7s4pfw026qazzic090mhk8d9m39j2nwl87dyqcpdylyq7gq5qs"))))
  6461. (properties
  6462. `((upstream-name . "S4Vectors")))
  6463. (build-system r-build-system)
  6464. (propagated-inputs
  6465. `(("r-biocgenerics" ,r-biocgenerics)))
  6466. (home-page "http://bioconductor.org/packages/S4Vectors")
  6467. (synopsis "S4 implementation of vectors and lists")
  6468. (description
  6469. "The S4Vectors package defines the @code{Vector} and @code{List} virtual
  6470. classes and a set of generic functions that extend the semantic of ordinary
  6471. vectors and lists in R. Package developers can easily implement vector-like
  6472. or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
  6473. In addition, a few low-level concrete subclasses of general interest (e.g.
  6474. @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
  6475. S4Vectors package itself.")
  6476. (license license:artistic2.0)))
  6477. (define-public r-seqinr
  6478. (package
  6479. (name "r-seqinr")
  6480. (version "3.4-5")
  6481. (source
  6482. (origin
  6483. (method url-fetch)
  6484. (uri (cran-uri "seqinr" version))
  6485. (sha256
  6486. (base32
  6487. "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn"))))
  6488. (build-system r-build-system)
  6489. (propagated-inputs
  6490. `(("r-ade4" ,r-ade4)
  6491. ("r-segmented" ,r-segmented)))
  6492. (inputs
  6493. `(("zlib" ,zlib)))
  6494. (home-page "http://seqinr.r-forge.r-project.org/")
  6495. (synopsis "Biological sequences retrieval and analysis")
  6496. (description
  6497. "This package provides tools for exploratory data analysis and data
  6498. visualization of biological sequence (DNA and protein) data. It also includes
  6499. utilities for sequence data management under the ACNUC system.")
  6500. (license license:gpl2+)))
  6501. (define-public r-iranges
  6502. (package
  6503. (name "r-iranges")
  6504. (version "2.10.2")
  6505. (source (origin
  6506. (method url-fetch)
  6507. (uri (bioconductor-uri "IRanges" version))
  6508. (sha256
  6509. (base32
  6510. "1brmzs3rsf97gymridrh9c9r3vws8b3rpghaanxnniw36lmcajfy"))))
  6511. (properties
  6512. `((upstream-name . "IRanges")))
  6513. (build-system r-build-system)
  6514. (propagated-inputs
  6515. `(("r-biocgenerics" ,r-biocgenerics)
  6516. ("r-s4vectors" ,r-s4vectors)))
  6517. (home-page "http://bioconductor.org/packages/IRanges")
  6518. (synopsis "Infrastructure for manipulating intervals on sequences")
  6519. (description
  6520. "This package provides efficient low-level and highly reusable S4 classes
  6521. for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
  6522. generally, data that can be organized sequentially (formally defined as
  6523. @code{Vector} objects), as well as views on these @code{Vector} objects.
  6524. Efficient list-like classes are also provided for storing big collections of
  6525. instances of the basic classes. All classes in the package use consistent
  6526. naming and share the same rich and consistent \"Vector API\" as much as
  6527. possible.")
  6528. (license license:artistic2.0)))
  6529. (define-public r-genomeinfodbdata
  6530. (package
  6531. (name "r-genomeinfodbdata")
  6532. (version "0.99.0")
  6533. (source (origin
  6534. (method url-fetch)
  6535. ;; We cannot use bioconductor-uri here because this tarball is
  6536. ;; located under "data/annotation/" instead of "bioc/".
  6537. (uri (string-append "https://bioconductor.org/packages/release/"
  6538. "data/annotation/src/contrib/GenomeInfoDbData_"
  6539. version ".tar.gz"))
  6540. (sha256
  6541. (base32
  6542. "120qvhb0pvkzd65lsgja62vyrgc37si6fh68q4cg4w5x9f04jw25"))))
  6543. (properties
  6544. `((upstream-name . "GenomeInfoDbData")))
  6545. (build-system r-build-system)
  6546. (home-page "http://bioconductor.org/packages/GenomeInfoDbData")
  6547. (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
  6548. (description "This package contains data for mapping between NCBI taxonomy
  6549. ID and species. It is used by functions in the GenomeInfoDb package.")
  6550. (license license:artistic2.0)))
  6551. (define-public r-genomeinfodb
  6552. (package
  6553. (name "r-genomeinfodb")
  6554. (version "1.12.2")
  6555. (source (origin
  6556. (method url-fetch)
  6557. (uri (bioconductor-uri "GenomeInfoDb" version))
  6558. (sha256
  6559. (base32
  6560. "1hjxgmcnrngp1307ipqaq9hgxz4j0ldn7d46knhzs30k2r4qnrfp"))))
  6561. (properties
  6562. `((upstream-name . "GenomeInfoDb")))
  6563. (build-system r-build-system)
  6564. (propagated-inputs
  6565. `(("r-biocgenerics" ,r-biocgenerics)
  6566. ("r-genomeinfodbdata" ,r-genomeinfodbdata)
  6567. ("r-iranges" ,r-iranges)
  6568. ("r-rcurl" ,r-rcurl)
  6569. ("r-s4vectors" ,r-s4vectors)))
  6570. (home-page "http://bioconductor.org/packages/GenomeInfoDb")
  6571. (synopsis "Utilities for manipulating chromosome identifiers")
  6572. (description
  6573. "This package contains data and functions that define and allow
  6574. translation between different chromosome sequence naming conventions (e.g.,
  6575. \"chr1\" versus \"1\"), including a function that attempts to place sequence
  6576. names in their natural, rather than lexicographic, order.")
  6577. (license license:artistic2.0)))
  6578. (define-public r-edger
  6579. (package
  6580. (name "r-edger")
  6581. (version "3.18.0")
  6582. (source (origin
  6583. (method url-fetch)
  6584. (uri (bioconductor-uri "edgeR" version))
  6585. (sha256
  6586. (base32
  6587. "02l17i6xh33dv10swwvyfxrmv5kp23iv278iwvkfq2mnipasfnb9"))))
  6588. (properties `((upstream-name . "edgeR")))
  6589. (build-system r-build-system)
  6590. (propagated-inputs
  6591. `(("r-limma" ,r-limma)
  6592. ("r-locfit" ,r-locfit)
  6593. ("r-statmod" ,r-statmod))) ;for estimateDisp
  6594. (home-page "http://bioinf.wehi.edu.au/edgeR")
  6595. (synopsis "EdgeR does empirical analysis of digital gene expression data")
  6596. (description "This package can do differential expression analysis of
  6597. RNA-seq expression profiles with biological replication. It implements a range
  6598. of statistical methodology based on the negative binomial distributions,
  6599. including empirical Bayes estimation, exact tests, generalized linear models
  6600. and quasi-likelihood tests. It be applied to differential signal analysis of
  6601. other types of genomic data that produce counts, including ChIP-seq, SAGE and
  6602. CAGE.")
  6603. (license license:gpl2+)))
  6604. (define-public r-variantannotation
  6605. (package
  6606. (name "r-variantannotation")
  6607. (version "1.22.3")
  6608. (source (origin
  6609. (method url-fetch)
  6610. (uri (bioconductor-uri "VariantAnnotation" version))
  6611. (sha256
  6612. (base32
  6613. "0sr3vdn85x5zdxh80cfwlpfdpi2hmjy3fwi00ac3jya4v145vawr"))))
  6614. (properties
  6615. `((upstream-name . "VariantAnnotation")))
  6616. (inputs
  6617. `(("zlib" ,zlib)))
  6618. (propagated-inputs
  6619. `(("r-annotationdbi" ,r-annotationdbi)
  6620. ("r-biobase" ,r-biobase)
  6621. ("r-biocgenerics" ,r-biocgenerics)
  6622. ("r-biostrings" ,r-biostrings)
  6623. ("r-bsgenome" ,r-bsgenome)
  6624. ("r-dbi" ,r-dbi)
  6625. ("r-genomeinfodb" ,r-genomeinfodb)
  6626. ("r-genomicfeatures" ,r-genomicfeatures)
  6627. ("r-genomicranges" ,r-genomicranges)
  6628. ("r-iranges" ,r-iranges)
  6629. ("r-summarizedexperiment" ,r-summarizedexperiment)
  6630. ("r-rsamtools" ,r-rsamtools)
  6631. ("r-rtracklayer" ,r-rtracklayer)
  6632. ("r-s4vectors" ,r-s4vectors)
  6633. ("r-xvector" ,r-xvector)
  6634. ("r-zlibbioc" ,r-zlibbioc)))
  6635. (build-system r-build-system)
  6636. (home-page "https://bioconductor.org/packages/VariantAnnotation")
  6637. (synopsis "Package for annotation of genetic variants")
  6638. (description "This R package can annotate variants, compute amino acid
  6639. coding changes and predict coding outcomes.")
  6640. (license license:artistic2.0)))
  6641. (define-public r-limma
  6642. (package
  6643. (name "r-limma")
  6644. (version "3.32.5")
  6645. (source (origin
  6646. (method url-fetch)
  6647. (uri (bioconductor-uri "limma" version))
  6648. (sha256
  6649. (base32
  6650. "0p2ayha9g9w5r8s7pgdf16mkmdbqwh6f35jh07g3b8gyra48gwiw"))))
  6651. (build-system r-build-system)
  6652. (home-page "http://bioinf.wehi.edu.au/limma")
  6653. (synopsis "Package for linear models for microarray and RNA-seq data")
  6654. (description "This package can be used for the analysis of gene expression
  6655. studies, especially the use of linear models for analysing designed experiments
  6656. and the assessment of differential expression. The analysis methods apply to
  6657. different technologies, including microarrays, RNA-seq, and quantitative PCR.")
  6658. (license license:gpl2+)))
  6659. (define-public r-xvector
  6660. (package
  6661. (name "r-xvector")
  6662. (version "0.16.0")
  6663. (source (origin
  6664. (method url-fetch)
  6665. (uri (bioconductor-uri "XVector" version))
  6666. (sha256
  6667. (base32
  6668. "01n09f4jdm60684lzikp02zf9gjan8bdrjx740vggr21q9fa69wn"))))
  6669. (properties
  6670. `((upstream-name . "XVector")))
  6671. (build-system r-build-system)
  6672. (arguments
  6673. `(#:phases
  6674. (modify-phases %standard-phases
  6675. (add-after 'unpack 'use-system-zlib
  6676. (lambda _
  6677. (substitute* "DESCRIPTION"
  6678. (("zlibbioc, ") ""))
  6679. (substitute* "NAMESPACE"
  6680. (("import\\(zlibbioc\\)") ""))
  6681. #t)))))
  6682. (inputs
  6683. `(("zlib" ,zlib)))
  6684. (propagated-inputs
  6685. `(("r-biocgenerics" ,r-biocgenerics)
  6686. ("r-iranges" ,r-iranges)
  6687. ("r-s4vectors" ,r-s4vectors)))
  6688. (home-page "http://bioconductor.org/packages/XVector")
  6689. (synopsis "Representation and manpulation of external sequences")
  6690. (description
  6691. "This package provides memory efficient S4 classes for storing sequences
  6692. \"externally\" (behind an R external pointer, or on disk).")
  6693. (license license:artistic2.0)))
  6694. (define-public r-genomicranges
  6695. (package
  6696. (name "r-genomicranges")
  6697. (version "1.28.4")
  6698. (source (origin
  6699. (method url-fetch)
  6700. (uri (bioconductor-uri "GenomicRanges" version))
  6701. (sha256
  6702. (base32
  6703. "1y15kg1q81h8rmga83ljiwr8whkajcargfjiljr212d6if17ys1z"))))
  6704. (properties
  6705. `((upstream-name . "GenomicRanges")))
  6706. (build-system r-build-system)
  6707. (propagated-inputs
  6708. `(("r-biocgenerics" ,r-biocgenerics)
  6709. ("r-genomeinfodb" ,r-genomeinfodb)
  6710. ("r-iranges" ,r-iranges)
  6711. ("r-s4vectors" ,r-s4vectors)
  6712. ("r-xvector" ,r-xvector)))
  6713. (home-page "http://bioconductor.org/packages/GenomicRanges")
  6714. (synopsis "Representation and manipulation of genomic intervals")
  6715. (description
  6716. "This package provides tools to efficiently represent and manipulate
  6717. genomic annotations and alignments is playing a central role when it comes to
  6718. analyzing high-throughput sequencing data (a.k.a. NGS data). The
  6719. GenomicRanges package defines general purpose containers for storing and
  6720. manipulating genomic intervals and variables defined along a genome.")
  6721. (license license:artistic2.0)))
  6722. (define-public r-biobase
  6723. (package
  6724. (name "r-biobase")
  6725. (version "2.36.2")
  6726. (source (origin
  6727. (method url-fetch)
  6728. (uri (bioconductor-uri "Biobase" version))
  6729. (sha256
  6730. (base32
  6731. "0sr48nqx5bqid4g6lr9zr9286xh842w717yvmssvddb5xxynib6f"))))
  6732. (properties
  6733. `((upstream-name . "Biobase")))
  6734. (build-system r-build-system)
  6735. (propagated-inputs
  6736. `(("r-biocgenerics" ,r-biocgenerics)))
  6737. (home-page "http://bioconductor.org/packages/Biobase")
  6738. (synopsis "Base functions for Bioconductor")
  6739. (description
  6740. "This package provides functions that are needed by many other packages
  6741. on Bioconductor or which replace R functions.")
  6742. (license license:artistic2.0)))
  6743. (define-public r-annotationdbi
  6744. (package
  6745. (name "r-annotationdbi")
  6746. (version "1.38.2")
  6747. (source (origin
  6748. (method url-fetch)
  6749. (uri (bioconductor-uri "AnnotationDbi" version))
  6750. (sha256
  6751. (base32
  6752. "1lsamnbf07zzsy5asy5hn97n2a4layv58w2bzd90ikcdx0gmzarj"))))
  6753. (properties
  6754. `((upstream-name . "AnnotationDbi")))
  6755. (build-system r-build-system)
  6756. (propagated-inputs
  6757. `(("r-biobase" ,r-biobase)
  6758. ("r-biocgenerics" ,r-biocgenerics)
  6759. ("r-dbi" ,r-dbi)
  6760. ("r-iranges" ,r-iranges)
  6761. ("r-rsqlite" ,r-rsqlite)
  6762. ("r-s4vectors" ,r-s4vectors)))
  6763. (home-page "http://bioconductor.org/packages/AnnotationDbi")
  6764. (synopsis "Annotation database interface")
  6765. (description
  6766. "This package provides user interface and database connection code for
  6767. annotation data packages using SQLite data storage.")
  6768. (license license:artistic2.0)))
  6769. (define-public r-biomart
  6770. (package
  6771. (name "r-biomart")
  6772. (version "2.32.1")
  6773. (source (origin
  6774. (method url-fetch)
  6775. (uri (bioconductor-uri "biomaRt" version))
  6776. (sha256
  6777. (base32
  6778. "0fhpbjlsgbqxrpj6nzhhk9q3ph81n5x4p7mmd097xjjn6b05w1d8"))))
  6779. (properties
  6780. `((upstream-name . "biomaRt")))
  6781. (build-system r-build-system)
  6782. (propagated-inputs
  6783. `(("r-annotationdbi" ,r-annotationdbi)
  6784. ("r-rcurl" ,r-rcurl)
  6785. ("r-xml" ,r-xml)))
  6786. (home-page "http://bioconductor.org/packages/biomaRt")
  6787. (synopsis "Interface to BioMart databases")
  6788. (description
  6789. "biomaRt provides an interface to a growing collection of databases
  6790. implementing the @url{BioMart software suite, http://www.biomart.org}. The
  6791. package enables retrieval of large amounts of data in a uniform way without
  6792. the need to know the underlying database schemas or write complex SQL queries.
  6793. Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
  6794. Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
  6795. users direct access to a diverse set of data and enable a wide range of
  6796. powerful online queries from gene annotation to database mining.")
  6797. (license license:artistic2.0)))
  6798. (define-public r-biocparallel
  6799. (package
  6800. (name "r-biocparallel")
  6801. (version "1.10.1")
  6802. (source (origin
  6803. (method url-fetch)
  6804. (uri (bioconductor-uri "BiocParallel" version))
  6805. (sha256
  6806. (base32
  6807. "08mdfxyk9nwz77v0xhlvs19p2wj0phgm5c5b25vm0xh3749njsp0"))))
  6808. (properties
  6809. `((upstream-name . "BiocParallel")))
  6810. (build-system r-build-system)
  6811. (propagated-inputs
  6812. `(("r-futile-logger" ,r-futile-logger)
  6813. ("r-snow" ,r-snow)))
  6814. (home-page "http://bioconductor.org/packages/BiocParallel")
  6815. (synopsis "Bioconductor facilities for parallel evaluation")
  6816. (description
  6817. "This package provides modified versions and novel implementation of
  6818. functions for parallel evaluation, tailored to use with Bioconductor
  6819. objects.")
  6820. (license (list license:gpl2+ license:gpl3+))))
  6821. (define-public r-biostrings
  6822. (package
  6823. (name "r-biostrings")
  6824. (version "2.44.2")
  6825. (source (origin
  6826. (method url-fetch)
  6827. (uri (bioconductor-uri "Biostrings" version))
  6828. (sha256
  6829. (base32
  6830. "12c5abgshwq86357jr0r9039y6vl4d6ngysy89rsnr23ldnsirjp"))))
  6831. (properties
  6832. `((upstream-name . "Biostrings")))
  6833. (build-system r-build-system)
  6834. (propagated-inputs
  6835. `(("r-biocgenerics" ,r-biocgenerics)
  6836. ("r-iranges" ,r-iranges)
  6837. ("r-s4vectors" ,r-s4vectors)
  6838. ("r-xvector" ,r-xvector)))
  6839. (home-page "http://bioconductor.org/packages/Biostrings")
  6840. (synopsis "String objects and algorithms for biological sequences")
  6841. (description
  6842. "This package provides memory efficient string containers, string
  6843. matching algorithms, and other utilities, for fast manipulation of large
  6844. biological sequences or sets of sequences.")
  6845. (license license:artistic2.0)))
  6846. (define-public r-rsamtools
  6847. (package
  6848. (name "r-rsamtools")
  6849. (version "1.28.0")
  6850. (source (origin
  6851. (method url-fetch)
  6852. (uri (bioconductor-uri "Rsamtools" version))
  6853. (sha256
  6854. (base32
  6855. "1zx6vhdz3lksbfy98jj3qzl7cxgspigm2pbsd5835b12r6mc5w6d"))))
  6856. (properties
  6857. `((upstream-name . "Rsamtools")))
  6858. (build-system r-build-system)
  6859. (arguments
  6860. `(#:phases
  6861. (modify-phases %standard-phases
  6862. (add-after 'unpack 'use-system-zlib
  6863. (lambda _
  6864. (substitute* "DESCRIPTION"
  6865. (("zlibbioc, ") ""))
  6866. (substitute* "NAMESPACE"
  6867. (("import\\(zlibbioc\\)") ""))
  6868. #t)))))
  6869. (inputs
  6870. `(("zlib" ,zlib)))
  6871. (propagated-inputs
  6872. `(("r-biocgenerics" ,r-biocgenerics)
  6873. ("r-biocparallel" ,r-biocparallel)
  6874. ("r-biostrings" ,r-biostrings)
  6875. ("r-bitops" ,r-bitops)
  6876. ("r-genomeinfodb" ,r-genomeinfodb)
  6877. ("r-genomicranges" ,r-genomicranges)
  6878. ("r-iranges" ,r-iranges)
  6879. ("r-s4vectors" ,r-s4vectors)
  6880. ("r-xvector" ,r-xvector)))
  6881. (home-page "http://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
  6882. (synopsis "Interface to samtools, bcftools, and tabix")
  6883. (description
  6884. "This package provides an interface to the 'samtools', 'bcftools', and
  6885. 'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
  6886. binary variant call (BCF) and compressed indexed tab-delimited (tabix)
  6887. files.")
  6888. (license license:expat)))
  6889. (define-public r-delayedarray
  6890. (package
  6891. (name "r-delayedarray")
  6892. (version "0.2.7")
  6893. (source (origin
  6894. (method url-fetch)
  6895. (uri (bioconductor-uri "DelayedArray" version))
  6896. (sha256
  6897. (base32
  6898. "02dfqp4md9xaqjj712ijc3jswghmipr5hwkd5hr0x1xi6l2fb69g"))))
  6899. (properties
  6900. `((upstream-name . "DelayedArray")))
  6901. (build-system r-build-system)
  6902. (propagated-inputs
  6903. `(("r-biocgenerics" ,r-biocgenerics)
  6904. ("r-s4vectors" ,r-s4vectors)
  6905. ("r-iranges" ,r-iranges)
  6906. ("r-matrixstats" ,r-matrixstats)))
  6907. (home-page "http://bioconductor.org/packages/DelayedArray")
  6908. (synopsis "Delayed operations on array-like objects")
  6909. (description
  6910. "Wrapping an array-like object (typically an on-disk object) in a
  6911. @code{DelayedArray} object allows one to perform common array operations on it
  6912. without loading the object in memory. In order to reduce memory usage and
  6913. optimize performance, operations on the object are either delayed or executed
  6914. using a block processing mechanism. Note that this also works on in-memory
  6915. array-like objects like @code{DataFrame} objects (typically with Rle columns),
  6916. @code{Matrix} objects, and ordinary arrays and data frames.")
  6917. (license license:artistic2.0)))
  6918. (define-public r-summarizedexperiment
  6919. (package
  6920. (name "r-summarizedexperiment")
  6921. (version "1.6.3")
  6922. (source (origin
  6923. (method url-fetch)
  6924. (uri (bioconductor-uri "SummarizedExperiment" version))
  6925. (sha256
  6926. (base32
  6927. "0j7xn7pk52d383fb1wplcggacl2586c4zi0alkgfc3wz7qq9w13s"))))
  6928. (properties
  6929. `((upstream-name . "SummarizedExperiment")))
  6930. (build-system r-build-system)
  6931. (propagated-inputs
  6932. `(("r-biobase" ,r-biobase)
  6933. ("r-biocgenerics" ,r-biocgenerics)
  6934. ("r-delayedarray" ,r-delayedarray)
  6935. ("r-genomeinfodb" ,r-genomeinfodb)
  6936. ("r-genomicranges" ,r-genomicranges)
  6937. ("r-iranges" ,r-iranges)
  6938. ("r-matrix" ,r-matrix)
  6939. ("r-s4vectors" ,r-s4vectors)))
  6940. (home-page "http://bioconductor.org/packages/SummarizedExperiment")
  6941. (synopsis "Container for representing genomic ranges by sample")
  6942. (description
  6943. "The SummarizedExperiment container contains one or more assays, each
  6944. represented by a matrix-like object of numeric or other mode. The rows
  6945. typically represent genomic ranges of interest and the columns represent
  6946. samples.")
  6947. (license license:artistic2.0)))
  6948. (define-public r-genomicalignments
  6949. (package
  6950. (name "r-genomicalignments")
  6951. (version "1.12.2")
  6952. (source (origin
  6953. (method url-fetch)
  6954. (uri (bioconductor-uri "GenomicAlignments" version))
  6955. (sha256
  6956. (base32
  6957. "03ysxi9fdd3bcfj05iaysya9knn2aa2irwpypb5srg0xwv92bdb9"))))
  6958. (properties
  6959. `((upstream-name . "GenomicAlignments")))
  6960. (build-system r-build-system)
  6961. (propagated-inputs
  6962. `(("r-biocgenerics" ,r-biocgenerics)
  6963. ("r-biocparallel" ,r-biocparallel)
  6964. ("r-biostrings" ,r-biostrings)
  6965. ("r-genomeinfodb" ,r-genomeinfodb)
  6966. ("r-genomicranges" ,r-genomicranges)
  6967. ("r-iranges" ,r-iranges)
  6968. ("r-rsamtools" ,r-rsamtools)
  6969. ("r-s4vectors" ,r-s4vectors)
  6970. ("r-summarizedexperiment" ,r-summarizedexperiment)))
  6971. (home-page "http://bioconductor.org/packages/GenomicAlignments")
  6972. (synopsis "Representation and manipulation of short genomic alignments")
  6973. (description
  6974. "This package provides efficient containers for storing and manipulating
  6975. short genomic alignments (typically obtained by aligning short reads to a
  6976. reference genome). This includes read counting, computing the coverage,
  6977. junction detection, and working with the nucleotide content of the
  6978. alignments.")
  6979. (license license:artistic2.0)))
  6980. (define-public r-rtracklayer
  6981. (package
  6982. (name "r-rtracklayer")
  6983. (version "1.36.4")
  6984. (source (origin
  6985. (method url-fetch)
  6986. (uri (bioconductor-uri "rtracklayer" version))
  6987. (sha256
  6988. (base32
  6989. "050q1rv04w31168ljr975vxva31n9lqdx84rnmsk6zcr6p640ffp"))))
  6990. (build-system r-build-system)
  6991. (arguments
  6992. `(#:phases
  6993. (modify-phases %standard-phases
  6994. (add-after 'unpack 'use-system-zlib
  6995. (lambda _
  6996. (substitute* "DESCRIPTION"
  6997. (("zlibbioc, ") ""))
  6998. (substitute* "NAMESPACE"
  6999. (("import\\(zlibbioc\\)") ""))
  7000. #t)))))
  7001. (inputs
  7002. `(("zlib" ,zlib)))
  7003. (propagated-inputs
  7004. `(("r-biocgenerics" ,r-biocgenerics)
  7005. ("r-biostrings" ,r-biostrings)
  7006. ("r-genomeinfodb" ,r-genomeinfodb)
  7007. ("r-genomicalignments" ,r-genomicalignments)
  7008. ("r-genomicranges" ,r-genomicranges)
  7009. ("r-iranges" ,r-iranges)
  7010. ("r-rcurl" ,r-rcurl)
  7011. ("r-rsamtools" ,r-rsamtools)
  7012. ("r-s4vectors" ,r-s4vectors)
  7013. ("r-xml" ,r-xml)
  7014. ("r-xvector" ,r-xvector)))
  7015. (home-page "http://bioconductor.org/packages/rtracklayer")
  7016. (synopsis "R interface to genome browsers and their annotation tracks")
  7017. (description
  7018. "rtracklayer is an extensible framework for interacting with multiple
  7019. genome browsers (currently UCSC built-in) and manipulating annotation tracks
  7020. in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
  7021. built-in). The user may export/import tracks to/from the supported browsers,
  7022. as well as query and modify the browser state, such as the current viewport.")
  7023. (license license:artistic2.0)))
  7024. (define-public r-genomicfeatures
  7025. (package
  7026. (name "r-genomicfeatures")
  7027. (version "1.28.4")
  7028. (source (origin
  7029. (method url-fetch)
  7030. (uri (bioconductor-uri "GenomicFeatures" version))
  7031. (sha256
  7032. (base32
  7033. "01ylvg275iy0cvsbxkbfxcf9pi9al597v5wnlqi2xdpmrcxyc3q0"))))
  7034. (properties
  7035. `((upstream-name . "GenomicFeatures")))
  7036. (build-system r-build-system)
  7037. (propagated-inputs
  7038. `(("r-annotationdbi" ,r-annotationdbi)
  7039. ("r-biobase" ,r-biobase)
  7040. ("r-biocgenerics" ,r-biocgenerics)
  7041. ("r-biomart" ,r-biomart)
  7042. ("r-biostrings" ,r-biostrings)
  7043. ("r-dbi" ,r-dbi)
  7044. ("r-genomeinfodb" ,r-genomeinfodb)
  7045. ("r-genomicranges" ,r-genomicranges)
  7046. ("r-iranges" ,r-iranges)
  7047. ("r-rcurl" ,r-rcurl)
  7048. ("r-rsqlite" ,r-rsqlite)
  7049. ("r-rtracklayer" ,r-rtracklayer)
  7050. ("r-s4vectors" ,r-s4vectors)
  7051. ("r-xvector" ,r-xvector)))
  7052. (home-page "http://bioconductor.org/packages/GenomicFeatures")
  7053. (synopsis "Tools for working with transcript centric annotations")
  7054. (description
  7055. "This package provides a set of tools and methods for making and
  7056. manipulating transcript centric annotations. With these tools the user can
  7057. easily download the genomic locations of the transcripts, exons and cds of a
  7058. given organism, from either the UCSC Genome Browser or a BioMart
  7059. database (more sources will be supported in the future). This information is
  7060. then stored in a local database that keeps track of the relationship between
  7061. transcripts, exons, cds and genes. Flexible methods are provided for
  7062. extracting the desired features in a convenient format.")
  7063. (license license:artistic2.0)))
  7064. (define-public r-go-db
  7065. (package
  7066. (name "r-go-db")
  7067. (version "3.4.0")
  7068. (source (origin
  7069. (method url-fetch)
  7070. (uri (string-append "http://www.bioconductor.org/packages/"
  7071. "release/data/annotation/src/contrib/GO.db_"
  7072. version ".tar.gz"))
  7073. (sha256
  7074. (base32
  7075. "02cj8kqi5w39jwcs8gp1dgj08sah262ppxnkz4h3qd0w191y8yyl"))))
  7076. (properties
  7077. `((upstream-name . "GO.db")))
  7078. (build-system r-build-system)
  7079. (propagated-inputs
  7080. `(("r-annotationdbi" ,r-annotationdbi)))
  7081. (home-page "http://bioconductor.org/packages/GO.db")
  7082. (synopsis "Annotation maps describing the entire Gene Ontology")
  7083. (description
  7084. "The purpose of this GO.db annotation package is to provide detailed
  7085. information about the latest version of the Gene Ontologies.")
  7086. (license license:artistic2.0)))
  7087. (define-public r-graph
  7088. (package
  7089. (name "r-graph")
  7090. (version "1.54.0")
  7091. (source (origin
  7092. (method url-fetch)
  7093. (uri (bioconductor-uri "graph" version))
  7094. (sha256
  7095. (base32
  7096. "0hx9wslnrci7c5i1gd1zlpjmgrkdnx9gabfgmzzwfnykk1mdvvna"))))
  7097. (build-system r-build-system)
  7098. (propagated-inputs
  7099. `(("r-biocgenerics" ,r-biocgenerics)))
  7100. (home-page "http://bioconductor.org/packages/graph")
  7101. (synopsis "Handle graph data structures in R")
  7102. (description
  7103. "This package implements some simple graph handling capabilities for R.")
  7104. (license license:artistic2.0)))
  7105. (define-public r-topgo
  7106. (package
  7107. (name "r-topgo")
  7108. (version "2.28.0")
  7109. (source (origin
  7110. (method url-fetch)
  7111. (uri (bioconductor-uri "topGO" version))
  7112. (sha256
  7113. (base32
  7114. "04kvxz9qsxfz0np7wlrzp4r95nykz94x7mqpyyk76f90g6m66vcc"))))
  7115. (properties
  7116. `((upstream-name . "topGO")))
  7117. (build-system r-build-system)
  7118. (propagated-inputs
  7119. `(("r-annotationdbi" ,r-annotationdbi)
  7120. ("r-dbi" ,r-dbi)
  7121. ("r-biobase" ,r-biobase)
  7122. ("r-biocgenerics" ,r-biocgenerics)
  7123. ("r-go-db" ,r-go-db)
  7124. ("r-graph" ,r-graph)
  7125. ("r-lattice" ,r-lattice)
  7126. ("r-matrixstats" ,r-matrixstats)
  7127. ("r-sparsem" ,r-sparsem)))
  7128. (home-page "http://bioconductor.org/packages/topGO")
  7129. (synopsis "Enrichment analysis for gene ontology")
  7130. (description
  7131. "The topGO package provides tools for testing @dfn{gene ontology} (GO)
  7132. terms while accounting for the topology of the GO graph. Different test
  7133. statistics and different methods for eliminating local similarities and
  7134. dependencies between GO terms can be implemented and applied.")
  7135. ;; Any version of the LGPL applies.
  7136. (license license:lgpl2.1+)))
  7137. (define-public r-bsgenome
  7138. (package
  7139. (name "r-bsgenome")
  7140. (version "1.44.0")
  7141. (source (origin
  7142. (method url-fetch)
  7143. (uri (bioconductor-uri "BSgenome" version))
  7144. (sha256
  7145. (base32
  7146. "18dlknwk3xvi52hamxf9pl5bjc5806mpw98xwvx5xajn8mrxiy5h"))))
  7147. (properties
  7148. `((upstream-name . "BSgenome")))
  7149. (build-system r-build-system)
  7150. (propagated-inputs
  7151. `(("r-biocgenerics" ,r-biocgenerics)
  7152. ("r-biostrings" ,r-biostrings)
  7153. ("r-genomeinfodb" ,r-genomeinfodb)
  7154. ("r-genomicranges" ,r-genomicranges)
  7155. ("r-iranges" ,r-iranges)
  7156. ("r-rsamtools" ,r-rsamtools)
  7157. ("r-rtracklayer" ,r-rtracklayer)
  7158. ("r-s4vectors" ,r-s4vectors)
  7159. ("r-xvector" ,r-xvector)))
  7160. (home-page "http://bioconductor.org/packages/BSgenome")
  7161. (synopsis "Infrastructure for Biostrings-based genome data packages")
  7162. (description
  7163. "This package provides infrastructure shared by all Biostrings-based
  7164. genome data packages and support for efficient SNP representation.")
  7165. (license license:artistic2.0)))
  7166. (define-public r-bsgenome-hsapiens-1000genomes-hs37d5
  7167. (package
  7168. (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
  7169. (version "0.99.1")
  7170. (source (origin
  7171. (method url-fetch)
  7172. ;; We cannot use bioconductor-uri here because this tarball is
  7173. ;; located under "data/annotation/" instead of "bioc/".
  7174. (uri (string-append "http://www.bioconductor.org/packages/"
  7175. "release/data/annotation/src/contrib/"
  7176. "BSgenome.Hsapiens.1000genomes.hs37d5_"
  7177. version ".tar.gz"))
  7178. (sha256
  7179. (base32
  7180. "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
  7181. (properties
  7182. `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
  7183. (build-system r-build-system)
  7184. ;; As this package provides little more than a very large data file it
  7185. ;; doesn't make sense to build substitutes.
  7186. (arguments `(#:substitutable? #f))
  7187. (propagated-inputs
  7188. `(("r-bsgenome" ,r-bsgenome)))
  7189. (home-page
  7190. "http://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
  7191. (synopsis "Full genome sequences for Homo sapiens")
  7192. (description
  7193. "This package provides full genome sequences for Homo sapiens from
  7194. 1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
  7195. (license license:artistic2.0)))
  7196. (define-public r-impute
  7197. (package
  7198. (name "r-impute")
  7199. (version "1.50.0")
  7200. (source (origin
  7201. (method url-fetch)
  7202. (uri (bioconductor-uri "impute" version))
  7203. (sha256
  7204. (base32
  7205. "0va45vfixy3np549md87h3b3rbavm54gfnmnjhpr9hf02lll6zxs"))))
  7206. (inputs
  7207. `(("gfortran" ,gfortran)))
  7208. (build-system r-build-system)
  7209. (home-page "http://bioconductor.org/packages/impute")
  7210. (synopsis "Imputation for microarray data")
  7211. (description
  7212. "This package provides a function to impute missing gene expression
  7213. microarray data, using nearest neighbor averaging.")
  7214. (license license:gpl2+)))
  7215. (define-public r-seqpattern
  7216. (package
  7217. (name "r-seqpattern")
  7218. (version "1.8.0")
  7219. (source (origin
  7220. (method url-fetch)
  7221. (uri (bioconductor-uri "seqPattern" version))
  7222. (sha256
  7223. (base32
  7224. "0yw5251sirv3283xgmfmfbf900xp8dwvqfk88mr945s3by5hx99v"))))
  7225. (properties
  7226. `((upstream-name . "seqPattern")))
  7227. (build-system r-build-system)
  7228. (propagated-inputs
  7229. `(("r-biostrings" ,r-biostrings)
  7230. ("r-genomicranges" ,r-genomicranges)
  7231. ("r-iranges" ,r-iranges)
  7232. ("r-kernsmooth" ,r-kernsmooth)
  7233. ("r-plotrix" ,r-plotrix)))
  7234. (home-page "http://bioconductor.org/packages/seqPattern")
  7235. (synopsis "Visualising oligonucleotide patterns and motif occurrences")
  7236. (description
  7237. "This package provides tools to visualize oligonucleotide patterns and
  7238. sequence motif occurrences across a large set of sequences centred at a common
  7239. reference point and sorted by a user defined feature.")
  7240. (license license:gpl3+)))
  7241. (define-public r-genomation
  7242. (package
  7243. (name "r-genomation")
  7244. (version "1.8.0")
  7245. (source (origin
  7246. (method url-fetch)
  7247. (uri (bioconductor-uri "genomation" version))
  7248. (sha256
  7249. (base32
  7250. "0r71nf8p3aq3yffqxg4yir1zmm7rar5a57nmy1kpqpn8kaf2svjl"))))
  7251. (build-system r-build-system)
  7252. (propagated-inputs
  7253. `(("r-biostrings" ,r-biostrings)
  7254. ("r-bsgenome" ,r-bsgenome)
  7255. ("r-data-table" ,r-data-table)
  7256. ("r-genomeinfodb" ,r-genomeinfodb)
  7257. ("r-genomicalignments" ,r-genomicalignments)
  7258. ("r-genomicranges" ,r-genomicranges)
  7259. ("r-ggplot2" ,r-ggplot2)
  7260. ("r-gridbase" ,r-gridbase)
  7261. ("r-impute" ,r-impute)
  7262. ("r-iranges" ,r-iranges)
  7263. ("r-matrixstats" ,r-matrixstats)
  7264. ("r-plotrix" ,r-plotrix)
  7265. ("r-plyr" ,r-plyr)
  7266. ("r-rcpp" ,r-rcpp)
  7267. ("r-readr" ,r-readr)
  7268. ("r-reshape2" ,r-reshape2)
  7269. ("r-rhtslib" ,r-rhtslib)
  7270. ("r-rsamtools" ,r-rsamtools)
  7271. ("r-rtracklayer" ,r-rtracklayer)
  7272. ("r-runit" ,r-runit)
  7273. ("r-s4vectors" ,r-s4vectors)
  7274. ("r-seqpattern" ,r-seqpattern)))
  7275. (inputs
  7276. `(("zlib" ,zlib)))
  7277. (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
  7278. (synopsis "Summary, annotation and visualization of genomic data")
  7279. (description
  7280. "This package provides a package for summary and annotation of genomic
  7281. intervals. Users can visualize and quantify genomic intervals over
  7282. pre-defined functional regions, such as promoters, exons, introns, etc. The
  7283. genomic intervals represent regions with a defined chromosome position, which
  7284. may be associated with a score, such as aligned reads from HT-seq experiments,
  7285. TF binding sites, methylation scores, etc. The package can use any tabular
  7286. genomic feature data as long as it has minimal information on the locations of
  7287. genomic intervals. In addition, it can use BAM or BigWig files as input.")
  7288. (license license:artistic2.0)))
  7289. (define-public r-genomationdata
  7290. (package
  7291. (name "r-genomationdata")
  7292. (version "1.6.0")
  7293. (source (origin
  7294. (method url-fetch)
  7295. ;; We cannot use bioconductor-uri here because this tarball is
  7296. ;; located under "data/annotation/" instead of "bioc/".
  7297. (uri (string-append "https://bioconductor.org/packages/"
  7298. "release/data/experiment/src/contrib/"
  7299. "genomationData_" version ".tar.gz"))
  7300. (sha256
  7301. (base32
  7302. "16dqwb7wx1igx77zdbcskx5m1hs4g4gp2hl56zzm70hcagnlkz8y"))))
  7303. (build-system r-build-system)
  7304. ;; As this package provides little more than large data files, it doesn't
  7305. ;; make sense to build substitutes.
  7306. (arguments `(#:substitutable? #f))
  7307. (native-inputs
  7308. `(("r-knitr" ,r-knitr)))
  7309. (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
  7310. (synopsis "Experimental data for use with the genomation package")
  7311. (description
  7312. "This package contains experimental genetic data for use with the
  7313. genomation package. Included are Chip Seq, Methylation and Cage data,
  7314. downloaded from Encode.")
  7315. (license license:gpl3+)))
  7316. (define-public r-org-hs-eg-db
  7317. (package
  7318. (name "r-org-hs-eg-db")
  7319. (version "3.4.0")
  7320. (source (origin
  7321. (method url-fetch)
  7322. ;; We cannot use bioconductor-uri here because this tarball is
  7323. ;; located under "data/annotation/" instead of "bioc/".
  7324. (uri (string-append "http://www.bioconductor.org/packages/"
  7325. "release/data/annotation/src/contrib/"
  7326. "org.Hs.eg.db_" version ".tar.gz"))
  7327. (sha256
  7328. (base32
  7329. "19mg64pw8zcvb9yxzzyf7caz1kvdrkfsj1hd84bzq7crrh8kc4y6"))))
  7330. (properties
  7331. `((upstream-name . "org.Hs.eg.db")))
  7332. (build-system r-build-system)
  7333. (propagated-inputs
  7334. `(("r-annotationdbi" ,r-annotationdbi)))
  7335. (home-page "http://www.bioconductor.org/packages/org.Hs.eg.db/")
  7336. (synopsis "Genome wide annotation for Human")
  7337. (description
  7338. "This package provides mappings from Entrez gene identifiers to various
  7339. annotations for the human genome.")
  7340. (license license:artistic2.0)))
  7341. (define-public r-org-ce-eg-db
  7342. (package
  7343. (name "r-org-ce-eg-db")
  7344. (version "3.4.0")
  7345. (source (origin
  7346. (method url-fetch)
  7347. ;; We cannot use bioconductor-uri here because this tarball is
  7348. ;; located under "data/annotation/" instead of "bioc/".
  7349. (uri (string-append "http://www.bioconductor.org/packages/"
  7350. "release/data/annotation/src/contrib/"
  7351. "org.Ce.eg.db_" version ".tar.gz"))
  7352. (sha256
  7353. (base32
  7354. "12llfzrrc09kj2wzbisdspv38qzkzgpsbn8kv7qkwg746k3pq436"))))
  7355. (properties
  7356. `((upstream-name . "org.Ce.eg.db")))
  7357. (build-system r-build-system)
  7358. (propagated-inputs
  7359. `(("r-annotationdbi" ,r-annotationdbi)))
  7360. (home-page "http://www.bioconductor.org/packages/org.Ce.eg.db/")
  7361. (synopsis "Genome wide annotation for Worm")
  7362. (description
  7363. "This package provides mappings from Entrez gene identifiers to various
  7364. annotations for the genome of the model worm Caenorhabditis elegans.")
  7365. (license license:artistic2.0)))
  7366. (define-public r-org-dm-eg-db
  7367. (package
  7368. (name "r-org-dm-eg-db")
  7369. (version "3.4.0")
  7370. (source (origin
  7371. (method url-fetch)
  7372. ;; We cannot use bioconductor-uri here because this tarball is
  7373. ;; located under "data/annotation/" instead of "bioc/".
  7374. (uri (string-append "http://www.bioconductor.org/packages/"
  7375. "release/data/annotation/src/contrib/"
  7376. "org.Dm.eg.db_" version ".tar.gz"))
  7377. (sha256
  7378. (base32
  7379. "1vzbphbrh1cf7xi5cksia9xy9a9l42js2z2qsajvjxvddiphrb7j"))))
  7380. (properties
  7381. `((upstream-name . "org.Dm.eg.db")))
  7382. (build-system r-build-system)
  7383. (propagated-inputs
  7384. `(("r-annotationdbi" ,r-annotationdbi)))
  7385. (home-page "http://www.bioconductor.org/packages/org.Dm.eg.db/")
  7386. (synopsis "Genome wide annotation for Fly")
  7387. (description
  7388. "This package provides mappings from Entrez gene identifiers to various
  7389. annotations for the genome of the model fruit fly Drosophila melanogaster.")
  7390. (license license:artistic2.0)))
  7391. (define-public r-org-mm-eg-db
  7392. (package
  7393. (name "r-org-mm-eg-db")
  7394. (version "3.4.0")
  7395. (source (origin
  7396. (method url-fetch)
  7397. ;; We cannot use bioconductor-uri here because this tarball is
  7398. ;; located under "data/annotation/" instead of "bioc/".
  7399. (uri (string-append "http://www.bioconductor.org/packages/"
  7400. "release/data/annotation/src/contrib/"
  7401. "org.Mm.eg.db_" version ".tar.gz"))
  7402. (sha256
  7403. (base32
  7404. "1lykjqjaf01fmgg3cvfcvwd5xjq6zc5vbxnm5r4l32fzvl89q50c"))))
  7405. (properties
  7406. `((upstream-name . "org.Mm.eg.db")))
  7407. (build-system r-build-system)
  7408. (propagated-inputs
  7409. `(("r-annotationdbi" ,r-annotationdbi)))
  7410. (home-page "http://www.bioconductor.org/packages/org.Mm.eg.db/")
  7411. (synopsis "Genome wide annotation for Mouse")
  7412. (description
  7413. "This package provides mappings from Entrez gene identifiers to various
  7414. annotations for the genome of the model mouse Mus musculus.")
  7415. (license license:artistic2.0)))
  7416. (define-public r-seqlogo
  7417. (package
  7418. (name "r-seqlogo")
  7419. (version "1.42.0")
  7420. (source
  7421. (origin
  7422. (method url-fetch)
  7423. (uri (bioconductor-uri "seqLogo" version))
  7424. (sha256
  7425. (base32
  7426. "19d5zmy7m8svljwgbmrb4vxkq18slq0f3all6k2ayv42b8w44h6q"))))
  7427. (properties `((upstream-name . "seqLogo")))
  7428. (build-system r-build-system)
  7429. (home-page "http://bioconductor.org/packages/seqLogo")
  7430. (synopsis "Sequence logos for DNA sequence alignments")
  7431. (description
  7432. "seqLogo takes the position weight matrix of a DNA sequence motif and
  7433. plots the corresponding sequence logo as introduced by Schneider and
  7434. Stephens (1990).")
  7435. (license license:lgpl2.0+)))
  7436. (define-public r-bsgenome-hsapiens-ucsc-hg19
  7437. (package
  7438. (name "r-bsgenome-hsapiens-ucsc-hg19")
  7439. (version "1.4.0")
  7440. (source (origin
  7441. (method url-fetch)
  7442. ;; We cannot use bioconductor-uri here because this tarball is
  7443. ;; located under "data/annotation/" instead of "bioc/".
  7444. (uri (string-append "http://www.bioconductor.org/packages/"
  7445. "release/data/annotation/src/contrib/"
  7446. "BSgenome.Hsapiens.UCSC.hg19_"
  7447. version ".tar.gz"))
  7448. (sha256
  7449. (base32
  7450. "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
  7451. (properties
  7452. `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
  7453. (build-system r-build-system)
  7454. ;; As this package provides little more than a very large data file it
  7455. ;; doesn't make sense to build substitutes.
  7456. (arguments `(#:substitutable? #f))
  7457. (propagated-inputs
  7458. `(("r-bsgenome" ,r-bsgenome)))
  7459. (home-page
  7460. "http://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
  7461. (synopsis "Full genome sequences for Homo sapiens")
  7462. (description
  7463. "This package provides full genome sequences for Homo sapiens as provided
  7464. by UCSC (hg19, February 2009) and stored in Biostrings objects.")
  7465. (license license:artistic2.0)))
  7466. (define-public r-bsgenome-mmusculus-ucsc-mm9
  7467. (package
  7468. (name "r-bsgenome-mmusculus-ucsc-mm9")
  7469. (version "1.4.0")
  7470. (source (origin
  7471. (method url-fetch)
  7472. ;; We cannot use bioconductor-uri here because this tarball is
  7473. ;; located under "data/annotation/" instead of "bioc/".
  7474. (uri (string-append "http://www.bioconductor.org/packages/"
  7475. "release/data/annotation/src/contrib/"
  7476. "BSgenome.Mmusculus.UCSC.mm9_"
  7477. version ".tar.gz"))
  7478. (sha256
  7479. (base32
  7480. "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
  7481. (properties
  7482. `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
  7483. (build-system r-build-system)
  7484. ;; As this package provides little more than a very large data file it
  7485. ;; doesn't make sense to build substitutes.
  7486. (arguments `(#:substitutable? #f))
  7487. (propagated-inputs
  7488. `(("r-bsgenome" ,r-bsgenome)))
  7489. (home-page
  7490. "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
  7491. (synopsis "Full genome sequences for Mouse")
  7492. (description
  7493. "This package provides full genome sequences for Mus musculus (Mouse) as
  7494. provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
  7495. (license license:artistic2.0)))
  7496. (define-public r-bsgenome-mmusculus-ucsc-mm10
  7497. (package
  7498. (name "r-bsgenome-mmusculus-ucsc-mm10")
  7499. (version "1.4.0")
  7500. (source (origin
  7501. (method url-fetch)
  7502. ;; We cannot use bioconductor-uri here because this tarball is
  7503. ;; located under "data/annotation/" instead of "bioc/".
  7504. (uri (string-append "http://www.bioconductor.org/packages/"
  7505. "release/data/annotation/src/contrib/"
  7506. "BSgenome.Mmusculus.UCSC.mm10_"
  7507. version ".tar.gz"))
  7508. (sha256
  7509. (base32
  7510. "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
  7511. (properties
  7512. `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
  7513. (build-system r-build-system)
  7514. ;; As this package provides little more than a very large data file it
  7515. ;; doesn't make sense to build substitutes.
  7516. (arguments `(#:substitutable? #f))
  7517. (propagated-inputs
  7518. `(("r-bsgenome" ,r-bsgenome)))
  7519. (home-page
  7520. "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
  7521. (synopsis "Full genome sequences for Mouse")
  7522. (description
  7523. "This package provides full genome sequences for Mus
  7524. musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
  7525. in Biostrings objects.")
  7526. (license license:artistic2.0)))
  7527. (define-public r-txdb-mmusculus-ucsc-mm10-knowngene
  7528. (package
  7529. (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
  7530. (version "3.4.0")
  7531. (source (origin
  7532. (method url-fetch)
  7533. ;; We cannot use bioconductor-uri here because this tarball is
  7534. ;; located under "data/annotation/" instead of "bioc/".
  7535. (uri (string-append "http://www.bioconductor.org/packages/"
  7536. "release/data/annotation/src/contrib/"
  7537. "TxDb.Mmusculus.UCSC.mm10.knownGene_"
  7538. version ".tar.gz"))
  7539. (sha256
  7540. (base32
  7541. "08gava9wsvpcqz51k2sni3pj03n5155v32d9riqbf305nbirqbkb"))))
  7542. (properties
  7543. `((upstream-name . "TxDb.Mmusculus.UCSC.mm10.knownGene")))
  7544. (build-system r-build-system)
  7545. ;; As this package provides little more than a very large data file it
  7546. ;; doesn't make sense to build substitutes.
  7547. (arguments `(#:substitutable? #f))
  7548. (propagated-inputs
  7549. `(("r-bsgenome" ,r-bsgenome)
  7550. ("r-genomicfeatures" ,r-genomicfeatures)
  7551. ("r-annotationdbi" ,r-annotationdbi)))
  7552. (home-page
  7553. "http://bioconductor.org/packages/TxDb.Mmusculus.UCSC.mm10.knownGene/")
  7554. (synopsis "Annotation package for TxDb knownGene object(s) for Mouse")
  7555. (description
  7556. "This package loads a TxDb object, which is an R interface to
  7557. prefabricated databases contained in this package. This package provides
  7558. the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
  7559. based on the knownGene track.")
  7560. (license license:artistic2.0)))
  7561. (define-public r-bsgenome-celegans-ucsc-ce6
  7562. (package
  7563. (name "r-bsgenome-celegans-ucsc-ce6")
  7564. (version "1.4.0")
  7565. (source (origin
  7566. (method url-fetch)
  7567. ;; We cannot use bioconductor-uri here because this tarball is
  7568. ;; located under "data/annotation/" instead of "bioc/".
  7569. (uri (string-append "http://www.bioconductor.org/packages/"
  7570. "release/data/annotation/src/contrib/"
  7571. "BSgenome.Celegans.UCSC.ce6_"
  7572. version ".tar.gz"))
  7573. (sha256
  7574. (base32
  7575. "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
  7576. (properties
  7577. `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
  7578. (build-system r-build-system)
  7579. ;; As this package provides little more than a very large data file it
  7580. ;; doesn't make sense to build substitutes.
  7581. (arguments `(#:substitutable? #f))
  7582. (propagated-inputs
  7583. `(("r-bsgenome" ,r-bsgenome)))
  7584. (home-page
  7585. "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
  7586. (synopsis "Full genome sequences for Worm")
  7587. (description
  7588. "This package provides full genome sequences for Caenorhabditis
  7589. elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
  7590. objects.")
  7591. (license license:artistic2.0)))
  7592. (define-public r-bsgenome-celegans-ucsc-ce10
  7593. (package
  7594. (name "r-bsgenome-celegans-ucsc-ce10")
  7595. (version "1.4.0")
  7596. (source (origin
  7597. (method url-fetch)
  7598. ;; We cannot use bioconductor-uri here because this tarball is
  7599. ;; located under "data/annotation/" instead of "bioc/".
  7600. (uri (string-append "http://www.bioconductor.org/packages/"
  7601. "release/data/annotation/src/contrib/"
  7602. "BSgenome.Celegans.UCSC.ce10_"
  7603. version ".tar.gz"))
  7604. (sha256
  7605. (base32
  7606. "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
  7607. (properties
  7608. `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
  7609. (build-system r-build-system)
  7610. ;; As this package provides little more than a very large data file it
  7611. ;; doesn't make sense to build substitutes.
  7612. (arguments `(#:substitutable? #f))
  7613. (propagated-inputs
  7614. `(("r-bsgenome" ,r-bsgenome)))
  7615. (home-page
  7616. "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
  7617. (synopsis "Full genome sequences for Worm")
  7618. (description
  7619. "This package provides full genome sequences for Caenorhabditis
  7620. elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
  7621. objects.")
  7622. (license license:artistic2.0)))
  7623. (define-public r-bsgenome-dmelanogaster-ucsc-dm3
  7624. (package
  7625. (name "r-bsgenome-dmelanogaster-ucsc-dm3")
  7626. (version "1.4.0")
  7627. (source (origin
  7628. (method url-fetch)
  7629. ;; We cannot use bioconductor-uri here because this tarball is
  7630. ;; located under "data/annotation/" instead of "bioc/".
  7631. (uri (string-append "http://www.bioconductor.org/packages/"
  7632. "release/data/annotation/src/contrib/"
  7633. "BSgenome.Dmelanogaster.UCSC.dm3_"
  7634. version ".tar.gz"))
  7635. (sha256
  7636. (base32
  7637. "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
  7638. (properties
  7639. `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
  7640. (build-system r-build-system)
  7641. ;; As this package provides little more than a very large data file it
  7642. ;; doesn't make sense to build substitutes.
  7643. (arguments `(#:substitutable? #f))
  7644. (propagated-inputs
  7645. `(("r-bsgenome" ,r-bsgenome)))
  7646. (home-page
  7647. "http://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
  7648. (synopsis "Full genome sequences for Fly")
  7649. (description
  7650. "This package provides full genome sequences for Drosophila
  7651. melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
  7652. Biostrings objects.")
  7653. (license license:artistic2.0)))
  7654. (define-public r-motifrg
  7655. (package
  7656. (name "r-motifrg")
  7657. (version "1.20.0")
  7658. (source
  7659. (origin
  7660. (method url-fetch)
  7661. (uri (bioconductor-uri "motifRG" version))
  7662. (sha256
  7663. (base32
  7664. "0lxxpqyljiyk73vyq6ss46q13g81pm24q87mkzdsqlr5zx427ch6"))))
  7665. (properties `((upstream-name . "motifRG")))
  7666. (build-system r-build-system)
  7667. (propagated-inputs
  7668. `(("r-biostrings" ,r-biostrings)
  7669. ("r-bsgenome" ,r-bsgenome)
  7670. ("r-bsgenome.hsapiens.ucsc.hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
  7671. ("r-iranges" ,r-iranges)
  7672. ("r-seqlogo" ,r-seqlogo)
  7673. ("r-xvector" ,r-xvector)))
  7674. (home-page "http://bioconductor.org/packages/motifRG")
  7675. (synopsis "Discover motifs in high throughput sequencing data")
  7676. (description
  7677. "This package provides tools for discriminative motif discovery in high
  7678. throughput genetic sequencing data sets using regression methods.")
  7679. (license license:artistic2.0)))
  7680. (define-public r-qtl
  7681. (package
  7682. (name "r-qtl")
  7683. (version "1.41-6")
  7684. (source
  7685. (origin
  7686. (method url-fetch)
  7687. (uri (string-append "mirror://cran/src/contrib/qtl_"
  7688. version ".tar.gz"))
  7689. (sha256
  7690. (base32
  7691. "067az4v432zxp6lxck8d7vlh9w4r13r0mvw5zsglyaqwsh3d9sad"))))
  7692. (build-system r-build-system)
  7693. (home-page "http://rqtl.org/")
  7694. (synopsis "R package for analyzing QTL experiments in genetics")
  7695. (description "R/qtl is an extension library for the R statistics
  7696. system. It is used to analyze experimental crosses for identifying
  7697. genes contributing to variation in quantitative traits (so-called
  7698. quantitative trait loci, QTLs).
  7699. Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
  7700. identify genotyping errors, and to perform single-QTL and two-QTL,
  7701. two-dimensional genome scans.")
  7702. (license license:gpl3)))
  7703. (define-public r-zlibbioc
  7704. (package
  7705. (name "r-zlibbioc")
  7706. (version "1.22.0")
  7707. (source (origin
  7708. (method url-fetch)
  7709. (uri (bioconductor-uri "zlibbioc" version))
  7710. (sha256
  7711. (base32
  7712. "1kdgwwlh39mgwzj3zq71za4iv40sq625ghnyrndv5wrivrcr2igv"))))
  7713. (properties
  7714. `((upstream-name . "zlibbioc")))
  7715. (build-system r-build-system)
  7716. (home-page "https://bioconductor.org/packages/zlibbioc")
  7717. (synopsis "Provider for zlib-1.2.5 to R packages")
  7718. (description "This package uses the source code of zlib-1.2.5 to create
  7719. libraries for systems that do not have these available via other means.")
  7720. (license license:artistic2.0)))
  7721. (define-public r-r4rna
  7722. (package
  7723. (name "r-r4rna")
  7724. (version "0.1.4")
  7725. (source
  7726. (origin
  7727. (method url-fetch)
  7728. (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
  7729. version ".tar.gz"))
  7730. (sha256
  7731. (base32
  7732. "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
  7733. (build-system r-build-system)
  7734. (propagated-inputs
  7735. `(("r-optparse" ,r-optparse)
  7736. ("r-rcolorbrewer" ,r-rcolorbrewer)))
  7737. (home-page "http://www.e-rna.org/r-chie/index.cgi")
  7738. (synopsis "Analysis framework for RNA secondary structure")
  7739. (description
  7740. "The R4RNA package aims to be a general framework for the analysis of RNA
  7741. secondary structure and comparative analysis in R.")
  7742. (license license:gpl3+)))
  7743. (define-public r-rhtslib
  7744. (package
  7745. (name "r-rhtslib")
  7746. (version "1.8.0")
  7747. (source
  7748. (origin
  7749. (method url-fetch)
  7750. (uri (bioconductor-uri "Rhtslib" version))
  7751. (sha256
  7752. (base32
  7753. "0jj5h81z5gyf4j3lp2l3zsm6pgbmalgrngr8qdpygc20phndpp0b"))))
  7754. (properties `((upstream-name . "Rhtslib")))
  7755. (build-system r-build-system)
  7756. (propagated-inputs
  7757. `(("r-zlibbioc" ,r-zlibbioc)))
  7758. (inputs
  7759. `(("zlib" ,zlib)))
  7760. (native-inputs
  7761. `(("autoconf" ,autoconf)))
  7762. (home-page "https://github.com/nhayden/Rhtslib")
  7763. (synopsis "High-throughput sequencing library as an R package")
  7764. (description
  7765. "This package provides the HTSlib C library for high-throughput
  7766. nucleotide sequence analysis. The package is primarily useful to developers
  7767. of other R packages who wish to make use of HTSlib.")
  7768. (license license:lgpl2.0+)))
  7769. (define-public r-bamsignals
  7770. (package
  7771. (name "r-bamsignals")
  7772. (version "1.8.0")
  7773. (source
  7774. (origin
  7775. (method url-fetch)
  7776. (uri (bioconductor-uri "bamsignals" version))
  7777. (sha256
  7778. (base32
  7779. "0knx69zzdaak2sjim8k9mygmcjxpa705m8013ld5zwpgi8dag9mc"))))
  7780. (build-system r-build-system)
  7781. (propagated-inputs
  7782. `(("r-biocgenerics" ,r-biocgenerics)
  7783. ("r-genomicranges" ,r-genomicranges)
  7784. ("r-iranges" ,r-iranges)
  7785. ("r-rcpp" ,r-rcpp)
  7786. ("r-rhtslib" ,r-rhtslib)
  7787. ("r-zlibbioc" ,r-zlibbioc)))
  7788. (inputs
  7789. `(("zlib" ,zlib)))
  7790. (home-page "http://bioconductor.org/packages/bamsignals")
  7791. (synopsis "Extract read count signals from bam files")
  7792. (description
  7793. "This package allows to efficiently obtain count vectors from indexed bam
  7794. files. It counts the number of nucleotide sequence reads in given genomic
  7795. ranges and it computes reads profiles and coverage profiles. It also handles
  7796. paired-end data.")
  7797. (license license:gpl2+)))
  7798. (define-public r-rcas
  7799. (package
  7800. (name "r-rcas")
  7801. (version "1.3.4")
  7802. (source (origin
  7803. (method url-fetch)
  7804. (uri (string-append "https://github.com/BIMSBbioinfo/RCAS/archive/v"
  7805. version ".tar.gz"))
  7806. (file-name (string-append name "-" version ".tar.gz"))
  7807. (sha256
  7808. (base32
  7809. "1qgc7vi6fpzl440yg7jhiycg5q336kd4pxqzx10yx2zcq3bq3msg"))))
  7810. (build-system r-build-system)
  7811. (native-inputs
  7812. `(("r-knitr" ,r-knitr)
  7813. ("r-testthat" ,r-testthat)
  7814. ;; During vignette building knitr checks that "pandoc-citeproc"
  7815. ;; is in the PATH.
  7816. ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)))
  7817. (propagated-inputs
  7818. `(("r-data-table" ,r-data-table)
  7819. ("r-biomart" ,r-biomart)
  7820. ("r-org-hs-eg-db" ,r-org-hs-eg-db)
  7821. ("r-org-ce-eg-db" ,r-org-ce-eg-db)
  7822. ("r-org-dm-eg-db" ,r-org-dm-eg-db)
  7823. ("r-org-mm-eg-db" ,r-org-mm-eg-db)
  7824. ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
  7825. ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
  7826. ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
  7827. ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
  7828. ("r-topgo" ,r-topgo)
  7829. ("r-dt" ,r-dt)
  7830. ("r-pbapply" ,r-pbapply)
  7831. ("r-plotly" ,r-plotly)
  7832. ("r-plotrix" ,r-plotrix)
  7833. ("r-motifrg" ,r-motifrg)
  7834. ("r-genomation" ,r-genomation)
  7835. ("r-genomicfeatures" ,r-genomicfeatures)
  7836. ("r-rtracklayer" ,r-rtracklayer)
  7837. ("r-rmarkdown" ,r-rmarkdown)))
  7838. (synopsis "RNA-centric annotation system")
  7839. (description
  7840. "RCAS aims to be a standalone RNA-centric annotation system that provides
  7841. intuitive reports and publication-ready graphics. This package provides the R
  7842. library implementing most of the pipeline's features.")
  7843. (home-page "https://github.com/BIMSBbioinfo/RCAS")
  7844. (license license:artistic2.0)))
  7845. (define-public rcas-web
  7846. (package
  7847. (name "rcas-web")
  7848. (version "0.0.3")
  7849. (source
  7850. (origin
  7851. (method url-fetch)
  7852. (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
  7853. "releases/download/v" version
  7854. "/rcas-web-" version ".tar.gz"))
  7855. (sha256
  7856. (base32
  7857. "0d3my0g8i7js59n184zzzjdki7hgmhpi4rhfvk7i6jsw01ba04qq"))))
  7858. (build-system gnu-build-system)
  7859. (arguments
  7860. `(#:phases
  7861. (modify-phases %standard-phases
  7862. (add-after 'install 'wrap-executable
  7863. (lambda* (#:key inputs outputs #:allow-other-keys)
  7864. (let* ((out (assoc-ref outputs "out"))
  7865. (json (assoc-ref inputs "guile-json"))
  7866. (redis (assoc-ref inputs "guile-redis"))
  7867. (path (string-append
  7868. json "/share/guile/site/2.2:"
  7869. redis "/share/guile/site/2.2")))
  7870. (wrap-program (string-append out "/bin/rcas-web")
  7871. `("GUILE_LOAD_PATH" ":" = (,path))
  7872. `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
  7873. `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
  7874. #t)))))
  7875. (inputs
  7876. `(("r-minimal" ,r-minimal)
  7877. ("r-rcas" ,r-rcas)
  7878. ("guile-next" ,guile-2.2)
  7879. ("guile-json" ,guile-json)
  7880. ("guile-redis" ,guile2.2-redis)))
  7881. (native-inputs
  7882. `(("pkg-config" ,pkg-config)))
  7883. (home-page "https://github.com/BIMSBbioinfo/rcas-web")
  7884. (synopsis "Web interface for RNA-centric annotation system (RCAS)")
  7885. (description "This package provides a simple web interface for the
  7886. @dfn{RNA-centric annotation system} (RCAS).")
  7887. (license license:agpl3+)))
  7888. (define-public r-mutationalpatterns
  7889. (package
  7890. (name "r-mutationalpatterns")
  7891. (version "1.2.1")
  7892. (source
  7893. (origin
  7894. (method url-fetch)
  7895. (uri (bioconductor-uri "MutationalPatterns" version))
  7896. (sha256
  7897. (base32
  7898. "1s50diwh1j6vg3mgahh6bczvq74mfdbmwjrad4d5lh723gnc5pjg"))))
  7899. (build-system r-build-system)
  7900. (propagated-inputs
  7901. `(("r-biocgenerics" ,r-biocgenerics)
  7902. ("r-biostrings" ,r-biostrings)
  7903. ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
  7904. ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
  7905. ("r-genomicranges" ,r-genomicranges)
  7906. ("r-genomeinfodb" ,r-genomeinfodb)
  7907. ("r-ggplot2" ,r-ggplot2)
  7908. ("r-gridextra" ,r-gridextra)
  7909. ("r-iranges" ,r-iranges)
  7910. ("r-nmf" ,r-nmf)
  7911. ("r-plyr" ,r-plyr)
  7912. ("r-pracma" ,r-pracma)
  7913. ("r-reshape2" ,r-reshape2)
  7914. ("r-summarizedexperiment" ,r-summarizedexperiment)
  7915. ("r-variantannotation" ,r-variantannotation)))
  7916. (home-page "http://bioconductor.org/packages/MutationalPatterns/")
  7917. (synopsis "Extract and visualize mutational patterns in genomic data")
  7918. (description "This package provides an extensive toolset for the
  7919. characterization and visualization of a wide range of mutational patterns
  7920. in SNV base substitution data.")
  7921. (license license:expat)))
  7922. (define-public r-wgcna
  7923. (package
  7924. (name "r-wgcna")
  7925. (version "1.61")
  7926. (source
  7927. (origin
  7928. (method url-fetch)
  7929. (uri (cran-uri "WGCNA" version))
  7930. (sha256
  7931. (base32
  7932. "1vrc2k33a196hrrl7k0z534fp96vv0shmigcr65ny1q0v6lq0h6i"))))
  7933. (properties `((upstream-name . "WGCNA")))
  7934. (build-system r-build-system)
  7935. (propagated-inputs
  7936. `(("r-annotationdbi" ,r-annotationdbi)
  7937. ("r-doparallel" ,r-doparallel)
  7938. ("r-dynamictreecut" ,r-dynamictreecut)
  7939. ("r-fastcluster" ,r-fastcluster)
  7940. ("r-foreach" ,r-foreach)
  7941. ("r-go-db" ,r-go-db)
  7942. ("r-hmisc" ,r-hmisc)
  7943. ("r-impute" ,r-impute)
  7944. ("r-rcpp" ,r-rcpp)
  7945. ("r-robust" ,r-robust)
  7946. ("r-survival" ,r-survival)
  7947. ("r-matrixstats" ,r-matrixstats)
  7948. ("r-preprocesscore" ,r-preprocesscore)))
  7949. (home-page
  7950. "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
  7951. (synopsis "Weighted correlation network analysis")
  7952. (description
  7953. "This package provides functions necessary to perform Weighted
  7954. Correlation Network Analysis on high-dimensional data. It includes functions
  7955. for rudimentary data cleaning, construction and summarization of correlation
  7956. networks, module identification and functions for relating both variables and
  7957. modules to sample traits. It also includes a number of utility functions for
  7958. data manipulation and visualization.")
  7959. (license license:gpl2+)))
  7960. (define-public r-chipkernels
  7961. (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
  7962. (revision "1"))
  7963. (package
  7964. (name "r-chipkernels")
  7965. (version (string-append "1.1-" revision "." (string-take commit 9)))
  7966. (source
  7967. (origin
  7968. (method git-fetch)
  7969. (uri (git-reference
  7970. (url "https://github.com/ManuSetty/ChIPKernels.git")
  7971. (commit commit)))
  7972. (file-name (string-append name "-" version))
  7973. (sha256
  7974. (base32
  7975. "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
  7976. (build-system r-build-system)
  7977. (propagated-inputs
  7978. `(("r-iranges" ,r-iranges)
  7979. ("r-xvector" ,r-xvector)
  7980. ("r-biostrings" ,r-biostrings)
  7981. ("r-bsgenome" ,r-bsgenome)
  7982. ("r-gtools" ,r-gtools)
  7983. ("r-genomicranges" ,r-genomicranges)
  7984. ("r-sfsmisc" ,r-sfsmisc)
  7985. ("r-kernlab" ,r-kernlab)
  7986. ("r-s4vectors" ,r-s4vectors)
  7987. ("r-biocgenerics" ,r-biocgenerics)))
  7988. (home-page "https://github.com/ManuSetty/ChIPKernels")
  7989. (synopsis "Build string kernels for DNA Sequence analysis")
  7990. (description "ChIPKernels is an R package for building different string
  7991. kernels used for DNA Sequence analysis. A dictionary of the desired kernel
  7992. must be built and this dictionary can be used for determining kernels for DNA
  7993. Sequences.")
  7994. (license license:gpl2+))))
  7995. (define-public r-seqgl
  7996. (package
  7997. (name "r-seqgl")
  7998. (version "1.1.4")
  7999. (source
  8000. (origin
  8001. (method url-fetch)
  8002. (uri (string-append "https://github.com/ManuSetty/SeqGL/"
  8003. "archive/" version ".tar.gz"))
  8004. (file-name (string-append name "-" version ".tar.gz"))
  8005. (sha256
  8006. (base32
  8007. "0pnk1p3sci5yipyc8xnb6jbmydpl80fld927xgnbcv104hy8h8yh"))))
  8008. (build-system r-build-system)
  8009. (propagated-inputs
  8010. `(("r-biostrings" ,r-biostrings)
  8011. ("r-chipkernels" ,r-chipkernels)
  8012. ("r-genomicranges" ,r-genomicranges)
  8013. ("r-spams" ,r-spams)
  8014. ("r-wgcna" ,r-wgcna)
  8015. ("r-fastcluster" ,r-fastcluster)))
  8016. (home-page "https://github.com/ManuSetty/SeqGL")
  8017. (synopsis "Group lasso for Dnase/ChIP-seq data")
  8018. (description "SeqGL is a group lasso based algorithm to extract
  8019. transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
  8020. This package presents a method which uses group lasso to discriminate between
  8021. bound and non bound genomic regions to accurately identify transcription
  8022. factors bound at the specific regions.")
  8023. (license license:gpl2+)))
  8024. (define-public r-gkmsvm
  8025. (package
  8026. (name "r-gkmsvm")
  8027. (version "0.71.0")
  8028. (source
  8029. (origin
  8030. (method url-fetch)
  8031. (uri (cran-uri "gkmSVM" version))
  8032. (sha256
  8033. (base32
  8034. "1zpxgxmf2nd5j5wn00ps6kfxr8wxh7d1swr1rr4spq7sj5z5z0k0"))))
  8035. (properties `((upstream-name . "gkmSVM")))
  8036. (build-system r-build-system)
  8037. (propagated-inputs
  8038. `(("r-biocgenerics" ,r-biocgenerics)
  8039. ("r-biostrings" ,r-biostrings)
  8040. ("r-genomeinfodb" ,r-genomeinfodb)
  8041. ("r-genomicranges" ,r-genomicranges)
  8042. ("r-iranges" ,r-iranges)
  8043. ("r-kernlab" ,r-kernlab)
  8044. ("r-rcpp" ,r-rcpp)
  8045. ("r-rocr" ,r-rocr)
  8046. ("r-rtracklayer" ,r-rtracklayer)
  8047. ("r-s4vectors" ,r-s4vectors)
  8048. ("r-seqinr" ,r-seqinr)))
  8049. (home-page "http://cran.r-project.org/web/packages/gkmSVM")
  8050. (synopsis "Gapped-kmer support vector machine")
  8051. (description
  8052. "This R package provides tools for training gapped-kmer SVM classifiers
  8053. for DNA and protein sequences. This package supports several sequence
  8054. kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
  8055. (license license:gpl2+)))
  8056. (define-public r-tximport
  8057. (package
  8058. (name "r-tximport")
  8059. (version "1.4.0")
  8060. (source (origin
  8061. (method url-fetch)
  8062. (uri (bioconductor-uri "tximport" version))
  8063. (sha256
  8064. (base32
  8065. "1mklb02bj4gnbjlmb7vv6k4lr3w9fp3pzli9rddbrwd0y5n8fcpx"))))
  8066. (build-system r-build-system)
  8067. (home-page "http://bioconductor.org/packages/tximport")
  8068. (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
  8069. (description
  8070. "This package provides tools to import transcript-level abundance,
  8071. estimated counts and transcript lengths, and to summarize them into matrices
  8072. for use with downstream gene-level analysis packages. Average transcript
  8073. length, weighted by sample-specific transcript abundance estimates, is
  8074. provided as a matrix which can be used as an offset for different expression
  8075. of gene-level counts.")
  8076. (license license:gpl2+)))
  8077. (define-public r-rhdf5
  8078. (package
  8079. (name "r-rhdf5")
  8080. (version "2.20.0")
  8081. (source (origin
  8082. (method url-fetch)
  8083. (uri (bioconductor-uri "rhdf5" version))
  8084. (sha256
  8085. (base32
  8086. "1p6f5i6l44phl772a38x9cav2sya37bkqbkjzdc4pmyfzkv1j6hy"))))
  8087. (build-system r-build-system)
  8088. (arguments
  8089. `(#:phases
  8090. (modify-phases %standard-phases
  8091. (add-after 'unpack 'unpack-smallhdf5
  8092. (lambda* (#:key outputs #:allow-other-keys)
  8093. (system* "tar" "-xzvf"
  8094. "src/hdf5source/hdf5small.tgz" "-C" "src/" )
  8095. (substitute* "src/Makevars"
  8096. (("^.*cd hdf5source &&.*$") "")
  8097. (("^.*gunzip -dc hdf5small.tgz.*$") "")
  8098. (("^.*rm -rf hdf5.*$") "")
  8099. (("^.*mv hdf5source/hdf5 ..*$") ""))
  8100. (substitute* "src/hdf5/configure"
  8101. (("/bin/mv") "mv"))
  8102. #t)))))
  8103. (propagated-inputs
  8104. `(("r-zlibbioc" ,r-zlibbioc)))
  8105. (inputs
  8106. `(("perl" ,perl)
  8107. ("zlib" ,zlib)))
  8108. (home-page "http://bioconductor.org/packages/rhdf5")
  8109. (synopsis "HDF5 interface to R")
  8110. (description
  8111. "This R/Bioconductor package provides an interface between HDF5 and R.
  8112. HDF5's main features are the ability to store and access very large and/or
  8113. complex datasets and a wide variety of metadata on mass storage (disk) through
  8114. a completely portable file format. The rhdf5 package is thus suited for the
  8115. exchange of large and/or complex datasets between R and other software
  8116. package, and for letting R applications work on datasets that are larger than
  8117. the available RAM.")
  8118. (license license:artistic2.0)))
  8119. (define-public r-annotationfilter
  8120. (package
  8121. (name "r-annotationfilter")
  8122. (version "1.0.0")
  8123. (source (origin
  8124. (method url-fetch)
  8125. (uri (bioconductor-uri "AnnotationFilter" version))
  8126. (sha256
  8127. (base32
  8128. "0pxvswjzwibdfmrkdragxmzcl844z73pmkn82z92wahwa6gjfyi7"))))
  8129. (properties
  8130. `((upstream-name . "AnnotationFilter")))
  8131. (build-system r-build-system)
  8132. (propagated-inputs
  8133. `(("r-genomicranges" ,r-genomicranges)
  8134. ("r-lazyeval" ,r-lazyeval)))
  8135. (home-page "https://github.com/Bioconductor/AnnotationFilter")
  8136. (synopsis "Facilities for filtering Bioconductor annotation resources")
  8137. (description
  8138. "This package provides classes and other infrastructure to implement
  8139. filters for manipulating Bioconductor annotation resources. The filters are
  8140. used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
  8141. (license license:artistic2.0)))
  8142. (define-public emboss
  8143. (package
  8144. (name "emboss")
  8145. (version "6.5.7")
  8146. (source (origin
  8147. (method url-fetch)
  8148. (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
  8149. (version-major+minor version) ".0/"
  8150. "EMBOSS-" version ".tar.gz"))
  8151. (sha256
  8152. (base32
  8153. "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
  8154. (build-system gnu-build-system)
  8155. (arguments
  8156. `(#:configure-flags
  8157. (list (string-append "--with-hpdf="
  8158. (assoc-ref %build-inputs "libharu")))
  8159. #:phases
  8160. (modify-phases %standard-phases
  8161. (add-after 'unpack 'fix-checks
  8162. (lambda _
  8163. ;; The PNGDRIVER tests check for the presence of libgd, libpng
  8164. ;; and zlib, but assume that they are all found at the same
  8165. ;; prefix.
  8166. (substitute* "configure.in"
  8167. (("CHECK_PNGDRIVER")
  8168. "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
  8169. AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
  8170. AM_CONDITIONAL(AMPNG, true)"))
  8171. #t))
  8172. (add-after 'fix-checks 'disable-update-check
  8173. (lambda _
  8174. ;; At build time there is no connection to the Internet, so
  8175. ;; looking for updates will not work.
  8176. (substitute* "Makefile.am"
  8177. (("\\$\\(bindir\\)/embossupdate") ""))
  8178. #t))
  8179. (add-after 'disable-update-check 'autogen
  8180. (lambda _ (zero? (system* "autoreconf" "-vif")))))))
  8181. (inputs
  8182. `(("perl" ,perl)
  8183. ("libpng" ,libpng)
  8184. ("gd" ,gd)
  8185. ("libx11" ,libx11)
  8186. ("libharu" ,libharu)
  8187. ("zlib" ,zlib)))
  8188. (native-inputs
  8189. `(("autoconf" ,autoconf)
  8190. ("automake" ,automake)
  8191. ("libtool" ,libtool)
  8192. ("pkg-config" ,pkg-config)))
  8193. (home-page "http://emboss.sourceforge.net")
  8194. (synopsis "Molecular biology analysis suite")
  8195. (description "EMBOSS is the \"European Molecular Biology Open Software
  8196. Suite\". EMBOSS is an analysis package specially developed for the needs of
  8197. the molecular biology (e.g. EMBnet) user community. The software
  8198. automatically copes with data in a variety of formats and even allows
  8199. transparent retrieval of sequence data from the web. It also provides a
  8200. number of libraries for the development of software in the field of molecular
  8201. biology. EMBOSS also integrates a range of currently available packages and
  8202. tools for sequence analysis into a seamless whole.")
  8203. (license license:gpl2+)))
  8204. (define-public bits
  8205. (let ((revision "1")
  8206. (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
  8207. (package
  8208. (name "bits")
  8209. ;; The version is 2.13.0 even though no release archives have been
  8210. ;; published as yet.
  8211. (version (string-append "2.13.0-" revision "." (string-take commit 9)))
  8212. (source (origin
  8213. (method git-fetch)
  8214. (uri (git-reference
  8215. (url "https://github.com/arq5x/bits.git")
  8216. (commit commit)))
  8217. (file-name (string-append name "-" version "-checkout"))
  8218. (sha256
  8219. (base32
  8220. "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
  8221. (build-system gnu-build-system)
  8222. (arguments
  8223. `(#:tests? #f ;no tests included
  8224. #:phases
  8225. (modify-phases %standard-phases
  8226. (delete 'configure)
  8227. (add-after 'unpack 'remove-cuda
  8228. (lambda _
  8229. (substitute* "Makefile"
  8230. ((".*_cuda") "")
  8231. (("(bits_test_intersections) \\\\" _ match) match))
  8232. #t))
  8233. (replace 'install
  8234. (lambda* (#:key outputs #:allow-other-keys)
  8235. (copy-recursively
  8236. "bin" (string-append (assoc-ref outputs "out") "/bin"))
  8237. #t)))))
  8238. (inputs
  8239. `(("gsl" ,gsl)
  8240. ("zlib" ,zlib)))
  8241. (home-page "https://github.com/arq5x/bits")
  8242. (synopsis "Implementation of binary interval search algorithm")
  8243. (description "This package provides an implementation of the
  8244. BITS (Binary Interval Search) algorithm, an approach to interval set
  8245. intersection. It is especially suited for the comparison of diverse genomic
  8246. datasets and the exploration of large datasets of genome
  8247. intervals (e.g. genes, sequence alignments).")
  8248. (license license:gpl2))))
  8249. (define-public piranha
  8250. ;; There is no release tarball for the latest version. The latest commit is
  8251. ;; older than one year at the time of this writing.
  8252. (let ((revision "1")
  8253. (commit "0466d364b71117d01e4471b74c514436cc281233"))
  8254. (package
  8255. (name "piranha")
  8256. (version (string-append "1.2.1-" revision "." (string-take commit 9)))
  8257. (source (origin
  8258. (method git-fetch)
  8259. (uri (git-reference
  8260. (url "https://github.com/smithlabcode/piranha.git")
  8261. (commit commit)))
  8262. (sha256
  8263. (base32
  8264. "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
  8265. (build-system gnu-build-system)
  8266. (arguments
  8267. `(#:test-target "test"
  8268. #:phases
  8269. (modify-phases %standard-phases
  8270. (add-after 'unpack 'copy-smithlab-cpp
  8271. (lambda* (#:key inputs #:allow-other-keys)
  8272. (for-each (lambda (file)
  8273. (install-file file "./src/smithlab_cpp/"))
  8274. (find-files (assoc-ref inputs "smithlab-cpp")))
  8275. #t))
  8276. (add-after 'install 'install-to-store
  8277. (lambda* (#:key outputs #:allow-other-keys)
  8278. (let* ((out (assoc-ref outputs "out"))
  8279. (bin (string-append out "/bin")))
  8280. (for-each (lambda (file)
  8281. (install-file file bin))
  8282. (find-files "bin" ".*")))
  8283. #t)))
  8284. #:configure-flags
  8285. (list (string-append "--with-bam_tools_headers="
  8286. (assoc-ref %build-inputs "bamtools") "/include/bamtools")
  8287. (string-append "--with-bam_tools_library="
  8288. (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
  8289. (inputs
  8290. `(("bamtools" ,bamtools)
  8291. ("samtools" ,samtools-0.1)
  8292. ("gsl" ,gsl)
  8293. ("smithlab-cpp"
  8294. ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
  8295. (origin
  8296. (method git-fetch)
  8297. (uri (git-reference
  8298. (url "https://github.com/smithlabcode/smithlab_cpp.git")
  8299. (commit commit)))
  8300. (file-name (string-append "smithlab_cpp-" commit "-checkout"))
  8301. (sha256
  8302. (base32
  8303. "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
  8304. (native-inputs
  8305. `(("python" ,python-2)))
  8306. (home-page "https://github.com/smithlabcode/piranha")
  8307. (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
  8308. (description
  8309. "Piranha is a peak-caller for genomic data produced by CLIP-seq and
  8310. RIP-seq experiments. It takes input in BED or BAM format and identifies
  8311. regions of statistically significant read enrichment. Additional covariates
  8312. may optionally be provided to further inform the peak-calling process.")
  8313. (license license:gpl3+))))
  8314. (define-public pepr
  8315. (package
  8316. (name "pepr")
  8317. (version "1.0.9")
  8318. (source (origin
  8319. (method url-fetch)
  8320. (uri (string-append "https://pypi.python.org/packages/source/P"
  8321. "/PePr/PePr-" version ".tar.gz"))
  8322. (sha256
  8323. (base32
  8324. "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
  8325. (build-system python-build-system)
  8326. (arguments
  8327. `(#:python ,python-2 ; python2 only
  8328. #:tests? #f)) ; no tests included
  8329. (propagated-inputs
  8330. `(("python2-numpy" ,python2-numpy)
  8331. ("python2-scipy" ,python2-scipy)
  8332. ("python2-pysam" ,python2-pysam)))
  8333. (home-page "https://github.com/shawnzhangyx/PePr")
  8334. (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
  8335. (description
  8336. "PePr is a ChIP-Seq peak calling or differential binding analysis tool
  8337. that is primarily designed for data with biological replicates. It uses a
  8338. negative binomial distribution to model the read counts among the samples in
  8339. the same group, and look for consistent differences between ChIP and control
  8340. group or two ChIP groups run under different conditions.")
  8341. (license license:gpl3+)))
  8342. (define-public filevercmp
  8343. (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
  8344. (package
  8345. (name "filevercmp")
  8346. (version (string-append "0-1." (string-take commit 7)))
  8347. (source (origin
  8348. (method url-fetch)
  8349. (uri (string-append "https://github.com/ekg/filevercmp/archive/"
  8350. commit ".tar.gz"))
  8351. (file-name (string-append name "-" version ".tar.gz"))
  8352. (sha256
  8353. (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
  8354. (build-system gnu-build-system)
  8355. (arguments
  8356. `(#:tests? #f ; There are no tests to run.
  8357. #:phases
  8358. (modify-phases %standard-phases
  8359. (delete 'configure) ; There is no configure phase.
  8360. (replace 'install
  8361. (lambda* (#:key outputs #:allow-other-keys)
  8362. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  8363. (install-file "filevercmp" bin)))))))
  8364. (home-page "https://github.com/ekg/filevercmp")
  8365. (synopsis "This program compares version strings")
  8366. (description "This program compares version strings. It intends to be a
  8367. replacement for strverscmp.")
  8368. (license license:gpl3+))))
  8369. (define-public multiqc
  8370. (package
  8371. (name "multiqc")
  8372. (version "1.2")
  8373. (source
  8374. (origin
  8375. (method url-fetch)
  8376. (uri (pypi-uri "multiqc" version))
  8377. (sha256
  8378. (base32
  8379. "032svgym67k2ds7wp0cxzv79gi30yrdl45zbqn74lni3dk04qm33"))))
  8380. (build-system python-build-system)
  8381. (propagated-inputs
  8382. `(("python-jinja2" ,python-jinja2)
  8383. ("python-simplejson" ,python-simplejson)
  8384. ("python-pyyaml" ,python-pyyaml)
  8385. ("python-click" ,python-click)
  8386. ("python-spectra" ,python-spectra)
  8387. ("python-requests" ,python-requests)
  8388. ("python-markdown" ,python-markdown)
  8389. ("python-lzstring" ,python-lzstring)
  8390. ("python-matplotlib" ,python-matplotlib)
  8391. ("python-numpy" ,python-numpy)
  8392. ;; MultQC checks for the presence of nose at runtime.
  8393. ("python-nose" ,python-nose)))
  8394. (home-page "http://multiqc.info")
  8395. (synopsis "Aggregate bioinformatics analysis reports")
  8396. (description
  8397. "MultiQC is a tool to aggregate bioinformatics results across many
  8398. samples into a single report. It contains modules for a large number of
  8399. common bioinformatics tools.")
  8400. (license license:gpl3)))
  8401. (define-public r-chipseq
  8402. (package
  8403. (name "r-chipseq")
  8404. (version "1.26.0")
  8405. (source
  8406. (origin
  8407. (method url-fetch)
  8408. (uri (bioconductor-uri "chipseq" version))
  8409. (sha256
  8410. (base32
  8411. "1hahyqiwb2ch8214xqpw0c3jpiwkmyf3dwz0xc87jx6cdnzipj3i"))))
  8412. (build-system r-build-system)
  8413. (propagated-inputs
  8414. `(("r-biocgenerics" ,r-biocgenerics)
  8415. ("r-genomicranges" ,r-genomicranges)
  8416. ("r-iranges" ,r-iranges)
  8417. ("r-s4vectors" ,r-s4vectors)
  8418. ("r-shortread" ,r-shortread)))
  8419. (home-page "http://bioconductor.org/packages/chipseq")
  8420. (synopsis "Package for analyzing ChIPseq data")
  8421. (description
  8422. "This package provides tools for processing short read data from ChIPseq
  8423. experiments.")
  8424. (license license:artistic2.0)))
  8425. (define-public r-copyhelper
  8426. (package
  8427. (name "r-copyhelper")
  8428. (version "1.6.0")
  8429. (source
  8430. (origin
  8431. (method url-fetch)
  8432. (uri (string-append "http://bioconductor.org/packages/release/"
  8433. "data/experiment/src/contrib/CopyhelpeR_"
  8434. version ".tar.gz"))
  8435. (sha256
  8436. (base32
  8437. "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
  8438. (properties `((upstream-name . "CopyhelpeR")))
  8439. (build-system r-build-system)
  8440. (home-page "http://bioconductor.org/packages/CopyhelpeR/")
  8441. (synopsis "Helper files for CopywriteR")
  8442. (description
  8443. "This package contains the helper files that are required to run the
  8444. Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
  8445. and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
  8446. mm10. In addition, it contains a blacklist filter to remove regions that
  8447. display copy number variation. Files are stored as GRanges objects from the
  8448. GenomicRanges Bioconductor package.")
  8449. (license license:gpl2)))
  8450. (define-public r-copywriter
  8451. (package
  8452. (name "r-copywriter")
  8453. (version "2.8.1")
  8454. (source
  8455. (origin
  8456. (method url-fetch)
  8457. (uri (bioconductor-uri "CopywriteR" version))
  8458. (sha256
  8459. (base32
  8460. "0xgqnq5v5213b3nzvlmjysjb7w1bc0iblqpmzbjqn7n0ib0qyhbm"))))
  8461. (properties `((upstream-name . "CopywriteR")))
  8462. (build-system r-build-system)
  8463. (propagated-inputs
  8464. `(("r-biocparallel" ,r-biocparallel)
  8465. ("r-chipseq" ,r-chipseq)
  8466. ("r-copyhelper" ,r-copyhelper)
  8467. ("r-data-table" ,r-data-table)
  8468. ("r-dnacopy" ,r-dnacopy)
  8469. ("r-futile-logger" ,r-futile-logger)
  8470. ("r-genomeinfodb" ,r-genomeinfodb)
  8471. ("r-genomicalignments" ,r-genomicalignments)
  8472. ("r-genomicranges" ,r-genomicranges)
  8473. ("r-gtools" ,r-gtools)
  8474. ("r-iranges" ,r-iranges)
  8475. ("r-matrixstats" ,r-matrixstats)
  8476. ("r-rsamtools" ,r-rsamtools)
  8477. ("r-s4vectors" ,r-s4vectors)))
  8478. (home-page "https://github.com/PeeperLab/CopywriteR")
  8479. (synopsis "Copy number information from targeted sequencing")
  8480. (description
  8481. "CopywriteR extracts DNA copy number information from targeted sequencing
  8482. by utilizing off-target reads. It allows for extracting uniformly distributed
  8483. copy number information, can be used without reference, and can be applied to
  8484. sequencing data obtained from various techniques including chromatin
  8485. immunoprecipitation and target enrichment on small gene panels. Thereby,
  8486. CopywriteR constitutes a widely applicable alternative to available copy
  8487. number detection tools.")
  8488. (license license:gpl2)))
  8489. (define-public r-methylkit
  8490. (package
  8491. (name "r-methylkit")
  8492. (version "1.2.0")
  8493. (source (origin
  8494. (method url-fetch)
  8495. (uri (bioconductor-uri "methylKit" version))
  8496. (sha256
  8497. (base32
  8498. "02acdjf6jl0c1glymin84pdna4farn4vv0gb6107d9iqz3y3gkmm"))))
  8499. (properties `((upstream-name . "methylKit")))
  8500. (build-system r-build-system)
  8501. (propagated-inputs
  8502. `(("r-data-table" ,r-data-table)
  8503. ("r-emdbook" ,r-emdbook)
  8504. ("r-fastseg" ,r-fastseg)
  8505. ("r-genomeinfodb" ,r-genomeinfodb)
  8506. ("r-genomicranges" ,r-genomicranges)
  8507. ("r-gtools" ,r-gtools)
  8508. ("r-iranges" ,r-iranges)
  8509. ("r-kernsmooth" ,r-kernsmooth)
  8510. ("r-limma" ,r-limma)
  8511. ("r-mclust" ,r-mclust)
  8512. ("r-qvalue" ,r-qvalue)
  8513. ("r-r-utils" ,r-r-utils)
  8514. ("r-rcpp" ,r-rcpp)
  8515. ("r-rhtslib" ,r-rhtslib)
  8516. ("r-rsamtools" ,r-rsamtools)
  8517. ("r-rtracklayer" ,r-rtracklayer)
  8518. ("r-s4vectors" ,r-s4vectors)
  8519. ("r-zlibbioc" ,r-zlibbioc)))
  8520. (inputs
  8521. `(("zlib" ,zlib)))
  8522. (home-page "http://code.google.com/p/methylkit/")
  8523. (synopsis
  8524. "DNA methylation analysis from high-throughput bisulfite sequencing results")
  8525. (description
  8526. "MethylKit is an R package for DNA methylation analysis and annotation
  8527. from high-throughput bisulfite sequencing. The package is designed to deal
  8528. with sequencing data from @dfn{Reduced representation bisulfite
  8529. sequencing} (RRBS) and its variants, but also target-capture methods and whole
  8530. genome bisulfite sequencing. It also has functions to analyze base-pair
  8531. resolution 5hmC data from experimental protocols such as oxBS-Seq and
  8532. TAB-Seq.")
  8533. (license license:artistic2.0)))
  8534. (define-public r-sva
  8535. (package
  8536. (name "r-sva")
  8537. (version "3.24.4")
  8538. (source
  8539. (origin
  8540. (method url-fetch)
  8541. (uri (bioconductor-uri "sva" version))
  8542. (sha256
  8543. (base32
  8544. "0wcway4ai9im81xnrzb1vij2iidq5pw24qhjfgacmhxvx3dzhbsc"))))
  8545. (build-system r-build-system)
  8546. (propagated-inputs
  8547. `(("r-genefilter" ,r-genefilter)
  8548. ("r-mgcv" ,r-mgcv)
  8549. ("r-biocparallel" ,r-biocparallel)
  8550. ("r-matrixstats" ,r-matrixstats)
  8551. ("r-limma" ,r-limma)))
  8552. (home-page "http://bioconductor.org/packages/sva")
  8553. (synopsis "Surrogate variable analysis")
  8554. (description
  8555. "This package contains functions for removing batch effects and other
  8556. unwanted variation in high-throughput experiment. It also contains functions
  8557. for identifying and building surrogate variables for high-dimensional data
  8558. sets. Surrogate variables are covariates constructed directly from
  8559. high-dimensional data like gene expression/RNA sequencing/methylation/brain
  8560. imaging data that can be used in subsequent analyses to adjust for unknown,
  8561. unmodeled, or latent sources of noise.")
  8562. (license license:artistic2.0)))
  8563. (define-public r-seqminer
  8564. (package
  8565. (name "r-seqminer")
  8566. (version "6.0")
  8567. (source
  8568. (origin
  8569. (method url-fetch)
  8570. (uri (cran-uri "seqminer" version))
  8571. (sha256
  8572. (base32
  8573. "057j1l6dip35l1aivilapl2zv9db677b3di2pb3sfgq2sxg0ps3l"))))
  8574. (build-system r-build-system)
  8575. (inputs
  8576. `(("zlib" ,zlib)))
  8577. (home-page "http://seqminer.genomic.codes")
  8578. (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
  8579. (description
  8580. "This package provides tools to integrate nucleotide sequencing
  8581. data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
  8582. ;; Any version of the GPL is acceptable
  8583. (license (list license:gpl2+ license:gpl3+))))
  8584. (define-public r-raremetals2
  8585. (package
  8586. (name "r-raremetals2")
  8587. (version "0.1")
  8588. (source
  8589. (origin
  8590. (method url-fetch)
  8591. (uri (string-append "http://genome.sph.umich.edu/w/images/"
  8592. "b/b7/RareMETALS2_" version ".tar.gz"))
  8593. (sha256
  8594. (base32
  8595. "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
  8596. (properties `((upstream-name . "RareMETALS2")))
  8597. (build-system r-build-system)
  8598. (propagated-inputs
  8599. `(("r-seqminer" ,r-seqminer)
  8600. ("r-mvtnorm" ,r-mvtnorm)
  8601. ("r-mass" ,r-mass)
  8602. ("r-compquadform" ,r-compquadform)
  8603. ("r-getopt" ,r-getopt)))
  8604. (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
  8605. (synopsis "Analyze gene-level association tests for binary trait")
  8606. (description
  8607. "The R package rareMETALS2 is an extension of the R package rareMETALS.
  8608. It was designed to meta-analyze gene-level association tests for binary trait.
  8609. While rareMETALS offers a near-complete solution for meta-analysis of
  8610. gene-level tests for quantitative trait, it does not offer the optimal
  8611. solution for binary trait. The package rareMETALS2 offers improved features
  8612. for analyzing gene-level association tests in meta-analyses for binary
  8613. trait.")
  8614. (license license:gpl3)))
  8615. (define-public r-maldiquant
  8616. (package
  8617. (name "r-maldiquant")
  8618. (version "1.16.4")
  8619. (source
  8620. (origin
  8621. (method url-fetch)
  8622. (uri (cran-uri "MALDIquant" version))
  8623. (sha256
  8624. (base32
  8625. "1pmhsfvd45a44xdiml4zx3zd5fhygqyziqvygahkk9yibnyhv4cv"))))
  8626. (properties `((upstream-name . "MALDIquant")))
  8627. (build-system r-build-system)
  8628. (home-page "http://cran.r-project.org/web/packages/MALDIquant")
  8629. (synopsis "Quantitative analysis of mass spectrometry data")
  8630. (description
  8631. "This package provides a complete analysis pipeline for matrix-assisted
  8632. laser desorption/ionization-time-of-flight (MALDI-TOF) and other
  8633. two-dimensional mass spectrometry data. In addition to commonly used plotting
  8634. and processing methods it includes distinctive features, namely baseline
  8635. subtraction methods such as morphological filters (TopHat) or the
  8636. statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
  8637. alignment using warping functions, handling of replicated measurements as well
  8638. as allowing spectra with different resolutions.")
  8639. (license license:gpl3+)))
  8640. (define-public r-protgenerics
  8641. (package
  8642. (name "r-protgenerics")
  8643. (version "1.8.0")
  8644. (source
  8645. (origin
  8646. (method url-fetch)
  8647. (uri (bioconductor-uri "ProtGenerics" version))
  8648. (sha256
  8649. (base32
  8650. "08idb3rvxn4fl7rd66jasyqz47cb76dbc3968r1g26jr2ci3w1pl"))))
  8651. (properties `((upstream-name . "ProtGenerics")))
  8652. (build-system r-build-system)
  8653. (home-page "https://github.com/lgatto/ProtGenerics")
  8654. (synopsis "S4 generic functions for proteomics infrastructure")
  8655. (description
  8656. "This package provides S4 generic functions needed by Bioconductor
  8657. proteomics packages.")
  8658. (license license:artistic2.0)))
  8659. (define-public r-mzr
  8660. (package
  8661. (name "r-mzr")
  8662. (version "2.10.0")
  8663. (source
  8664. (origin
  8665. (method url-fetch)
  8666. (uri (bioconductor-uri "mzR" version))
  8667. (sha256
  8668. (base32
  8669. "1zir46h320n2vbrky6q3m8l221f3wdjlfsnx4ak9xca5min24xm7"))))
  8670. (properties `((upstream-name . "mzR")))
  8671. (build-system r-build-system)
  8672. (inputs
  8673. `(("boost" ,boost)
  8674. ("netcdf" ,netcdf)))
  8675. (propagated-inputs
  8676. `(("r-biobase" ,r-biobase)
  8677. ("r-biocgenerics" ,r-biocgenerics)
  8678. ("r-protgenerics" ,r-protgenerics)
  8679. ("r-rcpp" ,r-rcpp)
  8680. ("r-zlibbioc" ,r-zlibbioc)))
  8681. (home-page "https://github.com/sneumann/mzR/")
  8682. (synopsis "Parser for mass spectrometry data files")
  8683. (description
  8684. "The mzR package provides a unified API to the common file formats and
  8685. parsers available for mass spectrometry data. It comes with a wrapper for the
  8686. ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
  8687. The package contains the original code written by the ISB, and a subset of the
  8688. proteowizard library for mzML and mzIdentML. The netCDF reading code has
  8689. previously been used in XCMS.")
  8690. (license license:artistic2.0)))
  8691. (define-public r-affyio
  8692. (package
  8693. (name "r-affyio")
  8694. (version "1.46.0")
  8695. (source
  8696. (origin
  8697. (method url-fetch)
  8698. (uri (bioconductor-uri "affyio" version))
  8699. (sha256
  8700. (base32
  8701. "13w6al9296q916w0w6ngbsk25b21ahla1b6n40fcqhbvhyfii6sj"))))
  8702. (build-system r-build-system)
  8703. (propagated-inputs
  8704. `(("r-zlibbioc" ,r-zlibbioc)))
  8705. (inputs
  8706. `(("zlib" ,zlib)))
  8707. (home-page "https://github.com/bmbolstad/affyio")
  8708. (synopsis "Tools for parsing Affymetrix data files")
  8709. (description
  8710. "This package provides routines for parsing Affymetrix data files based
  8711. upon file format information. The primary focus is on accessing the CEL and
  8712. CDF file formats.")
  8713. (license license:lgpl2.0+)))
  8714. (define-public r-affy
  8715. (package
  8716. (name "r-affy")
  8717. (version "1.54.0")
  8718. (source
  8719. (origin
  8720. (method url-fetch)
  8721. (uri (bioconductor-uri "affy" version))
  8722. (sha256
  8723. (base32
  8724. "0azwg2qxzgflr1rjvbdln5i5rbcr9gs36kqlacd9cwl1szb9ad3m"))))
  8725. (build-system r-build-system)
  8726. (propagated-inputs
  8727. `(("r-affyio" ,r-affyio)
  8728. ("r-biobase" ,r-biobase)
  8729. ("r-biocgenerics" ,r-biocgenerics)
  8730. ("r-biocinstaller" ,r-biocinstaller)
  8731. ("r-preprocesscore" ,r-preprocesscore)
  8732. ("r-zlibbioc" ,r-zlibbioc)))
  8733. (home-page "http://bioconductor.org/packages/affy")
  8734. (synopsis "Methods for affymetrix oligonucleotide arrays")
  8735. (description
  8736. "This package contains functions for exploratory oligonucleotide array
  8737. analysis.")
  8738. (license license:lgpl2.0+)))
  8739. (define-public r-vsn
  8740. (package
  8741. (name "r-vsn")
  8742. (version "3.44.0")
  8743. (source
  8744. (origin
  8745. (method url-fetch)
  8746. (uri (bioconductor-uri "vsn" version))
  8747. (sha256
  8748. (base32
  8749. "0qhg3a4sc62pfdxcpvmk831rk138xh4zx4f1s39jhxpqqhmr7jvk"))))
  8750. (build-system r-build-system)
  8751. (propagated-inputs
  8752. `(("r-affy" ,r-affy)
  8753. ("r-biobase" ,r-biobase)
  8754. ("r-ggplot2" ,r-ggplot2)
  8755. ("r-hexbin" ,r-hexbin)
  8756. ("r-lattice" ,r-lattice)
  8757. ("r-limma" ,r-limma)))
  8758. (home-page "http://bioconductor.org/packages/release/bioc/html/vsn.html")
  8759. (synopsis "Variance stabilization and calibration for microarray data")
  8760. (description
  8761. "The package implements a method for normalising microarray intensities,
  8762. and works for single- and multiple-color arrays. It can also be used for data
  8763. from other technologies, as long as they have similar format. The method uses
  8764. a robust variant of the maximum-likelihood estimator for an
  8765. additive-multiplicative error model and affine calibration. The model
  8766. incorporates data calibration step (a.k.a. normalization), a model for the
  8767. dependence of the variance on the mean intensity and a variance stabilizing
  8768. data transformation. Differences between transformed intensities are
  8769. analogous to \"normalized log-ratios\". However, in contrast to the latter,
  8770. their variance is independent of the mean, and they are usually more sensitive
  8771. and specific in detecting differential transcription.")
  8772. (license license:artistic2.0)))
  8773. (define-public r-mzid
  8774. (package
  8775. (name "r-mzid")
  8776. (version "1.14.0")
  8777. (source
  8778. (origin
  8779. (method url-fetch)
  8780. (uri (bioconductor-uri "mzID" version))
  8781. (sha256
  8782. (base32
  8783. "11xnild02jz24vbsfy92lb7jlqqwnrswg66a7r4rsw8d2ibrbk33"))))
  8784. (properties `((upstream-name . "mzID")))
  8785. (build-system r-build-system)
  8786. (propagated-inputs
  8787. `(("r-doparallel" ,r-doparallel)
  8788. ("r-foreach" ,r-foreach)
  8789. ("r-iterators" ,r-iterators)
  8790. ("r-plyr" ,r-plyr)
  8791. ("r-protgenerics" ,r-protgenerics)
  8792. ("r-rcpp" ,r-rcpp)
  8793. ("r-xml" ,r-xml)))
  8794. (home-page "http://bioconductor.org/packages/mzID")
  8795. (synopsis "Parser for mzIdentML files")
  8796. (description
  8797. "This package provides a parser for mzIdentML files implemented using the
  8798. XML package. The parser tries to be general and able to handle all types of
  8799. mzIdentML files with the drawback of having less pretty output than a vendor
  8800. specific parser.")
  8801. (license license:gpl2+)))
  8802. (define-public r-pcamethods
  8803. (package
  8804. (name "r-pcamethods")
  8805. (version "1.68.0")
  8806. (source
  8807. (origin
  8808. (method url-fetch)
  8809. (uri (bioconductor-uri "pcaMethods" version))
  8810. (sha256
  8811. (base32
  8812. "0c4lphqyzj577ws4s172391cgv00s5nhy152zp18k2k4diyhq6n0"))))
  8813. (properties `((upstream-name . "pcaMethods")))
  8814. (build-system r-build-system)
  8815. (propagated-inputs
  8816. `(("r-biobase" ,r-biobase)
  8817. ("r-biocgenerics" ,r-biocgenerics)
  8818. ("r-mass" ,r-mass)
  8819. ("r-rcpp" ,r-rcpp)))
  8820. (home-page "https://github.com/hredestig/pcamethods")
  8821. (synopsis "Collection of PCA methods")
  8822. (description
  8823. "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
  8824. Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
  8825. for missing value estimation is included for comparison. BPCA, PPCA and
  8826. NipalsPCA may be used to perform PCA on incomplete data as well as for
  8827. accurate missing value estimation. A set of methods for printing and plotting
  8828. the results is also provided. All PCA methods make use of the same data
  8829. structure (pcaRes) to provide a common interface to the PCA results.")
  8830. (license license:gpl3+)))
  8831. (define-public r-msnbase
  8832. (package
  8833. (name "r-msnbase")
  8834. (version "2.2.0")
  8835. (source
  8836. (origin
  8837. (method url-fetch)
  8838. (uri (bioconductor-uri "MSnbase" version))
  8839. (sha256
  8840. (base32
  8841. "1n9bbdlp8d8bx2mqby0c4yylz1yil42scbkxqgyrfr9s5sn6dqff"))))
  8842. (properties `((upstream-name . "MSnbase")))
  8843. (build-system r-build-system)
  8844. (propagated-inputs
  8845. `(("r-affy" ,r-affy)
  8846. ("r-biobase" ,r-biobase)
  8847. ("r-biocgenerics" ,r-biocgenerics)
  8848. ("r-biocparallel" ,r-biocparallel)
  8849. ("r-digest" ,r-digest)
  8850. ("r-ggplot2" ,r-ggplot2)
  8851. ("r-impute" ,r-impute)
  8852. ("r-iranges" ,r-iranges)
  8853. ("r-maldiquant" ,r-maldiquant)
  8854. ("r-mzid" ,r-mzid)
  8855. ("r-mzr" ,r-mzr)
  8856. ("r-pcamethods" ,r-pcamethods)
  8857. ("r-plyr" ,r-plyr)
  8858. ("r-preprocesscore" ,r-preprocesscore)
  8859. ("r-protgenerics" ,r-protgenerics)
  8860. ("r-rcpp" ,r-rcpp)
  8861. ("r-reshape2" ,r-reshape2)
  8862. ("r-s4vectors" ,r-s4vectors)
  8863. ("r-vsn" ,r-vsn)
  8864. ("r-xml" ,r-xml)))
  8865. (home-page "https://github.com/lgatto/MSnbase")
  8866. (synopsis "Base functions and classes for MS-based proteomics")
  8867. (description
  8868. "This package provides basic plotting, data manipulation and processing
  8869. of mass spectrometry based proteomics data.")
  8870. (license license:artistic2.0)))
  8871. (define-public r-msnid
  8872. (package
  8873. (name "r-msnid")
  8874. (version "1.10.0")
  8875. (source
  8876. (origin
  8877. (method url-fetch)
  8878. (uri (bioconductor-uri "MSnID" version))
  8879. (sha256
  8880. (base32
  8881. "0pjwargi5lif8q53fd43ql67p3yk9w10jychafd9qgbaw5k3f68k"))))
  8882. (properties `((upstream-name . "MSnID")))
  8883. (build-system r-build-system)
  8884. (propagated-inputs
  8885. `(("r-biobase" ,r-biobase)
  8886. ("r-data-table" ,r-data-table)
  8887. ("r-doparallel" ,r-doparallel)
  8888. ("r-dplyr" ,r-dplyr)
  8889. ("r-foreach" ,r-foreach)
  8890. ("r-iterators" ,r-iterators)
  8891. ("r-msnbase" ,r-msnbase)
  8892. ("r-mzid" ,r-mzid)
  8893. ("r-mzr" ,r-mzr)
  8894. ("r-protgenerics" ,r-protgenerics)
  8895. ("r-r-cache" ,r-r-cache)
  8896. ("r-rcpp" ,r-rcpp)
  8897. ("r-reshape2" ,r-reshape2)))
  8898. (home-page "http://bioconductor.org/packages/MSnID")
  8899. (synopsis "Utilities for LC-MSn proteomics identifications")
  8900. (description
  8901. "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
  8902. from mzIdentML (leveraging the mzID package) or text files. After collating
  8903. the search results from multiple datasets it assesses their identification
  8904. quality and optimize filtering criteria to achieve the maximum number of
  8905. identifications while not exceeding a specified false discovery rate. It also
  8906. contains a number of utilities to explore the MS/MS results and assess missed
  8907. and irregular enzymatic cleavages, mass measurement accuracy, etc.")
  8908. (license license:artistic2.0)))
  8909. (define-public r-seurat
  8910. ;; Source releases are only made for new x.0 versions. All newer versions
  8911. ;; are only released as pre-built binaries. At the time of this writing the
  8912. ;; latest binary release is 1.4.0.12, which is equivalent to this commit.
  8913. (let ((commit "fccb77d1452c35ee47e47ebf8e87bddb59f3b08d")
  8914. (revision "1"))
  8915. (package
  8916. (name "r-seurat")
  8917. (version (string-append "1.4.0.12-" revision "." (string-take commit 7)))
  8918. (source (origin
  8919. (method git-fetch)
  8920. (uri (git-reference
  8921. (url "https://github.com/satijalab/seurat")
  8922. (commit commit)))
  8923. (file-name (string-append name "-" version "-checkout"))
  8924. (sha256
  8925. (base32
  8926. "101wq3aqrdmbfi3lqmq4iivk9iwbf10d4z216ss25hf7n9091cyl"))
  8927. ;; Delete pre-built jar.
  8928. (snippet
  8929. '(begin (delete-file "inst/java/ModularityOptimizer.jar")
  8930. #t))))
  8931. (build-system r-build-system)
  8932. (arguments
  8933. `(#:phases
  8934. (modify-phases %standard-phases
  8935. (add-after 'unpack 'build-jar
  8936. (lambda* (#:key inputs #:allow-other-keys)
  8937. (let ((classesdir "tmp-classes"))
  8938. (setenv "JAVA_HOME" (assoc-ref inputs "jdk"))
  8939. (mkdir classesdir)
  8940. (and (zero? (apply system* `("javac" "-d" ,classesdir
  8941. ,@(find-files "java" "\\.java$"))))
  8942. (zero? (system* "jar"
  8943. "-cf" "inst/java/ModularityOptimizer.jar"
  8944. "-C" classesdir ".")))))))))
  8945. (native-inputs
  8946. `(("jdk" ,icedtea "jdk")))
  8947. (propagated-inputs
  8948. `(("r-ape" ,r-ape)
  8949. ("r-caret" ,r-caret)
  8950. ("r-cowplot" ,r-cowplot)
  8951. ("r-dplyr" ,r-dplyr)
  8952. ("r-fastica" ,r-fastica)
  8953. ("r-fnn" ,r-fnn)
  8954. ("r-fpc" ,r-fpc)
  8955. ("r-gdata" ,r-gdata)
  8956. ("r-ggplot2" ,r-ggplot2)
  8957. ("r-gplots" ,r-gplots)
  8958. ("r-gridextra" ,r-gridextra)
  8959. ("r-igraph" ,r-igraph)
  8960. ("r-irlba" ,r-irlba)
  8961. ("r-lars" ,r-lars)
  8962. ("r-mixtools" ,r-mixtools)
  8963. ("r-pbapply" ,r-pbapply)
  8964. ("r-plyr" ,r-plyr)
  8965. ("r-ranger" ,r-ranger)
  8966. ("r-rcolorbrewer" ,r-rcolorbrewer)
  8967. ("r-rcpp" ,r-rcpp)
  8968. ("r-rcppeigen" ,r-rcppeigen)
  8969. ("r-rcppprogress" ,r-rcppprogress)
  8970. ("r-reshape2" ,r-reshape2)
  8971. ("r-rocr" ,r-rocr)
  8972. ("r-rtsne" ,r-rtsne)
  8973. ("r-stringr" ,r-stringr)
  8974. ("r-tclust" ,r-tclust)
  8975. ("r-tsne" ,r-tsne)
  8976. ("r-vgam" ,r-vgam)))
  8977. (home-page "http://www.satijalab.org/seurat")
  8978. (synopsis "Seurat is an R toolkit for single cell genomics")
  8979. (description
  8980. "This package is an R package designed for QC, analysis, and
  8981. exploration of single cell RNA-seq data. It easily enables widely-used
  8982. analytical techniques, including the identification of highly variable genes,
  8983. dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
  8984. algorithms; density clustering, hierarchical clustering, k-means, and the
  8985. discovery of differentially expressed genes and markers.")
  8986. (license license:gpl3))))
  8987. (define-public r-aroma-light
  8988. (package
  8989. (name "r-aroma-light")
  8990. (version "3.6.0")
  8991. (source
  8992. (origin
  8993. (method url-fetch)
  8994. (uri (bioconductor-uri "aroma.light" version))
  8995. (sha256
  8996. (base32
  8997. "10snykmmx36qaymyf5s1n1km8hsscyzpykcpf0mzsrcv8ml9rp8a"))))
  8998. (properties `((upstream-name . "aroma.light")))
  8999. (build-system r-build-system)
  9000. (propagated-inputs
  9001. `(("r-matrixstats" ,r-matrixstats)
  9002. ("r-r-methodss3" ,r-r-methodss3)
  9003. ("r-r-oo" ,r-r-oo)
  9004. ("r-r-utils" ,r-r-utils)))
  9005. (home-page "https://github.com/HenrikBengtsson/aroma.light")
  9006. (synopsis "Methods for normalization and visualization of microarray data")
  9007. (description
  9008. "This package provides methods for microarray analysis that take basic
  9009. data types such as matrices and lists of vectors. These methods can be used
  9010. standalone, be utilized in other packages, or be wrapped up in higher-level
  9011. classes.")
  9012. (license license:gpl2+)))
  9013. (define-public r-deseq
  9014. (package
  9015. (name "r-deseq")
  9016. (version "1.28.0")
  9017. (source
  9018. (origin
  9019. (method url-fetch)
  9020. (uri (bioconductor-uri "DESeq" version))
  9021. (sha256
  9022. (base32
  9023. "0j3dgcxd64m9qknmlcbdzvg4xhp981xd6nbwsvnqjfn6yypslgyw"))))
  9024. (properties `((upstream-name . "DESeq")))
  9025. (build-system r-build-system)
  9026. (propagated-inputs
  9027. `(("r-biobase" ,r-biobase)
  9028. ("r-biocgenerics" ,r-biocgenerics)
  9029. ("r-genefilter" ,r-genefilter)
  9030. ("r-geneplotter" ,r-geneplotter)
  9031. ("r-lattice" ,r-lattice)
  9032. ("r-locfit" ,r-locfit)
  9033. ("r-mass" ,r-mass)
  9034. ("r-rcolorbrewer" ,r-rcolorbrewer)))
  9035. (home-page "http://www-huber.embl.de/users/anders/DESeq")
  9036. (synopsis "Differential gene expression analysis")
  9037. (description
  9038. "This package provides tools for estimating variance-mean dependence in
  9039. count data from high-throughput genetic sequencing assays and for testing for
  9040. differential expression based on a model using the negative binomial
  9041. distribution.")
  9042. (license license:gpl3+)))
  9043. (define-public r-edaseq
  9044. (package
  9045. (name "r-edaseq")
  9046. (version "2.10.0")
  9047. (source
  9048. (origin
  9049. (method url-fetch)
  9050. (uri (bioconductor-uri "EDASeq" version))
  9051. (sha256
  9052. (base32
  9053. "0f25dfc8hdii9fjm3bf89vy9jkxv23sa62fkcga5b4gkipwrvm9a"))))
  9054. (properties `((upstream-name . "EDASeq")))
  9055. (build-system r-build-system)
  9056. (propagated-inputs
  9057. `(("r-annotationdbi" ,r-annotationdbi)
  9058. ("r-aroma-light" ,r-aroma-light)
  9059. ("r-biobase" ,r-biobase)
  9060. ("r-biocgenerics" ,r-biocgenerics)
  9061. ("r-biomart" ,r-biomart)
  9062. ("r-biostrings" ,r-biostrings)
  9063. ("r-deseq" ,r-deseq)
  9064. ("r-genomicfeatures" ,r-genomicfeatures)
  9065. ("r-genomicranges" ,r-genomicranges)
  9066. ("r-iranges" ,r-iranges)
  9067. ("r-rsamtools" ,r-rsamtools)
  9068. ("r-shortread" ,r-shortread)))
  9069. (home-page "https://github.com/drisso/EDASeq")
  9070. (synopsis "Exploratory data analysis and normalization for RNA-Seq")
  9071. (description
  9072. "This package provides support for numerical and graphical summaries of
  9073. RNA-Seq genomic read data. Provided within-lane normalization procedures to
  9074. adjust for GC-content effect (or other gene-level effects) on read counts:
  9075. loess robust local regression, global-scaling, and full-quantile
  9076. normalization. Between-lane normalization procedures to adjust for
  9077. distributional differences between lanes (e.g., sequencing depth):
  9078. global-scaling and full-quantile normalization.")
  9079. (license license:artistic2.0)))
  9080. (define-public r-interactivedisplaybase
  9081. (package
  9082. (name "r-interactivedisplaybase")
  9083. (version "1.14.0")
  9084. (source
  9085. (origin
  9086. (method url-fetch)
  9087. (uri (bioconductor-uri "interactiveDisplayBase" version))
  9088. (sha256
  9089. (base32
  9090. "12f6ap4bl3h2iwwhg8i3r9a7yyd28d8i5lb3fj1vnfvjs762r7r7"))))
  9091. (properties
  9092. `((upstream-name . "interactiveDisplayBase")))
  9093. (build-system r-build-system)
  9094. (propagated-inputs
  9095. `(("r-biocgenerics" ,r-biocgenerics)
  9096. ("r-shiny" ,r-shiny)))
  9097. (home-page "http://bioconductor.org/packages/interactiveDisplayBase")
  9098. (synopsis "Base package for web displays of Bioconductor objects")
  9099. (description
  9100. "This package contains the basic methods needed to generate interactive
  9101. Shiny-based display methods for Bioconductor objects.")
  9102. (license license:artistic2.0)))
  9103. (define-public r-annotationhub
  9104. (package
  9105. (name "r-annotationhub")
  9106. (version "2.8.2")
  9107. (source
  9108. (origin
  9109. (method url-fetch)
  9110. (uri (bioconductor-uri "AnnotationHub" version))
  9111. (sha256
  9112. (base32
  9113. "1nh5si3j1nv37jcg4260582ayjg18851np47cskrm54prnvhwd9r"))))
  9114. (properties `((upstream-name . "AnnotationHub")))
  9115. (build-system r-build-system)
  9116. (propagated-inputs
  9117. `(("r-annotationdbi" ,r-annotationdbi)
  9118. ("r-biocgenerics" ,r-biocgenerics)
  9119. ("r-biocinstaller" ,r-biocinstaller)
  9120. ("r-httr" ,r-httr)
  9121. ("r-interactivedisplaybase" ,r-interactivedisplaybase)
  9122. ("r-rsqlite" ,r-rsqlite)
  9123. ("r-s4vectors" ,r-s4vectors)
  9124. ("r-yaml" ,r-yaml)))
  9125. (home-page "http://bioconductor.org/packages/AnnotationHub")
  9126. (synopsis "Client to access AnnotationHub resources")
  9127. (description
  9128. "This package provides a client for the Bioconductor AnnotationHub web
  9129. resource. The AnnotationHub web resource provides a central location where
  9130. genomic files (e.g. VCF, bed, wig) and other resources from standard
  9131. locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
  9132. metadata about each resource, e.g., a textual description, tags, and date of
  9133. modification. The client creates and manages a local cache of files retrieved
  9134. by the user, helping with quick and reproducible access.")
  9135. (license license:artistic2.0)))
  9136. (define-public r-fastseg
  9137. (package
  9138. (name "r-fastseg")
  9139. (version "1.22.0")
  9140. (source
  9141. (origin
  9142. (method url-fetch)
  9143. (uri (bioconductor-uri "fastseg" version))
  9144. (sha256
  9145. (base32
  9146. "083wiz03q9mynwchs9frlpp6c84dncri5ncibx6h82p228cpja6h"))))
  9147. (build-system r-build-system)
  9148. (propagated-inputs
  9149. `(("r-biobase" ,r-biobase)
  9150. ("r-biocgenerics" ,r-biocgenerics)
  9151. ("r-genomicranges" ,r-genomicranges)
  9152. ("r-iranges" ,r-iranges)
  9153. ("r-s4vectors" ,r-s4vectors)))
  9154. (home-page "http://www.bioinf.jku.at/software/fastseg/index.html")
  9155. (synopsis "Fast segmentation algorithm for genetic sequencing data")
  9156. (description
  9157. "Fastseg implements a very fast and efficient segmentation algorithm.
  9158. It can segment data from DNA microarrays and data from next generation
  9159. sequencing for example to detect copy number segments. Further it can segment
  9160. data from RNA microarrays like tiling arrays to identify transcripts. Most
  9161. generally, it can segment data given as a matrix or as a vector. Various data
  9162. formats can be used as input to fastseg like expression set objects for
  9163. microarrays or GRanges for sequencing data.")
  9164. (license license:lgpl2.0+)))
  9165. (define-public r-qvalue
  9166. (package
  9167. (name "r-qvalue")
  9168. (version "2.8.0")
  9169. (source
  9170. (origin
  9171. (method url-fetch)
  9172. (uri (bioconductor-uri "qvalue" version))
  9173. (sha256
  9174. (base32
  9175. "1dxdwa767a9r8n61r272ypi09qblcdfpzzwkmri74y5mbp1r3y4i"))))
  9176. (build-system r-build-system)
  9177. (propagated-inputs
  9178. `(("r-ggplot2" ,r-ggplot2)
  9179. ("r-reshape2" ,r-reshape2)))
  9180. (home-page "http://github.com/jdstorey/qvalue")
  9181. (synopsis "Q-value estimation for false discovery rate control")
  9182. (description
  9183. "This package takes a list of p-values resulting from the simultaneous
  9184. testing of many hypotheses and estimates their q-values and local @dfn{false
  9185. discovery rate} (FDR) values. The q-value of a test measures the proportion
  9186. of false positives incurred when that particular test is called significant.
  9187. The local FDR measures the posterior probability the null hypothesis is true
  9188. given the test's p-value. Various plots are automatically generated, allowing
  9189. one to make sensible significance cut-offs. The software can be applied to
  9190. problems in genomics, brain imaging, astrophysics, and data mining.")
  9191. ;; Any version of the LGPL.
  9192. (license license:lgpl3+)))
  9193. (define htslib-for-sambamba
  9194. (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
  9195. (package
  9196. (inherit htslib)
  9197. (name "htslib-for-sambamba")
  9198. (version (string-append "1.3.1-1." (string-take commit 9)))
  9199. (source
  9200. (origin
  9201. (method git-fetch)
  9202. (uri (git-reference
  9203. (url "https://github.com/lomereiter/htslib.git")
  9204. (commit commit)))
  9205. (file-name (string-append "htslib-" version "-checkout"))
  9206. (sha256
  9207. (base32
  9208. "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
  9209. (arguments
  9210. (substitute-keyword-arguments (package-arguments htslib)
  9211. ((#:phases phases)
  9212. `(modify-phases ,phases
  9213. (add-after 'unpack 'bootstrap
  9214. (lambda _
  9215. (zero? (system* "autoreconf" "-vif"))))))))
  9216. (native-inputs
  9217. `(("autoconf" ,autoconf)
  9218. ("automake" ,automake)
  9219. ,@(package-native-inputs htslib))))))
  9220. (define-public sambamba
  9221. (package
  9222. (name "sambamba")
  9223. (version "0.6.5")
  9224. (source
  9225. (origin
  9226. (method url-fetch)
  9227. (uri (string-append "https://github.com/lomereiter/sambamba/"
  9228. "archive/v" version ".tar.gz"))
  9229. (file-name (string-append name "-" version ".tar.gz"))
  9230. (sha256
  9231. (base32
  9232. "17076gijd65a3f07zns2gvbgahiz5lriwsa6dq353ss3jl85d8vy"))))
  9233. (build-system gnu-build-system)
  9234. (arguments
  9235. `(#:tests? #f ; there is no test target
  9236. #:make-flags
  9237. '("D_COMPILER=ldc2"
  9238. ;; Override "--compiler" flag only.
  9239. "D_FLAGS=--compiler=ldc2 -IBioD -g -d"
  9240. "sambamba-ldmd2-64")
  9241. #:phases
  9242. (modify-phases %standard-phases
  9243. (delete 'configure)
  9244. (add-after 'unpack 'place-biod
  9245. (lambda* (#:key inputs #:allow-other-keys)
  9246. (copy-recursively (assoc-ref inputs "biod") "BioD")
  9247. #t))
  9248. (add-after 'unpack 'unbundle-prerequisites
  9249. (lambda _
  9250. (substitute* "Makefile"
  9251. ((" htslib-static lz4-static") ""))
  9252. #t))
  9253. (replace 'install
  9254. (lambda* (#:key outputs #:allow-other-keys)
  9255. (let* ((out (assoc-ref outputs "out"))
  9256. (bin (string-append out "/bin")))
  9257. (mkdir-p bin)
  9258. (install-file "build/sambamba" bin)
  9259. #t))))))
  9260. (native-inputs
  9261. `(("ldc" ,ldc)
  9262. ("rdmd" ,rdmd)
  9263. ("biod"
  9264. ,(let ((commit "1248586b54af4bd4dfb28ebfebfc6bf012e7a587"))
  9265. (origin
  9266. (method git-fetch)
  9267. (uri (git-reference
  9268. (url "https://github.com/biod/BioD.git")
  9269. (commit commit)))
  9270. (file-name (string-append "biod-"
  9271. (string-take commit 9)
  9272. "-checkout"))
  9273. (sha256
  9274. (base32
  9275. "1m8hi1n7x0ri4l6s9i0x6jg4z4v94xrfdzp7mbizdipfag0m17g3")))))))
  9276. (inputs
  9277. `(("lz4" ,lz4)
  9278. ("htslib" ,htslib-for-sambamba)))
  9279. (home-page "http://lomereiter.github.io/sambamba")
  9280. (synopsis "Tools for working with SAM/BAM data")
  9281. (description "Sambamba is a high performance modern robust and
  9282. fast tool (and library), written in the D programming language, for
  9283. working with SAM and BAM files. Current parallelised functionality is
  9284. an important subset of samtools functionality, including view, index,
  9285. sort, markdup, and depth.")
  9286. (license license:gpl2+)))
  9287. (define-public ritornello
  9288. (package
  9289. (name "ritornello")
  9290. (version "1.0.0")
  9291. (source (origin
  9292. (method url-fetch)
  9293. (uri (string-append "https://github.com/KlugerLab/"
  9294. "Ritornello/archive/v"
  9295. version ".tar.gz"))
  9296. (file-name (string-append name "-" version ".tar.gz"))
  9297. (sha256
  9298. (base32
  9299. "02nik86gq9ljjriv6pamwlmqnfky3ads1fpklx6mc3hx6k40pg38"))))
  9300. (build-system gnu-build-system)
  9301. (arguments
  9302. `(#:tests? #f ; there are no tests
  9303. #:phases
  9304. (modify-phases %standard-phases
  9305. (add-after 'unpack 'patch-samtools-references
  9306. (lambda* (#:key inputs #:allow-other-keys)
  9307. (substitute* '("src/SamStream.h"
  9308. "src/BufferedGenomeReader.h")
  9309. (("<sam.h>") "<samtools/sam.h>"))
  9310. #t))
  9311. (delete 'configure)
  9312. (replace 'install
  9313. (lambda* (#:key inputs outputs #:allow-other-keys)
  9314. (let* ((out (assoc-ref outputs "out"))
  9315. (bin (string-append out "/bin/")))
  9316. (mkdir-p bin)
  9317. (install-file "bin/Ritornello" bin)
  9318. #t))))))
  9319. (inputs
  9320. `(("samtools" ,samtools-0.1)
  9321. ("fftw" ,fftw)
  9322. ("boost" ,boost)
  9323. ("zlib" ,zlib)))
  9324. (home-page "https://github.com/KlugerLab/Ritornello")
  9325. (synopsis "Control-free peak caller for ChIP-seq data")
  9326. (description "Ritornello is a ChIP-seq peak calling algorithm based on
  9327. signal processing that can accurately call binding events without the need to
  9328. do a pair total DNA input or IgG control sample. It has been tested for use
  9329. with narrow binding events such as transcription factor ChIP-seq.")
  9330. (license license:gpl3+)))
  9331. (define-public trim-galore
  9332. (package
  9333. (name "trim-galore")
  9334. (version "0.4.2")
  9335. (source
  9336. (origin
  9337. (method url-fetch)
  9338. (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
  9339. "projects/trim_galore/trim_galore_v"
  9340. version ".zip"))
  9341. (sha256
  9342. (base32
  9343. "0b9qdxi4521gsrjvbhgky8g7kry9b5nx3byzaxkgxz7p4k8bn1mn"))))
  9344. (build-system gnu-build-system)
  9345. (arguments
  9346. `(#:tests? #f ; no tests
  9347. #:phases
  9348. (modify-phases %standard-phases
  9349. ;; The archive contains plain files.
  9350. (replace 'unpack
  9351. (lambda* (#:key source #:allow-other-keys)
  9352. (zero? (system* "unzip" source))))
  9353. (delete 'configure)
  9354. (delete 'build)
  9355. (add-after 'unpack 'hardcode-tool-references
  9356. (lambda* (#:key inputs #:allow-other-keys)
  9357. (substitute* "trim_galore"
  9358. (("\\$path_to_cutadapt = 'cutadapt'")
  9359. (string-append "$path_to_cutadapt = '"
  9360. (assoc-ref inputs "cutadapt")
  9361. "/bin/cutadapt'"))
  9362. (("\\| gzip")
  9363. (string-append "| "
  9364. (assoc-ref inputs "gzip")
  9365. "/bin/gzip"))
  9366. (("\"gunzip")
  9367. (string-append "\""
  9368. (assoc-ref inputs "gzip")
  9369. "/bin/gunzip")))
  9370. #t))
  9371. (replace 'install
  9372. (lambda* (#:key outputs #:allow-other-keys)
  9373. (let ((bin (string-append (assoc-ref outputs "out")
  9374. "/bin")))
  9375. (mkdir-p bin)
  9376. (install-file "trim_galore" bin)
  9377. #t))))))
  9378. (inputs
  9379. `(("gzip" ,gzip)
  9380. ("perl" ,perl)
  9381. ("cutadapt" ,cutadapt)))
  9382. (native-inputs
  9383. `(("unzip" ,unzip)))
  9384. (home-page "http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
  9385. (synopsis "Wrapper around Cutadapt and FastQC")
  9386. (description "Trim Galore! is a wrapper script to automate quality and
  9387. adapter trimming as well as quality control, with some added functionality to
  9388. remove biased methylation positions for RRBS sequence files.")
  9389. (license license:gpl3+)))
  9390. (define-public gess
  9391. (package
  9392. (name "gess")
  9393. (version "1.0")
  9394. (source (origin
  9395. (method url-fetch)
  9396. (uri (string-append "http://compbio.uthscsa.edu/"
  9397. "GESS_Web/files/"
  9398. "gess-" version ".src.tar.gz"))
  9399. (sha256
  9400. (base32
  9401. "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
  9402. (build-system gnu-build-system)
  9403. (arguments
  9404. `(#:tests? #f ; no tests
  9405. #:phases
  9406. (modify-phases %standard-phases
  9407. (delete 'configure)
  9408. (delete 'build)
  9409. (replace 'install
  9410. (lambda* (#:key inputs outputs #:allow-other-keys)
  9411. (let* ((python (assoc-ref inputs "python"))
  9412. (out (assoc-ref outputs "out"))
  9413. (bin (string-append out "/bin/"))
  9414. (target (string-append
  9415. out "/lib/python2.7/site-packages/gess/")))
  9416. (mkdir-p target)
  9417. (copy-recursively "." target)
  9418. ;; Make GESS.py executable
  9419. (chmod (string-append target "GESS.py") #o555)
  9420. ;; Add Python shebang to the top and make Matplotlib
  9421. ;; usable.
  9422. (substitute* (string-append target "GESS.py")
  9423. (("\"\"\"Description:" line)
  9424. (string-append "#!" (which "python") "
  9425. import matplotlib
  9426. matplotlib.use('Agg')
  9427. " line)))
  9428. ;; Make sure GESS has all modules in its path
  9429. (wrap-program (string-append target "GESS.py")
  9430. `("PYTHONPATH" ":" prefix (,target ,(getenv "PYTHONPATH"))))
  9431. (mkdir-p bin)
  9432. (symlink (string-append target "GESS.py")
  9433. (string-append bin "GESS.py"))
  9434. #t))))))
  9435. (inputs
  9436. `(("python" ,python-2)
  9437. ("python2-pysam" ,python2-pysam)
  9438. ("python2-scipy" ,python2-scipy)
  9439. ("python2-numpy" ,python2-numpy)
  9440. ("python2-networkx" ,python2-networkx)
  9441. ("python2-biopython" ,python2-biopython)))
  9442. (home-page "http://compbio.uthscsa.edu/GESS_Web/")
  9443. (synopsis "Detect exon-skipping events from raw RNA-seq data")
  9444. (description
  9445. "GESS is an implementation of a novel computational method to detect de
  9446. novo exon-skipping events directly from raw RNA-seq data without the prior
  9447. knowledge of gene annotation information. GESS stands for the graph-based
  9448. exon-skipping scanner detection scheme.")
  9449. (license license:bsd-3)))
  9450. (define-public phylip
  9451. (package
  9452. (name "phylip")
  9453. (version "3.696")
  9454. (source
  9455. (origin
  9456. (method url-fetch)
  9457. (uri (string-append "http://evolution.gs.washington.edu/phylip/"
  9458. "download/phylip-" version ".tar.gz"))
  9459. (sha256
  9460. (base32
  9461. "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
  9462. (build-system gnu-build-system)
  9463. (arguments
  9464. `(#:tests? #f ; no check target
  9465. #:make-flags (list "-f" "Makefile.unx" "install")
  9466. #:parallel-build? #f ; not supported
  9467. #:phases
  9468. (modify-phases %standard-phases
  9469. (add-after 'unpack 'enter-dir
  9470. (lambda _ (chdir "src") #t))
  9471. (delete 'configure)
  9472. (replace 'install
  9473. (lambda* (#:key inputs outputs #:allow-other-keys)
  9474. (let ((target (string-append (assoc-ref outputs "out")
  9475. "/bin")))
  9476. (mkdir-p target)
  9477. (for-each (lambda (file)
  9478. (install-file file target))
  9479. (find-files "../exe" ".*")))
  9480. #t)))))
  9481. (home-page "http://evolution.genetics.washington.edu/phylip/")
  9482. (synopsis "Tools for inferring phylogenies")
  9483. (description "PHYLIP (the PHYLogeny Inference Package) is a package of
  9484. programs for inferring phylogenies (evolutionary trees).")
  9485. (license license:bsd-2)))
  9486. (define-public imp
  9487. (package
  9488. (name "imp")
  9489. (version "2.6.2")
  9490. (source
  9491. (origin
  9492. (method url-fetch)
  9493. (uri (string-append "https://integrativemodeling.org/"
  9494. version "/download/imp-" version ".tar.gz"))
  9495. (sha256
  9496. (base32
  9497. "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
  9498. (build-system cmake-build-system)
  9499. (arguments
  9500. `(;; FIXME: Some tests fail because they produce warnings, others fail
  9501. ;; because the PYTHONPATH does not include the modeller's directory.
  9502. #:tests? #f
  9503. ;; Do not place libraries in an architecture-specific directory.
  9504. #:configure-flags
  9505. (list "-DCMAKE_INSTALL_LIBDIR=lib")))
  9506. (inputs
  9507. `(("boost" ,boost)
  9508. ("gsl" ,gsl)
  9509. ("swig" ,swig)
  9510. ("hdf5" ,hdf5)
  9511. ("fftw" ,fftw)
  9512. ("python" ,python-2)))
  9513. (propagated-inputs
  9514. `(("python2-numpy" ,python2-numpy)
  9515. ("python2-scipy" ,python2-scipy)
  9516. ("python2-pandas" ,python2-pandas)
  9517. ("python2-scikit-learn" ,python2-scikit-learn)
  9518. ("python2-networkx" ,python2-networkx)))
  9519. (home-page "https://integrativemodeling.org")
  9520. (synopsis "Integrative modeling platform")
  9521. (description "IMP's broad goal is to contribute to a comprehensive
  9522. structural characterization of biomolecules ranging in size and complexity
  9523. from small peptides to large macromolecular assemblies, by integrating data
  9524. from diverse biochemical and biophysical experiments. IMP provides a C++ and
  9525. Python toolbox for solving complex modeling problems, and a number of
  9526. applications for tackling some common problems in a user-friendly way.")
  9527. ;; IMP is largely available under the GNU Lesser GPL; see the file
  9528. ;; COPYING.LGPL for the full text of this license. Some IMP modules are
  9529. ;; available under the GNU GPL (see the file COPYING.GPL).
  9530. (license (list license:lgpl2.1+
  9531. license:gpl3+))))
  9532. (define-public tadbit
  9533. (package
  9534. (name "tadbit")
  9535. (version "0.2")
  9536. (source (origin
  9537. (method url-fetch)
  9538. (uri (string-append "https://github.com/3DGenomes/TADbit/"
  9539. "archive/v" version ".tar.gz"))
  9540. (file-name (string-append name "-" version ".tar.gz"))
  9541. (sha256
  9542. (base32
  9543. "1cnfqrl4685zar4nnw94j94nhvl2h29jm448nadqi1h05z6fdk4f"))))
  9544. (build-system python-build-system)
  9545. (arguments
  9546. `(;; Tests are included and must be run after installation, but
  9547. ;; they are incomplete and thus cannot be run.
  9548. #:tests? #f
  9549. #:python ,python-2
  9550. #:phases
  9551. (modify-phases %standard-phases
  9552. (add-after 'unpack 'fix-problems-with-setup.py
  9553. (lambda* (#:key outputs #:allow-other-keys)
  9554. ;; setup.py opens these files for writing
  9555. (chmod "_pytadbit/_version.py" #o664)
  9556. (chmod "README.rst" #o664)
  9557. ;; Don't attempt to install the bash completions to
  9558. ;; the home directory.
  9559. (rename-file "extras/.bash_completion"
  9560. "extras/tadbit")
  9561. (substitute* "setup.py"
  9562. (("\\(path.expanduser\\('~'\\)")
  9563. (string-append "(\""
  9564. (assoc-ref outputs "out")
  9565. "/etc/bash_completion.d\""))
  9566. (("extras/\\.bash_completion")
  9567. "extras/tadbit"))
  9568. #t)))))
  9569. (inputs
  9570. ;; TODO: add Chimera for visualization
  9571. `(("imp" ,imp)
  9572. ("mcl" ,mcl)
  9573. ("python2-scipy" ,python2-scipy)
  9574. ("python2-numpy" ,python2-numpy)
  9575. ("python2-matplotlib" ,python2-matplotlib)
  9576. ("python2-pysam" ,python2-pysam)))
  9577. (home-page "http://3dgenomes.github.io/TADbit/")
  9578. (synopsis "Analyze, model, and explore 3C-based data")
  9579. (description
  9580. "TADbit is a complete Python library to deal with all steps to analyze,
  9581. model, and explore 3C-based data. With TADbit the user can map FASTQ files to
  9582. obtain raw interaction binned matrices (Hi-C like matrices), normalize and
  9583. correct interaction matrices, identify and compare the so-called
  9584. @dfn{Topologically Associating Domains} (TADs), build 3D models from the
  9585. interaction matrices, and finally, extract structural properties from the
  9586. models. TADbit is complemented by TADkit for visualizing 3D models.")
  9587. (license license:gpl3+)))
  9588. (define-public kentutils
  9589. (package
  9590. (name "kentutils")
  9591. ;; 302.1.0 is out, but the only difference is the inclusion of
  9592. ;; pre-built binaries.
  9593. (version "302.0.0")
  9594. (source
  9595. (origin
  9596. (method url-fetch)
  9597. (uri (string-append "https://github.com/ENCODE-DCC/kentUtils/"
  9598. "archive/v" version ".tar.gz"))
  9599. (file-name (string-append name "-" version ".tar.gz"))
  9600. (sha256
  9601. (base32
  9602. "134aja3k1cj32kbk1nnw0q9gxjb2krr15q6sga8qldzvc0585rmm"))
  9603. (modules '((guix build utils)
  9604. (srfi srfi-26)
  9605. (ice-9 ftw)))
  9606. (snippet
  9607. '(begin
  9608. ;; Only the contents of the specified directories are free
  9609. ;; for all uses, so we remove the rest. "hg/autoSql" and
  9610. ;; "hg/autoXml" are nominally free, but they depend on a
  9611. ;; library that is built from the sources in "hg/lib",
  9612. ;; which is nonfree.
  9613. (let ((free (list "." ".."
  9614. "utils" "lib" "inc" "tagStorm"
  9615. "parasol" "htslib"))
  9616. (directory? (lambda (file)
  9617. (eq? 'directory (stat:type (stat file))))))
  9618. (for-each (lambda (file)
  9619. (and (directory? file)
  9620. (delete-file-recursively file)))
  9621. (map (cut string-append "src/" <>)
  9622. (scandir "src"
  9623. (lambda (file)
  9624. (not (member file free)))))))
  9625. ;; Only make the utils target, not the userApps target,
  9626. ;; because that requires libraries we won't build.
  9627. (substitute* "Makefile"
  9628. ((" userApps") " utils"))
  9629. ;; Only build libraries that are free.
  9630. (substitute* "src/makefile"
  9631. (("DIRS =.*") "DIRS =\n")
  9632. (("cd jkOwnLib.*") "")
  9633. ((" hgLib") "")
  9634. (("cd hg.*") ""))
  9635. (substitute* "src/utils/makefile"
  9636. ;; These tools depend on "jkhgap.a", which is part of the
  9637. ;; nonfree "src/hg/lib" directory.
  9638. (("raSqlQuery") "")
  9639. (("pslLiftSubrangeBlat") "")
  9640. ;; Do not build UCSC tools, which may require nonfree
  9641. ;; components.
  9642. (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
  9643. #t))))
  9644. (build-system gnu-build-system)
  9645. (arguments
  9646. `( ;; There is no global test target and the test target for
  9647. ;; individual tools depends on input files that are not
  9648. ;; included.
  9649. #:tests? #f
  9650. #:phases
  9651. (modify-phases %standard-phases
  9652. (add-after 'unpack 'fix-paths
  9653. (lambda _
  9654. (substitute* "Makefile"
  9655. (("/bin/echo") (which "echo")))
  9656. #t))
  9657. (add-after 'unpack 'prepare-samtabix
  9658. (lambda* (#:key inputs #:allow-other-keys)
  9659. (copy-recursively (assoc-ref inputs "samtabix")
  9660. "samtabix")
  9661. #t))
  9662. (delete 'configure)
  9663. (replace 'install
  9664. (lambda* (#:key outputs #:allow-other-keys)
  9665. (let ((bin (string-append (assoc-ref outputs "out")
  9666. "/bin")))
  9667. (copy-recursively "bin" bin))
  9668. #t)))))
  9669. (native-inputs
  9670. `(("samtabix"
  9671. ,(origin
  9672. (method git-fetch)
  9673. (uri (git-reference
  9674. (url "http://genome-source.cse.ucsc.edu/samtabix.git")
  9675. (commit "10fd107909c1ac4d679299908be4262a012965ba")))
  9676. (sha256
  9677. (base32
  9678. "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
  9679. (inputs
  9680. `(("zlib" ,zlib)
  9681. ("tcsh" ,tcsh)
  9682. ("perl" ,perl)
  9683. ("libpng" ,libpng)
  9684. ("mysql" ,mysql)
  9685. ("openssl" ,openssl)))
  9686. (home-page "http://genome.cse.ucsc.edu/index.html")
  9687. (synopsis "Assorted bioinformatics utilities")
  9688. (description "This package provides the kentUtils, a selection of
  9689. bioinformatics utilities used in combination with the UCSC genome
  9690. browser.")
  9691. ;; Only a subset of the sources are released under a non-copyleft
  9692. ;; free software license. All other sources are removed in a
  9693. ;; snippet. See this bug report for an explanation of how the
  9694. ;; license statements apply:
  9695. ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
  9696. (license (license:non-copyleft
  9697. "http://genome.ucsc.edu/license/"
  9698. "The contents of this package are free for all uses."))))
  9699. (define-public f-seq
  9700. (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
  9701. (revision "1"))
  9702. (package
  9703. (name "f-seq")
  9704. (version (string-append "1.1-" revision "." commit))
  9705. (source (origin
  9706. (method git-fetch)
  9707. (uri (git-reference
  9708. (url "https://github.com/aboyle/F-seq.git")
  9709. (commit commit)))
  9710. (file-name (string-append name "-" version))
  9711. (sha256
  9712. (base32
  9713. "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
  9714. (modules '((guix build utils)))
  9715. ;; Remove bundled Java library archives.
  9716. (snippet
  9717. '(begin
  9718. (for-each delete-file (find-files "lib" ".*"))
  9719. #t))))
  9720. (build-system ant-build-system)
  9721. (arguments
  9722. `(#:tests? #f ; no tests included
  9723. #:phases
  9724. (modify-phases %standard-phases
  9725. (replace 'install
  9726. (lambda* (#:key outputs #:allow-other-keys)
  9727. (let* ((target (assoc-ref outputs "out"))
  9728. (doc (string-append target "/share/doc/f-seq/")))
  9729. (mkdir-p target)
  9730. (mkdir-p doc)
  9731. (substitute* "bin/linux/fseq"
  9732. (("java") (which "java")))
  9733. (install-file "README.txt" doc)
  9734. (install-file "bin/linux/fseq" (string-append target "/bin"))
  9735. (install-file "build~/fseq.jar" (string-append target "/lib"))
  9736. (copy-recursively "lib" (string-append target "/lib"))
  9737. #t))))))
  9738. (inputs
  9739. `(("perl" ,perl)
  9740. ("java-commons-cli" ,java-commons-cli)))
  9741. (home-page "http://fureylab.web.unc.edu/software/fseq/")
  9742. (synopsis "Feature density estimator for high-throughput sequence tags")
  9743. (description
  9744. "F-Seq is a software package that generates a continuous tag sequence
  9745. density estimation allowing identification of biologically meaningful sites
  9746. such as transcription factor binding sites (ChIP-seq) or regions of open
  9747. chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
  9748. Browser.")
  9749. (license license:gpl3+))))
  9750. (define-public bismark
  9751. (package
  9752. (name "bismark")
  9753. (version "0.16.3")
  9754. (source
  9755. (origin
  9756. (method url-fetch)
  9757. (uri (string-append "https://github.com/FelixKrueger/Bismark/"
  9758. "archive/" version ".tar.gz"))
  9759. (file-name (string-append name "-" version ".tar.gz"))
  9760. (sha256
  9761. (base32
  9762. "1204i0pa02ll2jn5pnxypkclnskvv7a2nwh5nxhagmhxk9wfv9sq"))))
  9763. (build-system perl-build-system)
  9764. (arguments
  9765. `(#:tests? #f ; there are no tests
  9766. #:phases
  9767. (modify-phases %standard-phases
  9768. (delete 'configure)
  9769. (delete 'build)
  9770. (replace 'install
  9771. (lambda* (#:key outputs #:allow-other-keys)
  9772. (let ((bin (string-append (assoc-ref outputs "out")
  9773. "/bin"))
  9774. (docdir (string-append (assoc-ref outputs "out")
  9775. "/share/doc/bismark"))
  9776. (docs '("Bismark_User_Guide.pdf"
  9777. "RELEASE_NOTES.txt"))
  9778. (scripts '("bismark"
  9779. "bismark_genome_preparation"
  9780. "bismark_methylation_extractor"
  9781. "bismark2bedGraph"
  9782. "bismark2report"
  9783. "coverage2cytosine"
  9784. "deduplicate_bismark"
  9785. "bismark_sitrep.tpl"
  9786. "bam2nuc"
  9787. "bismark2summary")))
  9788. (mkdir-p docdir)
  9789. (mkdir-p bin)
  9790. (for-each (lambda (file) (install-file file bin))
  9791. scripts)
  9792. (for-each (lambda (file) (install-file file docdir))
  9793. docs)
  9794. #t))))))
  9795. (home-page "http://www.bioinformatics.babraham.ac.uk/projects/bismark/")
  9796. (synopsis "Map bisulfite treated sequence reads and analyze methylation")
  9797. (description "Bismark is a program to map bisulfite treated sequencing
  9798. reads to a genome of interest and perform methylation calls in a single step.
  9799. The output can be easily imported into a genome viewer, such as SeqMonk, and
  9800. enables a researcher to analyse the methylation levels of their samples
  9801. straight away. Its main features are:
  9802. @itemize
  9803. @item Bisulfite mapping and methylation calling in one single step
  9804. @item Supports single-end and paired-end read alignments
  9805. @item Supports ungapped and gapped alignments
  9806. @item Alignment seed length, number of mismatches etc are adjustable
  9807. @item Output discriminates between cytosine methylation in CpG, CHG
  9808. and CHH context
  9809. @end itemize\n")
  9810. (license license:gpl3+)))