1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091 |
- ;;; GNU Guix --- Functional package management for GNU
- ;;; Copyright © 2014, 2015, 2016, 2017 Ricardo Wurmus <rekado@elephly.net>
- ;;; Copyright © 2015, 2016, 2017 Ben Woodcroft <donttrustben@gmail.com>
- ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
- ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
- ;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
- ;;; Copyright © 2016, 2017 Efraim Flashner <efraim@flashner.co.il>
- ;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
- ;;; Copyright © 2016 Raoul Bonnal <ilpuccio.febo@gmail.com>
- ;;;
- ;;; This file is part of GNU Guix.
- ;;;
- ;;; GNU Guix is free software; you can redistribute it and/or modify it
- ;;; under the terms of the GNU General Public License as published by
- ;;; the Free Software Foundation; either version 3 of the License, or (at
- ;;; your option) any later version.
- ;;;
- ;;; GNU Guix is distributed in the hope that it will be useful, but
- ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
- ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ;;; GNU General Public License for more details.
- ;;;
- ;;; You should have received a copy of the GNU General Public License
- ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
- (define-module (gnu packages bioinformatics)
- #:use-module ((guix licenses) #:prefix license:)
- #:use-module (guix packages)
- #:use-module (guix utils)
- #:use-module (guix download)
- #:use-module (guix git-download)
- #:use-module (guix hg-download)
- #:use-module (guix build-system ant)
- #:use-module (guix build-system gnu)
- #:use-module (guix build-system cmake)
- #:use-module (guix build-system ocaml)
- #:use-module (guix build-system perl)
- #:use-module (guix build-system python)
- #:use-module (guix build-system r)
- #:use-module (guix build-system ruby)
- #:use-module (guix build-system trivial)
- #:use-module (gnu packages)
- #:use-module (gnu packages autotools)
- #:use-module (gnu packages algebra)
- #:use-module (gnu packages base)
- #:use-module (gnu packages bash)
- #:use-module (gnu packages bison)
- #:use-module (gnu packages boost)
- #:use-module (gnu packages compression)
- #:use-module (gnu packages cpio)
- #:use-module (gnu packages cran)
- #:use-module (gnu packages curl)
- #:use-module (gnu packages documentation)
- #:use-module (gnu packages databases)
- #:use-module (gnu packages datastructures)
- #:use-module (gnu packages file)
- #:use-module (gnu packages flex)
- #:use-module (gnu packages gawk)
- #:use-module (gnu packages gcc)
- #:use-module (gnu packages gd)
- #:use-module (gnu packages gtk)
- #:use-module (gnu packages glib)
- #:use-module (gnu packages graph)
- #:use-module (gnu packages groff)
- #:use-module (gnu packages guile)
- #:use-module (gnu packages haskell)
- #:use-module (gnu packages image)
- #:use-module (gnu packages imagemagick)
- #:use-module (gnu packages java)
- #:use-module (gnu packages ldc)
- #:use-module (gnu packages linux)
- #:use-module (gnu packages logging)
- #:use-module (gnu packages machine-learning)
- #:use-module (gnu packages man)
- #:use-module (gnu packages maths)
- #:use-module (gnu packages mpi)
- #:use-module (gnu packages ncurses)
- #:use-module (gnu packages ocaml)
- #:use-module (gnu packages pcre)
- #:use-module (gnu packages parallel)
- #:use-module (gnu packages pdf)
- #:use-module (gnu packages perl)
- #:use-module (gnu packages pkg-config)
- #:use-module (gnu packages popt)
- #:use-module (gnu packages protobuf)
- #:use-module (gnu packages python)
- #:use-module (gnu packages readline)
- #:use-module (gnu packages ruby)
- #:use-module (gnu packages serialization)
- #:use-module (gnu packages shells)
- #:use-module (gnu packages statistics)
- #:use-module (gnu packages swig)
- #:use-module (gnu packages tbb)
- #:use-module (gnu packages tex)
- #:use-module (gnu packages texinfo)
- #:use-module (gnu packages textutils)
- #:use-module (gnu packages time)
- #:use-module (gnu packages tls)
- #:use-module (gnu packages vim)
- #:use-module (gnu packages web)
- #:use-module (gnu packages xml)
- #:use-module (gnu packages xorg)
- #:use-module (srfi srfi-1)
- #:use-module (ice-9 match))
- (define-public r-ape
- (package
- (name "r-ape")
- (version "4.1")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "ape" version))
- (sha256
- (base32
- "0959fiiy11rzfzrzaknmgrx64bhszj02l0ycz79k5a6bmpfzanlk"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-lattice" ,r-lattice)
- ("r-nlme" ,r-nlme)))
- (home-page "http://ape-package.ird.fr/")
- (synopsis "Analyses of phylogenetics and evolution")
- (description
- "This package provides functions for reading, writing, plotting, and
- manipulating phylogenetic trees, analyses of comparative data in a
- phylogenetic framework, ancestral character analyses, analyses of
- diversification and macroevolution, computing distances from DNA sequences,
- and several other tools.")
- (license license:gpl2+)))
- (define-public aragorn
- (package
- (name "aragorn")
- (version "1.2.38")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
- version ".tgz"))
- (sha256
- (base32
- "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; there are no tests
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (replace 'build
- (lambda _
- (zero? (system* "gcc"
- "-O3"
- "-ffast-math"
- "-finline-functions"
- "-o"
- "aragorn"
- (string-append "aragorn" ,version ".c")))))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (man (string-append out "/share/man/man1")))
- (mkdir-p bin)
- (install-file "aragorn" bin)
- (mkdir-p man)
- (install-file "aragorn.1" man))
- #t)))))
- (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
- (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
- (description
- "Aragorn identifies transfer RNA, mitochondrial RNA and
- transfer-messenger RNA from nucleotide sequences, based on homology to known
- tRNA consensus sequences and RNA structure. It also outputs the secondary
- structure of the predicted RNA.")
- (license license:gpl2)))
- (define-public bamm
- (package
- (name "bamm")
- (version "1.7.3")
- (source (origin
- (method url-fetch)
- ;; BamM is not available on pypi.
- (uri (string-append
- "https://github.com/Ecogenomics/BamM/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
- (modules '((guix build utils)))
- (snippet
- `(begin
- ;; Delete bundled htslib.
- (delete-file-recursively "c/htslib-1.3.1")
- #t))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2 ; BamM is Python 2 only.
- ;; Do not use bundled libhts. Do use the bundled libcfu because it has
- ;; been modified from its original form.
- #:configure-flags
- (let ((htslib (assoc-ref %build-inputs "htslib")))
- (list "--with-libhts-lib" (string-append htslib "/lib")
- "--with-libhts-inc" (string-append htslib "/include/htslib")))
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'autogen
- (lambda _
- (with-directory-excursion "c"
- (let ((sh (which "sh")))
- ;; Use autogen so that 'configure' works.
- (substitute* "autogen.sh" (("/bin/sh") sh))
- (setenv "CONFIG_SHELL" sh)
- (substitute* "configure" (("/bin/sh") sh))
- (zero? (system* "./autogen.sh"))))))
- (delete 'build)
- ;; Run tests after installation so compilation only happens once.
- (delete 'check)
- (add-after 'install 'wrap-executable
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (path (getenv "PATH")))
- (wrap-program (string-append out "/bin/bamm")
- `("PATH" ":" prefix (,path))))
- #t))
- (add-after 'wrap-executable 'post-install-check
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (setenv "PATH"
- (string-append (assoc-ref outputs "out")
- "/bin:"
- (getenv "PATH")))
- (setenv "PYTHONPATH"
- (string-append
- (assoc-ref outputs "out")
- "/lib/python"
- (string-take (string-take-right
- (assoc-ref inputs "python") 5) 3)
- "/site-packages:"
- (getenv "PYTHONPATH")))
- ;; There are 2 errors printed, but they are safe to ignore:
- ;; 1) [E::hts_open_format] fail to open file ...
- ;; 2) samtools view: failed to open ...
- (zero? (system* "nosetests")))))))
- (native-inputs
- `(("autoconf" ,autoconf)
- ("automake" ,automake)
- ("libtool" ,libtool)
- ("zlib" ,zlib)
- ("python-nose" ,python2-nose)
- ("python-pysam" ,python2-pysam)))
- (inputs
- `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
- ("samtools" ,samtools)
- ("bwa" ,bwa)
- ("grep" ,grep)
- ("sed" ,sed)
- ("coreutils" ,coreutils)))
- (propagated-inputs
- `(("python-numpy" ,python2-numpy)))
- (home-page "http://ecogenomics.github.io/BamM/")
- (synopsis "Metagenomics-focused BAM file manipulator")
- (description
- "BamM is a C library, wrapped in python, to efficiently generate and
- parse BAM files, specifically for the analysis of metagenomic data. For
- instance, it implements several methods to assess contig-wise read coverage.")
- (license license:lgpl3+)))
- (define-public bamtools
- (package
- (name "bamtools")
- (version "2.4.1")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/pezmaster31/bamtools/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0jr024kcrhjb82cm69i7p5fcg5375zlc1h3qh2n1v368hcd0qflk"))))
- (build-system cmake-build-system)
- (arguments
- `(#:tests? #f ;no "check" target
- #:phases
- (modify-phases %standard-phases
- (add-before
- 'configure 'set-ldflags
- (lambda* (#:key outputs #:allow-other-keys)
- (setenv "LDFLAGS"
- (string-append
- "-Wl,-rpath="
- (assoc-ref outputs "out") "/lib/bamtools")))))))
- (inputs `(("zlib" ,zlib)))
- (home-page "https://github.com/pezmaster31/bamtools")
- (synopsis "C++ API and command-line toolkit for working with BAM data")
- (description
- "BamTools provides both a C++ API and a command-line toolkit for handling
- BAM files.")
- (license license:expat)))
- (define-public bcftools
- (package
- (name "bcftools")
- (version "1.5")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/samtools/bcftools/releases/download/"
- version "/bcftools-" version ".tar.bz2"))
- (sha256
- (base32
- "0093hkkvxmbwfaa7905s6185jymynvg42kq6sxv7fili11l5mxwz"))
- (patches (search-patches "bcftools-regidx-unsigned-char.patch"))
- (modules '((guix build utils)))
- (snippet
- ;; Delete bundled htslib.
- '(delete-file-recursively "htslib-1.5"))))
- (build-system gnu-build-system)
- (arguments
- `(#:test-target "test"
- #:configure-flags (list "--with-htslib=system")
- #:make-flags
- (list
- "USE_GPL=1"
- "LIBS=-lgsl -lgslcblas"
- (string-append "prefix=" (assoc-ref %outputs "out"))
- (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
- (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.so")
- (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
- (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix")
- (string-append "PACKAGE_VERSION=" ,version))
- #:phases
- (modify-phases %standard-phases
- (add-before 'check 'patch-tests
- (lambda _
- (substitute* "test/test.pl"
- (("/bin/bash") (which "bash")))
- #t)))))
- (native-inputs
- `(("htslib" ,htslib)
- ("perl" ,perl)))
- (inputs
- `(("gsl" ,gsl)
- ("zlib" ,zlib)))
- (home-page "https://samtools.github.io/bcftools/")
- (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
- (description
- "BCFtools is a set of utilities that manipulate variant calls in the
- Variant Call Format (VCF) and its binary counterpart BCF. All commands work
- transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
- ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
- (license (list license:gpl3+ license:expat))))
- (define-public bedops
- (package
- (name "bedops")
- (version "2.4.14")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/bedops/bedops/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
- (build-system gnu-build-system)
- (arguments
- '(#:tests? #f
- #:make-flags (list (string-append "BINDIR=" %output "/bin"))
- #:phases
- (alist-cons-after
- 'unpack 'unpack-tarballs
- (lambda _
- ;; FIXME: Bedops includes tarballs of minimally patched upstream
- ;; libraries jansson, zlib, and bzip2. We cannot just use stock
- ;; libraries because at least one of the libraries (zlib) is
- ;; patched to add a C++ function definition (deflateInit2cpp).
- ;; Until the Bedops developers offer a way to link against system
- ;; libraries we have to build the in-tree copies of these three
- ;; libraries.
- ;; See upstream discussion:
- ;; https://github.com/bedops/bedops/issues/124
- ;; Unpack the tarballs to benefit from shebang patching.
- (with-directory-excursion "third-party"
- (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
- (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
- (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
- ;; Disable unpacking of tarballs in Makefile.
- (substitute* "system.mk/Makefile.linux"
- (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
- (("\\./configure") "CONFIG_SHELL=bash ./configure"))
- (substitute* "third-party/zlib-1.2.7/Makefile.in"
- (("^SHELL=.*$") "SHELL=bash\n")))
- (alist-delete 'configure %standard-phases))))
- (home-page "https://github.com/bedops/bedops")
- (synopsis "Tools for high-performance genomic feature operations")
- (description
- "BEDOPS is a suite of tools to address common questions raised in genomic
- studies---mostly with regard to overlap and proximity relationships between
- data sets. It aims to be scalable and flexible, facilitating the efficient
- and accurate analysis and management of large-scale genomic data.
- BEDOPS provides tools that perform highly efficient and scalable Boolean and
- other set operations, statistical calculations, archiving, conversion and
- other management of genomic data of arbitrary scale. Tasks can be easily
- split by chromosome for distributing whole-genome analyses across a
- computational cluster.")
- (license license:gpl2+)))
- (define-public bedtools
- (package
- (name "bedtools")
- (version "2.26.0")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
- (build-system gnu-build-system)
- (native-inputs `(("python" ,python-2)))
- (inputs `(("samtools" ,samtools)
- ("zlib" ,zlib)))
- (arguments
- '(#:test-target "test"
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
- (for-each (lambda (file)
- (install-file file bin))
- (find-files "bin" ".*")))
- #t)))))
- (home-page "https://github.com/arq5x/bedtools2")
- (synopsis "Tools for genome analysis and arithmetic")
- (description
- "Collectively, the bedtools utilities are a swiss-army knife of tools for
- a wide-range of genomics analysis tasks. The most widely-used tools enable
- genome arithmetic: that is, set theory on the genome. For example, bedtools
- allows one to intersect, merge, count, complement, and shuffle genomic
- intervals from multiple files in widely-used genomic file formats such as BAM,
- BED, GFF/GTF, VCF.")
- (license license:gpl2)))
- ;; Later releases of bedtools produce files with more columns than
- ;; what Ribotaper expects.
- (define-public bedtools-2.18
- (package (inherit bedtools)
- (name "bedtools")
- (version "2.18.0")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/arq5x/bedtools2/"
- "archive/v" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
- (define-public ribotaper
- (package
- (name "ribotaper")
- (version "1.3.1")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://ohlerlab.mdc-berlin.de/"
- "files/RiboTaper/RiboTaper_Version_"
- version ".tar.gz"))
- (sha256
- (base32
- "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
- (build-system gnu-build-system)
- (inputs
- `(("bedtools" ,bedtools-2.18)
- ("samtools" ,samtools-0.1)
- ("r-minimal" ,r-minimal)
- ("r-foreach" ,r-foreach)
- ("r-xnomial" ,r-xnomial)
- ("r-domc" ,r-domc)
- ("r-multitaper" ,r-multitaper)
- ("r-seqinr" ,r-seqinr)))
- (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
- (synopsis "Define translated ORFs using ribosome profiling data")
- (description
- "Ribotaper is a method for defining translated @dfn{open reading
- frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
- provides the Ribotaper pipeline.")
- (license license:gpl3+)))
- (define-public ribodiff
- (package
- (name "ribodiff")
- (version "0.2.2")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ratschlab/RiboDiff/"
- "archive/v" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj"))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2
- #:phases
- (modify-phases %standard-phases
- ;; Generate an installable executable script wrapper.
- (add-after 'unpack 'patch-setup.py
- (lambda _
- (substitute* "setup.py"
- (("^(.*)packages=.*" line prefix)
- (string-append line "\n"
- prefix "scripts=['scripts/TE.py'],\n")))
- #t)))))
- (inputs
- `(("python-numpy" ,python2-numpy)
- ("python-matplotlib" ,python2-matplotlib)
- ("python-scipy" ,python2-scipy)
- ("python-statsmodels" ,python2-statsmodels)))
- (native-inputs
- `(("python-mock" ,python2-mock)
- ("python-nose" ,python2-nose)))
- (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
- (synopsis "Detect translation efficiency changes from ribosome footprints")
- (description "RiboDiff is a statistical tool that detects the protein
- translational efficiency change from Ribo-Seq (ribosome footprinting) and
- RNA-Seq data. It uses a generalized linear model to detect genes showing
- difference in translational profile taking mRNA abundance into account. It
- facilitates us to decipher the translational regulation that behave
- independently with transcriptional regulation.")
- (license license:gpl3+)))
- (define-public bioawk
- (package
- (name "bioawk")
- (version "1.0")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/lh3/bioawk/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
- (build-system gnu-build-system)
- (inputs
- `(("zlib" ,zlib)))
- (native-inputs
- `(("bison" ,bison)))
- (arguments
- `(#:tests? #f ; There are no tests to run.
- ;; Bison must generate files, before other targets can build.
- #:parallel-build? #f
- #:phases
- (modify-phases %standard-phases
- (delete 'configure) ; There is no configure phase.
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (man (string-append out "/share/man/man1")))
- (mkdir-p man)
- (copy-file "awk.1" (string-append man "/bioawk.1"))
- (install-file "bioawk" bin)))))))
- (home-page "https://github.com/lh3/bioawk")
- (synopsis "AWK with bioinformatics extensions")
- (description "Bioawk is an extension to Brian Kernighan's awk, adding the
- support of several common biological data formats, including optionally gzip'ed
- BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
- also adds a few built-in functions and a command line option to use TAB as the
- input/output delimiter. When the new functionality is not used, bioawk is
- intended to behave exactly the same as the original BWK awk.")
- (license license:x11)))
- (define-public python2-pybedtools
- (package
- (name "python2-pybedtools")
- (version "0.6.9")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
- version ".tar.gz"))
- (sha256
- (base32
- "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
- (build-system python-build-system)
- (arguments `(#:python ,python-2)) ; no Python 3 support
- (inputs
- `(("python-matplotlib" ,python2-matplotlib)))
- (propagated-inputs
- `(("bedtools" ,bedtools)
- ("samtools" ,samtools)))
- (native-inputs
- `(("python-cython" ,python2-cython)
- ("python-pyyaml" ,python2-pyyaml)
- ("python-nose" ,python2-nose)))
- (home-page "https://pythonhosted.org/pybedtools/")
- (synopsis "Python wrapper for BEDtools programs")
- (description
- "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
- which are widely used for genomic interval manipulation or \"genome algebra\".
- pybedtools extends BEDTools by offering feature-level manipulations from with
- Python.")
- (license license:gpl2+)))
- (define-public python-biom-format
- (package
- (name "python-biom-format")
- (version "2.1.6")
- (source
- (origin
- (method url-fetch)
- ;; Use GitHub as source because PyPI distribution does not contain
- ;; test data: https://github.com/biocore/biom-format/issues/693
- (uri (string-append "https://github.com/biocore/biom-format/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "08cr7wpahk6zb31h4bs7jmzpvxcqv9s13xz40h6y2h656jvdvnpj"))))
- (build-system python-build-system)
- (propagated-inputs
- `(("python-numpy" ,python-numpy)
- ("python-scipy" ,python-scipy)
- ("python-future" ,python-future)
- ("python-click" ,python-click)
- ("python-h5py" ,python-h5py)
- ("python-pandas" ,python-pandas)))
- (native-inputs
- `(("python-nose" ,python-nose)))
- (home-page "http://www.biom-format.org")
- (synopsis "Biological Observation Matrix (BIOM) format utilities")
- (description
- "The BIOM file format is designed to be a general-use format for
- representing counts of observations e.g. operational taxonomic units, KEGG
- orthology groups or lipid types, in one or more biological samples
- e.g. microbiome samples, genomes, metagenomes.")
- (license license:bsd-3)
- (properties `((python2-variant . ,(delay python2-biom-format))))))
- (define-public python2-biom-format
- (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
- (package
- (inherit base)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- ;; Do not require the unmaintained pyqi library.
- (add-after 'unpack 'remove-pyqi
- (lambda _
- (substitute* "setup.py"
- (("install_requires.append\\(\"pyqi\"\\)") "pass"))
- #t)))
- ,@(package-arguments base))))))
- (define-public bioperl-minimal
- (let* ((inputs `(("perl-module-build" ,perl-module-build)
- ("perl-data-stag" ,perl-data-stag)
- ("perl-libwww" ,perl-libwww)
- ("perl-uri" ,perl-uri)))
- (transitive-inputs
- (map (compose package-name cadr)
- (delete-duplicates
- (concatenate
- (map (compose package-transitive-target-inputs cadr) inputs))))))
- (package
- (name "bioperl-minimal")
- (version "1.7.0")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/bioperl/bioperl-live/"
- "archive/release-"
- (string-map (lambda (c)
- (if (char=? c #\.)
- #\- c)) version)
- ".tar.gz"))
- (sha256
- (base32
- "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
- (build-system perl-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after
- 'install 'wrap-programs
- (lambda* (#:key outputs #:allow-other-keys)
- ;; Make sure all executables in "bin" find the required Perl
- ;; modules at runtime. As the PERL5LIB variable contains also
- ;; the paths of native inputs, we pick the transitive target
- ;; inputs from %build-inputs.
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin/"))
- (path (string-join
- (cons (string-append out "/lib/perl5/site_perl")
- (map (lambda (name)
- (assoc-ref %build-inputs name))
- ',transitive-inputs))
- ":")))
- (for-each (lambda (file)
- (wrap-program file
- `("PERL5LIB" ":" prefix (,path))))
- (find-files bin "\\.pl$"))
- #t))))))
- (inputs inputs)
- (native-inputs
- `(("perl-test-most" ,perl-test-most)))
- (home-page "http://search.cpan.org/dist/BioPerl")
- (synopsis "Bioinformatics toolkit")
- (description
- "BioPerl is the product of a community effort to produce Perl code which
- is useful in biology. Examples include Sequence objects, Alignment objects
- and database searching objects. These objects not only do what they are
- advertised to do in the documentation, but they also interact - Alignment
- objects are made from the Sequence objects, Sequence objects have access to
- Annotation and SeqFeature objects and databases, Blast objects can be
- converted to Alignment objects, and so on. This means that the objects
- provide a coordinated and extensible framework to do computational biology.")
- (license license:perl-license))))
- (define-public python-biopython
- (package
- (name "python-biopython")
- (version "1.68")
- (source (origin
- (method url-fetch)
- ;; use PyPi rather than biopython.org to ease updating
- (uri (pypi-uri "biopython" version))
- (sha256
- (base32
- "07qc7nz0k77y8hf8s18rscvibvm91zw0kkq7ylrhisf8vp8hkp6i"))))
- (build-system python-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-before 'check 'set-home
- ;; Some tests require a home directory to be set.
- (lambda _ (setenv "HOME" "/tmp") #t)))))
- (propagated-inputs
- `(("python-numpy" ,python-numpy)))
- (home-page "http://biopython.org/")
- (synopsis "Tools for biological computation in Python")
- (description
- "Biopython is a set of tools for biological computation including parsers
- for bioinformatics files into Python data structures; interfaces to common
- bioinformatics programs; a standard sequence class and tools for performing
- common operations on them; code to perform data classification; code for
- dealing with alignments; code making it easy to split up parallelizable tasks
- into separate processes; and more.")
- (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
- (define-public python2-biopython
- (package-with-python2 python-biopython))
- ;; An outdated version of biopython is required for seqmagick, see
- ;; https://github.com/fhcrc/seqmagick/issues/59
- ;; When that issue has been resolved this package should be removed.
- (define python2-biopython-1.66
- (package
- (inherit python2-biopython)
- (version "1.66")
- (source (origin
- (method url-fetch)
- (uri (pypi-uri "biopython" version))
- (sha256
- (base32
- "1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))))
- (define-public bpp-core
- ;; The last release was in 2014 and the recommended way to install from source
- ;; is to clone the git repository, so we do this.
- ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
- (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
- (package
- (name "bpp-core")
- (version (string-append "2.2.0-1." (string-take commit 7)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "http://biopp.univ-montp2.fr/git/bpp-core")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
- (build-system cmake-build-system)
- (arguments
- `(#:parallel-build? #f))
- (inputs
- `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
- ; compile all of the bpp packages with GCC 5.
- (home-page "http://biopp.univ-montp2.fr")
- (synopsis "C++ libraries for Bioinformatics")
- (description
- "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
- analysis, phylogenetics, molecular evolution and population genetics. It is
- Object Oriented and is designed to be both easy to use and computer efficient.
- Bio++ intends to help programmers to write computer expensive programs, by
- providing them a set of re-usable tools.")
- (license license:cecill-c))))
- (define-public bpp-phyl
- ;; The last release was in 2014 and the recommended way to install from source
- ;; is to clone the git repository, so we do this.
- ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
- (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
- (package
- (name "bpp-phyl")
- (version (string-append "2.2.0-1." (string-take commit 7)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
- (build-system cmake-build-system)
- (arguments
- `(#:parallel-build? #f
- ;; If out-of-source, test data is not copied into the build directory
- ;; so the tests fail.
- #:out-of-source? #f))
- (inputs
- `(("bpp-core" ,bpp-core)
- ("bpp-seq" ,bpp-seq)
- ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
- ;; modern GCC.
- ("gcc" ,gcc-5)))
- (home-page "http://biopp.univ-montp2.fr")
- (synopsis "Bio++ phylogenetic Library")
- (description
- "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
- analysis, phylogenetics, molecular evolution and population genetics. This
- library provides phylogenetics-related modules.")
- (license license:cecill-c))))
- (define-public bpp-popgen
- ;; The last release was in 2014 and the recommended way to install from source
- ;; is to clone the git repository, so we do this.
- ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
- (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
- (package
- (name "bpp-popgen")
- (version (string-append "2.2.0-1." (string-take commit 7)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
- (build-system cmake-build-system)
- (arguments
- `(#:parallel-build? #f
- #:tests? #f)) ; There are no tests.
- (inputs
- `(("bpp-core" ,bpp-core)
- ("bpp-seq" ,bpp-seq)
- ("gcc" ,gcc-5)))
- (home-page "http://biopp.univ-montp2.fr")
- (synopsis "Bio++ population genetics library")
- (description
- "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
- analysis, phylogenetics, molecular evolution and population genetics. This
- library provides population genetics-related modules.")
- (license license:cecill-c))))
- (define-public bpp-seq
- ;; The last release was in 2014 and the recommended way to install from source
- ;; is to clone the git repository, so we do this.
- ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
- (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
- (package
- (name "bpp-seq")
- (version (string-append "2.2.0-1." (string-take commit 7)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "http://biopp.univ-montp2.fr/git/bpp-seq")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
- (build-system cmake-build-system)
- (arguments
- `(#:parallel-build? #f
- ;; If out-of-source, test data is not copied into the build directory
- ;; so the tests fail.
- #:out-of-source? #f))
- (inputs
- `(("bpp-core" ,bpp-core)
- ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
- (home-page "http://biopp.univ-montp2.fr")
- (synopsis "Bio++ sequence library")
- (description
- "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
- analysis, phylogenetics, molecular evolution and population genetics. This
- library provides sequence-related modules.")
- (license license:cecill-c))))
- (define-public bppsuite
- ;; The last release was in 2014 and the recommended way to install from source
- ;; is to clone the git repository, so we do this.
- ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
- (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
- (package
- (name "bppsuite")
- (version (string-append "2.2.0-1." (string-take commit 7)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "http://biopp.univ-montp2.fr/git/bppsuite")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
- (build-system cmake-build-system)
- (arguments
- `(#:parallel-build? #f
- #:tests? #f)) ; There are no tests.
- (native-inputs
- `(("groff" ,groff)
- ("man-db" ,man-db)
- ("texinfo" ,texinfo)))
- (inputs
- `(("bpp-core" ,bpp-core)
- ("bpp-seq" ,bpp-seq)
- ("bpp-phyl" ,bpp-phyl)
- ("bpp-phyl" ,bpp-popgen)
- ("gcc" ,gcc-5)))
- (home-page "http://biopp.univ-montp2.fr")
- (synopsis "Bioinformatics tools written with the Bio++ libraries")
- (description
- "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
- analysis, phylogenetics, molecular evolution and population genetics. This
- package provides command line tools using the Bio++ library.")
- (license license:cecill-c))))
- (define-public blast+
- (package
- (name "blast+")
- (version "2.6.0")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
- version "/ncbi-blast-" version "+-src.tar.gz"))
- (sha256
- (base32
- "15n937pw5aqmyfjb6l387d18grqbb96l63d5xj4l7yyh0zbf2405"))
- (patches (search-patches "blast+-fix-makefile.patch"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- ;; Remove bundled bzip2, zlib and pcre.
- (delete-file-recursively "c++/src/util/compress/bzip2")
- (delete-file-recursively "c++/src/util/compress/zlib")
- (delete-file-recursively "c++/src/util/regexp")
- (substitute* "c++/src/util/compress/Makefile.in"
- (("bzip2 zlib api") "api"))
- ;; Remove useless msbuild directory
- (delete-file-recursively
- "c++/src/build-system/project_tree_builder/msbuild")
- #t))))
- (build-system gnu-build-system)
- (arguments
- `(;; There are two(!) tests for this massive library, and both fail with
- ;; "unparsable timing stats".
- ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
- ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
- #:tests? #f
- #:out-of-source? #t
- #:parallel-build? #f ; not supported
- #:phases
- (modify-phases %standard-phases
- (add-before
- 'configure 'set-HOME
- ;; $HOME needs to be set at some point during the configure phase
- (lambda _ (setenv "HOME" "/tmp") #t))
- (add-after
- 'unpack 'enter-dir
- (lambda _ (chdir "c++") #t))
- (add-after
- 'enter-dir 'fix-build-system
- (lambda _
- (define (which* cmd)
- (cond ((string=? cmd "date")
- ;; make call to "date" deterministic
- "date -d @0")
- ((which cmd)
- => identity)
- (else
- (format (current-error-port)
- "WARNING: Unable to find absolute path for ~s~%"
- cmd)
- #f)))
- ;; Rewrite hardcoded paths to various tools
- (substitute* (append '("src/build-system/configure.ac"
- "src/build-system/configure"
- "src/build-system/helpers/run_with_lock.c"
- "scripts/common/impl/if_diff.sh"
- "scripts/common/impl/run_with_lock.sh"
- "src/build-system/Makefile.configurables.real"
- "src/build-system/Makefile.in.top"
- "src/build-system/Makefile.meta.gmake=no"
- "src/build-system/Makefile.meta.in"
- "src/build-system/Makefile.meta_l"
- "src/build-system/Makefile.meta_p"
- "src/build-system/Makefile.meta_r"
- "src/build-system/Makefile.mk.in"
- "src/build-system/Makefile.requirements"
- "src/build-system/Makefile.rules_with_autodep.in")
- (find-files "scripts/common/check" "\\.sh$"))
- (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
- (or (which* cmd) all)))
- (substitute* (find-files "src/build-system" "^config.*")
- (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
- (("^PATH=.*") ""))
- ;; rewrite "/var/tmp" in check script
- (substitute* "scripts/common/check/check_make_unix.sh"
- (("/var/tmp") "/tmp"))
- ;; do not reset PATH
- (substitute* (find-files "scripts/common/impl/" "\\.sh$")
- (("^ *PATH=.*") "")
- (("action=/bin/") "action=")
- (("export PATH") ":"))
- #t))
- (replace
- 'configure
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let ((out (assoc-ref outputs "out"))
- (lib (string-append (assoc-ref outputs "lib") "/lib"))
- (include (string-append (assoc-ref outputs "include")
- "/include/ncbi-tools++")))
- ;; The 'configure' script doesn't recognize things like
- ;; '--enable-fast-install'.
- (zero? (system* "./configure.orig"
- (string-append "--with-build-root=" (getcwd) "/build")
- (string-append "--prefix=" out)
- (string-append "--libdir=" lib)
- (string-append "--includedir=" include)
- (string-append "--with-bz2="
- (assoc-ref inputs "bzip2"))
- (string-append "--with-z="
- (assoc-ref inputs "zlib"))
- (string-append "--with-pcre="
- (assoc-ref inputs "pcre"))
- ;; Each library is built twice by default, once
- ;; with "-static" in its name, and again
- ;; without.
- "--without-static"
- "--with-dll"))))))))
- (outputs '("out" ; 21 MB
- "lib" ; 226 MB
- "include")) ; 33 MB
- (inputs
- `(("bzip2" ,bzip2)
- ("zlib" ,zlib)
- ("pcre" ,pcre)
- ("perl" ,perl)
- ("python" ,python-wrapper)))
- (native-inputs
- `(("cpio" ,cpio)))
- (home-page "http://blast.ncbi.nlm.nih.gov")
- (synopsis "Basic local alignment search tool")
- (description
- "BLAST is a popular method of performing a DNA or protein sequence
- similarity search, using heuristics to produce results quickly. It also
- calculates an “expect value” that estimates how many matches would have
- occurred at a given score by chance, which can aid a user in judging how much
- confidence to have in an alignment.")
- ;; Most of the sources are in the public domain, with the following
- ;; exceptions:
- ;; * Expat:
- ;; * ./c++/include/util/bitset/
- ;; * ./c++/src/html/ncbi_menu*.js
- ;; * Boost license:
- ;; * ./c++/include/util/impl/floating_point_comparison.hpp
- ;; * LGPL 2+:
- ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
- ;; * ASL 2.0:
- ;; * ./c++/src/corelib/teamcity_*
- (license (list license:public-domain
- license:expat
- license:boost1.0
- license:lgpl2.0+
- license:asl2.0))))
- (define-public bless
- (package
- (name "bless")
- (version "1p02")
- (source (origin
- (method url-fetch)
- (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
- version ".tgz"))
- (sha256
- (base32
- "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
- (modules '((guix build utils)))
- (snippet
- `(begin
- ;; Remove bundled boost, pigz, zlib, and .git directory
- ;; FIXME: also remove bundled sources for murmurhash3 and
- ;; kmc once packaged.
- (delete-file-recursively "boost")
- (delete-file-recursively "pigz")
- (delete-file-recursively "google-sparsehash")
- (delete-file-recursively "zlib")
- (delete-file-recursively ".git")
- #t))))
- (build-system gnu-build-system)
- (arguments
- '(#:tests? #f ;no "check" target
- #:make-flags
- (list (string-append "ZLIB="
- (assoc-ref %build-inputs "zlib")
- "/lib/libz.a")
- (string-append "LDFLAGS="
- (string-join '("-lboost_filesystem"
- "-lboost_system"
- "-lboost_iostreams"
- "-lz"
- "-fopenmp"
- "-std=c++11"))))
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'do-not-build-bundled-pigz
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (substitute* "Makefile"
- (("cd pigz/pigz-2.3.3; make") ""))
- #t))
- (add-after 'unpack 'patch-paths-to-executables
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (substitute* "parse_args.cpp"
- (("kmc_binary = .*")
- (string-append "kmc_binary = \""
- (assoc-ref outputs "out")
- "/bin/kmc\";"))
- (("pigz_binary = .*")
- (string-append "pigz_binary = \""
- (assoc-ref inputs "pigz")
- "/bin/pigz\";")))
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
- (for-each (lambda (file)
- (install-file file bin))
- '("bless" "kmc/bin/kmc"))
- #t)))
- (delete 'configure))))
- (native-inputs
- `(("perl" ,perl)))
- (inputs
- `(("openmpi" ,openmpi)
- ("boost" ,boost)
- ("sparsehash" ,sparsehash)
- ("pigz" ,pigz)
- ("zlib" ,zlib)))
- (supported-systems '("x86_64-linux"))
- (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
- (synopsis "Bloom-filter-based error correction tool for NGS reads")
- (description
- "@dfn{Bloom-filter-based error correction solution for high-throughput
- sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
- correction tool for genomic reads produced by @dfn{Next-generation
- sequencing} (NGS). BLESS produces accurate correction results with much less
- memory compared with previous solutions and is also able to tolerate a higher
- false-positive rate. BLESS can extend reads like DNA assemblers to correct
- errors at the end of reads.")
- (license license:gpl3+)))
- (define-public bowtie
- (package
- (name "bowtie")
- (version "2.3.2")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0hwa5r9qbglppb7sz5z79rlmmddr3n51n468jb3wh8rwjgn3yr90"))
- (modules '((guix build utils)))
- (snippet
- '(substitute* "Makefile"
- ;; replace BUILD_HOST and BUILD_TIME for deterministic build
- (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
- (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
- (build-system gnu-build-system)
- (inputs
- `(("perl" ,perl)
- ("perl-clone" ,perl-clone)
- ("perl-test-deep" ,perl-test-deep)
- ("perl-test-simple" ,perl-test-simple)
- ("python" ,python-2)
- ("tbb" ,tbb)
- ("zlib" ,zlib)))
- (arguments
- '(#:make-flags
- (list "allall"
- "WITH_TBB=1"
- (string-append "prefix=" (assoc-ref %outputs "out")))
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (replace 'check
- (lambda* (#:key outputs #:allow-other-keys)
- (zero? (system* "perl"
- "scripts/test/simple_tests.pl"
- "--bowtie2=./bowtie2"
- "--bowtie2-build=./bowtie2-build")))))))
- (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
- (synopsis "Fast and sensitive nucleotide sequence read aligner")
- (description
- "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
- reads to long reference sequences. It is particularly good at aligning reads
- of about 50 up to 100s or 1,000s of characters, and particularly good at
- aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
- genome with an FM Index to keep its memory footprint small: for the human
- genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
- gapped, local, and paired-end alignment modes.")
- (supported-systems '("x86_64-linux"))
- (license license:gpl3+)))
- (define-public tophat
- (package
- (name "tophat")
- (version "2.1.0")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
- version ".tar.gz"))
- (sha256
- (base32
- "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
- (patches (search-patches "tophat-build-with-later-seqan.patch"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- ;; Remove bundled SeqAn and samtools
- (delete-file-recursively "src/SeqAn-1.3")
- (delete-file-recursively "src/samtools-0.1.18")
- #t))))
- (build-system gnu-build-system)
- (arguments
- '(#:parallel-build? #f ; not supported
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'use-system-samtools
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* "src/Makefile.in"
- (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
- (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
- (("SAMPROG = samtools_0\\.1\\.18") "")
- (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
- (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
- (substitute* '("src/common.cpp"
- "src/tophat.py")
- (("samtools_0.1.18") (which "samtools")))
- (substitute* '("src/common.h"
- "src/bam2fastx.cpp")
- (("#include \"bam.h\"") "#include <samtools/bam.h>")
- (("#include \"sam.h\"") "#include <samtools/sam.h>"))
- (substitute* '("src/bwt_map.h"
- "src/map2gtf.h"
- "src/align_status.h")
- (("#include <bam.h>") "#include <samtools/bam.h>")
- (("#include <sam.h>") "#include <samtools/sam.h>"))
- #t)))))
- (inputs
- `(("boost" ,boost)
- ("bowtie" ,bowtie)
- ("samtools" ,samtools-0.1)
- ("ncurses" ,ncurses)
- ("python" ,python-2)
- ("perl" ,perl)
- ("zlib" ,zlib)
- ("seqan" ,seqan)))
- (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
- (synopsis "Spliced read mapper for RNA-Seq data")
- (description
- "TopHat is a fast splice junction mapper for nucleotide sequence
- reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
- mammalian-sized genomes using the ultra high-throughput short read
- aligner Bowtie, and then analyzes the mapping results to identify
- splice junctions between exons.")
- ;; TopHat is released under the Boost Software License, Version 1.0
- ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
- (license license:boost1.0)))
- (define-public bwa
- (package
- (name "bwa")
- (version "0.7.15")
- (source (origin
- (method url-fetch)
- (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
- version ".tar.bz2"))
- (sha256
- (base32
- "0585ikg0gv0mpyw9iq0bq9n0hr95867bbv8jbzs9pk4slkpsymig"))))
- (build-system gnu-build-system)
- (arguments
- '(#:tests? #f ;no "check" target
- #:phases
- (alist-replace
- 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append
- (assoc-ref outputs "out") "/bin"))
- (doc (string-append
- (assoc-ref outputs "out") "/share/doc/bwa"))
- (man (string-append
- (assoc-ref outputs "out") "/share/man/man1")))
- (install-file "bwa" bin)
- (install-file "README.md" doc)
- (install-file "bwa.1" man)))
- ;; no "configure" script
- (alist-delete 'configure %standard-phases))))
- (inputs `(("zlib" ,zlib)))
- ;; Non-portable SSE instructions are used so building fails on platforms
- ;; other than x86_64.
- (supported-systems '("x86_64-linux"))
- (home-page "http://bio-bwa.sourceforge.net/")
- (synopsis "Burrows-Wheeler sequence aligner")
- (description
- "BWA is a software package for mapping low-divergent sequences against a
- large reference genome, such as the human genome. It consists of three
- algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
- designed for Illumina sequence reads up to 100bp, while the rest two for
- longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
- features such as long-read support and split alignment, but BWA-MEM, which is
- the latest, is generally recommended for high-quality queries as it is faster
- and more accurate. BWA-MEM also has better performance than BWA-backtrack for
- 70-100bp Illumina reads.")
- (license license:gpl3+)))
- (define-public bwa-pssm
- (package (inherit bwa)
- (name "bwa-pssm")
- (version "0.5.11")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
- "archive/" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
- (build-system gnu-build-system)
- (inputs
- `(("gdsl" ,gdsl)
- ("zlib" ,zlib)
- ("perl" ,perl)))
- (home-page "http://bwa-pssm.binf.ku.dk/")
- (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
- (description
- "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
- the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
- existing aligners it is fast and sensitive. Unlike most other aligners,
- however, it is also adaptible in the sense that one can direct the alignment
- based on known biases within the data set. It is coded as a modification of
- the original BWA alignment program and shares the genome index structure as
- well as many of the command line options.")
- (license license:gpl3+)))
- (define-public python2-bx-python
- (package
- (name "python2-bx-python")
- (version "0.7.3")
- (source (origin
- (method url-fetch)
- (uri (pypi-uri "bx-python" version))
- (sha256
- (base32
- "15z2w3bvnc0n4qmb9bd6d8ylc2h2nj883x2w9iixf4x3vki9b22i"))
- (modules '((guix build utils)))
- (snippet
- '(substitute* "setup.py"
- ;; remove dependency on outdated "distribute" module
- (("^from distribute_setup import use_setuptools") "")
- (("^use_setuptools\\(\\)") "")))))
- (build-system python-build-system)
- (arguments
- `(#:tests? #f ;tests fail because test data are not included
- #:python ,python-2))
- (inputs
- `(("python-numpy" ,python2-numpy)
- ("zlib" ,zlib)))
- (native-inputs
- `(("python-nose" ,python2-nose)))
- (home-page "http://bitbucket.org/james_taylor/bx-python/")
- (synopsis "Tools for manipulating biological data")
- (description
- "bx-python provides tools for manipulating biological data, particularly
- multiple sequence alignments.")
- (license license:expat)))
- (define-public python-pysam
- (package
- (name "python-pysam")
- (version "0.11.2.2")
- (source (origin
- (method url-fetch)
- ;; Test data is missing on PyPi.
- (uri (string-append
- "https://github.com/pysam-developers/pysam/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1cfqdxsqs3xhacns9n0271ck6wkc76px66ddjm91wfw2jxxfklvc"))
- (modules '((guix build utils)))
- (snippet
- ;; Drop bundled htslib. TODO: Also remove samtools and bcftools.
- '(delete-file-recursively "htslib"))))
- (build-system python-build-system)
- (arguments
- `(#:modules ((ice-9 ftw)
- (srfi srfi-26)
- (guix build python-build-system)
- (guix build utils))
- #:phases
- (modify-phases %standard-phases
- (add-before 'build 'set-flags
- (lambda* (#:key inputs #:allow-other-keys)
- (setenv "HTSLIB_MODE" "external")
- (setenv "HTSLIB_LIBRARY_DIR"
- (string-append (assoc-ref inputs "htslib") "/lib"))
- (setenv "HTSLIB_INCLUDE_DIR"
- (string-append (assoc-ref inputs "htslib") "/include"))
- (setenv "LDFLAGS" "-lncurses")
- (setenv "CFLAGS" "-D_CURSES_LIB=1")
- #t))
- (replace 'check
- (lambda* (#:key inputs outputs #:allow-other-keys)
- ;; Add first subdirectory of "build" directory to PYTHONPATH.
- (setenv "PYTHONPATH"
- (string-append
- (getenv "PYTHONPATH")
- ":" (getcwd) "/build/"
- (car (scandir "build"
- (negate (cut string-prefix? "." <>))))))
- ;; Step out of source dir so python does not import from CWD.
- (with-directory-excursion "tests"
- (setenv "HOME" "/tmp")
- (and (zero? (system* "make" "-C" "pysam_data"))
- (zero? (system* "make" "-C" "cbcf_data"))
- ;; Running nosetests without explicitly asking for a
- ;; single process leads to a crash. Running with multiple
- ;; processes fails because the tests are not designed to
- ;; run in parallel.
- ;; FIXME: tests keep timing out on some systems.
- ;; (zero? (system* "nosetests" "-v"
- ;; "--processes" "1"))
- )))))))
- (propagated-inputs
- `(("htslib" ,htslib))) ; Included from installed header files.
- (inputs
- `(("ncurses" ,ncurses)
- ("zlib" ,zlib)))
- (native-inputs
- `(("python-cython" ,python-cython)
- ;; Dependencies below are are for tests only.
- ("samtools" ,samtools)
- ("bcftools" ,bcftools)
- ("python-nose" ,python-nose)))
- (home-page "https://github.com/pysam-developers/pysam")
- (synopsis "Python bindings to the SAMtools C API")
- (description
- "Pysam is a Python module for reading and manipulating files in the
- SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
- also includes an interface for tabix.")
- (license license:expat)))
- (define-public python2-pysam
- (package-with-python2 python-pysam))
- (define-public python-twobitreader
- (package
- (name "python-twobitreader")
- (version "3.1.4")
- (source (origin
- (method url-fetch)
- (uri (pypi-uri "twobitreader" version))
- (sha256
- (base32
- "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
- (build-system python-build-system)
- (arguments
- '(;; Tests are not distributed in the PyPi release.
- ;; TODO Try building from the Git repo or asking the upstream maintainer
- ;; to distribute the tests on PyPi.
- #:tests? #f))
- (native-inputs
- `(("python-sphinx" ,python-sphinx)))
- (home-page "https://github.com/benjschiller/twobitreader")
- (synopsis "Python library for reading .2bit files")
- (description
- "twobitreader is a Python library for reading .2bit files as used by the
- UCSC genome browser.")
- (license license:artistic2.0)))
- (define-public python2-twobitreader
- (package-with-python2 python-twobitreader))
- (define-public python-plastid
- (package
- (name "python-plastid")
- (version "0.4.8")
- (source (origin
- (method url-fetch)
- (uri (pypi-uri "plastid" version))
- (sha256
- (base32
- "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
- (build-system python-build-system)
- (arguments
- ;; Some test files are not included.
- `(#:tests? #f))
- (propagated-inputs
- `(("python-numpy" ,python-numpy)
- ("python-scipy" ,python-scipy)
- ("python-pandas" ,python-pandas)
- ("python-pysam" ,python-pysam)
- ("python-matplotlib" ,python-matplotlib)
- ("python-biopython" ,python-biopython)
- ("python-twobitreader" ,python-twobitreader)
- ("python-termcolor" ,python-termcolor)))
- (native-inputs
- `(("python-cython" ,python-cython)
- ("python-nose" ,python-nose)))
- (home-page "https://github.com/joshuagryphon/plastid")
- (synopsis "Python library for genomic analysis")
- (description
- "plastid is a Python library for genomic analysis – in particular,
- high-throughput sequencing data – with an emphasis on simplicity.")
- (license license:bsd-3)))
- (define-public python2-plastid
- (package-with-python2 python-plastid))
- (define-public cd-hit
- (package
- (name "cd-hit")
- (version "4.6.8")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/weizhongli/cdhit"
- "/releases/download/V" version
- "/cd-hit-v" version
- "-2017-0621-source.tar.gz"))
- (sha256
- (base32
- "1386dg2npx8p62wmv08mjzsd2z3waknb9j1gg3gkvblcy57hymnn"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; there are no tests
- #:make-flags
- ;; Executables are copied directly to the PREFIX.
- (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
- #:phases
- (modify-phases %standard-phases
- ;; No "configure" script
- (delete 'configure)
- ;; Remove sources of non-determinism
- (add-after 'unpack 'be-timeless
- (lambda _
- (substitute* "cdhit-utility.c++"
- ((" \\(built on \" __DATE__ \"\\)") ""))
- (substitute* "cdhit-common.c++"
- (("__DATE__") "\"0\"")
- (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
- #t))
- ;; The "install" target does not create the target directory.
- (add-before 'install 'create-target-dir
- (lambda* (#:key outputs #:allow-other-keys)
- (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
- #t)))))
- (inputs
- `(("perl" ,perl)))
- (home-page "http://weizhongli-lab.org/cd-hit/")
- (synopsis "Cluster and compare protein or nucleotide sequences")
- (description
- "CD-HIT is a program for clustering and comparing protein or nucleotide
- sequences. CD-HIT is designed to be fast and handle extremely large
- databases.")
- ;; The manual says: "It can be copied under the GNU General Public License
- ;; version 2 (GPLv2)."
- (license license:gpl2)))
- (define-public clipper
- (package
- (name "clipper")
- (version "1.1")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/YeoLab/clipper/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- ;; remove unnecessary setup dependency
- (substitute* "setup.py"
- (("setup_requires = .*") ""))
- (for-each delete-file
- '("clipper/src/peaks.so"
- "clipper/src/readsToWiggle.so"))
- (delete-file-recursively "dist/")
- #t))))
- (build-system python-build-system)
- (arguments `(#:python ,python-2)) ; only Python 2 is supported
- (inputs
- `(("htseq" ,python2-htseq)
- ("python-pybedtools" ,python2-pybedtools)
- ("python-cython" ,python2-cython)
- ("python-scikit-learn" ,python2-scikit-learn)
- ("python-matplotlib" ,python2-matplotlib)
- ("python-pandas" ,python2-pandas)
- ("python-pysam" ,python2-pysam)
- ("python-numpy" ,python2-numpy)
- ("python-scipy" ,python2-scipy)))
- (native-inputs
- `(("python-mock" ,python2-mock) ; for tests
- ("python-nose" ,python2-nose) ; for tests
- ("python-pytz" ,python2-pytz))) ; for tests
- (home-page "https://github.com/YeoLab/clipper")
- (synopsis "CLIP peak enrichment recognition")
- (description
- "CLIPper is a tool to define peaks in CLIP-seq datasets.")
- (license license:gpl2)))
- (define-public codingquarry
- (package
- (name "codingquarry")
- (version "2.0")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "mirror://sourceforge/codingquarry/CodingQuarry_v"
- version ".tar.gz"))
- (sha256
- (base32
- "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
- (build-system gnu-build-system)
- (arguments
- '(#:tests? #f ; no "check" target
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (doc (string-append out "/share/doc/codingquarry")))
- (install-file "INSTRUCTIONS.pdf" doc)
- (copy-recursively "QuarryFiles"
- (string-append out "/QuarryFiles"))
- (install-file "CodingQuarry" bin)
- (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
- (inputs `(("openmpi" ,openmpi)))
- (native-search-paths
- (list (search-path-specification
- (variable "QUARRY_PATH")
- (files '("QuarryFiles")))))
- (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
- (synopsis "Fungal gene predictor")
- (description "CodingQuarry is a highly accurate, self-training GHMM fungal
- gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
- (home-page "https://sourceforge.net/projects/codingquarry/")
- (license license:gpl3+)))
- (define-public couger
- (package
- (name "couger")
- (version "1.8.2")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "http://couger.oit.duke.edu/static/assets/COUGER"
- version ".zip"))
- (sha256
- (base32
- "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (delete 'build)
- (replace
- 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
- (copy-recursively "src" (string-append out "/src"))
- (mkdir bin)
- ;; Add "src" directory to module lookup path.
- (substitute* "couger"
- (("from argparse")
- (string-append "import sys\nsys.path.append(\""
- out "\")\nfrom argparse")))
- (install-file "couger" bin))
- #t))
- (add-after
- 'install 'wrap-program
- (lambda* (#:key inputs outputs #:allow-other-keys)
- ;; Make sure 'couger' runs with the correct PYTHONPATH.
- (let* ((out (assoc-ref outputs "out"))
- (path (getenv "PYTHONPATH")))
- (wrap-program (string-append out "/bin/couger")
- `("PYTHONPATH" ":" prefix (,path))))
- #t)))))
- (inputs
- `(("python" ,python-2)
- ("python2-pillow" ,python2-pillow)
- ("python2-numpy" ,python2-numpy)
- ("python2-scipy" ,python2-scipy)
- ("python2-matplotlib" ,python2-matplotlib)))
- (propagated-inputs
- `(("r-minimal" ,r-minimal)
- ("libsvm" ,libsvm)
- ("randomjungle" ,randomjungle)))
- (native-inputs
- `(("unzip" ,unzip)))
- (home-page "http://couger.oit.duke.edu")
- (synopsis "Identify co-factors in sets of genomic regions")
- (description
- "COUGER can be applied to any two sets of genomic regions bound by
- paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
- putative co-factors that provide specificity to each TF. The framework
- determines the genomic targets uniquely-bound by each TF, and identifies a
- small set of co-factors that best explain the in vivo binding differences
- between the two TFs.
- COUGER uses classification algorithms (support vector machines and random
- forests) with features that reflect the DNA binding specificities of putative
- co-factors. The features are generated either from high-throughput TF-DNA
- binding data (from protein binding microarray experiments), or from large
- collections of DNA motifs.")
- (license license:gpl3+)))
- (define-public clustal-omega
- (package
- (name "clustal-omega")
- (version "1.2.1")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "http://www.clustal.org/omega/clustal-omega-"
- version ".tar.gz"))
- (sha256
- (base32
- "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
- (build-system gnu-build-system)
- (inputs
- `(("argtable" ,argtable)))
- (home-page "http://www.clustal.org/omega/")
- (synopsis "Multiple sequence aligner for protein and DNA/RNA")
- (description
- "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
- program for protein and DNA/RNA. It produces high quality MSAs and is capable
- of handling data-sets of hundreds of thousands of sequences in reasonable
- time.")
- (license license:gpl2+)))
- (define-public crossmap
- (package
- (name "crossmap")
- (version "0.2.1")
- (source (origin
- (method url-fetch)
- (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
- version ".tar.gz"))
- (sha256
- (base32
- "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
- ;; This patch has been sent upstream already and is available
- ;; for download from Sourceforge, but it has not been merged.
- (patches (search-patches "crossmap-allow-system-pysam.patch"))
- (modules '((guix build utils)))
- ;; remove bundled copy of pysam
- (snippet
- '(delete-file-recursively "lib/pysam"))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2
- #:phases
- (alist-cons-after
- 'unpack 'set-env
- (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
- %standard-phases)))
- (inputs
- `(("python-numpy" ,python2-numpy)
- ("python-pysam" ,python2-pysam)
- ("zlib" ,zlib)))
- (native-inputs
- `(("python-cython" ,python2-cython)
- ("python-nose" ,python2-nose)))
- (home-page "http://crossmap.sourceforge.net/")
- (synopsis "Convert genome coordinates between assemblies")
- (description
- "CrossMap is a program for conversion of genome coordinates or annotation
- files between different genome assemblies. It supports most commonly used
- file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
- (license license:gpl2+)))
- (define-public cufflinks
- (package
- (name "cufflinks")
- (version "2.2.1")
- (source (origin
- (method url-fetch)
- (uri (string-append "http://cole-trapnell-lab.github.io/"
- "cufflinks/assets/downloads/cufflinks-"
- version ".tar.gz"))
- (sha256
- (base32
- "1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
- (build-system gnu-build-system)
- (arguments
- `(#:make-flags
- (list
- ;; The includes for "eigen" are located in a subdirectory.
- (string-append "EIGEN_CPPFLAGS="
- "-I" (assoc-ref %build-inputs "eigen")
- "/include/eigen3/")
- ;; Cufflinks must be linked with various boost libraries.
- (string-append "LDFLAGS="
- (string-join '("-lboost_system"
- "-lboost_serialization"
- "-lboost_thread"))))
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'fix-search-for-bam
- (lambda _
- (substitute* '("ax_bam.m4"
- "configure"
- "src/hits.h")
- (("<bam/sam\\.h>") "<samtools/sam.h>")
- (("<bam/bam\\.h>") "<samtools/bam.h>")
- (("<bam/version\\.hpp>") "<samtools/version.h>"))
- #t)))
- #:configure-flags
- (list (string-append "--with-bam="
- (assoc-ref %build-inputs "samtools")))))
- (inputs
- `(("eigen" ,eigen)
- ("samtools" ,samtools-0.1)
- ("htslib" ,htslib)
- ("boost" ,boost)
- ("python" ,python-2)
- ("zlib" ,zlib)))
- (home-page "http://cole-trapnell-lab.github.io/cufflinks/")
- (synopsis "Transcriptome assembly and RNA-Seq expression analysis")
- (description
- "Cufflinks assembles RNA transcripts, estimates their abundances,
- and tests for differential expression and regulation in RNA-Seq
- samples. It accepts aligned RNA-Seq reads and assembles the
- alignments into a parsimonious set of transcripts. Cufflinks then
- estimates the relative abundances of these transcripts based on how
- many reads support each one, taking into account biases in library
- preparation protocols.")
- (license license:boost1.0)))
- (define-public cutadapt
- (package
- (name "cutadapt")
- (version "1.12")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/marcelm/cutadapt/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "19smhh6444ikn4jlmyhvffw4m5aw7yg07rqsk7arg8dkwyga1i4v"))))
- (build-system python-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- ;; The tests must be run after installation.
- (delete 'check)
- (add-after 'install 'check
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (setenv "PYTHONPATH"
- (string-append
- (getenv "PYTHONPATH")
- ":" (assoc-ref outputs "out")
- "/lib/python"
- (string-take (string-take-right
- (assoc-ref inputs "python") 5) 3)
- "/site-packages"))
- (zero? (system* "nosetests" "-P" "tests")))))))
- (inputs
- `(("python-xopen" ,python-xopen)))
- (native-inputs
- `(("python-cython" ,python-cython)
- ("python-nose" ,python-nose)))
- (home-page "https://cutadapt.readthedocs.io/en/stable/")
- (synopsis "Remove adapter sequences from nucleotide sequencing reads")
- (description
- "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
- other types of unwanted sequence from high-throughput sequencing reads.")
- (license license:expat)))
- (define-public libbigwig
- (package
- (name "libbigwig")
- (version "0.1.4")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/dpryan79/libBigWig/"
- "archive/" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
- (build-system gnu-build-system)
- (arguments
- `(#:test-target "test"
- #:make-flags
- (list "CC=gcc"
- (string-append "prefix=" (assoc-ref %outputs "out")))
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-before 'check 'disable-curl-test
- (lambda _
- (substitute* "Makefile"
- (("./test/testRemote.*") ""))
- #t))
- ;; This has been fixed with the upstream commit 4ff6959cd8a0, but
- ;; there has not yet been a release containing this change.
- (add-before 'install 'create-target-dirs
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((out (assoc-ref outputs "out")))
- (mkdir-p (string-append out "/lib"))
- (mkdir-p (string-append out "/include"))
- #t))))))
- (inputs
- `(("zlib" ,zlib)
- ("curl" ,curl)))
- (native-inputs
- `(("doxygen" ,doxygen)))
- (home-page "https://github.com/dpryan79/libBigWig")
- (synopsis "C library for handling bigWig files")
- (description
- "This package provides a C library for parsing local and remote BigWig
- files.")
- (license license:expat)))
- (define-public python-pybigwig
- (package
- (name "python-pybigwig")
- (version "0.2.5")
- (source (origin
- (method url-fetch)
- (uri (pypi-uri "pyBigWig" version))
- (sha256
- (base32
- "0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- ;; Delete bundled libBigWig sources
- (delete-file-recursively "libBigWig")))))
- (build-system python-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'link-with-libBigWig
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* "setup.py"
- (("libs=\\[") "libs=[\"BigWig\", "))
- #t)))))
- (inputs
- `(("libbigwig" ,libbigwig)
- ("zlib" ,zlib)
- ("curl" ,curl)))
- (home-page "https://github.com/dpryan79/pyBigWig")
- (synopsis "Access bigWig files in Python using libBigWig")
- (description
- "This package provides Python bindings to the libBigWig library for
- accessing bigWig files.")
- (license license:expat)))
- (define-public python2-pybigwig
- (package-with-python2 python-pybigwig))
- (define-public python-dendropy
- (package
- (name "python-dendropy")
- (version "4.2.0")
- (source
- (origin
- (method url-fetch)
- (uri (pypi-uri "DendroPy" version))
- (sha256
- (base32
- "15c7s3d5gf19ljsxvq5advaa752wfi7pwrdjyhzmg85hccyvp47p"))
- (patches (search-patches "python-dendropy-fix-tests.patch"))))
- (build-system python-build-system)
- (home-page "http://packages.python.org/DendroPy/")
- (synopsis "Library for phylogenetics and phylogenetic computing")
- (description
- "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
- writing, simulation, processing and manipulation of phylogenetic
- trees (phylogenies) and characters.")
- (license license:bsd-3)
- (properties `((python2-variant . ,(delay python2-dendropy))))))
- (define-public python2-dendropy
- (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
- (package
- (inherit base)
- (arguments
- `(#:python ,python-2
- #:phases
- (modify-phases %standard-phases
- (replace 'check
- ;; There is currently a test failure that only happens on some
- ;; systems, and only using "setup.py test"
- (lambda _ (zero? (system* "nosetests")))))))
- (native-inputs `(("python2-nose" ,python2-nose)
- ,@(package-native-inputs base))))))
- (define-public python-py2bit
- (package
- (name "python-py2bit")
- (version "0.2.1")
- (source
- (origin
- (method url-fetch)
- (uri (pypi-uri "py2bit" version))
- (sha256
- (base32
- "1cdf4qlmgwsh1f4k0wdv2sr8x9qn4366p0k3614vbd0fpqiarxrl"))))
- (build-system python-build-system)
- (home-page "https://github.com/dpryan79/py2bit")
- (synopsis "Access 2bit files using lib2bit")
- (description
- "This package provides Python bindings for lib2bit to access 2bit files
- with Python.")
- (license license:expat)))
- (define-public deeptools
- (package
- (name "deeptools")
- (version "2.5.1")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/fidelram/deepTools/"
- "archive/" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1q8i12l2gvk4n2s8lhyzwhh9g4qbc8lrk5l7maz00yvd5g6z5540"))))
- (build-system python-build-system)
- (inputs
- `(("python-scipy" ,python-scipy)
- ("python-numpy" ,python-numpy)
- ("python-numpydoc" ,python-numpydoc)
- ("python-matplotlib" ,python-matplotlib)
- ("python-pysam" ,python-pysam)
- ("python-py2bit" ,python-py2bit)
- ("python-pybigwig" ,python-pybigwig)))
- (native-inputs
- `(("python-mock" ,python-mock) ;for tests
- ("python-nose" ,python-nose) ;for tests
- ("python-pytz" ,python-pytz))) ;for tests
- (home-page "https://github.com/fidelram/deepTools")
- (synopsis "Tools for normalizing and visualizing deep-sequencing data")
- (description
- "DeepTools addresses the challenge of handling the large amounts of data
- that are now routinely generated from DNA sequencing centers. To do so,
- deepTools contains useful modules to process the mapped reads data to create
- coverage files in standard bedGraph and bigWig file formats. By doing so,
- deepTools allows the creation of normalized coverage files or the comparison
- between two files (for example, treatment and control). Finally, using such
- normalized and standardized files, multiple visualizations can be created to
- identify enrichments with functional annotations of the genome.")
- (license license:gpl3+)))
- (define-public diamond
- (package
- (name "diamond")
- (version "0.9.10")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/bbuchfink/diamond/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "13qqzwg54n5dqh8pm5n3v8x6gqbczzakphwwjix63qv60hcd5bqd"))))
- (build-system cmake-build-system)
- (arguments
- '(#:tests? #f ; no "check" target
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'remove-native-compilation
- (lambda _
- (substitute* "CMakeLists.txt" (("-march=native") ""))
- #t)))))
- (inputs
- `(("zlib" ,zlib)))
- (home-page "https://github.com/bbuchfink/diamond")
- (synopsis "Accelerated BLAST compatible local sequence aligner")
- (description
- "DIAMOND is a BLAST-compatible local aligner for mapping protein and
- translated DNA query sequences against a protein reference database (BLASTP
- and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
- reads at a typical sensitivity of 90-99% relative to BLAST depending on the
- data and settings.")
- (license license:agpl3+)))
- (define-public discrover
- (package
- (name "discrover")
- (version "1.6.0")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/maaskola/discrover/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
- (build-system cmake-build-system)
- (arguments
- `(#:tests? #f ; there are no tests
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'add-missing-includes
- (lambda _
- (substitute* "src/executioninformation.hpp"
- (("#define EXECUTIONINFORMATION_HPP" line)
- (string-append line "\n#include <random>")))
- (substitute* "src/plasma/fasta.hpp"
- (("#define FASTA_HPP" line)
- (string-append line "\n#include <random>")))
- #t)))))
- (inputs
- `(("boost" ,boost)
- ("cairo" ,cairo)))
- (native-inputs
- `(("texlive" ,texlive)
- ("imagemagick" ,imagemagick)))
- (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
- (synopsis "Discover discriminative nucleotide sequence motifs")
- (description "Discrover is a motif discovery method to find binding sites
- of nucleic acid binding proteins.")
- (license license:gpl3+)))
- (define-public eigensoft
- (let ((revision "1")
- (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
- (package
- (name "eigensoft")
- (version (string-append "6.1.2-"
- revision "."
- (string-take commit 9)))
- (source
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/DReichLab/EIG.git")
- (commit commit)))
- (file-name (string-append "eigensoft-" commit "-checkout"))
- (sha256
- (base32
- "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
- (modules '((guix build utils)))
- ;; Remove pre-built binaries.
- (snippet '(begin
- (delete-file-recursively "bin")
- (mkdir "bin")
- #t))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; There are no tests.
- #:make-flags '("CC=gcc")
- #:phases
- (modify-phases %standard-phases
- ;; There is no configure phase, but the Makefile is in a
- ;; sub-directory.
- (replace 'configure
- (lambda _
- (chdir "src")
- ;; The link flags are incomplete.
- (substitute* "Makefile"
- (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
- #t))
- ;; The provided install target only copies executables to
- ;; the "bin" directory in the build root.
- (add-after 'install 'actually-install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
- (for-each (lambda (file)
- (install-file file bin))
- (find-files "../bin" ".*"))
- #t))))))
- (inputs
- `(("gsl" ,gsl)
- ("lapack" ,lapack)
- ("openblas" ,openblas)
- ("perl" ,perl)
- ("gfortran" ,gfortran "lib")))
- (home-page "https://github.com/DReichLab/EIG")
- (synopsis "Tools for population genetics")
- (description "The EIGENSOFT package provides tools for population
- genetics and stratification correction. EIGENSOFT implements methods commonly
- used in population genetics analyses such as PCA, computation of Tracy-Widom
- statistics, and finding related individuals in structured populations. It
- comes with a built-in plotting script and supports multiple file formats and
- quantitative phenotypes.")
- ;; The license of the eigensoft tools is Expat, but since it's
- ;; linking with the GNU Scientific Library (GSL) the effective
- ;; license is the GPL.
- (license license:gpl3+))))
- (define-public edirect
- (package
- (name "edirect")
- (version "4.10")
- (source (origin
- (method url-fetch)
- (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
- "versions/2016-05-03/edirect.tar.gz"))
- (sha256
- (base32
- "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
- (build-system perl-build-system)
- (arguments
- `(#:tests? #f ;no "check" target
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (delete 'build)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((target (string-append (assoc-ref outputs "out")
- "/bin")))
- (mkdir-p target)
- (install-file "edirect.pl" target)
- #t)))
- (add-after
- 'install 'wrap-program
- (lambda* (#:key inputs outputs #:allow-other-keys)
- ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
- (let* ((out (assoc-ref outputs "out"))
- (path (getenv "PERL5LIB")))
- (wrap-program (string-append out "/bin/edirect.pl")
- `("PERL5LIB" ":" prefix (,path)))))))))
- (inputs
- `(("perl-html-parser" ,perl-html-parser)
- ("perl-encode-locale" ,perl-encode-locale)
- ("perl-file-listing" ,perl-file-listing)
- ("perl-html-tagset" ,perl-html-tagset)
- ("perl-html-tree" ,perl-html-tree)
- ("perl-http-cookies" ,perl-http-cookies)
- ("perl-http-date" ,perl-http-date)
- ("perl-http-message" ,perl-http-message)
- ("perl-http-negotiate" ,perl-http-negotiate)
- ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
- ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
- ("perl-net-http" ,perl-net-http)
- ("perl-uri" ,perl-uri)
- ("perl-www-robotrules" ,perl-www-robotrules)
- ("perl" ,perl)))
- (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
- (synopsis "Tools for accessing the NCBI's set of databases")
- (description
- "Entrez Direct (EDirect) is a method for accessing the National Center
- for Biotechnology Information's (NCBI) set of interconnected
- databases (publication, sequence, structure, gene, variation, expression,
- etc.) from a terminal. Functions take search terms from command-line
- arguments. Individual operations are combined to build multi-step queries.
- Record retrieval and formatting normally complete the process.
- EDirect also provides an argument-driven function that simplifies the
- extraction of data from document summaries or other results that are returned
- in structured XML format. This can eliminate the need for writing custom
- software to answer ad hoc questions.")
- (license license:public-domain)))
- (define-public exonerate
- (package
- (name "exonerate")
- (version "2.4.0")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append
- "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
- "exonerate-" version ".tar.gz"))
- (sha256
- (base32
- "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
- (build-system gnu-build-system)
- (arguments
- `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
- (native-inputs
- `(("pkg-config" ,pkg-config)))
- (inputs
- `(("glib" ,glib)))
- (home-page
- "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
- (synopsis "Generic tool for biological sequence alignment")
- (description
- "Exonerate is a generic tool for pairwise sequence comparison. It allows
- the alignment of sequences using a many alignment models, either exhaustive
- dynamic programming or a variety of heuristics.")
- (license license:gpl3)))
- (define-public express
- (package
- (name "express")
- (version "1.5.1")
- (source (origin
- (method url-fetch)
- (uri
- (string-append
- "http://bio.math.berkeley.edu/eXpress/downloads/express-"
- version "/express-" version "-src.tgz"))
- (sha256
- (base32
- "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
- (build-system cmake-build-system)
- (arguments
- `(#:tests? #f ;no "check" target
- #:phases
- (alist-cons-after
- 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* "CMakeLists.txt"
- (("set\\(Boost_USE_STATIC_LIBS ON\\)")
- "set(Boost_USE_STATIC_LIBS OFF)")
- (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
- (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
- (substitute* "src/CMakeLists.txt"
- (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
- (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
- #t)
- %standard-phases)))
- (inputs
- `(("boost" ,boost)
- ("bamtools" ,bamtools)
- ("protobuf" ,protobuf)
- ("zlib" ,zlib)))
- (home-page "http://bio.math.berkeley.edu/eXpress")
- (synopsis "Streaming quantification for high-throughput genomic sequencing")
- (description
- "eXpress is a streaming tool for quantifying the abundances of a set of
- target sequences from sampled subsequences. Example applications include
- transcript-level RNA-Seq quantification, allele-specific/haplotype expression
- analysis (from RNA-Seq), transcription factor binding quantification in
- ChIP-Seq, and analysis of metagenomic data.")
- (license license:artistic2.0)))
- (define-public express-beta-diversity
- (package
- (name "express-beta-diversity")
- (version "1.0.7")
- (source (origin
- (method url-fetch)
- (uri
- (string-append
- "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
- (build-system gnu-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
- (replace 'check
- (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
- "-u"))))
- (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out")
- "/bin")))
- (mkdir-p bin)
- (install-file "scripts/convertToEBD.py" bin)
- (install-file "bin/ExpressBetaDiversity" bin)
- #t))))))
- (inputs
- `(("python" ,python-2)))
- (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
- (synopsis "Taxon- and phylogenetic-based beta diversity measures")
- (description
- "Express Beta Diversity (EBD) calculates ecological beta diversity
- (dissimilarity) measures between biological communities. EBD implements a
- variety of diversity measures including those that make use of phylogenetic
- similarity of community members.")
- (license license:gpl3+)))
- (define-public fasttree
- (package
- (name "fasttree")
- (version "2.1.10")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "http://www.microbesonline.org/fasttree/FastTree-"
- version ".c"))
- (sha256
- (base32
- "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; no "check" target
- #:phases
- (modify-phases %standard-phases
- (delete 'unpack)
- (delete 'configure)
- (replace 'build
- (lambda* (#:key source #:allow-other-keys)
- (and (zero? (system* "gcc"
- "-O3"
- "-finline-functions"
- "-funroll-loops"
- "-Wall"
- "-o"
- "FastTree"
- source
- "-lm"))
- (zero? (system* "gcc"
- "-DOPENMP"
- "-fopenmp"
- "-O3"
- "-finline-functions"
- "-funroll-loops"
- "-Wall"
- "-o"
- "FastTreeMP"
- source
- "-lm")))))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out")
- "/bin")))
- (mkdir-p bin)
- (install-file "FastTree" bin)
- (install-file "FastTreeMP" bin)
- #t))))))
- (home-page "http://www.microbesonline.org/fasttree")
- (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
- (description
- "FastTree can handle alignments with up to a million of sequences in a
- reasonable amount of time and memory. For large alignments, FastTree is
- 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
- (license license:gpl2+)))
- (define-public fastx-toolkit
- (package
- (name "fastx-toolkit")
- (version "0.0.14")
- (source (origin
- (method url-fetch)
- (uri
- (string-append
- "https://github.com/agordon/fastx_toolkit/releases/download/"
- version "/fastx_toolkit-" version ".tar.bz2"))
- (sha256
- (base32
- "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
- (build-system gnu-build-system)
- (inputs
- `(("libgtextutils" ,libgtextutils)))
- (native-inputs
- `(("pkg-config" ,pkg-config)))
- (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
- (synopsis "Tools for FASTA/FASTQ file preprocessing")
- (description
- "The FASTX-Toolkit is a collection of command line tools for Short-Reads
- FASTA/FASTQ files preprocessing.
- Next-Generation sequencing machines usually produce FASTA or FASTQ files,
- containing multiple short-reads sequences. The main processing of such
- FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
- is sometimes more productive to preprocess the files before mapping the
- sequences to the genome---manipulating the sequences to produce better mapping
- results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
- (license license:agpl3+)))
- (define-public flexbar
- (package
- (name "flexbar")
- (version "2.5")
- (source (origin
- (method url-fetch)
- (uri
- (string-append "mirror://sourceforge/flexbar/"
- version "/flexbar_v" version "_src.tgz"))
- (sha256
- (base32
- "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
- (build-system cmake-build-system)
- (arguments
- `(#:configure-flags (list
- (string-append "-DFLEXBAR_BINARY_DIR="
- (assoc-ref %outputs "out")
- "/bin/"))
- #:phases
- (alist-replace
- 'check
- (lambda* (#:key outputs #:allow-other-keys)
- (setenv "PATH" (string-append
- (assoc-ref outputs "out") "/bin:"
- (getenv "PATH")))
- (chdir "../flexbar_v2.5_src/test")
- (zero? (system* "bash" "flexbar_validate.sh")))
- (alist-delete 'install %standard-phases))))
- (inputs
- `(("tbb" ,tbb)
- ("zlib" ,zlib)))
- (native-inputs
- `(("pkg-config" ,pkg-config)
- ("seqan" ,seqan)))
- (home-page "http://flexbar.sourceforge.net")
- (synopsis "Barcode and adapter removal tool for sequencing platforms")
- (description
- "Flexbar preprocesses high-throughput nucleotide sequencing data
- efficiently. It demultiplexes barcoded runs and removes adapter sequences.
- Moreover, trimming and filtering features are provided. Flexbar increases
- read mapping rates and improves genome and transcriptome assemblies. It
- supports next-generation sequencing data in fasta/q and csfasta/q format from
- Illumina, Roche 454, and the SOLiD platform.")
- (license license:gpl3)))
- (define-public fraggenescan
- (package
- (name "fraggenescan")
- (version "1.30")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append "mirror://sourceforge/fraggenescan/"
- "FragGeneScan" version ".tar.gz"))
- (sha256
- (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
- (build-system gnu-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-before 'build 'patch-paths
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (string-append (assoc-ref outputs "out")))
- (share (string-append out "/share/fraggenescan/")))
- (substitute* "run_FragGeneScan.pl"
- (("system\\(\"rm")
- (string-append "system(\"" (which "rm")))
- (("system\\(\"mv")
- (string-append "system(\"" (which "mv")))
- (("\\\"awk") (string-append "\"" (which "awk")))
- ;; This script and other programs expect the training files
- ;; to be in the non-standard location bin/train/XXX. Change
- ;; this to be share/fraggenescan/train/XXX instead.
- (("^\\$train.file = \\$dir.*")
- (string-append "$train_file = \""
- share
- "train/\".$FGS_train_file;")))
- (substitute* "run_hmm.c"
- (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
- (string-append " strcpy(train_dir, \"" share "/train/\");"))))
- #t))
- (replace 'build
- (lambda _ (and (zero? (system* "make" "clean"))
- (zero? (system* "make" "fgs")))))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (string-append (assoc-ref outputs "out")))
- (bin (string-append out "/bin/"))
- (share (string-append out "/share/fraggenescan/train")))
- (install-file "run_FragGeneScan.pl" bin)
- (install-file "FragGeneScan" bin)
- (copy-recursively "train" share))))
- (delete 'check)
- (add-after 'install 'post-install-check
- ;; In lieu of 'make check', run one of the examples and check the
- ;; output files gets created.
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (string-append (assoc-ref outputs "out")))
- (bin (string-append out "/bin/"))
- (frag (string-append bin "run_FragGeneScan.pl")))
- (and (zero? (system* frag ; Test complete genome.
- "-genome=./example/NC_000913.fna"
- "-out=./test2"
- "-complete=1"
- "-train=complete"))
- (file-exists? "test2.faa")
- (file-exists? "test2.ffn")
- (file-exists? "test2.gff")
- (file-exists? "test2.out")
- (zero? (system* ; Test incomplete sequences.
- frag
- "-genome=./example/NC_000913-fgs.ffn"
- "-out=out"
- "-complete=0"
- "-train=454_30")))))))))
- (inputs
- `(("perl" ,perl)
- ("python" ,python-2))) ;not compatible with python 3.
- (home-page "https://sourceforge.net/projects/fraggenescan/")
- (synopsis "Finds potentially fragmented genes in short reads")
- (description
- "FragGeneScan is a program for predicting bacterial and archaeal genes in
- short and error-prone DNA sequencing reads. It can also be applied to predict
- genes in incomplete assemblies or complete genomes.")
- ;; GPL3+ according to private correspondense with the authors.
- (license license:gpl3+)))
- (define-public fxtract
- (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
- (package
- (name "fxtract")
- (version "2.3")
- (source
- (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/ctSkennerton/fxtract/archive/"
- version ".tar.gz"))
- (file-name (string-append "ctstennerton-util-"
- (string-take util-commit 7)
- "-checkout"))
- (sha256
- (base32
- "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
- (build-system gnu-build-system)
- (arguments
- `(#:make-flags (list
- (string-append "PREFIX=" (assoc-ref %outputs "out"))
- "CC=gcc")
- #:test-target "fxtract_test"
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-before 'build 'copy-util
- (lambda* (#:key inputs #:allow-other-keys)
- (rmdir "util")
- (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
- #t))
- ;; Do not use make install as this requires additional dependencies.
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out"/bin")))
- (install-file "fxtract" bin)
- #t))))))
- (inputs
- `(("pcre" ,pcre)
- ("zlib" ,zlib)))
- (native-inputs
- ;; ctskennerton-util is licensed under GPL2.
- `(("ctskennerton-util"
- ,(origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/ctSkennerton/util.git")
- (commit util-commit)))
- (file-name (string-append
- "ctstennerton-util-" util-commit "-checkout"))
- (sha256
- (base32
- "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
- (home-page "https://github.com/ctSkennerton/fxtract")
- (synopsis "Extract sequences from FASTA and FASTQ files")
- (description
- "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
- or FASTQ) file given a subsequence. It uses a simple substring search for
- basic tasks but can change to using POSIX regular expressions, PCRE, hash
- lookups or multi-pattern searching as required. By default fxtract looks in
- the sequence of each record but can also be told to look in the header,
- comment or quality sections.")
- ;; 'util' requires SSE instructions.
- (supported-systems '("x86_64-linux"))
- (license license:expat))))
- (define-public gemma
- (package
- (name "gemma")
- (version "0.96")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/xiangzhou/GEMMA/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "055ynn16gd12pf78n4vr2a9jlwsbwzajpdnf2y2yilg1krfff222"))
- (patches (search-patches "gemma-intel-compat.patch"))))
- (inputs
- `(("gsl" ,gsl)
- ("lapack" ,lapack)
- ("zlib" ,zlib)))
- (build-system gnu-build-system)
- (arguments
- `(#:make-flags
- '(,@(match (%current-system)
- ("x86_64-linux"
- '("FORCE_DYNAMIC=1"))
- ("i686-linux"
- '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
- (_
- '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-before 'build 'bin-mkdir
- (lambda _
- (mkdir-p "bin")
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((out (assoc-ref outputs "out")))
- (install-file "bin/gemma"
- (string-append
- out "/bin")))
- #t)))
- #:tests? #f)) ; no tests included yet
- (home-page "https://github.com/xiangzhou/GEMMA")
- (synopsis "Tool for genome-wide efficient mixed model association")
- (description
- "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
- standard linear mixed model resolver with application in genome-wide
- association studies (GWAS).")
- (license license:gpl3)))
- (define-public grit
- (package
- (name "grit")
- (version "2.0.2")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/nboley/grit/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2
- #:phases
- (alist-cons-after
- 'unpack 'generate-from-cython-sources
- (lambda* (#:key inputs outputs #:allow-other-keys)
- ;; Delete these C files to force fresh generation from pyx sources.
- (delete-file "grit/sparsify_support_fns.c")
- (delete-file "grit/call_peaks_support_fns.c")
- (substitute* "setup.py"
- (("Cython.Setup") "Cython.Build")
- ;; Add numpy include path to fix compilation
- (("pyx\", \\]")
- (string-append "pyx\", ], include_dirs = ['"
- (assoc-ref inputs "python-numpy")
- "/lib/python2.7/site-packages/numpy/core/include/"
- "']"))) #t)
- %standard-phases)))
- (inputs
- `(("python-scipy" ,python2-scipy)
- ("python-numpy" ,python2-numpy)
- ("python-pysam" ,python2-pysam)
- ("python-networkx" ,python2-networkx)))
- (native-inputs
- `(("python-cython" ,python2-cython)))
- (home-page "http://grit-bio.org")
- (synopsis "Tool for integrative analysis of RNA-seq type assays")
- (description
- "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
- full length transcript models. When none of these data sources are available,
- GRIT can be run by providing a candidate set of TES or TSS sites. In
- addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
- also be run in quantification mode, where it uses a provided GTF file and just
- estimates transcript expression.")
- (license license:gpl3+)))
- (define-public hisat
- (package
- (name "hisat")
- (version "0.1.4")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
- version "-beta-source.zip"))
- (sha256
- (base32
- "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ;no check target
- #:make-flags '("allall"
- ;; Disable unsupported `popcnt' instructions on
- ;; architectures other than x86_64
- ,@(if (string-prefix? "x86_64"
- (or (%current-target-system)
- (%current-system)))
- '()
- '("POPCNT_CAPABILITY=0")))
- #:phases
- (alist-cons-after
- 'unpack 'patch-sources
- (lambda _
- ;; XXX Cannot use snippet because zip files are not supported
- (substitute* "Makefile"
- (("^CC = .*$") "CC = gcc")
- (("^CPP = .*$") "CPP = g++")
- ;; replace BUILD_HOST and BUILD_TIME for deterministic build
- (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
- (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
- (substitute* '("hisat-build" "hisat-inspect")
- (("/usr/bin/env") (which "env"))))
- (alist-replace
- 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
- (for-each (lambda (file)
- (install-file file bin))
- (find-files
- "."
- "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
- (alist-delete 'configure %standard-phases)))))
- (native-inputs
- `(("unzip" ,unzip)))
- (inputs
- `(("perl" ,perl)
- ("python" ,python)
- ("zlib" ,zlib)))
- ;; Non-portable SSE instructions are used so building fails on platforms
- ;; other than x86_64.
- (supported-systems '("x86_64-linux"))
- (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
- (synopsis "Hierarchical indexing for spliced alignment of transcripts")
- (description
- "HISAT is a fast and sensitive spliced alignment program for mapping
- RNA-seq reads. In addition to one global FM index that represents a whole
- genome, HISAT uses a large set of small FM indexes that collectively cover the
- whole genome. These small indexes (called local indexes) combined with
- several alignment strategies enable effective alignment of RNA-seq reads, in
- particular, reads spanning multiple exons.")
- (license license:gpl3+)))
- (define-public hisat2
- (package
- (name "hisat2")
- (version "2.0.5")
- (source
- (origin
- (method url-fetch)
- ;; FIXME: a better source URL is
- ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
- ;; "/downloads/hisat2-" version "-source.zip")
- ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"
- ;; but it is currently unavailable.
- (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz")
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; no check target
- #:make-flags (list "CC=gcc" "CXX=g++" "allall")
- #:modules ((guix build gnu-build-system)
- (guix build utils)
- (srfi srfi-26))
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'make-deterministic
- (lambda _
- (substitute* "Makefile"
- (("`date`") "0"))
- #t))
- (delete 'configure)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin/"))
- (doc (string-append out "/share/doc/hisat2/")))
- (for-each
- (cut install-file <> bin)
- (find-files "."
- "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
- (mkdir-p doc)
- (install-file "doc/manual.inc.html" doc))
- #t)))))
- (native-inputs
- `(("unzip" ,unzip) ; needed for archive from ftp
- ("perl" ,perl)
- ("pandoc" ,ghc-pandoc))) ; for documentation
- (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
- (synopsis "Graph-based alignment of genomic sequencing reads")
- (description "HISAT2 is a fast and sensitive alignment program for mapping
- next-generation sequencing reads (both DNA and RNA) to a population of human
- genomes (as well as to a single reference genome). In addition to using one
- global @dfn{graph FM} (GFM) index that represents a population of human
- genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
- the whole genome. These small indexes, combined with several alignment
- strategies, enable rapid and accurate alignment of sequencing reads. This new
- indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
- ;; HISAT2 contains files from Bowtie2, which is released under
- ;; GPLv2 or later. The HISAT2 source files are released under
- ;; GPLv3 or later.
- (license license:gpl3+)))
- (define-public hmmer
- (package
- (name "hmmer")
- (version "3.1b2")
- (source
- (origin
- (method url-fetch)
- (uri (string-append
- "http://eddylab.org/software/hmmer"
- (version-prefix version 1) "/"
- version "/hmmer-" version ".tar.gz"))
- (sha256
- (base32
- "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))
- (patches (search-patches "hmmer-remove-cpu-specificity.patch"))))
- (build-system gnu-build-system)
- (native-inputs `(("perl" ,perl)))
- (home-page "http://hmmer.org/")
- (synopsis "Biosequence analysis using profile hidden Markov models")
- (description
- "HMMER is used for searching sequence databases for homologs of protein
- sequences, and for making protein sequence alignments. It implements methods
- using probabilistic models called profile hidden Markov models (profile
- HMMs).")
- (license (list license:gpl3+
- ;; The bundled library 'easel' is distributed
- ;; under The Janelia Farm Software License.
- (license:non-copyleft
- "file://easel/LICENSE"
- "See easel/LICENSE in the distribution.")))))
- (define-public htseq
- (package
- (name "htseq")
- (version "0.9.1")
- (source (origin
- (method url-fetch)
- (uri (pypi-uri "HTSeq" version))
- (sha256
- (base32
- "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
- (build-system python-build-system)
- (native-inputs
- `(("python-cython" ,python-cython)))
- ;; Numpy needs to be propagated when htseq is used as a Python library.
- (propagated-inputs
- `(("python-numpy" ,python-numpy)))
- (inputs
- `(("python-pysam" ,python-pysam)
- ("python-matplotlib" ,python-matplotlib)))
- (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
- (synopsis "Analysing high-throughput sequencing data with Python")
- (description
- "HTSeq is a Python package that provides infrastructure to process data
- from high-throughput sequencing assays.")
- (license license:gpl3+)))
- (define-public python2-htseq
- (package-with-python2 htseq))
- (define-public java-htsjdk
- (package
- (name "java-htsjdk")
- (version "1.129")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/samtools/htsjdk/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
- (modules '((guix build utils)))
- ;; remove build dependency on git
- (snippet '(substitute* "build.xml"
- (("failifexecutionfails=\"true\"")
- "failifexecutionfails=\"false\"")))))
- (build-system ant-build-system)
- (arguments
- `(#:tests? #f ; test require Internet access
- #:make-flags
- (list (string-append "-Ddist=" (assoc-ref %outputs "out")
- "/share/java/htsjdk/"))
- #:build-target "all"
- #:phases
- (modify-phases %standard-phases
- ;; The build phase also installs the jars
- (delete 'install))))
- (home-page "http://samtools.github.io/htsjdk/")
- (synopsis "Java API for high-throughput sequencing data (HTS) formats")
- (description
- "HTSJDK is an implementation of a unified Java library for accessing
- common file formats, such as SAM and VCF, used for high-throughput
- sequencing (HTS) data. There are also an number of useful utilities for
- manipulating HTS data.")
- (license license:expat)))
- (define-public htslib
- (package
- (name "htslib")
- (version "1.5")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/samtools/htslib/releases/download/"
- version "/htslib-" version ".tar.bz2"))
- (sha256
- (base32
- "0bcjmnbwp2bib1z1bkrp95w9v2syzdwdfqww10mkb1hxlmg52ax0"))))
- (build-system gnu-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after
- 'unpack 'patch-tests
- (lambda _
- (substitute* "test/test.pl"
- (("/bin/bash") (which "bash")))
- #t)))))
- (inputs
- `(("openssl" ,openssl)
- ("curl" ,curl)
- ("zlib" ,zlib)))
- (native-inputs
- `(("perl" ,perl)))
- (home-page "http://www.htslib.org")
- (synopsis "C library for reading/writing high-throughput sequencing data")
- (description
- "HTSlib is a C library for reading/writing high-throughput sequencing
- data. It also provides the bgzip, htsfile, and tabix utilities.")
- ;; Files under cram/ are released under the modified BSD license;
- ;; the rest is released under the Expat license
- (license (list license:expat license:bsd-3))))
- ;; This package should be removed once no packages rely upon it.
- (define htslib-1.3
- (package
- (inherit htslib)
- (version "1.3.1")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/samtools/htslib/releases/download/"
- version "/htslib-" version ".tar.bz2"))
- (sha256
- (base32
- "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
- (define-public idr
- (package
- (name "idr")
- (version "2.0.0")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/nboley/idr/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
- (build-system python-build-system)
- (arguments
- `(#:tests? #f)) ; FIXME: "ImportError: No module named 'utility'"
- (propagated-inputs
- `(("python-scipy" ,python-scipy)
- ("python-sympy" ,python-sympy)
- ("python-numpy" ,python-numpy)
- ("python-matplotlib" ,python-matplotlib)))
- (native-inputs
- `(("python-cython" ,python-cython)))
- (home-page "https://github.com/nboley/idr")
- (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
- (description
- "The IDR (Irreproducible Discovery Rate) framework is a unified approach
- to measure the reproducibility of findings identified from replicate
- experiments and provide highly stable thresholds based on reproducibility.")
- (license license:gpl3+)))
- (define-public jellyfish
- (package
- (name "jellyfish")
- (version "2.2.4")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/gmarcais/Jellyfish/"
- "releases/download/v" version
- "/jellyfish-" version ".tar.gz"))
- (sha256
- (base32
- "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
- (build-system gnu-build-system)
- (outputs '("out" ;for library
- "ruby" ;for Ruby bindings
- "python")) ;for Python bindings
- (arguments
- `(#:configure-flags
- (list (string-append "--enable-ruby-binding="
- (assoc-ref %outputs "ruby"))
- (string-append "--enable-python-binding="
- (assoc-ref %outputs "python")))
- #:phases
- (modify-phases %standard-phases
- (add-before 'check 'set-SHELL-variable
- (lambda _
- ;; generator_manager.hpp either uses /bin/sh or $SHELL
- ;; to run tests.
- (setenv "SHELL" (which "bash"))
- #t)))))
- (native-inputs
- `(("bc" ,bc)
- ("time" ,time)
- ("ruby" ,ruby)
- ("python" ,python-2)))
- (synopsis "Tool for fast counting of k-mers in DNA")
- (description
- "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
- DNA. A k-mer is a substring of length k, and counting the occurrences of all
- such substrings is a central step in many analyses of DNA sequence. Jellyfish
- is a command-line program that reads FASTA and multi-FASTA files containing
- DNA sequences. It outputs its k-mer counts in a binary format, which can be
- translated into a human-readable text format using the @code{jellyfish dump}
- command, or queried for specific k-mers with @code{jellyfish query}.")
- (home-page "http://www.genome.umd.edu/jellyfish.html")
- ;; From their website: JELLYFISH runs on 64-bit Intel-compatible processors
- (supported-systems '("x86_64-linux"))
- ;; The combined work is published under the GPLv3 or later. Individual
- ;; files such as lib/jsoncpp.cpp are released under the Expat license.
- (license (list license:gpl3+ license:expat))))
- (define-public khmer
- (package
- (name "khmer")
- (version "2.0")
- (source
- (origin
- (method url-fetch)
- (uri (pypi-uri "khmer" version))
- (sha256
- (base32
- "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
- (patches (search-patches "khmer-use-libraries.patch"))))
- (build-system python-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'set-paths
- (lambda* (#:key inputs outputs #:allow-other-keys)
- ;; Delete bundled libraries.
- (delete-file-recursively "third-party/zlib")
- (delete-file-recursively "third-party/bzip2")
- ;; Replace bundled seqan.
- (let* ((seqan-all "third-party/seqan")
- (seqan-include (string-append
- seqan-all "/core/include")))
- (delete-file-recursively seqan-all)
- (copy-recursively (string-append (assoc-ref inputs "seqan")
- "/include/seqan")
- (string-append seqan-include "/seqan")))
- ;; We do not replace the bundled MurmurHash as the canonical
- ;; repository for this code 'SMHasher' is unsuitable for
- ;; providing a library. See
- ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
- #t))
- (add-after 'unpack 'set-cc
- (lambda _
- (setenv "CC" "gcc")
- #t))
- ;; It is simpler to test after installation.
- (delete 'check)
- (add-after 'install 'post-install-check
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let ((out (assoc-ref outputs "out")))
- (setenv "PATH"
- (string-append
- (getenv "PATH")
- ":"
- (assoc-ref outputs "out")
- "/bin"))
- (setenv "PYTHONPATH"
- (string-append
- (getenv "PYTHONPATH")
- ":"
- out
- "/lib/python"
- (string-take (string-take-right
- (assoc-ref inputs "python") 5) 3)
- "/site-packages"))
- (with-directory-excursion "build"
- (zero? (system* "nosetests" "khmer" "--attr"
- "!known_failing")))))))))
- (native-inputs
- `(("seqan" ,seqan)
- ("python-nose" ,python-nose)))
- (inputs
- `(("zlib" ,zlib)
- ("bzip2" ,bzip2)
- ("python-screed" ,python-screed)
- ("python-bz2file" ,python-bz2file)
- ;; Tests fail when gcc-5 is used for compilation. Use gcc-4.9 at least
- ;; until the next version of khmer (likely 2.1) is released.
- ("gcc" ,gcc-4.9)))
- (home-page "https://khmer.readthedocs.org/")
- (synopsis "K-mer counting, filtering and graph traversal library")
- (description "The khmer software is a set of command-line tools for
- working with DNA shotgun sequencing data from genomes, transcriptomes,
- metagenomes and single cells. Khmer can make de novo assemblies faster, and
- sometimes better. Khmer can also identify and fix problems with shotgun
- data.")
- ;; When building on i686, armhf and mips64el, we get the following error:
- ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
- (supported-systems '("x86_64-linux"))
- (license license:bsd-3)))
- (define-public kaiju
- (package
- (name "kaiju")
- (version "1.5.0")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/bioinformatics-centre/kaiju/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0afbfalfw9y39bkwnqjrh9bghs118ws1pzj5h8l0nblgn3mbjdks"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; There are no tests.
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-before 'build 'move-to-src-dir
- (lambda _ (chdir "src") #t))
- (replace 'install
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (mkdir-p bin)
- (chdir "..")
- (copy-recursively "bin" bin)
- (copy-recursively "util" bin))
- #t)))))
- (inputs
- `(("perl" ,perl)))
- (home-page "http://kaiju.binf.ku.dk/")
- (synopsis "Fast and sensitive taxonomic classification for metagenomics")
- (description "Kaiju is a program for sensitive taxonomic classification
- of high-throughput sequencing reads from metagenomic whole genome sequencing
- experiments.")
- (license license:gpl3+)))
- (define-public macs
- (package
- (name "macs")
- (version "2.1.0.20151222")
- (source (origin
- (method url-fetch)
- (uri (pypi-uri "MACS2" version))
- (sha256
- (base32
- "1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2 ; only compatible with Python 2.7
- #:tests? #f)) ; no test target
- (inputs
- `(("python-numpy" ,python2-numpy)))
- (home-page "https://github.com/taoliu/MACS/")
- (synopsis "Model based analysis for ChIP-Seq data")
- (description
- "MACS is an implementation of a ChIP-Seq analysis algorithm for
- identifying transcript factor binding sites named Model-based Analysis of
- ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
- the significance of enriched ChIP regions and it improves the spatial
- resolution of binding sites through combining the information of both
- sequencing tag position and orientation.")
- (license license:bsd-3)))
- (define-public mafft
- (package
- (name "mafft")
- (version "7.310")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "http://mafft.cbrc.jp/alignment/software/mafft-" version
- "-without-extensions-src.tgz"))
- (file-name (string-append name "-" version ".tgz"))
- (sha256
- (base32
- "0gbsaz6z2qa307kd7wfb06c3y4ikmv1hsdvlns11f6zq4w1z9pwc"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; no automated tests, though there are tests in the read me
- #:make-flags (let ((out (assoc-ref %outputs "out")))
- (list (string-append "PREFIX=" out)
- (string-append "BINDIR="
- (string-append out "/bin"))))
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'enter-dir
- (lambda _ (chdir "core") #t))
- (add-after 'enter-dir 'patch-makefile
- (lambda _
- ;; on advice from the MAFFT authors, there is no need to
- ;; distribute mafft-profile, mafft-distance, or
- ;; mafft-homologs.rb as they are too "specialised".
- (substitute* "Makefile"
- ;; remove mafft-homologs.rb from SCRIPTS
- (("^SCRIPTS = mafft mafft-homologs.rb")
- "SCRIPTS = mafft")
- ;; remove mafft-homologs from MANPAGES
- (("^MANPAGES = mafft.1 mafft-homologs.1")
- "MANPAGES = mafft.1")
- ;; remove mafft-distance from PROGS
- (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
- "PROGS = dvtditr dndfast7 dndblast sextet5")
- ;; remove mafft-profile from PROGS
- (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
- "splittbfast disttbfast tbfast f2cl mccaskillwrap")
- (("^rm -f mafft-profile mafft-profile.exe") "#")
- (("^rm -f mafft-distance mafft-distance.exe") ")#")
- ;; do not install MAN pages in libexec folder
- (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
- \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
- #t))
- (add-after 'enter-dir 'patch-paths
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* '("pairash.c"
- "mafft.tmpl")
- (("perl") (which "perl"))
- (("([\"`| ])awk" _ prefix)
- (string-append prefix (which "awk")))
- (("grep") (which "grep")))
- #t))
- (delete 'configure)
- (add-after 'install 'wrap-programs
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (path (string-append
- (assoc-ref %build-inputs "coreutils") "/bin:")))
- (for-each (lambda (file)
- (wrap-program file
- `("PATH" ":" prefix (,path))))
- (find-files bin)))
- #t)))))
- (inputs
- `(("perl" ,perl)
- ("ruby" ,ruby)
- ("gawk" ,gawk)
- ("grep" ,grep)
- ("coreutils" ,coreutils)))
- (home-page "http://mafft.cbrc.jp/alignment/software/")
- (synopsis "Multiple sequence alignment program")
- (description
- "MAFFT offers a range of multiple alignment methods for nucleotide and
- protein sequences. For instance, it offers L-INS-i (accurate; for alignment
- of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
- sequences).")
- (license (license:non-copyleft
- "http://mafft.cbrc.jp/alignment/software/license.txt"
- "BSD-3 with different formatting"))))
- (define-public mash
- (package
- (name "mash")
- (version "1.1.1")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/marbl/mash/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "08znbvqq5xknfhmpp3wcj574zvi4p7i8zifi67c9qw9a6ikp42fj"))
- (modules '((guix build utils)))
- (snippet
- ;; Delete bundled kseq.
- ;; TODO: Also delete bundled murmurhash and open bloom filter.
- '(delete-file "src/mash/kseq.h"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; No tests.
- #:configure-flags
- (list
- (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
- (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
- #:make-flags (list "CC=gcc")
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'fix-includes
- (lambda _
- (substitute* '("src/mash/Sketch.cpp" "src/mash/CommandFind.cpp")
- (("^#include \"kseq\\.h\"")
- "#include \"htslib/kseq.h\""))
- #t))
- (add-after 'fix-includes 'autoconf
- (lambda _ (zero? (system* "autoconf")))))))
- (native-inputs
- `(("autoconf" ,autoconf)
- ;; Capnproto and htslib are statically embedded in the final
- ;; application. Therefore we also list their licenses, below.
- ("capnproto" ,capnproto)
- ("htslib" ,htslib)))
- (inputs
- `(("gsl" ,gsl)
- ("zlib" ,zlib)))
- (supported-systems '("x86_64-linux"))
- (home-page "https://mash.readthedocs.io")
- (synopsis "Fast genome and metagenome distance estimation using MinHash")
- (description "Mash is a fast sequence distance estimator that uses the
- MinHash algorithm and is designed to work with genomes and metagenomes in the
- form of assemblies or reads.")
- (license (list license:bsd-3 ; Mash
- license:expat ; HTSlib and capnproto
- license:public-domain ; MurmurHash 3
- license:cpl1.0)))) ; Open Bloom Filter
- (define-public metabat
- (package
- (name "metabat")
- (version "2.12.1")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://bitbucket.org/berkeleylab/metabat/get/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1hmvdalz3zj5sqqklg0l4npjdv37cv2hsdi1al9iby2ndxjs1b73"))
- (patches (search-patches "metabat-fix-compilation.patch"))))
- (build-system gnu-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'fix-includes
- (lambda _
- (substitute* "src/BamUtils.h"
- (("^#include \"bam/bam\\.h\"")
- "#include \"samtools/bam.h\"")
- (("^#include \"bam/sam\\.h\"")
- "#include \"samtools/sam.h\""))
- (substitute* "src/KseqReader.h"
- (("^#include \"bam/kseq\\.h\"")
- "#include \"htslib/kseq.h\""))
- #t))
- (add-after 'unpack 'fix-scons
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* "SConstruct"
- (("^htslib_dir += 'samtools'")
- (string-append "htslib_dir = '"
- (assoc-ref inputs "htslib")
- "'"))
- (("^samtools_dir = 'samtools'")
- (string-append "samtools_dir = '"
- (assoc-ref inputs "samtools")
- "'"))
- (("^findStaticOrShared\\('bam', hts_lib")
- (string-append "findStaticOrShared('bam', '"
- (assoc-ref inputs "samtools")
- "/lib'"))
- ;; Do not distribute README.
- (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
- #t))
- (delete 'configure)
- (replace 'build
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (mkdir (assoc-ref outputs "out"))
- (zero? (system* "scons"
- (string-append
- "PREFIX="
- (assoc-ref outputs "out"))
- (string-append
- "BOOST_ROOT="
- (assoc-ref inputs "boost"))
- "install"))))
- ;; Check and install are carried out during build phase.
- (delete 'check)
- (delete 'install))))
- (inputs
- `(("zlib" ,zlib)
- ("perl" ,perl)
- ("samtools" ,samtools)
- ("htslib" ,htslib)
- ("boost" ,boost)))
- (native-inputs
- `(("scons" ,scons)))
- (home-page "https://bitbucket.org/berkeleylab/metabat")
- (synopsis
- "Reconstruction of single genomes from complex microbial communities")
- (description
- "Grouping large genomic fragments assembled from shotgun metagenomic
- sequences to deconvolute complex microbial communities, or metagenome binning,
- enables the study of individual organisms and their interactions. MetaBAT is
- an automated metagenome binning software, which integrates empirical
- probabilistic distances of genome abundance and tetranucleotide frequency.")
- ;; The source code contains inline assembly.
- (supported-systems '("x86_64-linux" "i686-linux"))
- (license (license:non-copyleft "file://license.txt"
- "See license.txt in the distribution."))))
- (define-public minced
- (package
- (name "minced")
- (version "0.2.0")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/ctSkennerton/minced/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
- (build-system gnu-build-system)
- (arguments
- `(#:test-target "test"
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-before 'check 'fix-test
- (lambda _
- ;; Fix test for latest version.
- (substitute* "t/Aquifex_aeolicus_VF5.expected"
- (("minced:0.1.6") "minced:0.2.0"))
- #t))
- (replace 'install ; No install target.
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (wrapper (string-append bin "/minced")))
- ;; Minced comes with a wrapper script that tries to figure out where
- ;; it is located before running the JAR. Since these paths are known
- ;; to us, we build our own wrapper to avoid coreutils dependency.
- (install-file "minced.jar" bin)
- (with-output-to-file wrapper
- (lambda _
- (display
- (string-append
- "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
- (assoc-ref inputs "jre") "/bin/java -jar "
- bin "/minced.jar \"$@\"\n"))))
- (chmod wrapper #o555)))))))
- (native-inputs
- `(("jdk" ,icedtea "jdk")))
- (inputs
- `(("bash" ,bash)
- ("jre" ,icedtea "out")))
- (home-page "https://github.com/ctSkennerton/minced")
- (synopsis "Mining CRISPRs in Environmental Datasets")
- (description
- "MinCED is a program to find Clustered Regularly Interspaced Short
- Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
- unassembled metagenomic reads, but is mainly designed for full genomes and
- assembled metagenomic sequence.")
- (license license:gpl3+)))
- (define-public miso
- (package
- (name "miso")
- (version "0.5.3")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://pypi.python.org/packages/source/m/misopy/misopy-"
- version ".tar.gz"))
- (sha256
- (base32
- "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
- (modules '((guix build utils)))
- (snippet
- '(substitute* "setup.py"
- ;; Use setuptools, or else the executables are not
- ;; installed.
- (("distutils.core") "setuptools")
- ;; use "gcc" instead of "cc" for compilation
- (("^defines")
- "cc.set_executables(
- compiler='gcc',
- compiler_so='gcc',
- linker_exe='gcc',
- linker_so='gcc -shared'); defines")))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2 ; only Python 2 is supported
- #:tests? #f)) ; no "test" target
- (inputs
- `(("samtools" ,samtools)
- ("python-numpy" ,python2-numpy)
- ("python-pysam" ,python2-pysam)
- ("python-scipy" ,python2-scipy)
- ("python-matplotlib" ,python2-matplotlib)))
- (native-inputs
- `(("python-mock" ,python2-mock) ;for tests
- ("python-pytz" ,python2-pytz))) ;for tests
- (home-page "http://genes.mit.edu/burgelab/miso/index.html")
- (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
- (description
- "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
- the expression level of alternatively spliced genes from RNA-Seq data, and
- identifies differentially regulated isoforms or exons across samples. By
- modeling the generative process by which reads are produced from isoforms in
- RNA-Seq, the MISO model uses Bayesian inference to compute the probability
- that a read originated from a particular isoform.")
- (license license:gpl2)))
- (define-public muscle
- (package
- (name "muscle")
- (version "3.8.1551")
- (source (origin
- (method url-fetch/tarbomb)
- (uri (string-append
- "http://www.drive5.com/muscle/muscle_src_"
- version ".tar.gz"))
- (sha256
- (base32
- "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
- (build-system gnu-build-system)
- (arguments
- `(#:make-flags (list "LDLIBS = -lm")
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (replace 'check
- ;; There are no tests, so just test if it runs.
- (lambda _ (zero? (system* "./muscle" "-version"))))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
- (install-file "muscle" bin)))))))
- (home-page "http://www.drive5.com/muscle")
- (synopsis "Multiple sequence alignment program")
- (description
- "MUSCLE aims to be a fast and accurate multiple sequence alignment
- program for nucleotide and protein sequences.")
- ;; License information found in 'muscle -h' and usage.cpp.
- (license license:public-domain)))
- (define-public newick-utils
- ;; There are no recent releases so we package from git.
- (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
- (package
- (name "newick-utils")
- (version (string-append "1.6-1." (string-take commit 8)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/tjunier/newick_utils.git")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
- (build-system gnu-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'autoconf
- (lambda _ (zero? (system* "autoreconf" "-vif")))))))
- (inputs
- ;; XXX: TODO: Enable Lua and Guile bindings.
- ;; https://github.com/tjunier/newick_utils/issues/13
- `(("libxml2" ,libxml2)
- ("flex" ,flex)
- ("bison" ,bison)))
- (native-inputs
- `(("autoconf" ,autoconf)
- ("automake" ,automake)
- ("libtool" ,libtool)))
- (synopsis "Programs for working with newick format phylogenetic trees")
- (description
- "Newick-utils is a suite of utilities for processing phylogenetic trees
- in Newick format. Functions include re-rooting, extracting subtrees,
- trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
- (home-page "https://github.com/tjunier/newick_utils")
- (license license:bsd-3))))
- (define-public orfm
- (package
- (name "orfm")
- (version "0.7.1")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/wwood/OrfM/releases/download/v"
- version "/orfm-" version ".tar.gz"))
- (sha256
- (base32
- "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
- (build-system gnu-build-system)
- (inputs `(("zlib" ,zlib)))
- (native-inputs
- `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
- ("ruby-rspec" ,ruby-rspec)
- ("ruby" ,ruby)))
- (synopsis "Simple and not slow open reading frame (ORF) caller")
- (description
- "An ORF caller finds stretches of DNA that, when translated, are not
- interrupted by stop codons. OrfM finds and prints these ORFs.")
- (home-page "https://github.com/wwood/OrfM")
- (license license:lgpl3+)))
- (define-public pplacer
- (let ((commit "g807f6f3"))
- (package
- (name "pplacer")
- ;; The commit should be updated with each version change.
- (version "1.1.alpha19")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/matsen/pplacer/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "0z1lnd2s8sh6kpzg106wzbh2szw7h0hvq8syd5a6wv4rmyyz6x0f"))))
- (build-system ocaml-build-system)
- (arguments
- `(#:ocaml ,ocaml-4.01
- #:findlib ,ocaml4.01-findlib
- #:modules ((guix build ocaml-build-system)
- (guix build utils)
- (ice-9 ftw))
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-after 'unpack 'replace-bundled-cddlib
- (lambda* (#:key inputs #:allow-other-keys)
- (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
- (local-dir "cddlib_guix"))
- (mkdir local-dir)
- (with-directory-excursion local-dir
- (system* "tar" "xvf" cddlib-src))
- (let ((cddlib-src-folder
- (string-append local-dir "/"
- (list-ref (scandir local-dir) 2)
- "/lib-src")))
- (for-each
- (lambda (file)
- (copy-file file
- (string-append "cdd_src/" (basename file))))
- (find-files cddlib-src-folder ".*[ch]$")))
- #t)))
- (add-after 'unpack 'fix-makefile
- (lambda _
- ;; Remove system calls to 'git'.
- (substitute* "Makefile"
- (("^DESCRIPT:=pplacer-.*")
- (string-append
- "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
- (substitute* "myocamlbuild.ml"
- (("git describe --tags --long .*\\\" with")
- (string-append
- "echo -n v" ,version "-" ,commit "\" with")))
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
- (copy-recursively "bin" bin))
- #t)))))
- (native-inputs
- `(("zlib" ,zlib)
- ("gsl" ,gsl)
- ("ocaml-ounit" ,ocaml4.01-ounit)
- ("ocaml-batteries" ,ocaml4.01-batteries)
- ("ocaml-camlzip" ,ocaml4.01-camlzip)
- ("ocaml-csv" ,ocaml4.01-csv)
- ("ocaml-sqlite3" ,ocaml4.01-sqlite3)
- ("ocaml-xmlm" ,ocaml4.01-xmlm)
- ("ocaml-mcl" ,ocaml4.01-mcl)
- ("ocaml-gsl" ,ocaml4.01-gsl)
- ("cddlib-src" ,(package-source cddlib))))
- (propagated-inputs
- `(("pplacer-scripts" ,pplacer-scripts)))
- (synopsis "Phylogenetic placement of biological sequences")
- (description
- "Pplacer places query sequences on a fixed reference phylogenetic tree
- to maximize phylogenetic likelihood or posterior probability according to a
- reference alignment. Pplacer is designed to be fast, to give useful
- information about uncertainty, and to offer advanced visualization and
- downstream analysis.")
- (home-page "http://matsen.fhcrc.org/pplacer")
- (license license:gpl3))))
- ;; This package is installed alongside 'pplacer'. It is a separate package so
- ;; that it can use the python-build-system for the scripts that are
- ;; distributed alongside the main OCaml binaries.
- (define pplacer-scripts
- (package
- (inherit pplacer)
- (name "pplacer-scripts")
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'enter-scripts-dir
- (lambda _ (chdir "scripts")))
- (replace 'check
- (lambda _
- (zero? (system* "python" "-m" "unittest" "discover" "-v"))))
- (add-after 'install 'wrap-executables
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
- (let ((path (string-append
- (assoc-ref inputs "hmmer") "/bin:"
- (assoc-ref inputs "infernal") "/bin")))
- (display path)
- (wrap-program (string-append bin "/refpkg_align.py")
- `("PATH" ":" prefix (,path))))
- (let ((path (string-append
- (assoc-ref inputs "hmmer") "/bin")))
- (wrap-program (string-append bin "/hrefpkg_query.py")
- `("PATH" ":" prefix (,path)))))
- #t)))))
- (inputs
- `(("infernal" ,infernal)
- ("hmmer" ,hmmer)))
- (propagated-inputs
- `(("python-biopython" ,python2-biopython)
- ("taxtastic" ,taxtastic)))
- (synopsis "Pplacer Python scripts")))
- (define-public python2-pbcore
- (package
- (name "python2-pbcore")
- (version "1.2.10")
- (source (origin
- (method url-fetch)
- (uri (pypi-uri "pbcore" version))
- (sha256
- (base32
- "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
- (build-system python-build-system)
- (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
- (propagated-inputs
- `(("python-cython" ,python2-cython)
- ("python-numpy" ,python2-numpy)
- ("python-pysam" ,python2-pysam)
- ("python-h5py" ,python2-h5py)))
- (native-inputs
- `(("python-nose" ,python2-nose)
- ("python-sphinx" ,python2-sphinx)
- ("python-pyxb" ,python2-pyxb)))
- (home-page "http://pacificbiosciences.github.io/pbcore/")
- (synopsis "Library for reading and writing PacBio data files")
- (description
- "The pbcore package provides Python APIs for interacting with PacBio data
- files and writing bioinformatics applications.")
- (license license:bsd-3)))
- (define-public python2-warpedlmm
- (package
- (name "python2-warpedlmm")
- (version "0.21")
- (source
- (origin
- (method url-fetch)
- (uri (string-append
- "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
- version ".zip"))
- (sha256
- (base32
- "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2)) ; requires Python 2.7
- (propagated-inputs
- `(("python-scipy" ,python2-scipy)
- ("python-numpy" ,python2-numpy)
- ("python-matplotlib" ,python2-matplotlib)
- ("python-fastlmm" ,python2-fastlmm)
- ("python-pandas" ,python2-pandas)
- ("python-pysnptools" ,python2-pysnptools)))
- (native-inputs
- `(("python-mock" ,python2-mock)
- ("python-nose" ,python2-nose)
- ("unzip" ,unzip)))
- (home-page "https://github.com/PMBio/warpedLMM")
- (synopsis "Implementation of warped linear mixed models")
- (description
- "WarpedLMM is a Python implementation of the warped linear mixed model,
- which automatically learns an optimal warping function (or transformation) for
- the phenotype as it models the data.")
- (license license:asl2.0)))
- (define-public pbtranscript-tofu
- (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
- (package
- (name "pbtranscript-tofu")
- (version (string-append "2.2.3." (string-take commit 7)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/PacificBiosciences/cDNA_primer.git")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- ;; remove bundled Cython sources
- (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
- #t))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2
- ;; FIXME: Tests fail with "No such file or directory:
- ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
- #:tests? #f
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'enter-directory
- (lambda _
- (chdir "pbtranscript-tofu/pbtranscript/")
- #t))
- ;; With setuptools version 18.0 and later this setup.py hack causes
- ;; a build error, so we disable it.
- (add-after 'enter-directory 'patch-setuppy
- (lambda _
- (substitute* "setup.py"
- (("if 'setuptools.extension' in sys.modules:")
- "if False:"))
- #t)))))
- (inputs
- `(("python-numpy" ,python2-numpy)
- ("python-bx-python" ,python2-bx-python)
- ("python-networkx" ,python2-networkx)
- ("python-scipy" ,python2-scipy)
- ("python-pbcore" ,python2-pbcore)
- ("python-h5py" ,python2-h5py)))
- (native-inputs
- `(("python-cython" ,python2-cython)
- ("python-nose" ,python2-nose)))
- (home-page "https://github.com/PacificBiosciences/cDNA_primer")
- (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
- (description
- "pbtranscript-tofu contains scripts to analyze transcriptome data
- generated using the PacBio Iso-Seq protocol.")
- (license license:bsd-3))))
- (define-public prank
- (package
- (name "prank")
- (version "150803")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "http://wasabiapp.org/download/prank/prank.source."
- version ".tgz"))
- (sha256
- (base32
- "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
- (build-system gnu-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'enter-src-dir
- (lambda _
- (chdir "src")
- #t))
- (add-after 'unpack 'remove-m64-flag
- ;; Prank will build with the correct 'bit-ness' without this flag
- ;; and this allows building on 32-bit machines.
- (lambda _ (substitute* "src/Makefile"
- (("-m64") ""))
- #t))
- (delete 'configure)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (man (string-append out "/share/man/man1"))
- (path (string-append
- (assoc-ref %build-inputs "mafft") "/bin:"
- (assoc-ref %build-inputs "exonerate") "/bin:"
- (assoc-ref %build-inputs "bppsuite") "/bin")))
- (install-file "prank" bin)
- (wrap-program (string-append bin "/prank")
- `("PATH" ":" prefix (,path)))
- (install-file "prank.1" man))
- #t)))))
- (inputs
- `(("mafft" ,mafft)
- ("exonerate" ,exonerate)
- ("bppsuite" ,bppsuite)))
- (home-page "http://wasabiapp.org/software/prank/")
- (synopsis "Probabilistic multiple sequence alignment program")
- (description
- "PRANK is a probabilistic multiple sequence alignment program for DNA,
- codon and amino-acid sequences. It is based on a novel algorithm that treats
- insertions correctly and avoids over-estimation of the number of deletion
- events. In addition, PRANK borrows ideas from maximum likelihood methods used
- in phylogenetics and correctly takes into account the evolutionary distances
- between sequences. Lastly, PRANK allows for defining a potential structure
- for sequences to be aligned and then, simultaneously with the alignment,
- predicts the locations of structural units in the sequences.")
- (license license:gpl2+)))
- (define-public proteinortho
- (package
- (name "proteinortho")
- (version "5.16b")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append
- "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
- version "_src.tar.gz"))
- (sha256
- (base32
- "1wl0dawpssqwfjvr651r4wlww8hhjin8nba6xh71ks7sbypx886j"))))
- (build-system gnu-build-system)
- (arguments
- `(#:test-target "test"
- #:phases
- (modify-phases %standard-phases
- (replace 'configure
- ;; There is no configure script, so we modify the Makefile directly.
- (lambda* (#:key outputs #:allow-other-keys)
- (substitute* "Makefile"
- (("INSTALLDIR=.*")
- (string-append
- "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
- #t))
- (add-before 'install 'make-install-directory
- ;; The install directory is not created during 'make install'.
- (lambda* (#:key outputs #:allow-other-keys)
- (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
- #t))
- (add-after 'install 'wrap-programs
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((path (getenv "PATH"))
- (out (assoc-ref outputs "out"))
- (binary (string-append out "/bin/proteinortho5.pl")))
- (wrap-program binary `("PATH" ":" prefix (,path))))
- #t)))))
- (inputs
- `(("perl" ,perl)
- ("python" ,python-2)
- ("blast+" ,blast+)))
- (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
- (synopsis "Detect orthologous genes across species")
- (description
- "Proteinortho is a tool to detect orthologous genes across different
- species. For doing so, it compares similarities of given gene sequences and
- clusters them to find significant groups. The algorithm was designed to handle
- large-scale data and can be applied to hundreds of species at once.")
- (license license:gpl2+)))
- (define-public pyicoteo
- (package
- (name "pyicoteo")
- (version "2.0.7")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
- "pyicoteo/get/v" version ".tar.bz2"))
- (file-name (string-append name "-" version ".tar.bz2"))
- (sha256
- (base32
- "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2 ; does not work with Python 3
- #:tests? #f)) ; there are no tests
- (inputs
- `(("python2-matplotlib" ,python2-matplotlib)))
- (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
- (synopsis "Analyze high-throughput genetic sequencing data")
- (description
- "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
- sequencing data. It works with genomic coordinates. There are currently six
- different command-line tools:
- @enumerate
- @item pyicoregion: for generating exploratory regions automatically;
- @item pyicoenrich: for differential enrichment between two conditions;
- @item pyicoclip: for calling CLIP-Seq peaks without a control;
- @item pyicos: for genomic coordinates manipulation;
- @item pyicoller: for peak calling on punctuated ChIP-Seq;
- @item pyicount: to count how many reads from N experiment files overlap in a
- region file;
- @item pyicotrocol: to combine operations from pyicoteo.
- @end enumerate\n")
- (license license:gpl3+)))
- (define-public prodigal
- (package
- (name "prodigal")
- (version "2.6.3")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/hyattpd/Prodigal/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ;no check target
- #:make-flags (list (string-append "INSTALLDIR="
- (assoc-ref %outputs "out")
- "/bin"))
- #:phases
- (modify-phases %standard-phases
- (delete 'configure))))
- (home-page "http://prodigal.ornl.gov")
- (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
- (description
- "Prodigal runs smoothly on finished genomes, draft genomes, and
- metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
- format. It runs quickly, in an unsupervised fashion, handles gaps, handles
- partial genes, and identifies translation initiation sites.")
- (license license:gpl3+)))
- (define-public roary
- (package
- (name "roary")
- (version "3.8.2")
- (source
- (origin
- (method url-fetch)
- (uri (string-append
- "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
- version ".tar.gz"))
- (sha256
- (base32
- "03dfr2cd5fp80bcr65923zpdzrasvcxl7c2vgh8373v25a1yfap7"))))
- (build-system perl-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (delete 'build)
- (replace 'check
- (lambda _
- ;; The tests are not run by default, so we run each test file
- ;; directly.
- (setenv "PATH" (string-append (getcwd) "/bin" ":"
- (getenv "PATH")))
- (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
- (getenv "PERL5LIB")))
- (zero? (length (filter (lambda (file)
- (display file)(display "\n")
- (not (zero? (system* "perl" file))))
- (find-files "t" ".*\\.t$"))))))
- (replace 'install
- ;; There is no 'install' target in the Makefile.
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (perl (string-append out "/lib/perl5/site_perl"))
- (roary-plots "contrib/roary_plots"))
- (mkdir-p bin)
- (mkdir-p perl)
- (copy-recursively "bin" bin)
- (copy-recursively "lib" perl)
- #t)))
- (add-after 'install 'wrap-programs
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (perl5lib (getenv "PERL5LIB"))
- (path (getenv "PATH")))
- (for-each (lambda (prog)
- (let ((binary (string-append out "/" prog)))
- (wrap-program binary
- `("PERL5LIB" ":" prefix
- (,(string-append perl5lib ":" out
- "/lib/perl5/site_perl"))))
- (wrap-program binary
- `("PATH" ":" prefix
- (,(string-append path ":" out "/bin"))))))
- (find-files "bin" ".*[^R]$"))
- (let ((file
- (string-append out "/bin/roary-create_pan_genome_plots.R"))
- (r-site-lib (getenv "R_LIBS_SITE"))
- (coreutils-path
- (string-append (assoc-ref inputs "coreutils") "/bin")))
- (wrap-program file
- `("R_LIBS_SITE" ":" prefix
- (,(string-append r-site-lib ":" out "/site-library/"))))
- (wrap-program file
- `("PATH" ":" prefix
- (,(string-append coreutils-path ":" out "/bin"))))))
- #t)))))
- (native-inputs
- `(("perl-env-path" ,perl-env-path)
- ("perl-test-files" ,perl-test-files)
- ("perl-test-most" ,perl-test-most)
- ("perl-test-output" ,perl-test-output)))
- (inputs
- `(("perl-array-utils" ,perl-array-utils)
- ("bioperl" ,bioperl-minimal)
- ("perl-exception-class" ,perl-exception-class)
- ("perl-file-find-rule" ,perl-file-find-rule)
- ("perl-file-grep" ,perl-file-grep)
- ("perl-file-slurper" ,perl-file-slurper)
- ("perl-file-which" ,perl-file-which)
- ("perl-graph" ,perl-graph)
- ("perl-graph-readwrite" ,perl-graph-readwrite)
- ("perl-log-log4perl" ,perl-log-log4perl)
- ("perl-moose" ,perl-moose)
- ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
- ("perl-text-csv" ,perl-text-csv)
- ("bedtools" ,bedtools)
- ("cd-hit" ,cd-hit)
- ("blast+" ,blast+)
- ("mcl" ,mcl)
- ("parallel" ,parallel)
- ("prank" ,prank)
- ("mafft" ,mafft)
- ("fasttree" ,fasttree)
- ("grep" ,grep)
- ("sed" ,sed)
- ("gawk" ,gawk)
- ("r-minimal" ,r-minimal)
- ("r-ggplot2" ,r-ggplot2)
- ("coreutils" ,coreutils)))
- (home-page "http://sanger-pathogens.github.io/Roary")
- (synopsis "High speed stand-alone pan genome pipeline")
- (description
- "Roary is a high speed stand alone pan genome pipeline, which takes
- annotated assemblies in GFF3 format (produced by the Prokka program) and
- calculates the pan genome. Using a standard desktop PC, it can analyse
- datasets with thousands of samples, without compromising the quality of the
- results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
- single processor. Roary is not intended for metagenomics or for comparing
- extremely diverse sets of genomes.")
- (license license:gpl3)))
- (define-public raxml
- (package
- (name "raxml")
- (version "8.2.10")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append
- "https://github.com/stamatak/standard-RAxML/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "13s7aspfdcfr6asynwdg1x6vznys6pzap5f8wsffbnnwpkkg9ya8"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; There are no tests.
- ;; Use 'standard' Makefile rather than SSE or AVX ones.
- #:make-flags (list "-f" "Makefile.HYBRID.gcc")
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (executable "raxmlHPC-HYBRID"))
- (install-file executable bin)
- (symlink (string-append bin "/" executable) "raxml"))
- #t)))))
- (inputs
- `(("openmpi" ,openmpi)))
- (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
- (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
- (description
- "RAxML is a tool for phylogenetic analysis and post-analysis of large
- phylogenies.")
- ;; The source includes x86 specific code
- (supported-systems '("x86_64-linux" "i686-linux"))
- (license license:gpl2+)))
- (define-public rsem
- (package
- (name "rsem")
- (version "1.2.20")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
- version ".tar.gz"))
- (sha256
- (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
- (patches (search-patches "rsem-makefile.patch"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- ;; remove bundled copy of boost
- (delete-file-recursively "boost")
- #t))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ;no "check" target
- #:phases
- (modify-phases %standard-phases
- ;; No "configure" script.
- ;; Do not build bundled samtools library.
- (replace 'configure
- (lambda _
- (substitute* "Makefile"
- (("^all : sam/libbam.a") "all : "))
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (string-append (assoc-ref outputs "out")))
- (bin (string-append out "/bin/"))
- (perl (string-append out "/lib/perl5/site_perl")))
- (mkdir-p bin)
- (mkdir-p perl)
- (for-each (lambda (file)
- (install-file file bin))
- (find-files "." "rsem-.*"))
- (install-file "rsem_perl_utils.pm" perl))
- #t))
- (add-after
- 'install 'wrap-program
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((out (assoc-ref outputs "out")))
- (for-each (lambda (prog)
- (wrap-program (string-append out "/bin/" prog)
- `("PERL5LIB" ":" prefix
- (,(string-append out "/lib/perl5/site_perl")))))
- '("rsem-plot-transcript-wiggles"
- "rsem-calculate-expression"
- "rsem-generate-ngvector"
- "rsem-run-ebseq"
- "rsem-prepare-reference")))
- #t)))))
- (inputs
- `(("boost" ,boost)
- ("ncurses" ,ncurses)
- ("r-minimal" ,r-minimal)
- ("perl" ,perl)
- ("samtools" ,samtools-0.1)
- ("zlib" ,zlib)))
- (home-page "http://deweylab.biostat.wisc.edu/rsem/")
- (synopsis "Estimate gene expression levels from RNA-Seq data")
- (description
- "RSEM is a software package for estimating gene and isoform expression
- levels from RNA-Seq data. The RSEM package provides a user-friendly
- interface, supports threads for parallel computation of the EM algorithm,
- single-end and paired-end read data, quality scores, variable-length reads and
- RSPD estimation. In addition, it provides posterior mean and 95% credibility
- interval estimates for expression levels. For visualization, it can generate
- BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
- (license license:gpl3+)))
- (define-public rseqc
- (package
- (name "rseqc")
- (version "2.6.1")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append "mirror://sourceforge/rseqc/"
- "RSeQC-" version ".tar.gz"))
- (sha256
- (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- ;; remove bundled copy of pysam
- (delete-file-recursively "lib/pysam")
- (substitute* "setup.py"
- ;; remove dependency on outdated "distribute" module
- (("^from distribute_setup import use_setuptools") "")
- (("^use_setuptools\\(\\)") "")
- ;; do not use bundled copy of pysam
- (("^have_pysam = False") "have_pysam = True"))))))
- (build-system python-build-system)
- (arguments `(#:python ,python-2))
- (inputs
- `(("python-cython" ,python2-cython)
- ("python-pysam" ,python2-pysam)
- ("python-numpy" ,python2-numpy)
- ("zlib" ,zlib)))
- (native-inputs
- `(("python-nose" ,python2-nose)))
- (home-page "http://rseqc.sourceforge.net/")
- (synopsis "RNA-seq quality control package")
- (description
- "RSeQC provides a number of modules that can comprehensively evaluate
- high throughput sequence data, especially RNA-seq data. Some basic modules
- inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
- while RNA-seq specific modules evaluate sequencing saturation, mapped reads
- distribution, coverage uniformity, strand specificity, etc.")
- (license license:gpl3+)))
- (define-public seek
- ;; There are no release tarballs. According to the installation
- ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
- ;; stable release is identified by this changeset ID.
- (let ((changeset "2329130")
- (revision "1"))
- (package
- (name "seek")
- (version (string-append "0-" revision "." changeset))
- (source (origin
- (method hg-fetch)
- (uri (hg-reference
- (url "https://bitbucket.org/libsleipnir/sleipnir")
- (changeset changeset)))
- (sha256
- (base32
- "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
- (build-system gnu-build-system)
- (arguments
- `(#:modules ((srfi srfi-1)
- (guix build gnu-build-system)
- (guix build utils))
- #:phases
- (let ((dirs '("SeekMiner"
- "SeekEvaluator"
- "SeekPrep"
- "Distancer"
- "Data2DB"
- "PCL2Bin")))
- (modify-phases %standard-phases
- (add-before 'configure 'bootstrap
- (lambda _
- (zero? (system* "bash" "gen_auto"))))
- (add-after 'build 'build-additional-tools
- (lambda* (#:key make-flags #:allow-other-keys)
- (every (lambda (dir)
- (with-directory-excursion (string-append "tools/" dir)
- (zero? (apply system* "make" make-flags))))
- dirs)))
- (add-after 'install 'install-additional-tools
- (lambda* (#:key make-flags #:allow-other-keys)
- (fold (lambda (dir result)
- (with-directory-excursion (string-append "tools/" dir)
- (and result
- (zero? (apply system*
- `("make" ,@make-flags "install"))))))
- #t dirs)))))))
- (inputs
- `(("gsl" ,gsl)
- ("boost" ,boost)
- ("libsvm" ,libsvm)
- ("readline" ,readline)
- ("gengetopt" ,gengetopt)
- ("log4cpp" ,log4cpp)))
- (native-inputs
- `(("autoconf" ,autoconf)
- ("automake" ,automake)
- ("perl" ,perl)))
- (home-page "http://seek.princeton.edu")
- (synopsis "Gene co-expression search engine")
- (description
- "SEEK is a computational gene co-expression search engine. SEEK provides
- biologists with a way to navigate the massive human expression compendium that
- now contains thousands of expression datasets. SEEK returns a robust ranking
- of co-expressed genes in the biological area of interest defined by the user's
- query genes. It also prioritizes thousands of expression datasets according
- to the user's query of interest.")
- (license license:cc-by3.0))))
- (define-public samtools
- (package
- (name "samtools")
- (version "1.5")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append "mirror://sourceforge/samtools/samtools/"
- version "/samtools-" version ".tar.bz2"))
- (sha256
- (base32
- "1xidmv0jmfy7l0kb32hdnlshcxgzi1hmygvig0cqrq1fhckdlhl5"))))
- (build-system gnu-build-system)
- (arguments
- `(#:modules ((ice-9 ftw)
- (ice-9 regex)
- (guix build gnu-build-system)
- (guix build utils))
- #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
- #:configure-flags (list "--with-ncurses" "--with-htslib=system")
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'patch-tests
- (lambda _
- (substitute* "test/test.pl"
- ;; The test script calls out to /bin/bash
- (("/bin/bash") (which "bash")))
- #t))
- (add-after 'install 'install-library
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
- (install-file "libbam.a" lib)
- #t)))
- (add-after 'install 'install-headers
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((include (string-append (assoc-ref outputs "out")
- "/include/samtools/")))
- (for-each (lambda (file)
- (install-file file include))
- (scandir "." (lambda (name) (string-match "\\.h$" name))))
- #t))))))
- (native-inputs `(("pkg-config" ,pkg-config)))
- (inputs
- `(("htslib" ,htslib)
- ("ncurses" ,ncurses)
- ("perl" ,perl)
- ("python" ,python)
- ("zlib" ,zlib)))
- (home-page "http://samtools.sourceforge.net")
- (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
- (description
- "Samtools implements various utilities for post-processing nucleotide
- sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
- variant calling (in conjunction with bcftools), and a simple alignment
- viewer.")
- (license license:expat)))
- (define-public samtools-0.1
- ;; This is the most recent version of the 0.1 line of samtools. The input
- ;; and output formats differ greatly from that used and produced by samtools
- ;; 1.x and is still used in many bioinformatics pipelines.
- (package (inherit samtools)
- (version "0.1.19")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append "mirror://sourceforge/samtools/samtools/"
- version "/samtools-" version ".tar.bz2"))
- (sha256
- (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
- (arguments
- `(#:tests? #f ;no "check" target
- ,@(substitute-keyword-arguments (package-arguments samtools)
- ((#:make-flags flags)
- `(cons "LIBCURSES=-lncurses" ,flags))
- ((#:phases phases)
- `(modify-phases ,phases
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append
- (assoc-ref outputs "out") "/bin")))
- (mkdir-p bin)
- (install-file "samtools" bin)
- #t)))
- (delete 'patch-tests)
- (delete 'configure))))))))
- (define-public mosaik
- (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
- (package
- (name "mosaik")
- (version "2.2.30")
- (source (origin
- ;; There are no release tarballs nor tags.
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/wanpinglee/MOSAIK.git")
- (commit commit)))
- (file-name (string-append name "-" version))
- (sha256
- (base32
- "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; no tests
- #:make-flags (list "CC=gcc")
- #:phases
- (modify-phases %standard-phases
- (replace 'configure
- (lambda _ (chdir "src") #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out")
- "/bin")))
- (mkdir-p bin)
- (copy-recursively "../bin" bin)
- #t))))))
- (inputs
- `(("perl" ,perl)
- ("zlib" ,zlib)))
- (supported-systems '("x86_64-linux"))
- (home-page "https://github.com/wanpinglee/MOSAIK")
- (synopsis "Map nucleotide sequence reads to reference genomes")
- (description
- "MOSAIK is a program for mapping second and third-generation sequencing
- reads to a reference genome. MOSAIK can align reads generated by all the
- major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
- Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
- ;; MOSAIK is released under the GPLv2+ with the exception of third-party
- ;; code released into the public domain:
- ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
- ;; 2. MD5 implementation - RSA Data Security, RFC 1321
- (license (list license:gpl2+ license:public-domain)))))
- (define-public ngs-sdk
- (package
- (name "ngs-sdk")
- (version "1.3.0")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append "https://github.com/ncbi/ngs/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1wiyf4c6nm2j87pv015cbi0qny5byf3pbvcw3likifz5dl56ag40"))))
- (build-system gnu-build-system)
- (arguments
- `(#:parallel-build? #f ; not supported
- #:tests? #f ; no "check" target
- #:phases
- (alist-replace
- 'configure
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((out (assoc-ref outputs "out")))
- ;; Allow 'konfigure.perl' to find 'package.prl'.
- (setenv "PERL5LIB"
- (string-append ".:" (getenv "PERL5LIB")))
- ;; The 'configure' script doesn't recognize things like
- ;; '--enable-fast-install'.
- (zero? (system* "./configure"
- (string-append "--build-prefix=" (getcwd) "/build")
- (string-append "--prefix=" out)))))
- (alist-cons-after
- 'unpack 'enter-dir
- (lambda _ (chdir "ngs-sdk") #t)
- %standard-phases))))
- (native-inputs `(("perl" ,perl)))
- ;; According to the test
- ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
- ;; in ngs-sdk/setup/konfigure.perl
- (supported-systems '("i686-linux" "x86_64-linux"))
- (home-page "https://github.com/ncbi/ngs")
- (synopsis "API for accessing Next Generation Sequencing data")
- (description
- "NGS is a domain-specific API for accessing reads, alignments and pileups
- produced from Next Generation Sequencing. The API itself is independent from
- any particular back-end implementation, and supports use of multiple back-ends
- simultaneously.")
- (license license:public-domain)))
- (define-public java-ngs
- (package (inherit ngs-sdk)
- (name "java-ngs")
- (arguments
- `(,@(substitute-keyword-arguments
- `(#:modules ((guix build gnu-build-system)
- (guix build utils)
- (srfi srfi-1)
- (srfi srfi-26))
- ,@(package-arguments ngs-sdk))
- ((#:phases phases)
- `(modify-phases ,phases
- (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
- (inputs
- `(("jdk" ,icedtea "jdk")
- ("ngs-sdk" ,ngs-sdk)))
- (synopsis "Java bindings for NGS SDK")))
- (define-public ncbi-vdb
- (package
- (name "ncbi-vdb")
- (version "2.8.2")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1acn4bv81mfl137qnbn9995mjjhwd36pm0b7qli1iw5skrxa9j8m"))))
- (build-system gnu-build-system)
- (arguments
- `(#:parallel-build? #f ; not supported
- #:tests? #f ; no "check" target
- #:phases
- (modify-phases %standard-phases
- (add-before 'configure 'set-perl-search-path
- (lambda _
- ;; Work around "dotless @INC" build failure.
- (setenv "PERL5LIB"
- (string-append (getcwd) "/setup:"
- (getenv "PERL5LIB")))
- #t))
- (replace 'configure
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let ((out (assoc-ref outputs "out")))
- ;; Override include path for libmagic
- (substitute* "setup/package.prl"
- (("name => 'magic', Include => '/usr/include'")
- (string-append "name=> 'magic', Include => '"
- (assoc-ref inputs "libmagic")
- "/include" "'")))
- ;; Install kdf5 library (needed by sra-tools)
- (substitute* "build/Makefile.install"
- (("LIBRARIES_TO_INSTALL =")
- "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
- (substitute* "build/Makefile.env"
- (("CFLAGS =" prefix)
- (string-append prefix "-msse2 ")))
- ;; Override search path for ngs-java
- (substitute* "setup/package.prl"
- (("/usr/local/ngs/ngs-java")
- (assoc-ref inputs "java-ngs")))
- ;; The 'configure' script doesn't recognize things like
- ;; '--enable-fast-install'.
- (zero? (system*
- "./configure"
- (string-append "--build-prefix=" (getcwd) "/build")
- (string-append "--prefix=" (assoc-ref outputs "out"))
- (string-append "--debug")
- (string-append "--with-xml2-prefix="
- (assoc-ref inputs "libxml2"))
- (string-append "--with-ngs-sdk-prefix="
- (assoc-ref inputs "ngs-sdk"))
- (string-append "--with-hdf5-prefix="
- (assoc-ref inputs "hdf5")))))))
- (add-after 'install 'install-interfaces
- (lambda* (#:key outputs #:allow-other-keys)
- ;; Install interface libraries. On i686 the interface libraries
- ;; are installed to "linux/gcc/i386", so we need to use the Linux
- ;; architecture name ("i386") instead of the target system prefix
- ;; ("i686").
- (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
- (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
- ,(system->linux-architecture
- (or (%current-target-system)
- (%current-system)))
- "/rel/ilib")
- (string-append (assoc-ref outputs "out")
- "/ilib"))
- ;; Install interface headers
- (copy-recursively "interfaces"
- (string-append (assoc-ref outputs "out")
- "/include"))
- #t))
- ;; These files are needed by sra-tools.
- (add-after 'install 'install-configuration-files
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
- (mkdir target)
- (install-file "libs/kfg/default.kfg" target)
- (install-file "libs/kfg/certs.kfg" target))
- #t)))))
- (inputs
- `(("libxml2" ,libxml2)
- ("ngs-sdk" ,ngs-sdk)
- ("java-ngs" ,java-ngs)
- ("libmagic" ,file)
- ("hdf5" ,hdf5)))
- (native-inputs `(("perl" ,perl)))
- ;; NCBI-VDB requires SSE capability.
- (supported-systems '("i686-linux" "x86_64-linux"))
- (home-page "https://github.com/ncbi/ncbi-vdb")
- (synopsis "Database engine for genetic information")
- (description
- "The NCBI-VDB library implements a highly compressed columnar data
- warehousing engine that is most often used to store genetic information.
- Databases are stored in a portable image within the file system, and can be
- accessed/downloaded on demand across HTTP.")
- (license license:public-domain)))
- (define-public plink
- (package
- (name "plink")
- (version "1.07")
- (source
- (origin
- (method url-fetch)
- (uri (string-append
- "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
- version "-src.zip"))
- (sha256
- (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
- (patches (search-patches "plink-1.07-unclobber-i.patch"
- "plink-endian-detection.patch"))))
- (build-system gnu-build-system)
- (arguments
- '(#:tests? #f ;no "check" target
- #:make-flags (list (string-append "LIB_LAPACK="
- (assoc-ref %build-inputs "lapack")
- "/lib/liblapack.so")
- "WITH_LAPACK=1"
- "FORCE_DYNAMIC=1"
- ;; disable phoning home
- "WITH_WEBCHECK=")
- #:phases
- (modify-phases %standard-phases
- ;; no "configure" script
- (delete 'configure)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out")
- "/bin/")))
- (install-file "plink" bin)
- #t))))))
- (inputs
- `(("zlib" ,zlib)
- ("lapack" ,lapack)))
- (native-inputs
- `(("unzip" ,unzip)))
- (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
- (synopsis "Whole genome association analysis toolset")
- (description
- "PLINK is a whole genome association analysis toolset, designed to
- perform a range of basic, large-scale analyses in a computationally efficient
- manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
- so there is no support for steps prior to this (e.g. study design and
- planning, generating genotype or CNV calls from raw data). Through
- integration with gPLINK and Haploview, there is some support for the
- subsequent visualization, annotation and storage of results.")
- ;; Code is released under GPLv2, except for fisher.h, which is under
- ;; LGPLv2.1+
- (license (list license:gpl2 license:lgpl2.1+))))
- (define-public smithlab-cpp
- (let ((revision "1")
- (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
- (package
- (name "smithlab-cpp")
- (version (string-append "0." revision "." (string-take commit 7)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/smithlabcode/smithlab_cpp.git")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
- (build-system gnu-build-system)
- (arguments
- `(#:modules ((guix build gnu-build-system)
- (guix build utils)
- (srfi srfi-26))
- #:tests? #f ;no "check" target
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'use-samtools-headers
- (lambda _
- (substitute* '("SAM.cpp"
- "SAM.hpp")
- (("sam.h") "samtools/sam.h"))
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (lib (string-append out "/lib"))
- (include (string-append out "/include/smithlab-cpp")))
- (mkdir-p lib)
- (mkdir-p include)
- (for-each (cut install-file <> lib)
- (find-files "." "\\.o$"))
- (for-each (cut install-file <> include)
- (find-files "." "\\.hpp$")))
- #t))
- (delete 'configure))))
- (inputs
- `(("samtools" ,samtools-0.1)
- ("zlib" ,zlib)))
- (home-page "https://github.com/smithlabcode/smithlab_cpp")
- (synopsis "C++ helper library for functions used in Smith lab projects")
- (description
- "Smithlab CPP is a C++ library that includes functions used in many of
- the Smith lab bioinformatics projects, such as a wrapper around Samtools data
- structures, classes for genomic regions, mapped sequencing reads, etc.")
- (license license:gpl3+))))
- (define-public preseq
- (package
- (name "preseq")
- (version "2.0")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/smithlabcode/"
- "preseq/archive/v" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
- (modules '((guix build utils)))
- (snippet
- ;; Remove bundled samtools.
- '(delete-file-recursively "samtools"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ;no "check" target
- #:phases
- (modify-phases %standard-phases
- (delete 'configure))
- #:make-flags
- (list (string-append "PREFIX="
- (assoc-ref %outputs "out"))
- (string-append "LIBBAM="
- (assoc-ref %build-inputs "samtools")
- "/lib/libbam.a")
- (string-append "SMITHLAB_CPP="
- (assoc-ref %build-inputs "smithlab-cpp")
- "/lib")
- "PROGS=preseq"
- "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
- (inputs
- `(("gsl" ,gsl)
- ("samtools" ,samtools-0.1)
- ("smithlab-cpp" ,smithlab-cpp)
- ("zlib" ,zlib)))
- (home-page "http://smithlabresearch.org/software/preseq/")
- (synopsis "Program for analyzing library complexity")
- (description
- "The preseq package is aimed at predicting and estimating the complexity
- of a genomic sequencing library, equivalent to predicting and estimating the
- number of redundant reads from a given sequencing depth and how many will be
- expected from additional sequencing using an initial sequencing experiment.
- The estimates can then be used to examine the utility of further sequencing,
- optimize the sequencing depth, or to screen multiple libraries to avoid low
- complexity samples.")
- (license license:gpl3+)))
- (define-public python-screed
- (package
- (name "python-screed")
- (version "0.9")
- (source
- (origin
- (method url-fetch)
- (uri (pypi-uri "screed" version))
- (sha256
- (base32
- "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
- (build-system python-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (replace 'check
- (lambda _
- (setenv "PYTHONPATH"
- (string-append (getenv "PYTHONPATH") ":."))
- (zero? (system* "nosetests" "--attr" "!known_failing")))))))
- (native-inputs
- `(("python-nose" ,python-nose)))
- (inputs
- `(("python-bz2file" ,python-bz2file)))
- (home-page "https://github.com/dib-lab/screed/")
- (synopsis "Short read sequence database utilities")
- (description "Screed parses FASTA and FASTQ files and generates databases.
- Values such as sequence name, sequence description, sequence quality and the
- sequence itself can be retrieved from these databases.")
- (license license:bsd-3)))
- (define-public python2-screed
- (package-with-python2 python-screed))
- (define-public sra-tools
- (package
- (name "sra-tools")
- (version "2.8.2-1")
- (source
- (origin
- (method url-fetch)
- (uri
- (string-append "https://github.com/ncbi/sra-tools/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1camsijmvv2s45mb4iyf44ghl4gkd4rl0viphpcgl3ccchy32a0g"))))
- (build-system gnu-build-system)
- (arguments
- `(#:parallel-build? #f ; not supported
- #:tests? #f ; no "check" target
- #:make-flags
- (list (string-append "DEFAULT_CRT="
- (assoc-ref %build-inputs "ncbi-vdb")
- "/kfg/certs.kfg")
- (string-append "DEFAULT_KFG="
- (assoc-ref %build-inputs "ncbi-vdb")
- "/kfg/default.kfg")
- (string-append "VDB_LIBDIR="
- (assoc-ref %build-inputs "ncbi-vdb")
- ,(if (string-prefix? "x86_64"
- (or (%current-target-system)
- (%current-system)))
- "/lib64"
- "/lib32")))
- #:phases
- (modify-phases %standard-phases
- (add-before 'configure 'set-perl-search-path
- (lambda _
- ;; Work around "dotless @INC" build failure.
- (setenv "PERL5LIB"
- (string-append (getcwd) "/setup:"
- (getenv "PERL5LIB")))
- #t))
- (replace 'configure
- (lambda* (#:key inputs outputs #:allow-other-keys)
- ;; The build system expects a directory containing the sources and
- ;; raw build output of ncbi-vdb, including files that are not
- ;; installed. Since we are building against an installed version of
- ;; ncbi-vdb, the following modifications are needed.
- (substitute* "setup/konfigure.perl"
- ;; Make the configure script look for the "ilib" directory of
- ;; "ncbi-vdb" without first checking for the existence of a
- ;; matching library in its "lib" directory.
- (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
- "my $f = File::Spec->catdir($ilibdir, $ilib);")
- ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
- (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
- "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
- ;; Dynamic linking
- (substitute* "tools/copycat/Makefile"
- (("smagic-static") "lmagic"))
- ;; The 'configure' script doesn't recognize things like
- ;; '--enable-fast-install'.
- (zero? (system*
- "./configure"
- (string-append "--build-prefix=" (getcwd) "/build")
- (string-append "--prefix=" (assoc-ref outputs "out"))
- (string-append "--debug")
- (string-append "--with-fuse-prefix="
- (assoc-ref inputs "fuse"))
- (string-append "--with-magic-prefix="
- (assoc-ref inputs "libmagic"))
- ;; TODO: building with libxml2 fails with linker errors
- ;; (string-append "--with-xml2-prefix="
- ;; (assoc-ref inputs "libxml2"))
- (string-append "--with-ncbi-vdb-sources="
- (assoc-ref inputs "ncbi-vdb"))
- (string-append "--with-ncbi-vdb-build="
- (assoc-ref inputs "ncbi-vdb"))
- (string-append "--with-ngs-sdk-prefix="
- (assoc-ref inputs "ngs-sdk"))
- (string-append "--with-hdf5-prefix="
- (assoc-ref inputs "hdf5"))))))
- ;; This version of sra-tools fails to build with glibc because of a
- ;; naming conflict. glibc-2.25/include/bits/mathcalls.h already
- ;; contains a definition of "canonicalize", so we rename it.
- ;;
- ;; See upstream bug report:
- ;; https://github.com/ncbi/sra-tools/issues/67
- (add-after 'unpack 'patch-away-glibc-conflict
- (lambda _
- (substitute* "tools/bam-loader/bam.c"
- (("canonicalize\\(" line)
- (string-append "sra_tools_" line)))
- #t)))))
- (native-inputs `(("perl" ,perl)))
- (inputs
- `(("ngs-sdk" ,ngs-sdk)
- ("ncbi-vdb" ,ncbi-vdb)
- ("libmagic" ,file)
- ("fuse" ,fuse)
- ("hdf5" ,hdf5)
- ("zlib" ,zlib)))
- (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
- (synopsis "Tools and libraries for reading and writing sequencing data")
- (description
- "The SRA Toolkit from NCBI is a collection of tools and libraries for
- reading of sequencing files from the Sequence Read Archive (SRA) database and
- writing files into the .sra format.")
- (license license:public-domain)))
- (define-public seqan
- (package
- (name "seqan")
- (version "1.4.2")
- (source (origin
- (method url-fetch)
- (uri (string-append "http://packages.seqan.de/seqan-library/"
- "seqan-library-" version ".tar.bz2"))
- (sha256
- (base32
- "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
- ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
- ;; makes sense to split the outputs.
- (outputs '("out" "doc"))
- (build-system trivial-build-system)
- (arguments
- `(#:modules ((guix build utils))
- #:builder
- (begin
- (use-modules (guix build utils))
- (let ((tar (assoc-ref %build-inputs "tar"))
- (bzip (assoc-ref %build-inputs "bzip2"))
- (out (assoc-ref %outputs "out"))
- (doc (assoc-ref %outputs "doc")))
- (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
- (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
- (chdir (string-append "seqan-library-" ,version))
- (copy-recursively "include" (string-append out "/include"))
- (copy-recursively "share" (string-append doc "/share"))))))
- (native-inputs
- `(("source" ,source)
- ("tar" ,tar)
- ("bzip2" ,bzip2)))
- (home-page "http://www.seqan.de")
- (synopsis "Library for nucleotide sequence analysis")
- (description
- "SeqAn is a C++ library of efficient algorithms and data structures for
- the analysis of sequences with the focus on biological data. It contains
- algorithms and data structures for string representation and their
- manipulation, online and indexed string search, efficient I/O of
- bioinformatics file formats, sequence alignment, and more.")
- (license license:bsd-3)))
- (define-public seqmagick
- (package
- (name "seqmagick")
- (version "0.6.1")
- (source
- (origin
- (method url-fetch)
- (uri (string-append
- "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
- version ".tar.gz"))
- (sha256
- (base32
- "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
- (build-system python-build-system)
- (arguments
- ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
- `(#:python ,python-2
- #:phases
- (modify-phases %standard-phases
- ;; Current test in setup.py does not work as of 0.6.1,
- ;; so use nose to run tests instead for now. See
- ;; https://github.com/fhcrc/seqmagick/issues/55
- (replace 'check (lambda _ (zero? (system* "nosetests")))))))
- (inputs
- ;; biopython-1.66 is required due to
- ;; https://github.com/fhcrc/seqmagick/issues/59
- ;; When that issue is resolved the 'python2-biopython-1.66' package
- ;; should be removed.
- `(("python-biopython" ,python2-biopython-1.66)))
- (native-inputs
- `(("python-nose" ,python2-nose)))
- (home-page "https://github.com/fhcrc/seqmagick")
- (synopsis "Tools for converting and modifying sequence files")
- (description
- "Bioinformaticians often have to convert sequence files between formats
- and do little manipulations on them, and it's not worth writing scripts for
- that. Seqmagick is a utility to expose the file format conversion in
- BioPython in a convenient way. Instead of having a big mess of scripts, there
- is one that takes arguments.")
- (license license:gpl3)))
- (define-public seqtk
- (package
- (name "seqtk")
- (version "1.2")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/lh3/seqtk/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- ;; Remove extraneous header files, as is done in the seqtk
- ;; master branch.
- (for-each (lambda (file) (delete-file file))
- (list "ksort.h" "kstring.h" "kvec.h"))
- #t))))
- (build-system gnu-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (replace 'check
- ;; There are no tests, so we just run a sanity check.
- (lambda _ (zero? (system* "./seqtk" "seq"))))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
- (install-file "seqtk" bin)))))))
- (inputs
- `(("zlib" ,zlib)))
- (home-page "https://github.com/lh3/seqtk")
- (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
- (description
- "Seqtk is a fast and lightweight tool for processing sequences in the
- FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
- optionally compressed by gzip.")
- (license license:expat)))
- (define-public snap-aligner
- (package
- (name "snap-aligner")
- (version "1.0beta.18")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/amplab/snap/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
- (build-system gnu-build-system)
- (arguments
- '(#:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
- (install-file "snap-aligner" bin)
- (install-file "SNAPCommand" bin)
- #t))))))
- (native-inputs
- `(("zlib" ,zlib)))
- (home-page "http://snap.cs.berkeley.edu/")
- (synopsis "Short read DNA sequence aligner")
- (description
- "SNAP is a fast and accurate aligner for short DNA reads. It is
- optimized for modern read lengths of 100 bases or higher, and takes advantage
- of these reads to align data quickly through a hash-based indexing scheme.")
- ;; 32-bit systems are not supported by the unpatched code.
- ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
- ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
- ;; systems without a lot of memory cannot make good use of this program.
- (supported-systems '("x86_64-linux"))
- (license license:asl2.0)))
- (define-public sortmerna
- (package
- (name "sortmerna")
- (version "2.1b")
- (source
- (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/biocore/sortmerna/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
- (build-system gnu-build-system)
- (outputs '("out" ;for binaries
- "db")) ;for sequence databases
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (db (assoc-ref outputs "db"))
- (share
- (string-append db "/share/sortmerna/rRNA_databases")))
- (install-file "sortmerna" bin)
- (install-file "indexdb_rna" bin)
- (for-each (lambda (file)
- (install-file file share))
- (find-files "rRNA_databases" ".*fasta"))
- #t))))))
- (inputs
- `(("zlib" ,zlib)))
- (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
- (synopsis "Biological sequence analysis tool for NGS reads")
- (description
- "SortMeRNA is a biological sequence analysis tool for filtering, mapping
- and operational taxonomic unit (OTU) picking of next generation
- sequencing (NGS) reads. The core algorithm is based on approximate seeds and
- allows for fast and sensitive analyses of nucleotide sequences. The main
- application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
- ;; The source includes x86 specific code
- (supported-systems '("x86_64-linux" "i686-linux"))
- (license license:lgpl3)))
- (define-public star
- (package
- (name "star")
- (version "2.5.3a")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/alexdobin/STAR/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "013wirlz8lllgjyagl48l75n1isxyabqb3sj7qlsl0x1rmvqw99a"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- (substitute* "source/Makefile"
- (("/bin/rm") "rm"))
- ;; Remove pre-built binaries and bundled htslib sources.
- (delete-file-recursively "bin/MacOSX_x86_64")
- (delete-file-recursively "bin/Linux_x86_64")
- (delete-file-recursively "bin/Linux_x86_64_static")
- (delete-file-recursively "source/htslib")
- #t))))
- (build-system gnu-build-system)
- (arguments
- '(#:tests? #f ;no check target
- #:make-flags '("STAR")
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'enter-source-dir
- (lambda _ (chdir "source") #t))
- (add-after 'enter-source-dir 'do-not-use-bundled-htslib
- (lambda _
- (substitute* "Makefile"
- (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
- _ prefix) prefix))
- (substitute* '("BAMfunctions.cpp"
- "signalFromBAM.h"
- "bam_cat.h"
- "bam_cat.c"
- "STAR.cpp"
- "bamRemoveDuplicates.cpp")
- (("#include \"htslib/([^\"]+\\.h)\"" _ header)
- (string-append "#include <" header ">")))
- (substitute* "IncludeDefine.h"
- (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
- (string-append "<" header ">")))
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
- (install-file "STAR" bin))
- #t))
- (delete 'configure))))
- (native-inputs
- `(("xxd" ,xxd)))
- (inputs
- `(("htslib" ,htslib)
- ("zlib" ,zlib)))
- (home-page "https://github.com/alexdobin/STAR")
- (synopsis "Universal RNA-seq aligner")
- (description
- "The Spliced Transcripts Alignment to a Reference (STAR) software is
- based on a previously undescribed RNA-seq alignment algorithm that uses
- sequential maximum mappable seed search in uncompressed suffix arrays followed
- by seed clustering and stitching procedure. In addition to unbiased de novo
- detection of canonical junctions, STAR can discover non-canonical splices and
- chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
- sequences.")
- ;; Only 64-bit systems are supported according to the README.
- (supported-systems '("x86_64-linux" "mips64el-linux"))
- ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
- (license license:gpl3+)))
- (define-public subread
- (package
- (name "subread")
- (version "1.5.1")
- (source (origin
- (method url-fetch)
- (uri (string-append "mirror://sourceforge/subread/subread-"
- version "/subread-" version "-source.tar.gz"))
- (sha256
- (base32
- "0gn5zhbvllks0mmdg3qlmsbg91p2mpdc2wixwfqpi85yzfrh8hcy"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ;no "check" target
- ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
- ;; optimizations by default, so we override these flags such that x86_64
- ;; flags are only added when the build target is an x86_64 system.
- #:make-flags
- (list (let ((system ,(or (%current-target-system)
- (%current-system)))
- (flags '("-ggdb" "-fomit-frame-pointer"
- "-ffast-math" "-funroll-loops"
- "-fmessage-length=0"
- "-O9" "-Wall" "-DMAKE_FOR_EXON"
- "-DMAKE_STANDALONE"
- "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
- (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
- (if (string-prefix? "x86_64" system)
- (string-append "CCFLAGS=" (string-join (append flags flags64)))
- (string-append "CCFLAGS=" (string-join flags))))
- "-f" "Makefile.Linux"
- "CC=gcc ${CCFLAGS}")
- #:phases
- (alist-cons-after
- 'unpack 'enter-dir
- (lambda _ (chdir "src") #t)
- (alist-replace
- 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
- (mkdir-p bin)
- (copy-recursively "../bin" bin)))
- ;; no "configure" script
- (alist-delete 'configure %standard-phases)))))
- (inputs `(("zlib" ,zlib)))
- (home-page "http://bioinf.wehi.edu.au/subread-package/")
- (synopsis "Tool kit for processing next-gen sequencing data")
- (description
- "The subread package contains the following tools: subread aligner, a
- general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
- and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
- features; exactSNP: a SNP caller that discovers SNPs by testing signals
- against local background noises.")
- (license license:gpl3+)))
- (define-public stringtie
- (package
- (name "stringtie")
- (version "1.2.1")
- (source (origin
- (method url-fetch)
- (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
- "stringtie-" version ".tar.gz"))
- (sha256
- (base32
- "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
- (modules '((guix build utils)))
- (snippet
- '(begin
- (delete-file-recursively "samtools-0.1.18")
- #t))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ;no test suite
- #:phases
- (modify-phases %standard-phases
- ;; no configure script
- (delete 'configure)
- (add-before 'build 'use-system-samtools
- (lambda _
- (substitute* "Makefile"
- (("stringtie: \\$\\{BAM\\}/libbam\\.a")
- "stringtie: "))
- (substitute* '("gclib/GBam.h"
- "gclib/GBam.cpp")
- (("#include \"(bam|sam|kstring).h\"" _ header)
- (string-append "#include <samtools/" header ".h>")))
- #t))
- (add-after 'unpack 'remove-duplicate-typedef
- (lambda _
- ;; This typedef conflicts with the typedef in
- ;; glibc-2.25/include/bits/types.h
- (substitute* "gclib/GThreads.h"
- (("typedef long long __intmax_t;") ""))
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
- (install-file "stringtie" bin)
- #t))))))
- (inputs
- `(("samtools" ,samtools-0.1)
- ("zlib" ,zlib)))
- (home-page "http://ccb.jhu.edu/software/stringtie/")
- (synopsis "Transcript assembly and quantification for RNA-Seq data")
- (description
- "StringTie is a fast and efficient assembler of RNA-Seq sequence
- alignments into potential transcripts. It uses a novel network flow algorithm
- as well as an optional de novo assembly step to assemble and quantitate
- full-length transcripts representing multiple splice variants for each gene
- locus. Its input can include not only the alignments of raw reads used by
- other transcript assemblers, but also alignments of longer sequences that have
- been assembled from those reads. To identify differentially expressed genes
- between experiments, StringTie's output can be processed either by the
- Cuffdiff or Ballgown programs.")
- (license license:artistic2.0)))
- (define-public taxtastic
- (package
- (name "taxtastic")
- (version "0.6.4")
- (source (origin
- (method url-fetch)
- (uri (pypi-uri "taxtastic" version))
- (sha256
- (base32
- "0s79z8kfl853x7l4h8ms05k31q87aw62nrchlk20w9n227j35929"))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2
- #:phases
- (modify-phases %standard-phases
- (replace 'check
- (lambda _
- (zero? (system* "python" "-m" "unittest" "discover" "-v")))))))
- (propagated-inputs
- `(("python-sqlalchemy" ,python2-sqlalchemy)
- ("python-decorator" ,python2-decorator)
- ("python-biopython" ,python2-biopython)
- ("python-pandas" ,python2-pandas)))
- (home-page "https://github.com/fhcrc/taxtastic")
- (synopsis "Tools for taxonomic naming and annotation")
- (description
- "Taxtastic is software written in python used to build and maintain
- reference packages i.e. collections of reference trees, reference alignments,
- profiles, and associated taxonomic information.")
- (license license:gpl3+)))
- (define-public vcftools
- (package
- (name "vcftools")
- (version "0.1.15")
- (source (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/vcftools/vcftools/releases/download/v"
- version "/vcftools-" version ".tar.gz"))
- (sha256
- (base32
- "1qw30c45wihgy632rbz4rh3njnwj4msj46l1rsgdhyg6bgypmr1i"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; no "check" target
- #:make-flags (list
- "CFLAGS=-O2" ; override "-m64" flag
- (string-append "PREFIX=" (assoc-ref %outputs "out"))
- (string-append "MANDIR=" (assoc-ref %outputs "out")
- "/share/man/man1"))))
- (native-inputs
- `(("pkg-config" ,pkg-config)))
- (inputs
- `(("perl" ,perl)
- ("zlib" ,zlib)))
- (home-page "https://vcftools.github.io/")
- (synopsis "Tools for working with VCF files")
- (description
- "VCFtools is a program package designed for working with VCF files, such
- as those generated by the 1000 Genomes Project. The aim of VCFtools is to
- provide easily accessible methods for working with complex genetic variation
- data in the form of VCF files.")
- ;; The license is declared as LGPLv3 in the README and
- ;; at https://vcftools.github.io/license.html
- (license license:lgpl3)))
- (define-public infernal
- (package
- (name "infernal")
- (version "1.1.2")
- (source (origin
- (method url-fetch)
- (uri (string-append "http://eddylab.org/software/infernal/"
- "infernal-" version ".tar.gz"))
- (sha256
- (base32
- "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
- (build-system gnu-build-system)
- (native-inputs
- `(("perl" ,perl))) ; for tests
- (home-page "http://eddylab.org/infernal/")
- (synopsis "Inference of RNA alignments")
- (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
- searching DNA sequence databases for RNA structure and sequence similarities.
- It is an implementation of a special case of profile stochastic context-free
- grammars called @dfn{covariance models} (CMs). A CM is like a sequence
- profile, but it scores a combination of sequence consensus and RNA secondary
- structure consensus, so in many cases, it is more capable of identifying RNA
- homologs that conserve their secondary structure more than their primary
- sequence.")
- ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
- (supported-systems '("i686-linux" "x86_64-linux"))
- (license license:bsd-3)))
- (define-public r-centipede
- (package
- (name "r-centipede")
- (version "1.2")
- (source (origin
- (method url-fetch)
- (uri (string-append "http://download.r-forge.r-project.org/"
- "src/contrib/CENTIPEDE_" version ".tar.gz"))
- (sha256
- (base32
- "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
- (build-system r-build-system)
- (home-page "http://centipede.uchicago.edu/")
- (synopsis "Predict transcription factor binding sites")
- (description
- "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
- of the genome that are bound by particular transcription factors. It starts
- by identifying a set of candidate binding sites, and then aims to classify the
- sites according to whether each site is bound or not bound by a transcription
- factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
- between two different types of motif instances using as much relevant
- information as possible.")
- (license (list license:gpl2+ license:gpl3+))))
- (define-public r-vegan
- (package
- (name "r-vegan")
- (version "2.4-4")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "vegan" version))
- (sha256
- (base32
- "1n57dzv2aid6iqd9fkqik401sidqanhzsawyak94qbiyh6dbd1x9"))))
- (build-system r-build-system)
- (native-inputs
- `(("gfortran" ,gfortran)))
- (propagated-inputs
- `(("r-cluster" ,r-cluster)
- ("r-lattice" ,r-lattice)
- ("r-mass" ,r-mass)
- ("r-mgcv" ,r-mgcv)
- ("r-permute" ,r-permute)))
- (home-page "https://cran.r-project.org/web/packages/vegan")
- (synopsis "Functions for community ecology")
- (description
- "The vegan package provides tools for descriptive community ecology. It
- has most basic functions of diversity analysis, community ordination and
- dissimilarity analysis. Most of its multivariate tools can be used for other
- data types as well.")
- (license license:gpl2+)))
- (define-public r-annotate
- (package
- (name "r-annotate")
- (version "1.54.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "annotate" version))
- (sha256
- (base32
- "03hmbvp3i6lvd307fqdg7akxi2qp322rlky3bzw0zccgm0i0221g"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-dbi" ,r-dbi)
- ("r-rcurl" ,r-rcurl)
- ("r-xml" ,r-xml)
- ("r-xtable" ,r-xtable)))
- (home-page
- "http://bioconductor.org/packages/annotate")
- (synopsis "Annotation for microarrays")
- (description "This package provides R environments for the annotation of
- microarrays.")
- (license license:artistic2.0)))
- (define-public r-geneplotter
- (package
- (name "r-geneplotter")
- (version "1.54.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "geneplotter" version))
- (sha256
- (base32
- "0a0ajns21db5rrjl16bq6wawggsnxr00fg184pc38nmfghv4z4b6"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotate" ,r-annotate)
- ("r-annotationdbi" ,r-annotationdbi)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-lattice" ,r-lattice)
- ("r-rcolorbrewer" ,r-rcolorbrewer)))
- (home-page "http://bioconductor.org/packages/geneplotter")
- (synopsis "Graphics functions for genomic data")
- (description
- "This package provides functions for plotting genomic data.")
- (license license:artistic2.0)))
- (define-public r-genefilter
- (package
- (name "r-genefilter")
- (version "1.58.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "genefilter" version))
- (sha256
- (base32
- "0sf2hdi9nv6r83vn1y65m4jiba8pffddpj46d6yjn5rlsixplmqg"))))
- (build-system r-build-system)
- (native-inputs
- `(("gfortran" ,gfortran)))
- (propagated-inputs
- `(("r-annotate" ,r-annotate)
- ("r-annotationdbi" ,r-annotationdbi)
- ("r-biobase" ,r-biobase)
- ("r-s4vectors" ,r-s4vectors)
- ("r-survival" ,r-survival)))
- (home-page "http://bioconductor.org/packages/genefilter")
- (synopsis "Filter genes from high-throughput experiments")
- (description
- "This package provides basic functions for filtering genes from
- high-throughput sequencing experiments.")
- (license license:artistic2.0)))
- (define-public r-deseq2
- (package
- (name "r-deseq2")
- (version "1.16.1")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "DESeq2" version))
- (sha256
- (base32
- "01pvyljxkwazxl510v7h0971nx65iqd2bdkbdhw3xzind0n9pdvq"))))
- (properties `((upstream-name . "DESeq2")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biocparallel" ,r-biocparallel)
- ("r-genefilter" ,r-genefilter)
- ("r-geneplotter" ,r-geneplotter)
- ("r-genomicranges" ,r-genomicranges)
- ("r-ggplot2" ,r-ggplot2)
- ("r-hmisc" ,r-hmisc)
- ("r-iranges" ,r-iranges)
- ("r-locfit" ,r-locfit)
- ("r-rcpp" ,r-rcpp)
- ("r-rcpparmadillo" ,r-rcpparmadillo)
- ("r-s4vectors" ,r-s4vectors)
- ("r-summarizedexperiment" ,r-summarizedexperiment)))
- (home-page "http://bioconductor.org/packages/DESeq2")
- (synopsis "Differential gene expression analysis")
- (description
- "This package provides functions to estimate variance-mean dependence in
- count data from high-throughput nucleotide sequencing assays and test for
- differential expression based on a model using the negative binomial
- distribution.")
- (license license:lgpl3+)))
- (define-public r-dexseq
- (package
- (name "r-dexseq")
- (version "1.22.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "DEXSeq" version))
- (sha256
- (base32
- "085aqk1wlzzqcqcqhvz74y099kr2ln5dwdxd3rl6zan806mgwahg"))))
- (properties `((upstream-name . "DEXSeq")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biocparallel" ,r-biocparallel)
- ("r-biomart" ,r-biomart)
- ("r-deseq2" ,r-deseq2)
- ("r-genefilter" ,r-genefilter)
- ("r-geneplotter" ,r-geneplotter)
- ("r-genomicranges" ,r-genomicranges)
- ("r-hwriter" ,r-hwriter)
- ("r-iranges" ,r-iranges)
- ("r-rcolorbrewer" ,r-rcolorbrewer)
- ("r-rsamtools" ,r-rsamtools)
- ("r-s4vectors" ,r-s4vectors)
- ("r-statmod" ,r-statmod)
- ("r-stringr" ,r-stringr)
- ("r-summarizedexperiment" ,r-summarizedexperiment)))
- (home-page "http://bioconductor.org/packages/DEXSeq")
- (synopsis "Inference of differential exon usage in RNA-Seq")
- (description
- "This package is focused on finding differential exon usage using RNA-seq
- exon counts between samples with different experimental designs. It provides
- functions that allows the user to make the necessary statistical tests based
- on a model that uses the negative binomial distribution to estimate the
- variance between biological replicates and generalized linear models for
- testing. The package also provides functions for the visualization and
- exploration of the results.")
- (license license:gpl3+)))
- (define-public r-annotationforge
- (package
- (name "r-annotationforge")
- (version "1.18.1")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "AnnotationForge" version))
- (sha256
- (base32
- "1366qvykd9cpcvwgc5g9mm9adw9rxw6p4814dd6l5fyb0pwpmysx"))))
- (properties
- `((upstream-name . "AnnotationForge")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-dbi" ,r-dbi)
- ("r-rcurl" ,r-rcurl)
- ("r-rsqlite" ,r-rsqlite)
- ("r-s4vectors" ,r-s4vectors)
- ("r-xml" ,r-xml)))
- (home-page "http://bioconductor.org/packages/AnnotationForge")
- (synopsis "Code for building annotation database packages")
- (description
- "This package provides code for generating Annotation packages and their
- databases. Packages produced are intended to be used with AnnotationDbi.")
- (license license:artistic2.0)))
- (define-public r-rbgl
- (package
- (name "r-rbgl")
- (version "1.52.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "RBGL" version))
- (sha256
- (base32
- "11db6kvz453ypj9ds3xpjqzwrrjck84ijn4wlhkfyz2dzdgd5ryv"))))
- (properties `((upstream-name . "RBGL")))
- (build-system r-build-system)
- (propagated-inputs `(("r-graph" ,r-graph)))
- (home-page "http://www.bioconductor.org/packages/RBGL")
- (synopsis "Interface to the Boost graph library")
- (description
- "This package provides a fairly extensive and comprehensive interface to
- the graph algorithms contained in the Boost library.")
- (license license:artistic2.0)))
- (define-public r-gseabase
- (package
- (name "r-gseabase")
- (version "1.38.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "GSEABase" version))
- (sha256
- (base32
- "1c6i6g4fj3b8wjyxyygr7i3v8sxrq1ffb2bbicya5ah2gdaclfad"))))
- (properties `((upstream-name . "GSEABase")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotate" ,r-annotate)
- ("r-annotationdbi" ,r-annotationdbi)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-graph" ,r-graph)
- ("r-xml" ,r-xml)))
- (home-page "http://bioconductor.org/packages/GSEABase")
- (synopsis "Gene set enrichment data structures and methods")
- (description
- "This package provides classes and methods to support @dfn{Gene Set
- Enrichment Analysis} (GSEA).")
- (license license:artistic2.0)))
- (define-public r-category
- (package
- (name "r-category")
- (version "2.42.1")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "Category" version))
- (sha256
- (base32
- "1w186nhc85bglcgmbcrsdbb8l6rph21pl5kdwjqwkp0jnr9z0ifn"))))
- (properties `((upstream-name . "Category")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotate" ,r-annotate)
- ("r-annotationdbi" ,r-annotationdbi)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-genefilter" ,r-genefilter)
- ("r-graph" ,r-graph)
- ("r-gseabase" ,r-gseabase)
- ("r-matrix" ,r-matrix)
- ("r-rbgl" ,r-rbgl)
- ("r-rsqlite" ,r-rsqlite)))
- (home-page "http://bioconductor.org/packages/Category")
- (synopsis "Category analysis")
- (description
- "This package provides a collection of tools for performing category
- analysis.")
- (license license:artistic2.0)))
- (define-public r-gostats
- (package
- (name "r-gostats")
- (version "2.42.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "GOstats" version))
- (sha256
- (base32
- "0qvqjgfnd9ap4rikvyxa9p4dhcnccvkw8phzv88vghh6pq463d62"))))
- (properties `((upstream-name . "GOstats")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotate" ,r-annotate)
- ("r-annotationdbi" ,r-annotationdbi)
- ("r-annotationforge" ,r-annotationforge)
- ("r-biobase" ,r-biobase)
- ("r-category" ,r-category)
- ("r-go-db" ,r-go-db)
- ("r-graph" ,r-graph)
- ("r-rbgl" ,r-rbgl)))
- (home-page "http://bioconductor.org/packages/GOstats")
- (synopsis "Tools for manipulating GO and microarrays")
- (description
- "This package provides a set of tools for interacting with GO and
- microarray data. A variety of basic manipulation tools for graphs, hypothesis
- testing and other simple calculations.")
- (license license:artistic2.0)))
- (define-public r-shortread
- (package
- (name "r-shortread")
- (version "1.34.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "ShortRead" version))
- (sha256
- (base32
- "0ayk3d5625ymb5g2gycq6banzqmyd642xrwjzhdshz2dwid7kly8"))))
- (properties `((upstream-name . "ShortRead")))
- (build-system r-build-system)
- (inputs
- `(("zlib" ,zlib)))
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biocparallel" ,r-biocparallel)
- ("r-biostrings" ,r-biostrings)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicalignments" ,r-genomicalignments)
- ("r-genomicranges" ,r-genomicranges)
- ("r-hwriter" ,r-hwriter)
- ("r-iranges" ,r-iranges)
- ("r-lattice" ,r-lattice)
- ("r-latticeextra" ,r-latticeextra)
- ("r-rsamtools" ,r-rsamtools)
- ("r-s4vectors" ,r-s4vectors)
- ("r-xvector" ,r-xvector)
- ("r-zlibbioc" ,r-zlibbioc)))
- (home-page "http://bioconductor.org/packages/ShortRead")
- (synopsis "FASTQ input and manipulation tools")
- (description
- "This package implements sampling, iteration, and input of FASTQ files.
- It includes functions for filtering and trimming reads, and for generating a
- quality assessment report. Data are represented as
- @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
- purposes. The package also contains legacy support for early single-end,
- ungapped alignment formats.")
- (license license:artistic2.0)))
- (define-public r-systempiper
- (package
- (name "r-systempiper")
- (version "1.10.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "systemPipeR" version))
- (sha256
- (base32
- "0c3m5rq63ypv15yca97yag5d4vgd7xj9by2a4sd8z0pcmpajz0hw"))))
- (properties `((upstream-name . "systemPipeR")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotate" ,r-annotate)
- ("r-batchjobs" ,r-batchjobs)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biostrings" ,r-biostrings)
- ("r-deseq2" ,r-deseq2)
- ("r-edger" ,r-edger)
- ("r-genomicfeatures" ,r-genomicfeatures)
- ("r-genomicranges" ,r-genomicranges)
- ("r-ggplot2" ,r-ggplot2)
- ("r-go-db" ,r-go-db)
- ("r-gostats" ,r-gostats)
- ("r-limma" ,r-limma)
- ("r-pheatmap" ,r-pheatmap)
- ("r-rjson" ,r-rjson)
- ("r-rsamtools" ,r-rsamtools)
- ("r-shortread" ,r-shortread)
- ("r-summarizedexperiment" ,r-summarizedexperiment)
- ("r-variantannotation" ,r-variantannotation)))
- (home-page "https://github.com/tgirke/systemPipeR")
- (synopsis "Next generation sequencing workflow and reporting environment")
- (description
- "This R package provides tools for building and running automated
- end-to-end analysis workflows for a wide range of @dfn{next generation
- sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
- Important features include a uniform workflow interface across different NGS
- applications, automated report generation, and support for running both R and
- command-line software, such as NGS aligners or peak/variant callers, on local
- computers or compute clusters. Efficient handling of complex sample sets and
- experimental designs is facilitated by a consistently implemented sample
- annotation infrastructure.")
- (license license:artistic2.0)))
- (define-public r-grohmm
- (package
- (name "r-grohmm")
- (version "1.10.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "groHMM" version))
- (sha256
- (base32
- "16k1kp4sbhh0vp7dzywafq52csq42ksqfrqfy4bdv1qbd7536dpd"))))
- (properties `((upstream-name . "groHMM")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicalignments" ,r-genomicalignments)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-mass" ,r-mass)
- ("r-rtracklayer" ,r-rtracklayer)
- ("r-s4vectors" ,r-s4vectors)))
- (home-page "https://github.com/Kraus-Lab/groHMM")
- (synopsis "GRO-seq analysis pipeline")
- (description
- "This package provides a pipeline for the analysis of GRO-seq data.")
- (license license:gpl3+)))
- (define-public r-txdb-hsapiens-ucsc-hg19-knowngene
- (package
- (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
- (version "3.2.2")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://bioconductor.org/packages/"
- "release/data/annotation/src/contrib"
- "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
- version ".tar.gz"))
- (sha256
- (base32
- "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
- (properties
- `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
- (build-system r-build-system)
- ;; As this package provides little more than a very large data file it
- ;; doesn't make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (propagated-inputs
- `(("r-genomicfeatures" ,r-genomicfeatures)))
- (home-page
- "http://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
- (synopsis "Annotation package for human genome in TxDb format")
- (description
- "This package provides an annotation database of Homo sapiens genome
- data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
- track. The database is exposed as a @code{TxDb} object.")
- (license license:artistic2.0)))
- (define-public r-sparql
- (package
- (name "r-sparql")
- (version "1.16")
- (source (origin
- (method url-fetch)
- (uri (cran-uri "SPARQL" version))
- (sha256
- (base32
- "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc"))))
- (properties `((upstream-name . "SPARQL")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-rcurl" ,r-rcurl)
- ("r-xml" ,r-xml)))
- (home-page "http://cran.r-project.org/web/packages/SPARQL")
- (synopsis "SPARQL client for R")
- (description "This package provides an interface to use SPARQL to pose
- SELECT or UPDATE queries to an end-point.")
- ;; The only license indication is found in the DESCRIPTION file,
- ;; which states GPL-3. So we cannot assume GPLv3+.
- (license license:gpl3)))
- (define-public vsearch
- (package
- (name "vsearch")
- (version "2.5.0")
- (source
- (origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/torognes/vsearch/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1k8wf3qns4mqrsizywbkqcasqjw000m4drxsag3qd7390pwvf9kz"))
- (patches (search-patches "vsearch-unbundle-cityhash.patch"))
- (snippet
- '(begin
- ;; Remove bundled cityhash sources. The vsearch source is adjusted
- ;; for this in the patch.
- (delete-file "src/city.h")
- (delete-file "src/citycrc.h")
- (delete-file "src/city.cc")
- #t))))
- (build-system gnu-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'autogen
- (lambda _ (zero? (system* "autoreconf" "-vif")))))))
- (inputs
- `(("zlib" ,zlib)
- ("bzip2" ,bzip2)
- ("cityhash" ,cityhash)))
- (native-inputs
- `(("autoconf" ,autoconf)
- ("automake" ,automake)))
- (synopsis "Sequence search tools for metagenomics")
- (description
- "VSEARCH supports DNA sequence searching, clustering, chimera detection,
- dereplication, pairwise alignment, shuffling, subsampling, sorting and
- masking. The tool takes advantage of parallelism in the form of SIMD
- vectorization as well as multiple threads to perform accurate alignments at
- high speed. VSEARCH uses an optimal global aligner (full dynamic programming
- Needleman-Wunsch).")
- (home-page "https://github.com/torognes/vsearch")
- ;; vsearch uses non-portable SSE intrinsics so building fails on other
- ;; platforms.
- (supported-systems '("x86_64-linux"))
- ;; Dual licensed; also includes public domain source.
- (license (list license:gpl3 license:bsd-2))))
- (define-public pardre
- (package
- (name "pardre")
- ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
- (version "1.1.5-1")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
- "1.1.5" ".tar.gz"))
- (sha256
- (base32
- "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; no tests included
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "ParDRe" bin)
- #t))))))
- (inputs
- `(("openmpi" ,openmpi)
- ("zlib" ,zlib)))
- (synopsis "Parallel tool to remove duplicate DNA reads")
- (description
- "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
- Duplicate reads can be seen as identical or nearly identical sequences with
- some mismatches. This tool lets users avoid the analysis of unnecessary
- reads, reducing the time of subsequent procedures with the
- dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
- in order to exploit the parallel capabilities of multicore clusters. It is
- faster than multithreaded counterparts (end of 2015) for the same number of
- cores and, thanks to the message-passing technology, it can be executed on
- clusters.")
- (home-page "https://sourceforge.net/projects/pardre/")
- (license license:gpl3+)))
- (define-public ruby-bio-kseq
- (package
- (name "ruby-bio-kseq")
- (version "0.0.2")
- (source
- (origin
- (method url-fetch)
- (uri (rubygems-uri "bio-kseq" version))
- (sha256
- (base32
- "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
- (build-system ruby-build-system)
- (arguments
- `(#:test-target "spec"))
- (native-inputs
- `(("bundler" ,bundler)
- ("ruby-rspec" ,ruby-rspec)
- ("ruby-rake-compiler" ,ruby-rake-compiler)))
- (inputs
- `(("zlib" ,zlib)))
- (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
- (description
- "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
- FASTQ parsing code. It provides a fast iterator over sequences and their
- quality scores.")
- (home-page "https://github.com/gusevfe/bio-kseq")
- (license license:expat)))
- (define-public bio-locus
- (package
- (name "bio-locus")
- (version "0.0.7")
- (source
- (origin
- (method url-fetch)
- (uri (rubygems-uri "bio-locus" version))
- (sha256
- (base32
- "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
- (build-system ruby-build-system)
- (native-inputs
- `(("ruby-rspec" ,ruby-rspec)))
- (synopsis "Tool for fast querying of genome locations")
- (description
- "Bio-locus is a tabix-like tool for fast querying of genome
- locations. Many file formats in bioinformatics contain records that
- start with a chromosome name and a position for a SNP, or a start-end
- position for indels. Bio-locus allows users to store this chr+pos or
- chr+pos+alt information in a database.")
- (home-page "https://github.com/pjotrp/bio-locus")
- (license license:expat)))
- (define-public bio-blastxmlparser
- (package
- (name "bio-blastxmlparser")
- (version "2.0.4")
- (source (origin
- (method url-fetch)
- (uri (rubygems-uri "bio-blastxmlparser" version))
- (sha256
- (base32
- "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
- (build-system ruby-build-system)
- (propagated-inputs
- `(("ruby-bio-logger" ,ruby-bio-logger)
- ("ruby-nokogiri" ,ruby-nokogiri)))
- (inputs
- `(("ruby-rspec" ,ruby-rspec)))
- (synopsis "Fast big data BLAST XML parser and library")
- (description
- "Very fast parallel big-data BLAST XML file parser which can be used as
- command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
- generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
- (home-page "https://github.com/pjotrp/blastxmlparser")
- (license license:expat)))
- (define-public bioruby
- (package
- (name "bioruby")
- (version "1.5.1")
- (source
- (origin
- (method url-fetch)
- (uri (rubygems-uri "bio" version))
- (sha256
- (base32
- "0hdl0789c9n4mprnx5pgd46bfwl8d000rqpamj5h6kkjgspijv49"))))
- (build-system ruby-build-system)
- (propagated-inputs
- `(("ruby-libxml" ,ruby-libxml)))
- (native-inputs
- `(("which" ,which))) ; required for test phase
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-before 'build 'patch-test-command
- (lambda _
- (substitute* '("test/functional/bio/test_command.rb")
- (("/bin/sh") (which "sh")))
- (substitute* '("test/functional/bio/test_command.rb")
- (("/bin/ls") (which "ls")))
- (substitute* '("test/functional/bio/test_command.rb")
- (("which") (which "which")))
- (substitute* '("test/functional/bio/test_command.rb",
- "test/data/command/echoarg2.sh")
- (("/bin/echo") (which "echo")))
- #t)))))
- (synopsis "Ruby library, shell and utilities for bioinformatics")
- (description "BioRuby comes with a comprehensive set of Ruby development
- tools and libraries for bioinformatics and molecular biology. BioRuby has
- components for sequence analysis, pathway analysis, protein modelling and
- phylogenetic analysis; it supports many widely used data formats and provides
- easy access to databases, external programs and public web services, including
- BLAST, KEGG, GenBank, MEDLINE and GO.")
- (home-page "http://bioruby.org/")
- ;; Code is released under Ruby license, except for setup
- ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
- (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
- (define-public r-acsnminer
- (package
- (name "r-acsnminer")
- (version "0.16.8.25")
- (source (origin
- (method url-fetch)
- (uri (cran-uri "ACSNMineR" version))
- (sha256
- (base32
- "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
- (properties `((upstream-name . "ACSNMineR")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-ggplot2" ,r-ggplot2)
- ("r-gridextra" ,r-gridextra)))
- (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
- (synopsis "Gene enrichment analysis")
- (description
- "This package provides tools to compute and represent gene set enrichment
- or depletion from your data based on pre-saved maps from the @dfn{Atlas of
- Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
- enrichment can be run with hypergeometric test or Fisher exact test, and can
- use multiple corrections. Visualization of data can be done either by
- barplots or heatmaps.")
- (license license:gpl2+)))
- (define-public r-biocgenerics
- (package
- (name "r-biocgenerics")
- (version "0.22.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "BiocGenerics" version))
- (sha256
- (base32
- "0qbmz2qxwwi30xpxpvp2h1h7l494rbbz5d9pls5cfhqdv3wnpzsv"))))
- (properties
- `((upstream-name . "BiocGenerics")))
- (build-system r-build-system)
- (home-page "http://bioconductor.org/packages/BiocGenerics")
- (synopsis "S4 generic functions for Bioconductor")
- (description
- "This package provides S4 generic functions needed by many Bioconductor
- packages.")
- (license license:artistic2.0)))
- (define-public r-biocinstaller
- (package
- (name "r-biocinstaller")
- (version "1.26.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "BiocInstaller" version))
- (sha256
- (base32
- "0njw2q3lq1rrjx8qzw5d2130l72bmd3g2z8qlxqmkdcbmmgliyj2"))))
- (properties
- `((upstream-name . "BiocInstaller")))
- (build-system r-build-system)
- (home-page "http://bioconductor.org/packages/BiocInstaller")
- (synopsis "Install Bioconductor packages")
- (description "This package is used to install and update R packages from
- Bioconductor, CRAN, and Github.")
- (license license:artistic2.0)))
- (define-public r-biocviews
- (package
- (name "r-biocviews")
- (version "1.44.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "biocViews" version))
- (sha256
- (base32
- "17hi8w0w63f5yc43kid5pbld3ca78sj6n8x9dmkbl8h48818xbga"))))
- (properties
- `((upstream-name . "biocViews")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-graph" ,r-graph)
- ("r-rbgl" ,r-rbgl)
- ("r-rcurl" ,r-rcurl)
- ("r-xml" ,r-xml)
- ("r-knitr" ,r-knitr)
- ("r-runit" ,r-runit)))
- (home-page "http://bioconductor.org/packages/biocViews")
- (synopsis "Bioconductor package categorization helper")
- (description "The purpose of biocViews is to create HTML pages that
- categorize packages in a Bioconductor package repository according to keywords,
- also known as views, in a controlled vocabulary.")
- (license license:artistic2.0)))
- (define-public r-bookdown
- (package
- (name "r-bookdown")
- (version "0.5")
- (source (origin
- (method url-fetch)
- (uri (cran-uri "bookdown" version))
- (sha256
- (base32
- "0zm63kr4f4kja4qpwkzl119zzyciqj7ihajfqgfjpgb4dzaiycxp"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-htmltools" ,r-htmltools)
- ("r-knitr" ,r-knitr)
- ("r-rmarkdown" ,r-rmarkdown)
- ("r-yaml" ,r-yaml)))
- (home-page "https://github.com/rstudio/bookdown")
- (synopsis "Authoring books and technical documents with R markdown")
- (description "This package provides output formats and utilities for
- authoring books and technical documents with R Markdown.")
- (license license:gpl3)))
- (define-public r-biocstyle
- (package
- (name "r-biocstyle")
- (version "2.4.1")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "BiocStyle" version))
- (sha256
- (base32
- "0bmgmsfll923v573g0kyzlmjd7gly5jwgd8vkrcwvbam1gz75f2c"))))
- (properties
- `((upstream-name . "BiocStyle")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-bookdown" ,r-bookdown)
- ("r-knitr" ,r-knitr)
- ("r-rmarkdown" ,r-rmarkdown)
- ("r-yaml" ,r-yaml)))
- (home-page "http://bioconductor.org/packages/BiocStyle")
- (synopsis "Bioconductor formatting styles")
- (description "This package provides standard formatting styles for
- Bioconductor PDF and HTML documents. Package vignettes illustrate use and
- functionality.")
- (license license:artistic2.0)))
- (define-public r-bioccheck
- (package
- (name "r-bioccheck")
- (version "1.12.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "BiocCheck" version))
- (sha256
- (base32
- "01zkw5hggzvn0wj4msac71k1mknq4h2inn1c2hwqgw4cy1675wl0"))))
- (properties
- `((upstream-name . "BiocCheck")))
- (build-system r-build-system)
- (arguments
- '(#:phases
- (modify-phases %standard-phases
- ;; This package can be used by calling BiocCheck(<package>) from
- ;; within R, or by running R CMD BiocCheck <package>. This phase
- ;; makes sure the latter works. For this to work, the BiocCheck
- ;; script must be somewhere on the PATH (not the R bin directory).
- (add-after 'install 'install-bioccheck-subcommand
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (dest-dir (string-append out "/bin"))
- (script-dir
- (string-append out "/site-library/BiocCheck/script/")))
- (mkdir-p dest-dir)
- (symlink (string-append script-dir "/checkBadDeps.R")
- (string-append dest-dir "/checkBadDeps.R"))
- (symlink (string-append script-dir "/BiocCheck")
- (string-append dest-dir "/BiocCheck")))
- #t)))))
- (native-inputs
- `(("which" ,which)))
- (propagated-inputs
- `(("r-codetools" ,r-codetools)
- ("r-graph" ,r-graph)
- ("r-httr" ,r-httr)
- ("r-optparse" ,r-optparse)
- ("r-biocinstaller" ,r-biocinstaller)
- ("r-biocviews" ,r-biocviews)))
- (home-page "http://bioconductor.org/packages/BiocCheck")
- (synopsis "Executes Bioconductor-specific package checks")
- (description "This package contains tools to perform additional quality
- checks on R packages that are to be submitted to the Bioconductor repository.")
- (license license:artistic2.0)))
- (define-public r-getopt
- (package
- (name "r-getopt")
- (version "1.20.0")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "getopt" version))
- (sha256
- (base32
- "00f57vgnzmg7cz80rjmjz1556xqcmx8nhrlbbhaq4w7gl2ibl87r"))))
- (build-system r-build-system)
- (home-page "https://github.com/trevorld/getopt")
- (synopsis "Command-line option processor for R")
- (description
- "This package is designed to be used with Rscript to write shebang
- scripts that accept short and long options. Many users will prefer to
- use the packages @code{optparse} or @code{argparse} which add extra
- features like automatically generated help options and usage texts,
- support for default values, positional argument support, etc.")
- (license license:gpl2+)))
- (define-public r-optparse
- (package
- (name "r-optparse")
- (version "1.4.4")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "optparse" version))
- (sha256
- (base32
- "1ff4wmsszrb3spwfp7ynfs8w11qpy1sdzfxm1wk8dqqvdwris7qb"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-getopt" ,r-getopt)))
- (home-page
- "https://github.com/trevorld/optparse")
- (synopsis "Command line option parser")
- (description
- "This package provides a command line parser inspired by Python's
- @code{optparse} library to be used with Rscript to write shebang scripts
- that accept short and long options.")
- (license license:gpl2+)))
- (define-public r-dnacopy
- (package
- (name "r-dnacopy")
- (version "1.50.1")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "DNAcopy" version))
- (sha256
- (base32
- "0f0x83db7rm5xf9fg5pjhvs4i165qqaf01lbwb8kj13fsqpwx15p"))))
- (properties
- `((upstream-name . "DNAcopy")))
- (build-system r-build-system)
- (inputs
- `(("gfortran" ,gfortran)))
- (home-page "https://bioconductor.org/packages/DNAcopy")
- (synopsis "Implementation of a circular binary segmentation algorithm")
- (description "This package implements the circular binary segmentation (CBS)
- algorithm to segment DNA copy number data and identify genomic regions with
- abnormal copy number.")
- (license license:gpl2+)))
- (define-public r-s4vectors
- (package
- (name "r-s4vectors")
- (version "0.14.3")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "S4Vectors" version))
- (sha256
- (base32
- "1r7s4pfw026qazzic090mhk8d9m39j2nwl87dyqcpdylyq7gq5qs"))))
- (properties
- `((upstream-name . "S4Vectors")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)))
- (home-page "http://bioconductor.org/packages/S4Vectors")
- (synopsis "S4 implementation of vectors and lists")
- (description
- "The S4Vectors package defines the @code{Vector} and @code{List} virtual
- classes and a set of generic functions that extend the semantic of ordinary
- vectors and lists in R. Package developers can easily implement vector-like
- or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
- In addition, a few low-level concrete subclasses of general interest (e.g.
- @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
- S4Vectors package itself.")
- (license license:artistic2.0)))
- (define-public r-seqinr
- (package
- (name "r-seqinr")
- (version "3.4-5")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "seqinr" version))
- (sha256
- (base32
- "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-ade4" ,r-ade4)
- ("r-segmented" ,r-segmented)))
- (inputs
- `(("zlib" ,zlib)))
- (home-page "http://seqinr.r-forge.r-project.org/")
- (synopsis "Biological sequences retrieval and analysis")
- (description
- "This package provides tools for exploratory data analysis and data
- visualization of biological sequence (DNA and protein) data. It also includes
- utilities for sequence data management under the ACNUC system.")
- (license license:gpl2+)))
- (define-public r-iranges
- (package
- (name "r-iranges")
- (version "2.10.2")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "IRanges" version))
- (sha256
- (base32
- "1brmzs3rsf97gymridrh9c9r3vws8b3rpghaanxnniw36lmcajfy"))))
- (properties
- `((upstream-name . "IRanges")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-s4vectors" ,r-s4vectors)))
- (home-page "http://bioconductor.org/packages/IRanges")
- (synopsis "Infrastructure for manipulating intervals on sequences")
- (description
- "This package provides efficient low-level and highly reusable S4 classes
- for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
- generally, data that can be organized sequentially (formally defined as
- @code{Vector} objects), as well as views on these @code{Vector} objects.
- Efficient list-like classes are also provided for storing big collections of
- instances of the basic classes. All classes in the package use consistent
- naming and share the same rich and consistent \"Vector API\" as much as
- possible.")
- (license license:artistic2.0)))
- (define-public r-genomeinfodbdata
- (package
- (name "r-genomeinfodbdata")
- (version "0.99.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "https://bioconductor.org/packages/release/"
- "data/annotation/src/contrib/GenomeInfoDbData_"
- version ".tar.gz"))
- (sha256
- (base32
- "120qvhb0pvkzd65lsgja62vyrgc37si6fh68q4cg4w5x9f04jw25"))))
- (properties
- `((upstream-name . "GenomeInfoDbData")))
- (build-system r-build-system)
- (home-page "http://bioconductor.org/packages/GenomeInfoDbData")
- (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
- (description "This package contains data for mapping between NCBI taxonomy
- ID and species. It is used by functions in the GenomeInfoDb package.")
- (license license:artistic2.0)))
- (define-public r-genomeinfodb
- (package
- (name "r-genomeinfodb")
- (version "1.12.2")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "GenomeInfoDb" version))
- (sha256
- (base32
- "1hjxgmcnrngp1307ipqaq9hgxz4j0ldn7d46knhzs30k2r4qnrfp"))))
- (properties
- `((upstream-name . "GenomeInfoDb")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-genomeinfodbdata" ,r-genomeinfodbdata)
- ("r-iranges" ,r-iranges)
- ("r-rcurl" ,r-rcurl)
- ("r-s4vectors" ,r-s4vectors)))
- (home-page "http://bioconductor.org/packages/GenomeInfoDb")
- (synopsis "Utilities for manipulating chromosome identifiers")
- (description
- "This package contains data and functions that define and allow
- translation between different chromosome sequence naming conventions (e.g.,
- \"chr1\" versus \"1\"), including a function that attempts to place sequence
- names in their natural, rather than lexicographic, order.")
- (license license:artistic2.0)))
- (define-public r-edger
- (package
- (name "r-edger")
- (version "3.18.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "edgeR" version))
- (sha256
- (base32
- "02l17i6xh33dv10swwvyfxrmv5kp23iv278iwvkfq2mnipasfnb9"))))
- (properties `((upstream-name . "edgeR")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-limma" ,r-limma)
- ("r-locfit" ,r-locfit)
- ("r-statmod" ,r-statmod))) ;for estimateDisp
- (home-page "http://bioinf.wehi.edu.au/edgeR")
- (synopsis "EdgeR does empirical analysis of digital gene expression data")
- (description "This package can do differential expression analysis of
- RNA-seq expression profiles with biological replication. It implements a range
- of statistical methodology based on the negative binomial distributions,
- including empirical Bayes estimation, exact tests, generalized linear models
- and quasi-likelihood tests. It be applied to differential signal analysis of
- other types of genomic data that produce counts, including ChIP-seq, SAGE and
- CAGE.")
- (license license:gpl2+)))
- (define-public r-variantannotation
- (package
- (name "r-variantannotation")
- (version "1.22.3")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "VariantAnnotation" version))
- (sha256
- (base32
- "0sr3vdn85x5zdxh80cfwlpfdpi2hmjy3fwi00ac3jya4v145vawr"))))
- (properties
- `((upstream-name . "VariantAnnotation")))
- (inputs
- `(("zlib" ,zlib)))
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biostrings" ,r-biostrings)
- ("r-bsgenome" ,r-bsgenome)
- ("r-dbi" ,r-dbi)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicfeatures" ,r-genomicfeatures)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-summarizedexperiment" ,r-summarizedexperiment)
- ("r-rsamtools" ,r-rsamtools)
- ("r-rtracklayer" ,r-rtracklayer)
- ("r-s4vectors" ,r-s4vectors)
- ("r-xvector" ,r-xvector)
- ("r-zlibbioc" ,r-zlibbioc)))
- (build-system r-build-system)
- (home-page "https://bioconductor.org/packages/VariantAnnotation")
- (synopsis "Package for annotation of genetic variants")
- (description "This R package can annotate variants, compute amino acid
- coding changes and predict coding outcomes.")
- (license license:artistic2.0)))
- (define-public r-limma
- (package
- (name "r-limma")
- (version "3.32.5")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "limma" version))
- (sha256
- (base32
- "0p2ayha9g9w5r8s7pgdf16mkmdbqwh6f35jh07g3b8gyra48gwiw"))))
- (build-system r-build-system)
- (home-page "http://bioinf.wehi.edu.au/limma")
- (synopsis "Package for linear models for microarray and RNA-seq data")
- (description "This package can be used for the analysis of gene expression
- studies, especially the use of linear models for analysing designed experiments
- and the assessment of differential expression. The analysis methods apply to
- different technologies, including microarrays, RNA-seq, and quantitative PCR.")
- (license license:gpl2+)))
- (define-public r-xvector
- (package
- (name "r-xvector")
- (version "0.16.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "XVector" version))
- (sha256
- (base32
- "01n09f4jdm60684lzikp02zf9gjan8bdrjx740vggr21q9fa69wn"))))
- (properties
- `((upstream-name . "XVector")))
- (build-system r-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'use-system-zlib
- (lambda _
- (substitute* "DESCRIPTION"
- (("zlibbioc, ") ""))
- (substitute* "NAMESPACE"
- (("import\\(zlibbioc\\)") ""))
- #t)))))
- (inputs
- `(("zlib" ,zlib)))
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-iranges" ,r-iranges)
- ("r-s4vectors" ,r-s4vectors)))
- (home-page "http://bioconductor.org/packages/XVector")
- (synopsis "Representation and manpulation of external sequences")
- (description
- "This package provides memory efficient S4 classes for storing sequences
- \"externally\" (behind an R external pointer, or on disk).")
- (license license:artistic2.0)))
- (define-public r-genomicranges
- (package
- (name "r-genomicranges")
- (version "1.28.4")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "GenomicRanges" version))
- (sha256
- (base32
- "1y15kg1q81h8rmga83ljiwr8whkajcargfjiljr212d6if17ys1z"))))
- (properties
- `((upstream-name . "GenomicRanges")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-iranges" ,r-iranges)
- ("r-s4vectors" ,r-s4vectors)
- ("r-xvector" ,r-xvector)))
- (home-page "http://bioconductor.org/packages/GenomicRanges")
- (synopsis "Representation and manipulation of genomic intervals")
- (description
- "This package provides tools to efficiently represent and manipulate
- genomic annotations and alignments is playing a central role when it comes to
- analyzing high-throughput sequencing data (a.k.a. NGS data). The
- GenomicRanges package defines general purpose containers for storing and
- manipulating genomic intervals and variables defined along a genome.")
- (license license:artistic2.0)))
- (define-public r-biobase
- (package
- (name "r-biobase")
- (version "2.36.2")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "Biobase" version))
- (sha256
- (base32
- "0sr48nqx5bqid4g6lr9zr9286xh842w717yvmssvddb5xxynib6f"))))
- (properties
- `((upstream-name . "Biobase")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)))
- (home-page "http://bioconductor.org/packages/Biobase")
- (synopsis "Base functions for Bioconductor")
- (description
- "This package provides functions that are needed by many other packages
- on Bioconductor or which replace R functions.")
- (license license:artistic2.0)))
- (define-public r-annotationdbi
- (package
- (name "r-annotationdbi")
- (version "1.38.2")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "AnnotationDbi" version))
- (sha256
- (base32
- "1lsamnbf07zzsy5asy5hn97n2a4layv58w2bzd90ikcdx0gmzarj"))))
- (properties
- `((upstream-name . "AnnotationDbi")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-dbi" ,r-dbi)
- ("r-iranges" ,r-iranges)
- ("r-rsqlite" ,r-rsqlite)
- ("r-s4vectors" ,r-s4vectors)))
- (home-page "http://bioconductor.org/packages/AnnotationDbi")
- (synopsis "Annotation database interface")
- (description
- "This package provides user interface and database connection code for
- annotation data packages using SQLite data storage.")
- (license license:artistic2.0)))
- (define-public r-biomart
- (package
- (name "r-biomart")
- (version "2.32.1")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "biomaRt" version))
- (sha256
- (base32
- "0fhpbjlsgbqxrpj6nzhhk9q3ph81n5x4p7mmd097xjjn6b05w1d8"))))
- (properties
- `((upstream-name . "biomaRt")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-rcurl" ,r-rcurl)
- ("r-xml" ,r-xml)))
- (home-page "http://bioconductor.org/packages/biomaRt")
- (synopsis "Interface to BioMart databases")
- (description
- "biomaRt provides an interface to a growing collection of databases
- implementing the @url{BioMart software suite, http://www.biomart.org}. The
- package enables retrieval of large amounts of data in a uniform way without
- the need to know the underlying database schemas or write complex SQL queries.
- Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
- Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
- users direct access to a diverse set of data and enable a wide range of
- powerful online queries from gene annotation to database mining.")
- (license license:artistic2.0)))
- (define-public r-biocparallel
- (package
- (name "r-biocparallel")
- (version "1.10.1")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "BiocParallel" version))
- (sha256
- (base32
- "08mdfxyk9nwz77v0xhlvs19p2wj0phgm5c5b25vm0xh3749njsp0"))))
- (properties
- `((upstream-name . "BiocParallel")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-futile-logger" ,r-futile-logger)
- ("r-snow" ,r-snow)))
- (home-page "http://bioconductor.org/packages/BiocParallel")
- (synopsis "Bioconductor facilities for parallel evaluation")
- (description
- "This package provides modified versions and novel implementation of
- functions for parallel evaluation, tailored to use with Bioconductor
- objects.")
- (license (list license:gpl2+ license:gpl3+))))
- (define-public r-biostrings
- (package
- (name "r-biostrings")
- (version "2.44.2")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "Biostrings" version))
- (sha256
- (base32
- "12c5abgshwq86357jr0r9039y6vl4d6ngysy89rsnr23ldnsirjp"))))
- (properties
- `((upstream-name . "Biostrings")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-iranges" ,r-iranges)
- ("r-s4vectors" ,r-s4vectors)
- ("r-xvector" ,r-xvector)))
- (home-page "http://bioconductor.org/packages/Biostrings")
- (synopsis "String objects and algorithms for biological sequences")
- (description
- "This package provides memory efficient string containers, string
- matching algorithms, and other utilities, for fast manipulation of large
- biological sequences or sets of sequences.")
- (license license:artistic2.0)))
- (define-public r-rsamtools
- (package
- (name "r-rsamtools")
- (version "1.28.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "Rsamtools" version))
- (sha256
- (base32
- "1zx6vhdz3lksbfy98jj3qzl7cxgspigm2pbsd5835b12r6mc5w6d"))))
- (properties
- `((upstream-name . "Rsamtools")))
- (build-system r-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'use-system-zlib
- (lambda _
- (substitute* "DESCRIPTION"
- (("zlibbioc, ") ""))
- (substitute* "NAMESPACE"
- (("import\\(zlibbioc\\)") ""))
- #t)))))
- (inputs
- `(("zlib" ,zlib)))
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-biocparallel" ,r-biocparallel)
- ("r-biostrings" ,r-biostrings)
- ("r-bitops" ,r-bitops)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-s4vectors" ,r-s4vectors)
- ("r-xvector" ,r-xvector)))
- (home-page "http://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
- (synopsis "Interface to samtools, bcftools, and tabix")
- (description
- "This package provides an interface to the 'samtools', 'bcftools', and
- 'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
- binary variant call (BCF) and compressed indexed tab-delimited (tabix)
- files.")
- (license license:expat)))
- (define-public r-delayedarray
- (package
- (name "r-delayedarray")
- (version "0.2.7")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "DelayedArray" version))
- (sha256
- (base32
- "02dfqp4md9xaqjj712ijc3jswghmipr5hwkd5hr0x1xi6l2fb69g"))))
- (properties
- `((upstream-name . "DelayedArray")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-s4vectors" ,r-s4vectors)
- ("r-iranges" ,r-iranges)
- ("r-matrixstats" ,r-matrixstats)))
- (home-page "http://bioconductor.org/packages/DelayedArray")
- (synopsis "Delayed operations on array-like objects")
- (description
- "Wrapping an array-like object (typically an on-disk object) in a
- @code{DelayedArray} object allows one to perform common array operations on it
- without loading the object in memory. In order to reduce memory usage and
- optimize performance, operations on the object are either delayed or executed
- using a block processing mechanism. Note that this also works on in-memory
- array-like objects like @code{DataFrame} objects (typically with Rle columns),
- @code{Matrix} objects, and ordinary arrays and data frames.")
- (license license:artistic2.0)))
- (define-public r-summarizedexperiment
- (package
- (name "r-summarizedexperiment")
- (version "1.6.3")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "SummarizedExperiment" version))
- (sha256
- (base32
- "0j7xn7pk52d383fb1wplcggacl2586c4zi0alkgfc3wz7qq9w13s"))))
- (properties
- `((upstream-name . "SummarizedExperiment")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-delayedarray" ,r-delayedarray)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-matrix" ,r-matrix)
- ("r-s4vectors" ,r-s4vectors)))
- (home-page "http://bioconductor.org/packages/SummarizedExperiment")
- (synopsis "Container for representing genomic ranges by sample")
- (description
- "The SummarizedExperiment container contains one or more assays, each
- represented by a matrix-like object of numeric or other mode. The rows
- typically represent genomic ranges of interest and the columns represent
- samples.")
- (license license:artistic2.0)))
- (define-public r-genomicalignments
- (package
- (name "r-genomicalignments")
- (version "1.12.2")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "GenomicAlignments" version))
- (sha256
- (base32
- "03ysxi9fdd3bcfj05iaysya9knn2aa2irwpypb5srg0xwv92bdb9"))))
- (properties
- `((upstream-name . "GenomicAlignments")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-biocparallel" ,r-biocparallel)
- ("r-biostrings" ,r-biostrings)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-rsamtools" ,r-rsamtools)
- ("r-s4vectors" ,r-s4vectors)
- ("r-summarizedexperiment" ,r-summarizedexperiment)))
- (home-page "http://bioconductor.org/packages/GenomicAlignments")
- (synopsis "Representation and manipulation of short genomic alignments")
- (description
- "This package provides efficient containers for storing and manipulating
- short genomic alignments (typically obtained by aligning short reads to a
- reference genome). This includes read counting, computing the coverage,
- junction detection, and working with the nucleotide content of the
- alignments.")
- (license license:artistic2.0)))
- (define-public r-rtracklayer
- (package
- (name "r-rtracklayer")
- (version "1.36.4")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "rtracklayer" version))
- (sha256
- (base32
- "050q1rv04w31168ljr975vxva31n9lqdx84rnmsk6zcr6p640ffp"))))
- (build-system r-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'use-system-zlib
- (lambda _
- (substitute* "DESCRIPTION"
- (("zlibbioc, ") ""))
- (substitute* "NAMESPACE"
- (("import\\(zlibbioc\\)") ""))
- #t)))))
- (inputs
- `(("zlib" ,zlib)))
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-biostrings" ,r-biostrings)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicalignments" ,r-genomicalignments)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-rcurl" ,r-rcurl)
- ("r-rsamtools" ,r-rsamtools)
- ("r-s4vectors" ,r-s4vectors)
- ("r-xml" ,r-xml)
- ("r-xvector" ,r-xvector)))
- (home-page "http://bioconductor.org/packages/rtracklayer")
- (synopsis "R interface to genome browsers and their annotation tracks")
- (description
- "rtracklayer is an extensible framework for interacting with multiple
- genome browsers (currently UCSC built-in) and manipulating annotation tracks
- in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
- built-in). The user may export/import tracks to/from the supported browsers,
- as well as query and modify the browser state, such as the current viewport.")
- (license license:artistic2.0)))
- (define-public r-genomicfeatures
- (package
- (name "r-genomicfeatures")
- (version "1.28.4")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "GenomicFeatures" version))
- (sha256
- (base32
- "01ylvg275iy0cvsbxkbfxcf9pi9al597v5wnlqi2xdpmrcxyc3q0"))))
- (properties
- `((upstream-name . "GenomicFeatures")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biomart" ,r-biomart)
- ("r-biostrings" ,r-biostrings)
- ("r-dbi" ,r-dbi)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-rcurl" ,r-rcurl)
- ("r-rsqlite" ,r-rsqlite)
- ("r-rtracklayer" ,r-rtracklayer)
- ("r-s4vectors" ,r-s4vectors)
- ("r-xvector" ,r-xvector)))
- (home-page "http://bioconductor.org/packages/GenomicFeatures")
- (synopsis "Tools for working with transcript centric annotations")
- (description
- "This package provides a set of tools and methods for making and
- manipulating transcript centric annotations. With these tools the user can
- easily download the genomic locations of the transcripts, exons and cds of a
- given organism, from either the UCSC Genome Browser or a BioMart
- database (more sources will be supported in the future). This information is
- then stored in a local database that keeps track of the relationship between
- transcripts, exons, cds and genes. Flexible methods are provided for
- extracting the desired features in a convenient format.")
- (license license:artistic2.0)))
- (define-public r-go-db
- (package
- (name "r-go-db")
- (version "3.4.0")
- (source (origin
- (method url-fetch)
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/GO.db_"
- version ".tar.gz"))
- (sha256
- (base32
- "02cj8kqi5w39jwcs8gp1dgj08sah262ppxnkz4h3qd0w191y8yyl"))))
- (properties
- `((upstream-name . "GO.db")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)))
- (home-page "http://bioconductor.org/packages/GO.db")
- (synopsis "Annotation maps describing the entire Gene Ontology")
- (description
- "The purpose of this GO.db annotation package is to provide detailed
- information about the latest version of the Gene Ontologies.")
- (license license:artistic2.0)))
- (define-public r-graph
- (package
- (name "r-graph")
- (version "1.54.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "graph" version))
- (sha256
- (base32
- "0hx9wslnrci7c5i1gd1zlpjmgrkdnx9gabfgmzzwfnykk1mdvvna"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)))
- (home-page "http://bioconductor.org/packages/graph")
- (synopsis "Handle graph data structures in R")
- (description
- "This package implements some simple graph handling capabilities for R.")
- (license license:artistic2.0)))
- (define-public r-topgo
- (package
- (name "r-topgo")
- (version "2.28.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "topGO" version))
- (sha256
- (base32
- "04kvxz9qsxfz0np7wlrzp4r95nykz94x7mqpyyk76f90g6m66vcc"))))
- (properties
- `((upstream-name . "topGO")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-dbi" ,r-dbi)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-go-db" ,r-go-db)
- ("r-graph" ,r-graph)
- ("r-lattice" ,r-lattice)
- ("r-matrixstats" ,r-matrixstats)
- ("r-sparsem" ,r-sparsem)))
- (home-page "http://bioconductor.org/packages/topGO")
- (synopsis "Enrichment analysis for gene ontology")
- (description
- "The topGO package provides tools for testing @dfn{gene ontology} (GO)
- terms while accounting for the topology of the GO graph. Different test
- statistics and different methods for eliminating local similarities and
- dependencies between GO terms can be implemented and applied.")
- ;; Any version of the LGPL applies.
- (license license:lgpl2.1+)))
- (define-public r-bsgenome
- (package
- (name "r-bsgenome")
- (version "1.44.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "BSgenome" version))
- (sha256
- (base32
- "18dlknwk3xvi52hamxf9pl5bjc5806mpw98xwvx5xajn8mrxiy5h"))))
- (properties
- `((upstream-name . "BSgenome")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-biostrings" ,r-biostrings)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-rsamtools" ,r-rsamtools)
- ("r-rtracklayer" ,r-rtracklayer)
- ("r-s4vectors" ,r-s4vectors)
- ("r-xvector" ,r-xvector)))
- (home-page "http://bioconductor.org/packages/BSgenome")
- (synopsis "Infrastructure for Biostrings-based genome data packages")
- (description
- "This package provides infrastructure shared by all Biostrings-based
- genome data packages and support for efficient SNP representation.")
- (license license:artistic2.0)))
- (define-public r-bsgenome-hsapiens-1000genomes-hs37d5
- (package
- (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
- (version "0.99.1")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "BSgenome.Hsapiens.1000genomes.hs37d5_"
- version ".tar.gz"))
- (sha256
- (base32
- "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
- (properties
- `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
- (build-system r-build-system)
- ;; As this package provides little more than a very large data file it
- ;; doesn't make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (propagated-inputs
- `(("r-bsgenome" ,r-bsgenome)))
- (home-page
- "http://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
- (synopsis "Full genome sequences for Homo sapiens")
- (description
- "This package provides full genome sequences for Homo sapiens from
- 1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
- (license license:artistic2.0)))
- (define-public r-impute
- (package
- (name "r-impute")
- (version "1.50.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "impute" version))
- (sha256
- (base32
- "0va45vfixy3np549md87h3b3rbavm54gfnmnjhpr9hf02lll6zxs"))))
- (inputs
- `(("gfortran" ,gfortran)))
- (build-system r-build-system)
- (home-page "http://bioconductor.org/packages/impute")
- (synopsis "Imputation for microarray data")
- (description
- "This package provides a function to impute missing gene expression
- microarray data, using nearest neighbor averaging.")
- (license license:gpl2+)))
- (define-public r-seqpattern
- (package
- (name "r-seqpattern")
- (version "1.8.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "seqPattern" version))
- (sha256
- (base32
- "0yw5251sirv3283xgmfmfbf900xp8dwvqfk88mr945s3by5hx99v"))))
- (properties
- `((upstream-name . "seqPattern")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biostrings" ,r-biostrings)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-kernsmooth" ,r-kernsmooth)
- ("r-plotrix" ,r-plotrix)))
- (home-page "http://bioconductor.org/packages/seqPattern")
- (synopsis "Visualising oligonucleotide patterns and motif occurrences")
- (description
- "This package provides tools to visualize oligonucleotide patterns and
- sequence motif occurrences across a large set of sequences centred at a common
- reference point and sorted by a user defined feature.")
- (license license:gpl3+)))
- (define-public r-genomation
- (package
- (name "r-genomation")
- (version "1.8.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "genomation" version))
- (sha256
- (base32
- "0r71nf8p3aq3yffqxg4yir1zmm7rar5a57nmy1kpqpn8kaf2svjl"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biostrings" ,r-biostrings)
- ("r-bsgenome" ,r-bsgenome)
- ("r-data-table" ,r-data-table)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicalignments" ,r-genomicalignments)
- ("r-genomicranges" ,r-genomicranges)
- ("r-ggplot2" ,r-ggplot2)
- ("r-gridbase" ,r-gridbase)
- ("r-impute" ,r-impute)
- ("r-iranges" ,r-iranges)
- ("r-matrixstats" ,r-matrixstats)
- ("r-plotrix" ,r-plotrix)
- ("r-plyr" ,r-plyr)
- ("r-rcpp" ,r-rcpp)
- ("r-readr" ,r-readr)
- ("r-reshape2" ,r-reshape2)
- ("r-rhtslib" ,r-rhtslib)
- ("r-rsamtools" ,r-rsamtools)
- ("r-rtracklayer" ,r-rtracklayer)
- ("r-runit" ,r-runit)
- ("r-s4vectors" ,r-s4vectors)
- ("r-seqpattern" ,r-seqpattern)))
- (inputs
- `(("zlib" ,zlib)))
- (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
- (synopsis "Summary, annotation and visualization of genomic data")
- (description
- "This package provides a package for summary and annotation of genomic
- intervals. Users can visualize and quantify genomic intervals over
- pre-defined functional regions, such as promoters, exons, introns, etc. The
- genomic intervals represent regions with a defined chromosome position, which
- may be associated with a score, such as aligned reads from HT-seq experiments,
- TF binding sites, methylation scores, etc. The package can use any tabular
- genomic feature data as long as it has minimal information on the locations of
- genomic intervals. In addition, it can use BAM or BigWig files as input.")
- (license license:artistic2.0)))
- (define-public r-genomationdata
- (package
- (name "r-genomationdata")
- (version "1.6.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "https://bioconductor.org/packages/"
- "release/data/experiment/src/contrib/"
- "genomationData_" version ".tar.gz"))
- (sha256
- (base32
- "16dqwb7wx1igx77zdbcskx5m1hs4g4gp2hl56zzm70hcagnlkz8y"))))
- (build-system r-build-system)
- ;; As this package provides little more than large data files, it doesn't
- ;; make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (native-inputs
- `(("r-knitr" ,r-knitr)))
- (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
- (synopsis "Experimental data for use with the genomation package")
- (description
- "This package contains experimental genetic data for use with the
- genomation package. Included are Chip Seq, Methylation and Cage data,
- downloaded from Encode.")
- (license license:gpl3+)))
- (define-public r-org-hs-eg-db
- (package
- (name "r-org-hs-eg-db")
- (version "3.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "org.Hs.eg.db_" version ".tar.gz"))
- (sha256
- (base32
- "19mg64pw8zcvb9yxzzyf7caz1kvdrkfsj1hd84bzq7crrh8kc4y6"))))
- (properties
- `((upstream-name . "org.Hs.eg.db")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)))
- (home-page "http://www.bioconductor.org/packages/org.Hs.eg.db/")
- (synopsis "Genome wide annotation for Human")
- (description
- "This package provides mappings from Entrez gene identifiers to various
- annotations for the human genome.")
- (license license:artistic2.0)))
- (define-public r-org-ce-eg-db
- (package
- (name "r-org-ce-eg-db")
- (version "3.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "org.Ce.eg.db_" version ".tar.gz"))
- (sha256
- (base32
- "12llfzrrc09kj2wzbisdspv38qzkzgpsbn8kv7qkwg746k3pq436"))))
- (properties
- `((upstream-name . "org.Ce.eg.db")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)))
- (home-page "http://www.bioconductor.org/packages/org.Ce.eg.db/")
- (synopsis "Genome wide annotation for Worm")
- (description
- "This package provides mappings from Entrez gene identifiers to various
- annotations for the genome of the model worm Caenorhabditis elegans.")
- (license license:artistic2.0)))
- (define-public r-org-dm-eg-db
- (package
- (name "r-org-dm-eg-db")
- (version "3.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "org.Dm.eg.db_" version ".tar.gz"))
- (sha256
- (base32
- "1vzbphbrh1cf7xi5cksia9xy9a9l42js2z2qsajvjxvddiphrb7j"))))
- (properties
- `((upstream-name . "org.Dm.eg.db")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)))
- (home-page "http://www.bioconductor.org/packages/org.Dm.eg.db/")
- (synopsis "Genome wide annotation for Fly")
- (description
- "This package provides mappings from Entrez gene identifiers to various
- annotations for the genome of the model fruit fly Drosophila melanogaster.")
- (license license:artistic2.0)))
- (define-public r-org-mm-eg-db
- (package
- (name "r-org-mm-eg-db")
- (version "3.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "org.Mm.eg.db_" version ".tar.gz"))
- (sha256
- (base32
- "1lykjqjaf01fmgg3cvfcvwd5xjq6zc5vbxnm5r4l32fzvl89q50c"))))
- (properties
- `((upstream-name . "org.Mm.eg.db")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)))
- (home-page "http://www.bioconductor.org/packages/org.Mm.eg.db/")
- (synopsis "Genome wide annotation for Mouse")
- (description
- "This package provides mappings from Entrez gene identifiers to various
- annotations for the genome of the model mouse Mus musculus.")
- (license license:artistic2.0)))
- (define-public r-seqlogo
- (package
- (name "r-seqlogo")
- (version "1.42.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "seqLogo" version))
- (sha256
- (base32
- "19d5zmy7m8svljwgbmrb4vxkq18slq0f3all6k2ayv42b8w44h6q"))))
- (properties `((upstream-name . "seqLogo")))
- (build-system r-build-system)
- (home-page "http://bioconductor.org/packages/seqLogo")
- (synopsis "Sequence logos for DNA sequence alignments")
- (description
- "seqLogo takes the position weight matrix of a DNA sequence motif and
- plots the corresponding sequence logo as introduced by Schneider and
- Stephens (1990).")
- (license license:lgpl2.0+)))
- (define-public r-bsgenome-hsapiens-ucsc-hg19
- (package
- (name "r-bsgenome-hsapiens-ucsc-hg19")
- (version "1.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "BSgenome.Hsapiens.UCSC.hg19_"
- version ".tar.gz"))
- (sha256
- (base32
- "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
- (properties
- `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
- (build-system r-build-system)
- ;; As this package provides little more than a very large data file it
- ;; doesn't make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (propagated-inputs
- `(("r-bsgenome" ,r-bsgenome)))
- (home-page
- "http://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
- (synopsis "Full genome sequences for Homo sapiens")
- (description
- "This package provides full genome sequences for Homo sapiens as provided
- by UCSC (hg19, February 2009) and stored in Biostrings objects.")
- (license license:artistic2.0)))
- (define-public r-bsgenome-mmusculus-ucsc-mm9
- (package
- (name "r-bsgenome-mmusculus-ucsc-mm9")
- (version "1.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "BSgenome.Mmusculus.UCSC.mm9_"
- version ".tar.gz"))
- (sha256
- (base32
- "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
- (properties
- `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
- (build-system r-build-system)
- ;; As this package provides little more than a very large data file it
- ;; doesn't make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (propagated-inputs
- `(("r-bsgenome" ,r-bsgenome)))
- (home-page
- "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
- (synopsis "Full genome sequences for Mouse")
- (description
- "This package provides full genome sequences for Mus musculus (Mouse) as
- provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
- (license license:artistic2.0)))
- (define-public r-bsgenome-mmusculus-ucsc-mm10
- (package
- (name "r-bsgenome-mmusculus-ucsc-mm10")
- (version "1.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "BSgenome.Mmusculus.UCSC.mm10_"
- version ".tar.gz"))
- (sha256
- (base32
- "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
- (properties
- `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
- (build-system r-build-system)
- ;; As this package provides little more than a very large data file it
- ;; doesn't make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (propagated-inputs
- `(("r-bsgenome" ,r-bsgenome)))
- (home-page
- "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
- (synopsis "Full genome sequences for Mouse")
- (description
- "This package provides full genome sequences for Mus
- musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
- in Biostrings objects.")
- (license license:artistic2.0)))
- (define-public r-txdb-mmusculus-ucsc-mm10-knowngene
- (package
- (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
- (version "3.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "TxDb.Mmusculus.UCSC.mm10.knownGene_"
- version ".tar.gz"))
- (sha256
- (base32
- "08gava9wsvpcqz51k2sni3pj03n5155v32d9riqbf305nbirqbkb"))))
- (properties
- `((upstream-name . "TxDb.Mmusculus.UCSC.mm10.knownGene")))
- (build-system r-build-system)
- ;; As this package provides little more than a very large data file it
- ;; doesn't make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (propagated-inputs
- `(("r-bsgenome" ,r-bsgenome)
- ("r-genomicfeatures" ,r-genomicfeatures)
- ("r-annotationdbi" ,r-annotationdbi)))
- (home-page
- "http://bioconductor.org/packages/TxDb.Mmusculus.UCSC.mm10.knownGene/")
- (synopsis "Annotation package for TxDb knownGene object(s) for Mouse")
- (description
- "This package loads a TxDb object, which is an R interface to
- prefabricated databases contained in this package. This package provides
- the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
- based on the knownGene track.")
- (license license:artistic2.0)))
- (define-public r-bsgenome-celegans-ucsc-ce6
- (package
- (name "r-bsgenome-celegans-ucsc-ce6")
- (version "1.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "BSgenome.Celegans.UCSC.ce6_"
- version ".tar.gz"))
- (sha256
- (base32
- "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
- (properties
- `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
- (build-system r-build-system)
- ;; As this package provides little more than a very large data file it
- ;; doesn't make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (propagated-inputs
- `(("r-bsgenome" ,r-bsgenome)))
- (home-page
- "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
- (synopsis "Full genome sequences for Worm")
- (description
- "This package provides full genome sequences for Caenorhabditis
- elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
- objects.")
- (license license:artistic2.0)))
- (define-public r-bsgenome-celegans-ucsc-ce10
- (package
- (name "r-bsgenome-celegans-ucsc-ce10")
- (version "1.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "BSgenome.Celegans.UCSC.ce10_"
- version ".tar.gz"))
- (sha256
- (base32
- "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
- (properties
- `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
- (build-system r-build-system)
- ;; As this package provides little more than a very large data file it
- ;; doesn't make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (propagated-inputs
- `(("r-bsgenome" ,r-bsgenome)))
- (home-page
- "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
- (synopsis "Full genome sequences for Worm")
- (description
- "This package provides full genome sequences for Caenorhabditis
- elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
- objects.")
- (license license:artistic2.0)))
- (define-public r-bsgenome-dmelanogaster-ucsc-dm3
- (package
- (name "r-bsgenome-dmelanogaster-ucsc-dm3")
- (version "1.4.0")
- (source (origin
- (method url-fetch)
- ;; We cannot use bioconductor-uri here because this tarball is
- ;; located under "data/annotation/" instead of "bioc/".
- (uri (string-append "http://www.bioconductor.org/packages/"
- "release/data/annotation/src/contrib/"
- "BSgenome.Dmelanogaster.UCSC.dm3_"
- version ".tar.gz"))
- (sha256
- (base32
- "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
- (properties
- `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
- (build-system r-build-system)
- ;; As this package provides little more than a very large data file it
- ;; doesn't make sense to build substitutes.
- (arguments `(#:substitutable? #f))
- (propagated-inputs
- `(("r-bsgenome" ,r-bsgenome)))
- (home-page
- "http://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
- (synopsis "Full genome sequences for Fly")
- (description
- "This package provides full genome sequences for Drosophila
- melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
- Biostrings objects.")
- (license license:artistic2.0)))
- (define-public r-motifrg
- (package
- (name "r-motifrg")
- (version "1.20.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "motifRG" version))
- (sha256
- (base32
- "0lxxpqyljiyk73vyq6ss46q13g81pm24q87mkzdsqlr5zx427ch6"))))
- (properties `((upstream-name . "motifRG")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biostrings" ,r-biostrings)
- ("r-bsgenome" ,r-bsgenome)
- ("r-bsgenome.hsapiens.ucsc.hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
- ("r-iranges" ,r-iranges)
- ("r-seqlogo" ,r-seqlogo)
- ("r-xvector" ,r-xvector)))
- (home-page "http://bioconductor.org/packages/motifRG")
- (synopsis "Discover motifs in high throughput sequencing data")
- (description
- "This package provides tools for discriminative motif discovery in high
- throughput genetic sequencing data sets using regression methods.")
- (license license:artistic2.0)))
- (define-public r-qtl
- (package
- (name "r-qtl")
- (version "1.41-6")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "mirror://cran/src/contrib/qtl_"
- version ".tar.gz"))
- (sha256
- (base32
- "067az4v432zxp6lxck8d7vlh9w4r13r0mvw5zsglyaqwsh3d9sad"))))
- (build-system r-build-system)
- (home-page "http://rqtl.org/")
- (synopsis "R package for analyzing QTL experiments in genetics")
- (description "R/qtl is an extension library for the R statistics
- system. It is used to analyze experimental crosses for identifying
- genes contributing to variation in quantitative traits (so-called
- quantitative trait loci, QTLs).
- Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
- identify genotyping errors, and to perform single-QTL and two-QTL,
- two-dimensional genome scans.")
- (license license:gpl3)))
- (define-public r-zlibbioc
- (package
- (name "r-zlibbioc")
- (version "1.22.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "zlibbioc" version))
- (sha256
- (base32
- "1kdgwwlh39mgwzj3zq71za4iv40sq625ghnyrndv5wrivrcr2igv"))))
- (properties
- `((upstream-name . "zlibbioc")))
- (build-system r-build-system)
- (home-page "https://bioconductor.org/packages/zlibbioc")
- (synopsis "Provider for zlib-1.2.5 to R packages")
- (description "This package uses the source code of zlib-1.2.5 to create
- libraries for systems that do not have these available via other means.")
- (license license:artistic2.0)))
- (define-public r-r4rna
- (package
- (name "r-r4rna")
- (version "0.1.4")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
- version ".tar.gz"))
- (sha256
- (base32
- "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-optparse" ,r-optparse)
- ("r-rcolorbrewer" ,r-rcolorbrewer)))
- (home-page "http://www.e-rna.org/r-chie/index.cgi")
- (synopsis "Analysis framework for RNA secondary structure")
- (description
- "The R4RNA package aims to be a general framework for the analysis of RNA
- secondary structure and comparative analysis in R.")
- (license license:gpl3+)))
- (define-public r-rhtslib
- (package
- (name "r-rhtslib")
- (version "1.8.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "Rhtslib" version))
- (sha256
- (base32
- "0jj5h81z5gyf4j3lp2l3zsm6pgbmalgrngr8qdpygc20phndpp0b"))))
- (properties `((upstream-name . "Rhtslib")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-zlibbioc" ,r-zlibbioc)))
- (inputs
- `(("zlib" ,zlib)))
- (native-inputs
- `(("autoconf" ,autoconf)))
- (home-page "https://github.com/nhayden/Rhtslib")
- (synopsis "High-throughput sequencing library as an R package")
- (description
- "This package provides the HTSlib C library for high-throughput
- nucleotide sequence analysis. The package is primarily useful to developers
- of other R packages who wish to make use of HTSlib.")
- (license license:lgpl2.0+)))
- (define-public r-bamsignals
- (package
- (name "r-bamsignals")
- (version "1.8.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "bamsignals" version))
- (sha256
- (base32
- "0knx69zzdaak2sjim8k9mygmcjxpa705m8013ld5zwpgi8dag9mc"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-rcpp" ,r-rcpp)
- ("r-rhtslib" ,r-rhtslib)
- ("r-zlibbioc" ,r-zlibbioc)))
- (inputs
- `(("zlib" ,zlib)))
- (home-page "http://bioconductor.org/packages/bamsignals")
- (synopsis "Extract read count signals from bam files")
- (description
- "This package allows to efficiently obtain count vectors from indexed bam
- files. It counts the number of nucleotide sequence reads in given genomic
- ranges and it computes reads profiles and coverage profiles. It also handles
- paired-end data.")
- (license license:gpl2+)))
- (define-public r-rcas
- (package
- (name "r-rcas")
- (version "1.3.4")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/BIMSBbioinfo/RCAS/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1qgc7vi6fpzl440yg7jhiycg5q336kd4pxqzx10yx2zcq3bq3msg"))))
- (build-system r-build-system)
- (native-inputs
- `(("r-knitr" ,r-knitr)
- ("r-testthat" ,r-testthat)
- ;; During vignette building knitr checks that "pandoc-citeproc"
- ;; is in the PATH.
- ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)))
- (propagated-inputs
- `(("r-data-table" ,r-data-table)
- ("r-biomart" ,r-biomart)
- ("r-org-hs-eg-db" ,r-org-hs-eg-db)
- ("r-org-ce-eg-db" ,r-org-ce-eg-db)
- ("r-org-dm-eg-db" ,r-org-dm-eg-db)
- ("r-org-mm-eg-db" ,r-org-mm-eg-db)
- ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
- ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
- ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
- ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
- ("r-topgo" ,r-topgo)
- ("r-dt" ,r-dt)
- ("r-pbapply" ,r-pbapply)
- ("r-plotly" ,r-plotly)
- ("r-plotrix" ,r-plotrix)
- ("r-motifrg" ,r-motifrg)
- ("r-genomation" ,r-genomation)
- ("r-genomicfeatures" ,r-genomicfeatures)
- ("r-rtracklayer" ,r-rtracklayer)
- ("r-rmarkdown" ,r-rmarkdown)))
- (synopsis "RNA-centric annotation system")
- (description
- "RCAS aims to be a standalone RNA-centric annotation system that provides
- intuitive reports and publication-ready graphics. This package provides the R
- library implementing most of the pipeline's features.")
- (home-page "https://github.com/BIMSBbioinfo/RCAS")
- (license license:artistic2.0)))
- (define-public rcas-web
- (package
- (name "rcas-web")
- (version "0.0.3")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
- "releases/download/v" version
- "/rcas-web-" version ".tar.gz"))
- (sha256
- (base32
- "0d3my0g8i7js59n184zzzjdki7hgmhpi4rhfvk7i6jsw01ba04qq"))))
- (build-system gnu-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'install 'wrap-executable
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (json (assoc-ref inputs "guile-json"))
- (redis (assoc-ref inputs "guile-redis"))
- (path (string-append
- json "/share/guile/site/2.2:"
- redis "/share/guile/site/2.2")))
- (wrap-program (string-append out "/bin/rcas-web")
- `("GUILE_LOAD_PATH" ":" = (,path))
- `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
- `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
- #t)))))
- (inputs
- `(("r-minimal" ,r-minimal)
- ("r-rcas" ,r-rcas)
- ("guile-next" ,guile-2.2)
- ("guile-json" ,guile-json)
- ("guile-redis" ,guile2.2-redis)))
- (native-inputs
- `(("pkg-config" ,pkg-config)))
- (home-page "https://github.com/BIMSBbioinfo/rcas-web")
- (synopsis "Web interface for RNA-centric annotation system (RCAS)")
- (description "This package provides a simple web interface for the
- @dfn{RNA-centric annotation system} (RCAS).")
- (license license:agpl3+)))
- (define-public r-mutationalpatterns
- (package
- (name "r-mutationalpatterns")
- (version "1.2.1")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "MutationalPatterns" version))
- (sha256
- (base32
- "1s50diwh1j6vg3mgahh6bczvq74mfdbmwjrad4d5lh723gnc5pjg"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-biostrings" ,r-biostrings)
- ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
- ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
- ("r-genomicranges" ,r-genomicranges)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-ggplot2" ,r-ggplot2)
- ("r-gridextra" ,r-gridextra)
- ("r-iranges" ,r-iranges)
- ("r-nmf" ,r-nmf)
- ("r-plyr" ,r-plyr)
- ("r-pracma" ,r-pracma)
- ("r-reshape2" ,r-reshape2)
- ("r-summarizedexperiment" ,r-summarizedexperiment)
- ("r-variantannotation" ,r-variantannotation)))
- (home-page "http://bioconductor.org/packages/MutationalPatterns/")
- (synopsis "Extract and visualize mutational patterns in genomic data")
- (description "This package provides an extensive toolset for the
- characterization and visualization of a wide range of mutational patterns
- in SNV base substitution data.")
- (license license:expat)))
- (define-public r-wgcna
- (package
- (name "r-wgcna")
- (version "1.61")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "WGCNA" version))
- (sha256
- (base32
- "1vrc2k33a196hrrl7k0z534fp96vv0shmigcr65ny1q0v6lq0h6i"))))
- (properties `((upstream-name . "WGCNA")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-doparallel" ,r-doparallel)
- ("r-dynamictreecut" ,r-dynamictreecut)
- ("r-fastcluster" ,r-fastcluster)
- ("r-foreach" ,r-foreach)
- ("r-go-db" ,r-go-db)
- ("r-hmisc" ,r-hmisc)
- ("r-impute" ,r-impute)
- ("r-rcpp" ,r-rcpp)
- ("r-robust" ,r-robust)
- ("r-survival" ,r-survival)
- ("r-matrixstats" ,r-matrixstats)
- ("r-preprocesscore" ,r-preprocesscore)))
- (home-page
- "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
- (synopsis "Weighted correlation network analysis")
- (description
- "This package provides functions necessary to perform Weighted
- Correlation Network Analysis on high-dimensional data. It includes functions
- for rudimentary data cleaning, construction and summarization of correlation
- networks, module identification and functions for relating both variables and
- modules to sample traits. It also includes a number of utility functions for
- data manipulation and visualization.")
- (license license:gpl2+)))
- (define-public r-chipkernels
- (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
- (revision "1"))
- (package
- (name "r-chipkernels")
- (version (string-append "1.1-" revision "." (string-take commit 9)))
- (source
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/ManuSetty/ChIPKernels.git")
- (commit commit)))
- (file-name (string-append name "-" version))
- (sha256
- (base32
- "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-iranges" ,r-iranges)
- ("r-xvector" ,r-xvector)
- ("r-biostrings" ,r-biostrings)
- ("r-bsgenome" ,r-bsgenome)
- ("r-gtools" ,r-gtools)
- ("r-genomicranges" ,r-genomicranges)
- ("r-sfsmisc" ,r-sfsmisc)
- ("r-kernlab" ,r-kernlab)
- ("r-s4vectors" ,r-s4vectors)
- ("r-biocgenerics" ,r-biocgenerics)))
- (home-page "https://github.com/ManuSetty/ChIPKernels")
- (synopsis "Build string kernels for DNA Sequence analysis")
- (description "ChIPKernels is an R package for building different string
- kernels used for DNA Sequence analysis. A dictionary of the desired kernel
- must be built and this dictionary can be used for determining kernels for DNA
- Sequences.")
- (license license:gpl2+))))
- (define-public r-seqgl
- (package
- (name "r-seqgl")
- (version "1.1.4")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ManuSetty/SeqGL/"
- "archive/" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "0pnk1p3sci5yipyc8xnb6jbmydpl80fld927xgnbcv104hy8h8yh"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biostrings" ,r-biostrings)
- ("r-chipkernels" ,r-chipkernels)
- ("r-genomicranges" ,r-genomicranges)
- ("r-spams" ,r-spams)
- ("r-wgcna" ,r-wgcna)
- ("r-fastcluster" ,r-fastcluster)))
- (home-page "https://github.com/ManuSetty/SeqGL")
- (synopsis "Group lasso for Dnase/ChIP-seq data")
- (description "SeqGL is a group lasso based algorithm to extract
- transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
- This package presents a method which uses group lasso to discriminate between
- bound and non bound genomic regions to accurately identify transcription
- factors bound at the specific regions.")
- (license license:gpl2+)))
- (define-public r-gkmsvm
- (package
- (name "r-gkmsvm")
- (version "0.71.0")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "gkmSVM" version))
- (sha256
- (base32
- "1zpxgxmf2nd5j5wn00ps6kfxr8wxh7d1swr1rr4spq7sj5z5z0k0"))))
- (properties `((upstream-name . "gkmSVM")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-biostrings" ,r-biostrings)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-kernlab" ,r-kernlab)
- ("r-rcpp" ,r-rcpp)
- ("r-rocr" ,r-rocr)
- ("r-rtracklayer" ,r-rtracklayer)
- ("r-s4vectors" ,r-s4vectors)
- ("r-seqinr" ,r-seqinr)))
- (home-page "http://cran.r-project.org/web/packages/gkmSVM")
- (synopsis "Gapped-kmer support vector machine")
- (description
- "This R package provides tools for training gapped-kmer SVM classifiers
- for DNA and protein sequences. This package supports several sequence
- kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
- (license license:gpl2+)))
- (define-public r-tximport
- (package
- (name "r-tximport")
- (version "1.4.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "tximport" version))
- (sha256
- (base32
- "1mklb02bj4gnbjlmb7vv6k4lr3w9fp3pzli9rddbrwd0y5n8fcpx"))))
- (build-system r-build-system)
- (home-page "http://bioconductor.org/packages/tximport")
- (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
- (description
- "This package provides tools to import transcript-level abundance,
- estimated counts and transcript lengths, and to summarize them into matrices
- for use with downstream gene-level analysis packages. Average transcript
- length, weighted by sample-specific transcript abundance estimates, is
- provided as a matrix which can be used as an offset for different expression
- of gene-level counts.")
- (license license:gpl2+)))
- (define-public r-rhdf5
- (package
- (name "r-rhdf5")
- (version "2.20.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "rhdf5" version))
- (sha256
- (base32
- "1p6f5i6l44phl772a38x9cav2sya37bkqbkjzdc4pmyfzkv1j6hy"))))
- (build-system r-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'unpack-smallhdf5
- (lambda* (#:key outputs #:allow-other-keys)
- (system* "tar" "-xzvf"
- "src/hdf5source/hdf5small.tgz" "-C" "src/" )
- (substitute* "src/Makevars"
- (("^.*cd hdf5source &&.*$") "")
- (("^.*gunzip -dc hdf5small.tgz.*$") "")
- (("^.*rm -rf hdf5.*$") "")
- (("^.*mv hdf5source/hdf5 ..*$") ""))
- (substitute* "src/hdf5/configure"
- (("/bin/mv") "mv"))
- #t)))))
- (propagated-inputs
- `(("r-zlibbioc" ,r-zlibbioc)))
- (inputs
- `(("perl" ,perl)
- ("zlib" ,zlib)))
- (home-page "http://bioconductor.org/packages/rhdf5")
- (synopsis "HDF5 interface to R")
- (description
- "This R/Bioconductor package provides an interface between HDF5 and R.
- HDF5's main features are the ability to store and access very large and/or
- complex datasets and a wide variety of metadata on mass storage (disk) through
- a completely portable file format. The rhdf5 package is thus suited for the
- exchange of large and/or complex datasets between R and other software
- package, and for letting R applications work on datasets that are larger than
- the available RAM.")
- (license license:artistic2.0)))
- (define-public r-annotationfilter
- (package
- (name "r-annotationfilter")
- (version "1.0.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "AnnotationFilter" version))
- (sha256
- (base32
- "0pxvswjzwibdfmrkdragxmzcl844z73pmkn82z92wahwa6gjfyi7"))))
- (properties
- `((upstream-name . "AnnotationFilter")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-genomicranges" ,r-genomicranges)
- ("r-lazyeval" ,r-lazyeval)))
- (home-page "https://github.com/Bioconductor/AnnotationFilter")
- (synopsis "Facilities for filtering Bioconductor annotation resources")
- (description
- "This package provides classes and other infrastructure to implement
- filters for manipulating Bioconductor annotation resources. The filters are
- used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
- (license license:artistic2.0)))
- (define-public emboss
- (package
- (name "emboss")
- (version "6.5.7")
- (source (origin
- (method url-fetch)
- (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
- (version-major+minor version) ".0/"
- "EMBOSS-" version ".tar.gz"))
- (sha256
- (base32
- "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
- (build-system gnu-build-system)
- (arguments
- `(#:configure-flags
- (list (string-append "--with-hpdf="
- (assoc-ref %build-inputs "libharu")))
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'fix-checks
- (lambda _
- ;; The PNGDRIVER tests check for the presence of libgd, libpng
- ;; and zlib, but assume that they are all found at the same
- ;; prefix.
- (substitute* "configure.in"
- (("CHECK_PNGDRIVER")
- "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
- AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
- AM_CONDITIONAL(AMPNG, true)"))
- #t))
- (add-after 'fix-checks 'disable-update-check
- (lambda _
- ;; At build time there is no connection to the Internet, so
- ;; looking for updates will not work.
- (substitute* "Makefile.am"
- (("\\$\\(bindir\\)/embossupdate") ""))
- #t))
- (add-after 'disable-update-check 'autogen
- (lambda _ (zero? (system* "autoreconf" "-vif")))))))
- (inputs
- `(("perl" ,perl)
- ("libpng" ,libpng)
- ("gd" ,gd)
- ("libx11" ,libx11)
- ("libharu" ,libharu)
- ("zlib" ,zlib)))
- (native-inputs
- `(("autoconf" ,autoconf)
- ("automake" ,automake)
- ("libtool" ,libtool)
- ("pkg-config" ,pkg-config)))
- (home-page "http://emboss.sourceforge.net")
- (synopsis "Molecular biology analysis suite")
- (description "EMBOSS is the \"European Molecular Biology Open Software
- Suite\". EMBOSS is an analysis package specially developed for the needs of
- the molecular biology (e.g. EMBnet) user community. The software
- automatically copes with data in a variety of formats and even allows
- transparent retrieval of sequence data from the web. It also provides a
- number of libraries for the development of software in the field of molecular
- biology. EMBOSS also integrates a range of currently available packages and
- tools for sequence analysis into a seamless whole.")
- (license license:gpl2+)))
- (define-public bits
- (let ((revision "1")
- (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
- (package
- (name "bits")
- ;; The version is 2.13.0 even though no release archives have been
- ;; published as yet.
- (version (string-append "2.13.0-" revision "." (string-take commit 9)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/arq5x/bits.git")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ;no tests included
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-after 'unpack 'remove-cuda
- (lambda _
- (substitute* "Makefile"
- ((".*_cuda") "")
- (("(bits_test_intersections) \\\\" _ match) match))
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (copy-recursively
- "bin" (string-append (assoc-ref outputs "out") "/bin"))
- #t)))))
- (inputs
- `(("gsl" ,gsl)
- ("zlib" ,zlib)))
- (home-page "https://github.com/arq5x/bits")
- (synopsis "Implementation of binary interval search algorithm")
- (description "This package provides an implementation of the
- BITS (Binary Interval Search) algorithm, an approach to interval set
- intersection. It is especially suited for the comparison of diverse genomic
- datasets and the exploration of large datasets of genome
- intervals (e.g. genes, sequence alignments).")
- (license license:gpl2))))
- (define-public piranha
- ;; There is no release tarball for the latest version. The latest commit is
- ;; older than one year at the time of this writing.
- (let ((revision "1")
- (commit "0466d364b71117d01e4471b74c514436cc281233"))
- (package
- (name "piranha")
- (version (string-append "1.2.1-" revision "." (string-take commit 9)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/smithlabcode/piranha.git")
- (commit commit)))
- (sha256
- (base32
- "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
- (build-system gnu-build-system)
- (arguments
- `(#:test-target "test"
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'copy-smithlab-cpp
- (lambda* (#:key inputs #:allow-other-keys)
- (for-each (lambda (file)
- (install-file file "./src/smithlab_cpp/"))
- (find-files (assoc-ref inputs "smithlab-cpp")))
- #t))
- (add-after 'install 'install-to-store
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
- (for-each (lambda (file)
- (install-file file bin))
- (find-files "bin" ".*")))
- #t)))
- #:configure-flags
- (list (string-append "--with-bam_tools_headers="
- (assoc-ref %build-inputs "bamtools") "/include/bamtools")
- (string-append "--with-bam_tools_library="
- (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
- (inputs
- `(("bamtools" ,bamtools)
- ("samtools" ,samtools-0.1)
- ("gsl" ,gsl)
- ("smithlab-cpp"
- ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/smithlabcode/smithlab_cpp.git")
- (commit commit)))
- (file-name (string-append "smithlab_cpp-" commit "-checkout"))
- (sha256
- (base32
- "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
- (native-inputs
- `(("python" ,python-2)))
- (home-page "https://github.com/smithlabcode/piranha")
- (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
- (description
- "Piranha is a peak-caller for genomic data produced by CLIP-seq and
- RIP-seq experiments. It takes input in BED or BAM format and identifies
- regions of statistically significant read enrichment. Additional covariates
- may optionally be provided to further inform the peak-calling process.")
- (license license:gpl3+))))
- (define-public pepr
- (package
- (name "pepr")
- (version "1.0.9")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://pypi.python.org/packages/source/P"
- "/PePr/PePr-" version ".tar.gz"))
- (sha256
- (base32
- "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
- (build-system python-build-system)
- (arguments
- `(#:python ,python-2 ; python2 only
- #:tests? #f)) ; no tests included
- (propagated-inputs
- `(("python2-numpy" ,python2-numpy)
- ("python2-scipy" ,python2-scipy)
- ("python2-pysam" ,python2-pysam)))
- (home-page "https://github.com/shawnzhangyx/PePr")
- (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
- (description
- "PePr is a ChIP-Seq peak calling or differential binding analysis tool
- that is primarily designed for data with biological replicates. It uses a
- negative binomial distribution to model the read counts among the samples in
- the same group, and look for consistent differences between ChIP and control
- group or two ChIP groups run under different conditions.")
- (license license:gpl3+)))
- (define-public filevercmp
- (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
- (package
- (name "filevercmp")
- (version (string-append "0-1." (string-take commit 7)))
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/filevercmp/archive/"
- commit ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; There are no tests to run.
- #:phases
- (modify-phases %standard-phases
- (delete 'configure) ; There is no configure phase.
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "filevercmp" bin)))))))
- (home-page "https://github.com/ekg/filevercmp")
- (synopsis "This program compares version strings")
- (description "This program compares version strings. It intends to be a
- replacement for strverscmp.")
- (license license:gpl3+))))
- (define-public multiqc
- (package
- (name "multiqc")
- (version "1.2")
- (source
- (origin
- (method url-fetch)
- (uri (pypi-uri "multiqc" version))
- (sha256
- (base32
- "032svgym67k2ds7wp0cxzv79gi30yrdl45zbqn74lni3dk04qm33"))))
- (build-system python-build-system)
- (propagated-inputs
- `(("python-jinja2" ,python-jinja2)
- ("python-simplejson" ,python-simplejson)
- ("python-pyyaml" ,python-pyyaml)
- ("python-click" ,python-click)
- ("python-spectra" ,python-spectra)
- ("python-requests" ,python-requests)
- ("python-markdown" ,python-markdown)
- ("python-lzstring" ,python-lzstring)
- ("python-matplotlib" ,python-matplotlib)
- ("python-numpy" ,python-numpy)
- ;; MultQC checks for the presence of nose at runtime.
- ("python-nose" ,python-nose)))
- (home-page "http://multiqc.info")
- (synopsis "Aggregate bioinformatics analysis reports")
- (description
- "MultiQC is a tool to aggregate bioinformatics results across many
- samples into a single report. It contains modules for a large number of
- common bioinformatics tools.")
- (license license:gpl3)))
- (define-public r-chipseq
- (package
- (name "r-chipseq")
- (version "1.26.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "chipseq" version))
- (sha256
- (base32
- "1hahyqiwb2ch8214xqpw0c3jpiwkmyf3dwz0xc87jx6cdnzipj3i"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-s4vectors" ,r-s4vectors)
- ("r-shortread" ,r-shortread)))
- (home-page "http://bioconductor.org/packages/chipseq")
- (synopsis "Package for analyzing ChIPseq data")
- (description
- "This package provides tools for processing short read data from ChIPseq
- experiments.")
- (license license:artistic2.0)))
- (define-public r-copyhelper
- (package
- (name "r-copyhelper")
- (version "1.6.0")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "http://bioconductor.org/packages/release/"
- "data/experiment/src/contrib/CopyhelpeR_"
- version ".tar.gz"))
- (sha256
- (base32
- "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
- (properties `((upstream-name . "CopyhelpeR")))
- (build-system r-build-system)
- (home-page "http://bioconductor.org/packages/CopyhelpeR/")
- (synopsis "Helper files for CopywriteR")
- (description
- "This package contains the helper files that are required to run the
- Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
- and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
- mm10. In addition, it contains a blacklist filter to remove regions that
- display copy number variation. Files are stored as GRanges objects from the
- GenomicRanges Bioconductor package.")
- (license license:gpl2)))
- (define-public r-copywriter
- (package
- (name "r-copywriter")
- (version "2.8.1")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "CopywriteR" version))
- (sha256
- (base32
- "0xgqnq5v5213b3nzvlmjysjb7w1bc0iblqpmzbjqn7n0ib0qyhbm"))))
- (properties `((upstream-name . "CopywriteR")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocparallel" ,r-biocparallel)
- ("r-chipseq" ,r-chipseq)
- ("r-copyhelper" ,r-copyhelper)
- ("r-data-table" ,r-data-table)
- ("r-dnacopy" ,r-dnacopy)
- ("r-futile-logger" ,r-futile-logger)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicalignments" ,r-genomicalignments)
- ("r-genomicranges" ,r-genomicranges)
- ("r-gtools" ,r-gtools)
- ("r-iranges" ,r-iranges)
- ("r-matrixstats" ,r-matrixstats)
- ("r-rsamtools" ,r-rsamtools)
- ("r-s4vectors" ,r-s4vectors)))
- (home-page "https://github.com/PeeperLab/CopywriteR")
- (synopsis "Copy number information from targeted sequencing")
- (description
- "CopywriteR extracts DNA copy number information from targeted sequencing
- by utilizing off-target reads. It allows for extracting uniformly distributed
- copy number information, can be used without reference, and can be applied to
- sequencing data obtained from various techniques including chromatin
- immunoprecipitation and target enrichment on small gene panels. Thereby,
- CopywriteR constitutes a widely applicable alternative to available copy
- number detection tools.")
- (license license:gpl2)))
- (define-public r-methylkit
- (package
- (name "r-methylkit")
- (version "1.2.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "methylKit" version))
- (sha256
- (base32
- "02acdjf6jl0c1glymin84pdna4farn4vv0gb6107d9iqz3y3gkmm"))))
- (properties `((upstream-name . "methylKit")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-data-table" ,r-data-table)
- ("r-emdbook" ,r-emdbook)
- ("r-fastseg" ,r-fastseg)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicranges" ,r-genomicranges)
- ("r-gtools" ,r-gtools)
- ("r-iranges" ,r-iranges)
- ("r-kernsmooth" ,r-kernsmooth)
- ("r-limma" ,r-limma)
- ("r-mclust" ,r-mclust)
- ("r-qvalue" ,r-qvalue)
- ("r-r-utils" ,r-r-utils)
- ("r-rcpp" ,r-rcpp)
- ("r-rhtslib" ,r-rhtslib)
- ("r-rsamtools" ,r-rsamtools)
- ("r-rtracklayer" ,r-rtracklayer)
- ("r-s4vectors" ,r-s4vectors)
- ("r-zlibbioc" ,r-zlibbioc)))
- (inputs
- `(("zlib" ,zlib)))
- (home-page "http://code.google.com/p/methylkit/")
- (synopsis
- "DNA methylation analysis from high-throughput bisulfite sequencing results")
- (description
- "MethylKit is an R package for DNA methylation analysis and annotation
- from high-throughput bisulfite sequencing. The package is designed to deal
- with sequencing data from @dfn{Reduced representation bisulfite
- sequencing} (RRBS) and its variants, but also target-capture methods and whole
- genome bisulfite sequencing. It also has functions to analyze base-pair
- resolution 5hmC data from experimental protocols such as oxBS-Seq and
- TAB-Seq.")
- (license license:artistic2.0)))
- (define-public r-sva
- (package
- (name "r-sva")
- (version "3.24.4")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "sva" version))
- (sha256
- (base32
- "0wcway4ai9im81xnrzb1vij2iidq5pw24qhjfgacmhxvx3dzhbsc"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-genefilter" ,r-genefilter)
- ("r-mgcv" ,r-mgcv)
- ("r-biocparallel" ,r-biocparallel)
- ("r-matrixstats" ,r-matrixstats)
- ("r-limma" ,r-limma)))
- (home-page "http://bioconductor.org/packages/sva")
- (synopsis "Surrogate variable analysis")
- (description
- "This package contains functions for removing batch effects and other
- unwanted variation in high-throughput experiment. It also contains functions
- for identifying and building surrogate variables for high-dimensional data
- sets. Surrogate variables are covariates constructed directly from
- high-dimensional data like gene expression/RNA sequencing/methylation/brain
- imaging data that can be used in subsequent analyses to adjust for unknown,
- unmodeled, or latent sources of noise.")
- (license license:artistic2.0)))
- (define-public r-seqminer
- (package
- (name "r-seqminer")
- (version "6.0")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "seqminer" version))
- (sha256
- (base32
- "057j1l6dip35l1aivilapl2zv9db677b3di2pb3sfgq2sxg0ps3l"))))
- (build-system r-build-system)
- (inputs
- `(("zlib" ,zlib)))
- (home-page "http://seqminer.genomic.codes")
- (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
- (description
- "This package provides tools to integrate nucleotide sequencing
- data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
- ;; Any version of the GPL is acceptable
- (license (list license:gpl2+ license:gpl3+))))
- (define-public r-raremetals2
- (package
- (name "r-raremetals2")
- (version "0.1")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "http://genome.sph.umich.edu/w/images/"
- "b/b7/RareMETALS2_" version ".tar.gz"))
- (sha256
- (base32
- "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
- (properties `((upstream-name . "RareMETALS2")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-seqminer" ,r-seqminer)
- ("r-mvtnorm" ,r-mvtnorm)
- ("r-mass" ,r-mass)
- ("r-compquadform" ,r-compquadform)
- ("r-getopt" ,r-getopt)))
- (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
- (synopsis "Analyze gene-level association tests for binary trait")
- (description
- "The R package rareMETALS2 is an extension of the R package rareMETALS.
- It was designed to meta-analyze gene-level association tests for binary trait.
- While rareMETALS offers a near-complete solution for meta-analysis of
- gene-level tests for quantitative trait, it does not offer the optimal
- solution for binary trait. The package rareMETALS2 offers improved features
- for analyzing gene-level association tests in meta-analyses for binary
- trait.")
- (license license:gpl3)))
- (define-public r-maldiquant
- (package
- (name "r-maldiquant")
- (version "1.16.4")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "MALDIquant" version))
- (sha256
- (base32
- "1pmhsfvd45a44xdiml4zx3zd5fhygqyziqvygahkk9yibnyhv4cv"))))
- (properties `((upstream-name . "MALDIquant")))
- (build-system r-build-system)
- (home-page "http://cran.r-project.org/web/packages/MALDIquant")
- (synopsis "Quantitative analysis of mass spectrometry data")
- (description
- "This package provides a complete analysis pipeline for matrix-assisted
- laser desorption/ionization-time-of-flight (MALDI-TOF) and other
- two-dimensional mass spectrometry data. In addition to commonly used plotting
- and processing methods it includes distinctive features, namely baseline
- subtraction methods such as morphological filters (TopHat) or the
- statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
- alignment using warping functions, handling of replicated measurements as well
- as allowing spectra with different resolutions.")
- (license license:gpl3+)))
- (define-public r-protgenerics
- (package
- (name "r-protgenerics")
- (version "1.8.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "ProtGenerics" version))
- (sha256
- (base32
- "08idb3rvxn4fl7rd66jasyqz47cb76dbc3968r1g26jr2ci3w1pl"))))
- (properties `((upstream-name . "ProtGenerics")))
- (build-system r-build-system)
- (home-page "https://github.com/lgatto/ProtGenerics")
- (synopsis "S4 generic functions for proteomics infrastructure")
- (description
- "This package provides S4 generic functions needed by Bioconductor
- proteomics packages.")
- (license license:artistic2.0)))
- (define-public r-mzr
- (package
- (name "r-mzr")
- (version "2.10.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "mzR" version))
- (sha256
- (base32
- "1zir46h320n2vbrky6q3m8l221f3wdjlfsnx4ak9xca5min24xm7"))))
- (properties `((upstream-name . "mzR")))
- (build-system r-build-system)
- (inputs
- `(("boost" ,boost)
- ("netcdf" ,netcdf)))
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-protgenerics" ,r-protgenerics)
- ("r-rcpp" ,r-rcpp)
- ("r-zlibbioc" ,r-zlibbioc)))
- (home-page "https://github.com/sneumann/mzR/")
- (synopsis "Parser for mass spectrometry data files")
- (description
- "The mzR package provides a unified API to the common file formats and
- parsers available for mass spectrometry data. It comes with a wrapper for the
- ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
- The package contains the original code written by the ISB, and a subset of the
- proteowizard library for mzML and mzIdentML. The netCDF reading code has
- previously been used in XCMS.")
- (license license:artistic2.0)))
- (define-public r-affyio
- (package
- (name "r-affyio")
- (version "1.46.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "affyio" version))
- (sha256
- (base32
- "13w6al9296q916w0w6ngbsk25b21ahla1b6n40fcqhbvhyfii6sj"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-zlibbioc" ,r-zlibbioc)))
- (inputs
- `(("zlib" ,zlib)))
- (home-page "https://github.com/bmbolstad/affyio")
- (synopsis "Tools for parsing Affymetrix data files")
- (description
- "This package provides routines for parsing Affymetrix data files based
- upon file format information. The primary focus is on accessing the CEL and
- CDF file formats.")
- (license license:lgpl2.0+)))
- (define-public r-affy
- (package
- (name "r-affy")
- (version "1.54.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "affy" version))
- (sha256
- (base32
- "0azwg2qxzgflr1rjvbdln5i5rbcr9gs36kqlacd9cwl1szb9ad3m"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-affyio" ,r-affyio)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biocinstaller" ,r-biocinstaller)
- ("r-preprocesscore" ,r-preprocesscore)
- ("r-zlibbioc" ,r-zlibbioc)))
- (home-page "http://bioconductor.org/packages/affy")
- (synopsis "Methods for affymetrix oligonucleotide arrays")
- (description
- "This package contains functions for exploratory oligonucleotide array
- analysis.")
- (license license:lgpl2.0+)))
- (define-public r-vsn
- (package
- (name "r-vsn")
- (version "3.44.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "vsn" version))
- (sha256
- (base32
- "0qhg3a4sc62pfdxcpvmk831rk138xh4zx4f1s39jhxpqqhmr7jvk"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-affy" ,r-affy)
- ("r-biobase" ,r-biobase)
- ("r-ggplot2" ,r-ggplot2)
- ("r-hexbin" ,r-hexbin)
- ("r-lattice" ,r-lattice)
- ("r-limma" ,r-limma)))
- (home-page "http://bioconductor.org/packages/release/bioc/html/vsn.html")
- (synopsis "Variance stabilization and calibration for microarray data")
- (description
- "The package implements a method for normalising microarray intensities,
- and works for single- and multiple-color arrays. It can also be used for data
- from other technologies, as long as they have similar format. The method uses
- a robust variant of the maximum-likelihood estimator for an
- additive-multiplicative error model and affine calibration. The model
- incorporates data calibration step (a.k.a. normalization), a model for the
- dependence of the variance on the mean intensity and a variance stabilizing
- data transformation. Differences between transformed intensities are
- analogous to \"normalized log-ratios\". However, in contrast to the latter,
- their variance is independent of the mean, and they are usually more sensitive
- and specific in detecting differential transcription.")
- (license license:artistic2.0)))
- (define-public r-mzid
- (package
- (name "r-mzid")
- (version "1.14.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "mzID" version))
- (sha256
- (base32
- "11xnild02jz24vbsfy92lb7jlqqwnrswg66a7r4rsw8d2ibrbk33"))))
- (properties `((upstream-name . "mzID")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-doparallel" ,r-doparallel)
- ("r-foreach" ,r-foreach)
- ("r-iterators" ,r-iterators)
- ("r-plyr" ,r-plyr)
- ("r-protgenerics" ,r-protgenerics)
- ("r-rcpp" ,r-rcpp)
- ("r-xml" ,r-xml)))
- (home-page "http://bioconductor.org/packages/mzID")
- (synopsis "Parser for mzIdentML files")
- (description
- "This package provides a parser for mzIdentML files implemented using the
- XML package. The parser tries to be general and able to handle all types of
- mzIdentML files with the drawback of having less pretty output than a vendor
- specific parser.")
- (license license:gpl2+)))
- (define-public r-pcamethods
- (package
- (name "r-pcamethods")
- (version "1.68.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "pcaMethods" version))
- (sha256
- (base32
- "0c4lphqyzj577ws4s172391cgv00s5nhy152zp18k2k4diyhq6n0"))))
- (properties `((upstream-name . "pcaMethods")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-mass" ,r-mass)
- ("r-rcpp" ,r-rcpp)))
- (home-page "https://github.com/hredestig/pcamethods")
- (synopsis "Collection of PCA methods")
- (description
- "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
- Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
- for missing value estimation is included for comparison. BPCA, PPCA and
- NipalsPCA may be used to perform PCA on incomplete data as well as for
- accurate missing value estimation. A set of methods for printing and plotting
- the results is also provided. All PCA methods make use of the same data
- structure (pcaRes) to provide a common interface to the PCA results.")
- (license license:gpl3+)))
- (define-public r-msnbase
- (package
- (name "r-msnbase")
- (version "2.2.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "MSnbase" version))
- (sha256
- (base32
- "1n9bbdlp8d8bx2mqby0c4yylz1yil42scbkxqgyrfr9s5sn6dqff"))))
- (properties `((upstream-name . "MSnbase")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-affy" ,r-affy)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biocparallel" ,r-biocparallel)
- ("r-digest" ,r-digest)
- ("r-ggplot2" ,r-ggplot2)
- ("r-impute" ,r-impute)
- ("r-iranges" ,r-iranges)
- ("r-maldiquant" ,r-maldiquant)
- ("r-mzid" ,r-mzid)
- ("r-mzr" ,r-mzr)
- ("r-pcamethods" ,r-pcamethods)
- ("r-plyr" ,r-plyr)
- ("r-preprocesscore" ,r-preprocesscore)
- ("r-protgenerics" ,r-protgenerics)
- ("r-rcpp" ,r-rcpp)
- ("r-reshape2" ,r-reshape2)
- ("r-s4vectors" ,r-s4vectors)
- ("r-vsn" ,r-vsn)
- ("r-xml" ,r-xml)))
- (home-page "https://github.com/lgatto/MSnbase")
- (synopsis "Base functions and classes for MS-based proteomics")
- (description
- "This package provides basic plotting, data manipulation and processing
- of mass spectrometry based proteomics data.")
- (license license:artistic2.0)))
- (define-public r-msnid
- (package
- (name "r-msnid")
- (version "1.10.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "MSnID" version))
- (sha256
- (base32
- "0pjwargi5lif8q53fd43ql67p3yk9w10jychafd9qgbaw5k3f68k"))))
- (properties `((upstream-name . "MSnID")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-data-table" ,r-data-table)
- ("r-doparallel" ,r-doparallel)
- ("r-dplyr" ,r-dplyr)
- ("r-foreach" ,r-foreach)
- ("r-iterators" ,r-iterators)
- ("r-msnbase" ,r-msnbase)
- ("r-mzid" ,r-mzid)
- ("r-mzr" ,r-mzr)
- ("r-protgenerics" ,r-protgenerics)
- ("r-r-cache" ,r-r-cache)
- ("r-rcpp" ,r-rcpp)
- ("r-reshape2" ,r-reshape2)))
- (home-page "http://bioconductor.org/packages/MSnID")
- (synopsis "Utilities for LC-MSn proteomics identifications")
- (description
- "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
- from mzIdentML (leveraging the mzID package) or text files. After collating
- the search results from multiple datasets it assesses their identification
- quality and optimize filtering criteria to achieve the maximum number of
- identifications while not exceeding a specified false discovery rate. It also
- contains a number of utilities to explore the MS/MS results and assess missed
- and irregular enzymatic cleavages, mass measurement accuracy, etc.")
- (license license:artistic2.0)))
- (define-public r-seurat
- ;; Source releases are only made for new x.0 versions. All newer versions
- ;; are only released as pre-built binaries. At the time of this writing the
- ;; latest binary release is 1.4.0.12, which is equivalent to this commit.
- (let ((commit "fccb77d1452c35ee47e47ebf8e87bddb59f3b08d")
- (revision "1"))
- (package
- (name "r-seurat")
- (version (string-append "1.4.0.12-" revision "." (string-take commit 7)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/satijalab/seurat")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32
- "101wq3aqrdmbfi3lqmq4iivk9iwbf10d4z216ss25hf7n9091cyl"))
- ;; Delete pre-built jar.
- (snippet
- '(begin (delete-file "inst/java/ModularityOptimizer.jar")
- #t))))
- (build-system r-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'build-jar
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((classesdir "tmp-classes"))
- (setenv "JAVA_HOME" (assoc-ref inputs "jdk"))
- (mkdir classesdir)
- (and (zero? (apply system* `("javac" "-d" ,classesdir
- ,@(find-files "java" "\\.java$"))))
- (zero? (system* "jar"
- "-cf" "inst/java/ModularityOptimizer.jar"
- "-C" classesdir ".")))))))))
- (native-inputs
- `(("jdk" ,icedtea "jdk")))
- (propagated-inputs
- `(("r-ape" ,r-ape)
- ("r-caret" ,r-caret)
- ("r-cowplot" ,r-cowplot)
- ("r-dplyr" ,r-dplyr)
- ("r-fastica" ,r-fastica)
- ("r-fnn" ,r-fnn)
- ("r-fpc" ,r-fpc)
- ("r-gdata" ,r-gdata)
- ("r-ggplot2" ,r-ggplot2)
- ("r-gplots" ,r-gplots)
- ("r-gridextra" ,r-gridextra)
- ("r-igraph" ,r-igraph)
- ("r-irlba" ,r-irlba)
- ("r-lars" ,r-lars)
- ("r-mixtools" ,r-mixtools)
- ("r-pbapply" ,r-pbapply)
- ("r-plyr" ,r-plyr)
- ("r-ranger" ,r-ranger)
- ("r-rcolorbrewer" ,r-rcolorbrewer)
- ("r-rcpp" ,r-rcpp)
- ("r-rcppeigen" ,r-rcppeigen)
- ("r-rcppprogress" ,r-rcppprogress)
- ("r-reshape2" ,r-reshape2)
- ("r-rocr" ,r-rocr)
- ("r-rtsne" ,r-rtsne)
- ("r-stringr" ,r-stringr)
- ("r-tclust" ,r-tclust)
- ("r-tsne" ,r-tsne)
- ("r-vgam" ,r-vgam)))
- (home-page "http://www.satijalab.org/seurat")
- (synopsis "Seurat is an R toolkit for single cell genomics")
- (description
- "This package is an R package designed for QC, analysis, and
- exploration of single cell RNA-seq data. It easily enables widely-used
- analytical techniques, including the identification of highly variable genes,
- dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
- algorithms; density clustering, hierarchical clustering, k-means, and the
- discovery of differentially expressed genes and markers.")
- (license license:gpl3))))
- (define-public r-aroma-light
- (package
- (name "r-aroma-light")
- (version "3.6.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "aroma.light" version))
- (sha256
- (base32
- "10snykmmx36qaymyf5s1n1km8hsscyzpykcpf0mzsrcv8ml9rp8a"))))
- (properties `((upstream-name . "aroma.light")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-matrixstats" ,r-matrixstats)
- ("r-r-methodss3" ,r-r-methodss3)
- ("r-r-oo" ,r-r-oo)
- ("r-r-utils" ,r-r-utils)))
- (home-page "https://github.com/HenrikBengtsson/aroma.light")
- (synopsis "Methods for normalization and visualization of microarray data")
- (description
- "This package provides methods for microarray analysis that take basic
- data types such as matrices and lists of vectors. These methods can be used
- standalone, be utilized in other packages, or be wrapped up in higher-level
- classes.")
- (license license:gpl2+)))
- (define-public r-deseq
- (package
- (name "r-deseq")
- (version "1.28.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "DESeq" version))
- (sha256
- (base32
- "0j3dgcxd64m9qknmlcbdzvg4xhp981xd6nbwsvnqjfn6yypslgyw"))))
- (properties `((upstream-name . "DESeq")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-genefilter" ,r-genefilter)
- ("r-geneplotter" ,r-geneplotter)
- ("r-lattice" ,r-lattice)
- ("r-locfit" ,r-locfit)
- ("r-mass" ,r-mass)
- ("r-rcolorbrewer" ,r-rcolorbrewer)))
- (home-page "http://www-huber.embl.de/users/anders/DESeq")
- (synopsis "Differential gene expression analysis")
- (description
- "This package provides tools for estimating variance-mean dependence in
- count data from high-throughput genetic sequencing assays and for testing for
- differential expression based on a model using the negative binomial
- distribution.")
- (license license:gpl3+)))
- (define-public r-edaseq
- (package
- (name "r-edaseq")
- (version "2.10.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "EDASeq" version))
- (sha256
- (base32
- "0f25dfc8hdii9fjm3bf89vy9jkxv23sa62fkcga5b4gkipwrvm9a"))))
- (properties `((upstream-name . "EDASeq")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-aroma-light" ,r-aroma-light)
- ("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biomart" ,r-biomart)
- ("r-biostrings" ,r-biostrings)
- ("r-deseq" ,r-deseq)
- ("r-genomicfeatures" ,r-genomicfeatures)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-rsamtools" ,r-rsamtools)
- ("r-shortread" ,r-shortread)))
- (home-page "https://github.com/drisso/EDASeq")
- (synopsis "Exploratory data analysis and normalization for RNA-Seq")
- (description
- "This package provides support for numerical and graphical summaries of
- RNA-Seq genomic read data. Provided within-lane normalization procedures to
- adjust for GC-content effect (or other gene-level effects) on read counts:
- loess robust local regression, global-scaling, and full-quantile
- normalization. Between-lane normalization procedures to adjust for
- distributional differences between lanes (e.g., sequencing depth):
- global-scaling and full-quantile normalization.")
- (license license:artistic2.0)))
- (define-public r-interactivedisplaybase
- (package
- (name "r-interactivedisplaybase")
- (version "1.14.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "interactiveDisplayBase" version))
- (sha256
- (base32
- "12f6ap4bl3h2iwwhg8i3r9a7yyd28d8i5lb3fj1vnfvjs762r7r7"))))
- (properties
- `((upstream-name . "interactiveDisplayBase")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-shiny" ,r-shiny)))
- (home-page "http://bioconductor.org/packages/interactiveDisplayBase")
- (synopsis "Base package for web displays of Bioconductor objects")
- (description
- "This package contains the basic methods needed to generate interactive
- Shiny-based display methods for Bioconductor objects.")
- (license license:artistic2.0)))
- (define-public r-annotationhub
- (package
- (name "r-annotationhub")
- (version "2.8.2")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "AnnotationHub" version))
- (sha256
- (base32
- "1nh5si3j1nv37jcg4260582ayjg18851np47cskrm54prnvhwd9r"))))
- (properties `((upstream-name . "AnnotationHub")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-biocinstaller" ,r-biocinstaller)
- ("r-httr" ,r-httr)
- ("r-interactivedisplaybase" ,r-interactivedisplaybase)
- ("r-rsqlite" ,r-rsqlite)
- ("r-s4vectors" ,r-s4vectors)
- ("r-yaml" ,r-yaml)))
- (home-page "http://bioconductor.org/packages/AnnotationHub")
- (synopsis "Client to access AnnotationHub resources")
- (description
- "This package provides a client for the Bioconductor AnnotationHub web
- resource. The AnnotationHub web resource provides a central location where
- genomic files (e.g. VCF, bed, wig) and other resources from standard
- locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
- metadata about each resource, e.g., a textual description, tags, and date of
- modification. The client creates and manages a local cache of files retrieved
- by the user, helping with quick and reproducible access.")
- (license license:artistic2.0)))
- (define-public r-fastseg
- (package
- (name "r-fastseg")
- (version "1.22.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "fastseg" version))
- (sha256
- (base32
- "083wiz03q9mynwchs9frlpp6c84dncri5ncibx6h82p228cpja6h"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-s4vectors" ,r-s4vectors)))
- (home-page "http://www.bioinf.jku.at/software/fastseg/index.html")
- (synopsis "Fast segmentation algorithm for genetic sequencing data")
- (description
- "Fastseg implements a very fast and efficient segmentation algorithm.
- It can segment data from DNA microarrays and data from next generation
- sequencing for example to detect copy number segments. Further it can segment
- data from RNA microarrays like tiling arrays to identify transcripts. Most
- generally, it can segment data given as a matrix or as a vector. Various data
- formats can be used as input to fastseg like expression set objects for
- microarrays or GRanges for sequencing data.")
- (license license:lgpl2.0+)))
- (define-public r-qvalue
- (package
- (name "r-qvalue")
- (version "2.8.0")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "qvalue" version))
- (sha256
- (base32
- "1dxdwa767a9r8n61r272ypi09qblcdfpzzwkmri74y5mbp1r3y4i"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-ggplot2" ,r-ggplot2)
- ("r-reshape2" ,r-reshape2)))
- (home-page "http://github.com/jdstorey/qvalue")
- (synopsis "Q-value estimation for false discovery rate control")
- (description
- "This package takes a list of p-values resulting from the simultaneous
- testing of many hypotheses and estimates their q-values and local @dfn{false
- discovery rate} (FDR) values. The q-value of a test measures the proportion
- of false positives incurred when that particular test is called significant.
- The local FDR measures the posterior probability the null hypothesis is true
- given the test's p-value. Various plots are automatically generated, allowing
- one to make sensible significance cut-offs. The software can be applied to
- problems in genomics, brain imaging, astrophysics, and data mining.")
- ;; Any version of the LGPL.
- (license license:lgpl3+)))
- (define htslib-for-sambamba
- (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
- (package
- (inherit htslib)
- (name "htslib-for-sambamba")
- (version (string-append "1.3.1-1." (string-take commit 9)))
- (source
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/lomereiter/htslib.git")
- (commit commit)))
- (file-name (string-append "htslib-" version "-checkout"))
- (sha256
- (base32
- "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
- (arguments
- (substitute-keyword-arguments (package-arguments htslib)
- ((#:phases phases)
- `(modify-phases ,phases
- (add-after 'unpack 'bootstrap
- (lambda _
- (zero? (system* "autoreconf" "-vif"))))))))
- (native-inputs
- `(("autoconf" ,autoconf)
- ("automake" ,automake)
- ,@(package-native-inputs htslib))))))
- (define-public sambamba
- (package
- (name "sambamba")
- (version "0.6.5")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/lomereiter/sambamba/"
- "archive/v" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "17076gijd65a3f07zns2gvbgahiz5lriwsa6dq353ss3jl85d8vy"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; there is no test target
- #:make-flags
- '("D_COMPILER=ldc2"
- ;; Override "--compiler" flag only.
- "D_FLAGS=--compiler=ldc2 -IBioD -g -d"
- "sambamba-ldmd2-64")
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-after 'unpack 'place-biod
- (lambda* (#:key inputs #:allow-other-keys)
- (copy-recursively (assoc-ref inputs "biod") "BioD")
- #t))
- (add-after 'unpack 'unbundle-prerequisites
- (lambda _
- (substitute* "Makefile"
- ((" htslib-static lz4-static") ""))
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
- (mkdir-p bin)
- (install-file "build/sambamba" bin)
- #t))))))
- (native-inputs
- `(("ldc" ,ldc)
- ("rdmd" ,rdmd)
- ("biod"
- ,(let ((commit "1248586b54af4bd4dfb28ebfebfc6bf012e7a587"))
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/biod/BioD.git")
- (commit commit)))
- (file-name (string-append "biod-"
- (string-take commit 9)
- "-checkout"))
- (sha256
- (base32
- "1m8hi1n7x0ri4l6s9i0x6jg4z4v94xrfdzp7mbizdipfag0m17g3")))))))
- (inputs
- `(("lz4" ,lz4)
- ("htslib" ,htslib-for-sambamba)))
- (home-page "http://lomereiter.github.io/sambamba")
- (synopsis "Tools for working with SAM/BAM data")
- (description "Sambamba is a high performance modern robust and
- fast tool (and library), written in the D programming language, for
- working with SAM and BAM files. Current parallelised functionality is
- an important subset of samtools functionality, including view, index,
- sort, markdup, and depth.")
- (license license:gpl2+)))
- (define-public ritornello
- (package
- (name "ritornello")
- (version "1.0.0")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/KlugerLab/"
- "Ritornello/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "02nik86gq9ljjriv6pamwlmqnfky3ads1fpklx6mc3hx6k40pg38"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; there are no tests
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'patch-samtools-references
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* '("src/SamStream.h"
- "src/BufferedGenomeReader.h")
- (("<sam.h>") "<samtools/sam.h>"))
- #t))
- (delete 'configure)
- (replace 'install
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin/")))
- (mkdir-p bin)
- (install-file "bin/Ritornello" bin)
- #t))))))
- (inputs
- `(("samtools" ,samtools-0.1)
- ("fftw" ,fftw)
- ("boost" ,boost)
- ("zlib" ,zlib)))
- (home-page "https://github.com/KlugerLab/Ritornello")
- (synopsis "Control-free peak caller for ChIP-seq data")
- (description "Ritornello is a ChIP-seq peak calling algorithm based on
- signal processing that can accurately call binding events without the need to
- do a pair total DNA input or IgG control sample. It has been tested for use
- with narrow binding events such as transcription factor ChIP-seq.")
- (license license:gpl3+)))
- (define-public trim-galore
- (package
- (name "trim-galore")
- (version "0.4.2")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
- "projects/trim_galore/trim_galore_v"
- version ".zip"))
- (sha256
- (base32
- "0b9qdxi4521gsrjvbhgky8g7kry9b5nx3byzaxkgxz7p4k8bn1mn"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; no tests
- #:phases
- (modify-phases %standard-phases
- ;; The archive contains plain files.
- (replace 'unpack
- (lambda* (#:key source #:allow-other-keys)
- (zero? (system* "unzip" source))))
- (delete 'configure)
- (delete 'build)
- (add-after 'unpack 'hardcode-tool-references
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* "trim_galore"
- (("\\$path_to_cutadapt = 'cutadapt'")
- (string-append "$path_to_cutadapt = '"
- (assoc-ref inputs "cutadapt")
- "/bin/cutadapt'"))
- (("\\| gzip")
- (string-append "| "
- (assoc-ref inputs "gzip")
- "/bin/gzip"))
- (("\"gunzip")
- (string-append "\""
- (assoc-ref inputs "gzip")
- "/bin/gunzip")))
- #t))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out")
- "/bin")))
- (mkdir-p bin)
- (install-file "trim_galore" bin)
- #t))))))
- (inputs
- `(("gzip" ,gzip)
- ("perl" ,perl)
- ("cutadapt" ,cutadapt)))
- (native-inputs
- `(("unzip" ,unzip)))
- (home-page "http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
- (synopsis "Wrapper around Cutadapt and FastQC")
- (description "Trim Galore! is a wrapper script to automate quality and
- adapter trimming as well as quality control, with some added functionality to
- remove biased methylation positions for RRBS sequence files.")
- (license license:gpl3+)))
- (define-public gess
- (package
- (name "gess")
- (version "1.0")
- (source (origin
- (method url-fetch)
- (uri (string-append "http://compbio.uthscsa.edu/"
- "GESS_Web/files/"
- "gess-" version ".src.tar.gz"))
- (sha256
- (base32
- "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; no tests
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (delete 'build)
- (replace 'install
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((python (assoc-ref inputs "python"))
- (out (assoc-ref outputs "out"))
- (bin (string-append out "/bin/"))
- (target (string-append
- out "/lib/python2.7/site-packages/gess/")))
- (mkdir-p target)
- (copy-recursively "." target)
- ;; Make GESS.py executable
- (chmod (string-append target "GESS.py") #o555)
- ;; Add Python shebang to the top and make Matplotlib
- ;; usable.
- (substitute* (string-append target "GESS.py")
- (("\"\"\"Description:" line)
- (string-append "#!" (which "python") "
- import matplotlib
- matplotlib.use('Agg')
- " line)))
- ;; Make sure GESS has all modules in its path
- (wrap-program (string-append target "GESS.py")
- `("PYTHONPATH" ":" prefix (,target ,(getenv "PYTHONPATH"))))
- (mkdir-p bin)
- (symlink (string-append target "GESS.py")
- (string-append bin "GESS.py"))
- #t))))))
- (inputs
- `(("python" ,python-2)
- ("python2-pysam" ,python2-pysam)
- ("python2-scipy" ,python2-scipy)
- ("python2-numpy" ,python2-numpy)
- ("python2-networkx" ,python2-networkx)
- ("python2-biopython" ,python2-biopython)))
- (home-page "http://compbio.uthscsa.edu/GESS_Web/")
- (synopsis "Detect exon-skipping events from raw RNA-seq data")
- (description
- "GESS is an implementation of a novel computational method to detect de
- novo exon-skipping events directly from raw RNA-seq data without the prior
- knowledge of gene annotation information. GESS stands for the graph-based
- exon-skipping scanner detection scheme.")
- (license license:bsd-3)))
- (define-public phylip
- (package
- (name "phylip")
- (version "3.696")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "http://evolution.gs.washington.edu/phylip/"
- "download/phylip-" version ".tar.gz"))
- (sha256
- (base32
- "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; no check target
- #:make-flags (list "-f" "Makefile.unx" "install")
- #:parallel-build? #f ; not supported
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'enter-dir
- (lambda _ (chdir "src") #t))
- (delete 'configure)
- (replace 'install
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let ((target (string-append (assoc-ref outputs "out")
- "/bin")))
- (mkdir-p target)
- (for-each (lambda (file)
- (install-file file target))
- (find-files "../exe" ".*")))
- #t)))))
- (home-page "http://evolution.genetics.washington.edu/phylip/")
- (synopsis "Tools for inferring phylogenies")
- (description "PHYLIP (the PHYLogeny Inference Package) is a package of
- programs for inferring phylogenies (evolutionary trees).")
- (license license:bsd-2)))
- (define-public imp
- (package
- (name "imp")
- (version "2.6.2")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://integrativemodeling.org/"
- version "/download/imp-" version ".tar.gz"))
- (sha256
- (base32
- "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
- (build-system cmake-build-system)
- (arguments
- `(;; FIXME: Some tests fail because they produce warnings, others fail
- ;; because the PYTHONPATH does not include the modeller's directory.
- #:tests? #f
- ;; Do not place libraries in an architecture-specific directory.
- #:configure-flags
- (list "-DCMAKE_INSTALL_LIBDIR=lib")))
- (inputs
- `(("boost" ,boost)
- ("gsl" ,gsl)
- ("swig" ,swig)
- ("hdf5" ,hdf5)
- ("fftw" ,fftw)
- ("python" ,python-2)))
- (propagated-inputs
- `(("python2-numpy" ,python2-numpy)
- ("python2-scipy" ,python2-scipy)
- ("python2-pandas" ,python2-pandas)
- ("python2-scikit-learn" ,python2-scikit-learn)
- ("python2-networkx" ,python2-networkx)))
- (home-page "https://integrativemodeling.org")
- (synopsis "Integrative modeling platform")
- (description "IMP's broad goal is to contribute to a comprehensive
- structural characterization of biomolecules ranging in size and complexity
- from small peptides to large macromolecular assemblies, by integrating data
- from diverse biochemical and biophysical experiments. IMP provides a C++ and
- Python toolbox for solving complex modeling problems, and a number of
- applications for tackling some common problems in a user-friendly way.")
- ;; IMP is largely available under the GNU Lesser GPL; see the file
- ;; COPYING.LGPL for the full text of this license. Some IMP modules are
- ;; available under the GNU GPL (see the file COPYING.GPL).
- (license (list license:lgpl2.1+
- license:gpl3+))))
- (define-public tadbit
- (package
- (name "tadbit")
- (version "0.2")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/3DGenomes/TADbit/"
- "archive/v" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1cnfqrl4685zar4nnw94j94nhvl2h29jm448nadqi1h05z6fdk4f"))))
- (build-system python-build-system)
- (arguments
- `(;; Tests are included and must be run after installation, but
- ;; they are incomplete and thus cannot be run.
- #:tests? #f
- #:python ,python-2
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'fix-problems-with-setup.py
- (lambda* (#:key outputs #:allow-other-keys)
- ;; setup.py opens these files for writing
- (chmod "_pytadbit/_version.py" #o664)
- (chmod "README.rst" #o664)
- ;; Don't attempt to install the bash completions to
- ;; the home directory.
- (rename-file "extras/.bash_completion"
- "extras/tadbit")
- (substitute* "setup.py"
- (("\\(path.expanduser\\('~'\\)")
- (string-append "(\""
- (assoc-ref outputs "out")
- "/etc/bash_completion.d\""))
- (("extras/\\.bash_completion")
- "extras/tadbit"))
- #t)))))
- (inputs
- ;; TODO: add Chimera for visualization
- `(("imp" ,imp)
- ("mcl" ,mcl)
- ("python2-scipy" ,python2-scipy)
- ("python2-numpy" ,python2-numpy)
- ("python2-matplotlib" ,python2-matplotlib)
- ("python2-pysam" ,python2-pysam)))
- (home-page "http://3dgenomes.github.io/TADbit/")
- (synopsis "Analyze, model, and explore 3C-based data")
- (description
- "TADbit is a complete Python library to deal with all steps to analyze,
- model, and explore 3C-based data. With TADbit the user can map FASTQ files to
- obtain raw interaction binned matrices (Hi-C like matrices), normalize and
- correct interaction matrices, identify and compare the so-called
- @dfn{Topologically Associating Domains} (TADs), build 3D models from the
- interaction matrices, and finally, extract structural properties from the
- models. TADbit is complemented by TADkit for visualizing 3D models.")
- (license license:gpl3+)))
- (define-public kentutils
- (package
- (name "kentutils")
- ;; 302.1.0 is out, but the only difference is the inclusion of
- ;; pre-built binaries.
- (version "302.0.0")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ENCODE-DCC/kentUtils/"
- "archive/v" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "134aja3k1cj32kbk1nnw0q9gxjb2krr15q6sga8qldzvc0585rmm"))
- (modules '((guix build utils)
- (srfi srfi-26)
- (ice-9 ftw)))
- (snippet
- '(begin
- ;; Only the contents of the specified directories are free
- ;; for all uses, so we remove the rest. "hg/autoSql" and
- ;; "hg/autoXml" are nominally free, but they depend on a
- ;; library that is built from the sources in "hg/lib",
- ;; which is nonfree.
- (let ((free (list "." ".."
- "utils" "lib" "inc" "tagStorm"
- "parasol" "htslib"))
- (directory? (lambda (file)
- (eq? 'directory (stat:type (stat file))))))
- (for-each (lambda (file)
- (and (directory? file)
- (delete-file-recursively file)))
- (map (cut string-append "src/" <>)
- (scandir "src"
- (lambda (file)
- (not (member file free)))))))
- ;; Only make the utils target, not the userApps target,
- ;; because that requires libraries we won't build.
- (substitute* "Makefile"
- ((" userApps") " utils"))
- ;; Only build libraries that are free.
- (substitute* "src/makefile"
- (("DIRS =.*") "DIRS =\n")
- (("cd jkOwnLib.*") "")
- ((" hgLib") "")
- (("cd hg.*") ""))
- (substitute* "src/utils/makefile"
- ;; These tools depend on "jkhgap.a", which is part of the
- ;; nonfree "src/hg/lib" directory.
- (("raSqlQuery") "")
- (("pslLiftSubrangeBlat") "")
- ;; Do not build UCSC tools, which may require nonfree
- ;; components.
- (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
- #t))))
- (build-system gnu-build-system)
- (arguments
- `( ;; There is no global test target and the test target for
- ;; individual tools depends on input files that are not
- ;; included.
- #:tests? #f
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'fix-paths
- (lambda _
- (substitute* "Makefile"
- (("/bin/echo") (which "echo")))
- #t))
- (add-after 'unpack 'prepare-samtabix
- (lambda* (#:key inputs #:allow-other-keys)
- (copy-recursively (assoc-ref inputs "samtabix")
- "samtabix")
- #t))
- (delete 'configure)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out")
- "/bin")))
- (copy-recursively "bin" bin))
- #t)))))
- (native-inputs
- `(("samtabix"
- ,(origin
- (method git-fetch)
- (uri (git-reference
- (url "http://genome-source.cse.ucsc.edu/samtabix.git")
- (commit "10fd107909c1ac4d679299908be4262a012965ba")))
- (sha256
- (base32
- "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
- (inputs
- `(("zlib" ,zlib)
- ("tcsh" ,tcsh)
- ("perl" ,perl)
- ("libpng" ,libpng)
- ("mysql" ,mysql)
- ("openssl" ,openssl)))
- (home-page "http://genome.cse.ucsc.edu/index.html")
- (synopsis "Assorted bioinformatics utilities")
- (description "This package provides the kentUtils, a selection of
- bioinformatics utilities used in combination with the UCSC genome
- browser.")
- ;; Only a subset of the sources are released under a non-copyleft
- ;; free software license. All other sources are removed in a
- ;; snippet. See this bug report for an explanation of how the
- ;; license statements apply:
- ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
- (license (license:non-copyleft
- "http://genome.ucsc.edu/license/"
- "The contents of this package are free for all uses."))))
- (define-public f-seq
- (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
- (revision "1"))
- (package
- (name "f-seq")
- (version (string-append "1.1-" revision "." commit))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/aboyle/F-seq.git")
- (commit commit)))
- (file-name (string-append name "-" version))
- (sha256
- (base32
- "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
- (modules '((guix build utils)))
- ;; Remove bundled Java library archives.
- (snippet
- '(begin
- (for-each delete-file (find-files "lib" ".*"))
- #t))))
- (build-system ant-build-system)
- (arguments
- `(#:tests? #f ; no tests included
- #:phases
- (modify-phases %standard-phases
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let* ((target (assoc-ref outputs "out"))
- (doc (string-append target "/share/doc/f-seq/")))
- (mkdir-p target)
- (mkdir-p doc)
- (substitute* "bin/linux/fseq"
- (("java") (which "java")))
- (install-file "README.txt" doc)
- (install-file "bin/linux/fseq" (string-append target "/bin"))
- (install-file "build~/fseq.jar" (string-append target "/lib"))
- (copy-recursively "lib" (string-append target "/lib"))
- #t))))))
- (inputs
- `(("perl" ,perl)
- ("java-commons-cli" ,java-commons-cli)))
- (home-page "http://fureylab.web.unc.edu/software/fseq/")
- (synopsis "Feature density estimator for high-throughput sequence tags")
- (description
- "F-Seq is a software package that generates a continuous tag sequence
- density estimation allowing identification of biologically meaningful sites
- such as transcription factor binding sites (ChIP-seq) or regions of open
- chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
- Browser.")
- (license license:gpl3+))))
- (define-public bismark
- (package
- (name "bismark")
- (version "0.16.3")
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/FelixKrueger/Bismark/"
- "archive/" version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32
- "1204i0pa02ll2jn5pnxypkclnskvv7a2nwh5nxhagmhxk9wfv9sq"))))
- (build-system perl-build-system)
- (arguments
- `(#:tests? #f ; there are no tests
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (delete 'build)
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out")
- "/bin"))
- (docdir (string-append (assoc-ref outputs "out")
- "/share/doc/bismark"))
- (docs '("Bismark_User_Guide.pdf"
- "RELEASE_NOTES.txt"))
- (scripts '("bismark"
- "bismark_genome_preparation"
- "bismark_methylation_extractor"
- "bismark2bedGraph"
- "bismark2report"
- "coverage2cytosine"
- "deduplicate_bismark"
- "bismark_sitrep.tpl"
- "bam2nuc"
- "bismark2summary")))
- (mkdir-p docdir)
- (mkdir-p bin)
- (for-each (lambda (file) (install-file file bin))
- scripts)
- (for-each (lambda (file) (install-file file docdir))
- docs)
- #t))))))
- (home-page "http://www.bioinformatics.babraham.ac.uk/projects/bismark/")
- (synopsis "Map bisulfite treated sequence reads and analyze methylation")
- (description "Bismark is a program to map bisulfite treated sequencing
- reads to a genome of interest and perform methylation calls in a single step.
- The output can be easily imported into a genome viewer, such as SeqMonk, and
- enables a researcher to analyse the methylation levels of their samples
- straight away. Its main features are:
- @itemize
- @item Bisulfite mapping and methylation calling in one single step
- @item Supports single-end and paired-end read alignments
- @item Supports ungapped and gapped alignments
- @item Alignment seed length, number of mismatches etc are adjustable
- @item Output discriminates between cytosine methylation in CpG, CHG
- and CHH context
- @end itemize\n")
- (license license:gpl3+)))
|