pax_global_header00006660000000000000000000000064143621743250014521gustar00rootroot0000000000000052 comment=ae5cdf846af85bd1d0e310c05e5c67b037f51a25 tabixpp-1.1.2/000077500000000000000000000000001436217432500131715ustar00rootroot00000000000000tabixpp-1.1.2/.gitignore000066400000000000000000000000511436217432500151550ustar00rootroot00000000000000*.a *.o libtabix.so* tabix tabix++ bgzip tabixpp-1.1.2/.gitmodules000066400000000000000000000001221436217432500153410ustar00rootroot00000000000000[submodule "htslib"] path = htslib url = https://github.com/samtools/htslib.git tabixpp-1.1.2/LICENSE000066400000000000000000000020701436217432500141750ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2015 Erik Garrison Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. tabixpp-1.1.2/Makefile000066400000000000000000000043121436217432500146310ustar00rootroot00000000000000 # Use ?= to allow overriding from the env or command-line, e.g. # # make CXXFLAGS="-O3 -fPIC" install # # Package managers will override many of these variables automatically, so # this is aimed at making it easy to create packages (Debian packages, # FreeBSD ports, MacPorts, pkgsrc, etc.) CC ?= cc CXX ?= c++ CXXFLAGS ?= -g -Wall -O2 #-m64 #-arch ppc CXXFLAGS += -fPIC INCLUDES ?= -Ihtslib HTS_HEADERS ?= htslib/htslib/bgzf.h htslib/htslib/tbx.h HTS_LIB ?= htslib/libhts.a LIBPATH ?= -L. -Lhtslib DESTDIR ?= stage PREFIX ?= /usr/local STRIP ?= strip INSTALL ?= install -c MKDIR ?= mkdir -p AR ?= ar DFLAGS = -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE BIN = tabix++ LIB = libtabix.a SOVERSION = 1 SLIB = libtabix.so.$(SOVERSION) OBJS = tabix.o SUBDIRS = . .SUFFIXES:.c .o .c.o: $(CC) -c $(CXXFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ all-recur lib-recur clean-recur cleanlocal-recur install-recur: @target=`echo $@ | sed s/-recur//`; \ wdir=`pwd`; \ list='$(SUBDIRS)'; for subdir in $$list; do \ cd $$subdir; \ $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CXXFLAGS="$(CXXFLAGS)" \ INCLUDES="$(INCLUDES)" LIBPATH="$(LIBPATH)" $$target \ || exit 1; \ cd $$wdir; \ done; all: $(BIN) $(LIB) $(SLIB) tabix.o: $(HTS_HEADERS) tabix.cpp tabix.hpp $(CXX) $(CXXFLAGS) -c tabix.cpp $(INCLUDES) htslib/libhts.a: cd htslib && $(MAKE) lib-static $(LIB): $(OBJS) $(AR) rs $(LIB) $(OBJS) $(SLIB): $(OBJS) $(CXX) -shared -Wl,-soname,$(SLIB) -o $(SLIB) $(OBJS) tabix++: $(OBJS) main.cpp $(HTS_LIB) $(CXX) $(CXXFLAGS) -o $@ main.cpp $(OBJS) $(INCLUDES) $(LIBPATH) \ -lhts -lpthread -lm -lz -lcurl -llzma -lbz2 test: all ./tabix++ test/vcf_file.vcf.gz install: all $(MKDIR) $(DESTDIR)$(PREFIX)/bin $(MKDIR) $(DESTDIR)$(PREFIX)/include $(MKDIR) $(DESTDIR)$(PREFIX)/lib $(INSTALL) $(BIN) $(DESTDIR)$(PREFIX)/bin $(INSTALL) *.hpp $(DESTDIR)$(PREFIX)/include $(INSTALL) $(LIB) $(SLIB) $(DESTDIR)$(PREFIX)/lib install-strip: install $(STRIP) $(DESTDIR)$(PREFIX)/bin/$(BIN) $(DESTDIR)$(PREFIX)/lib/$(SLIB) cleanlocal: rm -rf $(BIN) $(LIB) $(SLIB) $(OBJS) $(DESTDIR) rm -fr gmon.out *.o a.out *.dSYM $(BIN) *~ *.a tabix.aux tabix.log \ tabix.pdf *.class libtabix.*.dylib cd htslib && $(MAKE) clean clean: cleanlocal-recur tabixpp-1.1.2/README.md000066400000000000000000000014641436217432500144550ustar00rootroot00000000000000This is a C++ wrapper around [tabix project](http://samtools.sourceforge.net/tabix.shtml) which abstracts some of the details of opening and jumping in tabix-indexed files. # Build ```sh git submodule update --init --recursive make CC=gcc -j 16 make test ``` See also [guix.scm](./guix.scm) for the build environment we test with. # Dependencies tabixpp has htslib as a dependency. If you want to build from the included submodule make sure that the following dependencies are available: ``` libcurl libcurl - Library to transfer files with ftp, http, etc. zlib zlib - zlib compression library liblzma liblzma - General purpose data compression library ``` It is also possible to disable these inside htslib/config.h --- generated after the first build. Author: Erik Garrison tabixpp-1.1.2/guix.scm000066400000000000000000000040571436217432500146570ustar00rootroot00000000000000;; To use this file to build HEAD of tabixpp: ;; ;; guix build -f guix.scm ;; ;; To get a development container (emacs shell will work) ;; ;; guix shell -C -D -f guix.scm ;; ;; For the tests you need /usr/bin/env. In a container create it with ;; ;; mkdir -p /usr/bin ; ln -s $GUIX_ENVIRONMENT/bin/env /usr/bin/env ;; ;; or in one go ;; ;; guix shell -C -D -f guix.scm -- bash --init-file <(echo "mkdir -p /usr/bin && ln -s \$GUIX_ENVIRONMENT/bin/env /usr/bin/env") ;; ;; make CC=gcc -j 16 (use-modules ((guix licenses) #:prefix license:) (guix gexp) (guix packages) (guix git-download) (guix build-system cmake) (gnu packages algebra) (gnu packages base) (gnu packages compression) (gnu packages bioinformatics) (gnu packages build-tools) (gnu packages curl) (gnu packages gcc) (gnu packages gdb) (gnu packages haskell-xyz) ; pandoc for help files (gnu packages llvm) (gnu packages parallel) (gnu packages perl) (gnu packages perl6) (gnu packages pkg-config) (gnu packages python) (gnu packages python-xyz) ; for pybind11 (gnu packages ruby) (gnu packages version-control) (srfi srfi-1) (ice-9 popen) (ice-9 rdelim)) (define %source-dir (dirname (current-filename))) (define %git-commit (read-string (open-pipe "git show HEAD | head -1 | cut -d ' ' -f 2" OPEN_READ))) (define-public tabixpp-git (package (name "tabixpp-git") (version (git-version "1.0.0" "HEAD" %git-commit)) (source (local-file %source-dir #:recursive? #t)) (build-system cmake-build-system) (inputs `(("curl" ,curl) ("gcc" ,gcc-11) ;; test ("gdb" ,gdb) ;; ("htslib" ,htslib) ;; ("tabixpp" ,tabixpp) ("xz" ,xz) ("zlib" ,zlib))) (native-inputs `(("pkg-config" ,pkg-config) ("git" ,git))) (home-page "https://github.com/tabixpp/tabixpp/") (synopsis "C++ wrapper library for tabix") (description " C++ wrapper around tabix project which abstracts some of the details of opening and jumping in tabix-indexed files.") (license license:expat))) tabixpp-git tabixpp-1.1.2/htslib/000077500000000000000000000000001436217432500144565ustar00rootroot00000000000000tabixpp-1.1.2/main.cpp000066400000000000000000000025241436217432500146240ustar00rootroot00000000000000#include "tabix.hpp" #include using namespace std; const string VERSION = "1.1.1"; int main(int argc, char** argv) { if (argc < 2) { cout << argv[0] << " [file] [ [region] ... ]" << endl << "Writes out regions from bgzf-compressed, tabix-indexed file." << endl << "Supply 'header' to print out the header, and no regions to" << endl << "print the contents of the entire file." << endl << endl << "Version " << VERSION << endl; return 1; } string filename = string(argv[1]); vector regions; for (int i = 2; i < argc; ++i) { regions.push_back(string(argv[i])); } Tabix file(filename); if (!regions.empty()) { for (vector::iterator r = regions.begin(); r != regions.end(); ++r) { string& region = *r; if (region == "header") { string header; file.getHeader(header); cout << header; } else { string line; file.setRegion(region); while (file.getNextLine(line)) { cout << line << endl; } } } } else { string line; while (file.getNextLine(line)) { cout << line << endl; } } return 0; } tabixpp-1.1.2/tabix.cpp000066400000000000000000000107641436217432500150140ustar00rootroot00000000000000#include "tabix.hpp" Tabix::Tabix(void) { } Tabix::Tabix(string& file) { has_jumped = false; filename = file; str.l = 0; str.m = 0; str.s = NULL; const char* cfilename = file.c_str(); struct stat stat_tbi,stat_vcf; char *fnidx = (char*) calloc(strlen(cfilename) + 5, 1); strcat(strcpy(fnidx, cfilename), ".tbi"); hFILE *fp; if (!(fp = hopen(cfilename, "r"))) { cerr << "can't open " << cfilename; return; } htsFormat fmt; if ( hts_detect_format(fp,&fmt) < 0 ) { cerr << "[tabix++] was bgzip used to compress this file? " << file << endl; free(fnidx); exit(1); } if (hclose(fp) != 0) { cerr << "can't close " << cfilename; return; } // Common source of errors: new VCF is used with an old index stat(fnidx, &stat_tbi); stat(cfilename, &stat_vcf); if ( stat_vcf.st_mtime > stat_tbi.st_mtime ) { cerr << "[tabix++] the index file is older than the vcf file. Please use '-f' to overwrite or reindex." << endl; free(fnidx); exit(1); } free(fnidx); if ((fn = hts_open(cfilename, "r")) == 0) { cerr << "[tabix++] fail to open the data file." << endl; exit(1); } if ((tbx = tbx_index_load(cfilename)) == NULL) { cerr << "[tabix++] failed to load the index file." << endl; exit(1); } int nseq; const char** seq = tbx_seqnames(tbx, &nseq); for (int i=0; ic_str() : "")); } Tabix::~Tabix(void) { tbx_itr_destroy(iter); tbx_destroy(tbx); free(str.s); } const kstring_t * Tabix::getKstringPtr(){ return &str; } void Tabix::getHeader(string& header) { header.clear(); while ( hts_getline(fn, KS_SEP_LINE, &str) >= 0 ) { if ( !str.l || str.s[0]!=tbx->conf.meta_char ) { break; } else { header += string(str.s); header += "\n"; } } // set back to start current_chrom = chroms.begin(); if (iter) tbx_itr_destroy(iter); iter = tbx_itr_querys(tbx, (current_chrom != chroms.end() ? current_chrom->c_str() : "")); } bool Tabix::setRegion(string& region) { tbx_itr_destroy(iter); iter = tbx_itr_querys(tbx, region.c_str()); has_jumped = true; return true; } bool Tabix::getNextLine(string& line) { if (has_jumped) { if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) { line = string(str.s); return true; } else return false; } else { // step through all sequences in the file // we've never jumped, so read everything if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) { line = string(str.s); return true; } else { // While we aren't at the end, advance. While we're still not at the end... while (current_chrom != chroms.end() && ++current_chrom != chroms.end()) { tbx_itr_destroy(iter); iter = tbx_itr_querys(tbx, current_chrom->c_str()); if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) { line = string(str.s); return true; } else { ++current_chrom; } } return false; } } } bool Tabix::getNextLineKS() { if (has_jumped) { if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) { //line = &str; return true; } else return false; } else { // step through all sequences in the file // we've never jumped, so read everything if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) { //line = &str; return true; } else { // While we aren't at the end, advance. While we're still not at the end... while (current_chrom != chroms.end() && ++current_chrom != chroms.end()) { tbx_itr_destroy(iter); iter = tbx_itr_querys(tbx, current_chrom->c_str()); if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) { //line = &str; return true; } else { ++current_chrom; } } return false; } } } tabixpp-1.1.2/tabix.hpp000066400000000000000000000030611436217432500150110ustar00rootroot00000000000000#pragma once #include #include #include #include "htslib/bgzf.h" #include "htslib/tbx.h" #include "htslib/kseq.h" #include "htslib/hfile.h" #include #include #include using namespace std; class Tabix { htsFile* fn; tbx_t* tbx; kstring_t str; hts_itr_t* iter; const tbx_conf_t *idxconf; int tid, beg, end; string firstline; bool has_jumped; vector::iterator current_chrom; /* uncompressed file pos off_t hts_utell1(htsFile *fp) { if (fp->is_bgzf) { return bgzf_htell(fp->fp.bgzf); } else return htell(fp->fp.hfile); } */ // Get file position in compressed file - really on disk off_t bgzf_htell1(BGZF *fp) { if (fp->mt) { return -1; // skip if multithreading //pthread_mutex_lock(&fp->mt->job_pool_m); //off_t pos = fp->block_address + fp->block_clength; //pthread_mutex_unlock(&fp->mt->job_pool_m); //return pos; } else { return htell(fp->fp); } } public: string filename; vector chroms; Tabix(void); Tabix(string& file); ~Tabix(void); const kstring_t * getKstringPtr(); void getHeader(string& header); bool setRegion(string& region); bool getNextLine(string& line); bool getNextLineKS(); // Specialised function gets actual file position when using bgzf long file_pos() { return bgzf_htell1(fn->fp.bgzf); }; }; tabixpp-1.1.2/test/000077500000000000000000000000001436217432500141505ustar00rootroot00000000000000tabixpp-1.1.2/test/vcf_file.vcf.gz000066400000000000000000000017731436217432500170540ustar00rootroot00000000000000BCms:?_ѯT*rg8XxUL'jTA?P|zБ>ٗXHt˥{Ra5co:5"[NIV(`%E2 6QQ[swREz D&xր `Pn"nyyij*fU?L^hlV}'U=QwlXWMܽywdK< *9a`úP=,iD_ĉOOԤO&ʔD%|bXs^pE M vҔ/|"J0%e(hv\l(4_,dX_ 0hly?ml[?٪x&V?u:˸¹!"%csv>l#<±V G3](y~0>MKS4C`~~iU|S?GO gz3yƏS4Cy= QH@!PdBMP(@SgHֽ N 0.Kzݲ;.MvEh2(\]Gഄ34W0u!EzഗF[v6vh ~WHs՟kߗb0U ԠmPL ~@e %MCIHi]+ I<!ȿR eQ }RH*E(i@Eh DԐVZj'P=5tw;&Ab P GQVmq,5Ԥr(;M(9J$iSC7jz b"CJsP ǡFK (v۲zg~ׯl:&=+ԉjg BCtabixpp-1.1.2/test/vcf_file.vcf.gz.gzi000066400000000000000000000000101436217432500176230ustar00rootroot00000000000000tabixpp-1.1.2/test/vcf_file.vcf.gz.tbi000066400000000000000000000004011436217432500176140ustar00rootroot00000000000000BCս P `'0в++CD B^Hg)䋋yH"_bYe-e%kH݇w[1+˱Jud9Y[}^ǫA^Kw ]Ex}נKB_/>O;RJ) =2AL}4 Rg DIOL+(_|QdBC