Needle
An application for fast and efficient searches of NGS data.
shared.h
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/needle/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
8 #pragma once
9 
10 #include <robin_hood.h>
11 
12 #include <seqan3/alphabet/nucleotide/dna4.hpp>
13 #include <seqan3/io/sequence_file/input.hpp>
14 #include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
15 #include <seqan3/search/kmer_index/shape.hpp>
16 #include <seqan3/search/views/minimiser_hash.hpp>
17 
18 inline constexpr static uint64_t adjust_seed(uint8_t const kmer_size, uint64_t const seed = 0x8F3F73B5CF1C9ADEULL) noexcept
19 {
20  return seed >> (64u - 2u * kmer_size);
21 }
22 
25 {
26  std::filesystem::path path_out{"./"};
27  uint8_t threads{1};
28 };
29 
32 {
33  uint8_t k{20};
34  seqan3::seed s{0x8F3F73B5CF1C9ADEULL};
35  seqan3::shape shape = seqan3::ungapped{k};
36  seqan3::window_size w_size{60};
37 };
38 
41 {
42  bool compressed = false;
43  std::vector<uint16_t> expression_thresholds{}; // Expression levels which should be created
44  uint8_t number_expression_thresholds{}; // If set, the expression levels are determined by the program.
45  bool samplewise{false};
46 
47  template<class Archive>
48  void save(Archive & archive) const
49  {
50  archive(k);
51  archive(w_size.get());
52  archive(s.get());
53  archive(shape);
54  archive(compressed);
56  archive(expression_thresholds);
57  archive(samplewise);
58  }
59 
60  template<class Archive>
61  void load(Archive & archive)
62  {
63  archive(k);
64  archive(w_size.get());
65  archive(s.get());
66  archive(shape);
67  archive(compressed);
69  archive(expression_thresholds);
70  archive(samplewise);
71  }
72 };
73 
78 static void load_args(estimate_ibf_arguments & args, std::filesystem::path ipath)
79 {
80  std::ifstream is{ipath, std::ios::binary};
81  cereal::BinaryInputArchive iarchive{is};
82  iarchive(args);
83 }
84 
89 static void store_args(estimate_ibf_arguments const & args, std::filesystem::path opath)
90 {
91  std::ofstream os{opath, std::ios::binary};
92  cereal::BinaryOutputArchive oarchive{os};
93  oarchive(args);
94 }
95 
97 struct my_traits : seqan3::sequence_file_input_default_traits_dna
98 {
99  using sequence_alphabet = seqan3::dna4;
100  //TODO: Should I use a bitcompressed_vector to save memory but with the disadvantage of losing speed?
101  //template <typename alph>
102  //using sequence_container = seqan3::bitcompressed_vector<alph>;
103 };
104 
109 template <class IBFType>
110 void load_ibf(IBFType & ibf, std::filesystem::path ipath)
111 {
112  std::ifstream is{ipath, std::ios::binary};
113  cereal::BinaryInputArchive iarchive{is};
114  iarchive(ibf);
115 }
116 
121 template <class IBFType>
122 void store_ibf(IBFType const & ibf,
123  std::filesystem::path opath)
124 {
125  std::ofstream os{opath, std::ios::binary};
126  cereal::BinaryOutputArchive oarchive{os};
127  oarchive(seqan3::interleaved_bloom_filter(ibf));
128 }
std::vector< uint16_t > ibf(std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< double > &fpr, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1)
Creates IBFs.
Definition: ibf.cpp:672
uint64_t shape
Definition: main.cpp:17
void load_ibf(IBFType &ibf, std::filesystem::path ipath)
Function, loading compressed and uncompressed ibfs.
Definition: shared.h:110
void store_ibf(IBFType const &ibf, std::filesystem::path opath)
Function, which stored compressed and uncompressed ibfs.
Definition: shared.h:122
arguments used for all tools
Definition: shared.h:25
uint8_t threads
Definition: shared.h:27
std::filesystem::path path_out
Definition: shared.h:26
arguments used for estimate, ibf, ibfmin
Definition: shared.h:41
void load(Archive &archive)
Definition: shared.h:61
std::vector< uint16_t > expression_thresholds
Definition: shared.h:43
uint8_t number_expression_thresholds
Definition: shared.h:44
bool compressed
Definition: shared.h:42
bool samplewise
Definition: shared.h:45
void save(Archive &archive) const
Definition: shared.h:48
arguments used for estimate, ibf, minimiser
Definition: shared.h:32
uint8_t k
Definition: shared.h:33
seqan3::shape shape
Definition: shared.h:35
seqan3::window_size w_size
Definition: shared.h:36
seqan3::seed s
Definition: shared.h:34
Use dna4 instead of default dna5.
Definition: shared.h:98
seqan3::dna4 sequence_alphabet
Definition: shared.h:99