Needle
An application for fast and efficient searches of NGS data.
ibf.h
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/needle/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
8 #pragma once
9 
10 #include <iostream>
11 #include <math.h>
12 #include <numeric>
13 #include <string>
14 
15 #include <seqan3/alphabet/container/concatenated_sequences.hpp>
16 #include <seqan3/alphabet/nucleotide/dna4.hpp>
17 #include <filesystem>
18 
19 #include "shared.h"
20 
22 {
23  std::filesystem::path include_file; // Needs to be defined when only minimisers appearing in this file should be stored
24  std::filesystem::path exclude_file; // Needs to be defined when minimisers appearing in this file should NOT be stored
25  std::vector<int> samples{}; // Can be used to indicate that sequence files belong to the same experiment
26  bool paired = false; // If true, than experiments are seen as paired-end experiments
27  bool experiment_names = false; // Flag, if names of experiment should be stored in a txt file
28 };
29 
32  int maxi;
33  RandomGenerator(int max) :
34  maxi(max) {
35  }
36 
37  int operator()() {
38  return rand() % maxi;
39  }
40 };
41 
50 void count(min_arguments const & args, std::vector<std::filesystem::path> sequence_files, std::filesystem::path genome_file,
51  std::filesystem::path exclude_file, bool paired);
52 
58 void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map<uint64_t, uint16_t> & hash_table);
59 
66 void read_binary_start(min_arguments & args, std::filesystem::path filename, uint64_t & num_of_minimisers, uint8_t & cutoff);
67 
80 std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_files, estimate_ibf_arguments & ibf_args,
81  minimiser_arguments & minimiser_args, std::vector<double> & fpr, std::vector<uint8_t> & cutoffs,
82  std::filesystem::path const expression_by_genome_file = "",
83  size_t num_hash = 1);
84 
95 std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & minimiser_files,
96  estimate_ibf_arguments & ibf_args, std::vector<double> & fpr,
97  std::filesystem::path const expression_by_genome_file = "",
98  size_t num_hash = 1);
99 
106 void minimiser(std::vector<std::filesystem::path> const & sequence_files, min_arguments const & args,
107  minimiser_arguments & minimiser_args, std::vector<uint8_t> & cutoffs);
void minimiser(std::vector< std::filesystem::path > const &sequence_files, min_arguments const &args, minimiser_arguments &minimiser_args, std::vector< uint8_t > &cutoffs)
Create minimiser and header files.
Definition: ibf.cpp:794
void count(min_arguments const &args, std::vector< std::filesystem::path > sequence_files, std::filesystem::path genome_file, std::filesystem::path exclude_file, bool paired)
Get the concrete expression values (= median of all counts of one transcript) for given experiments....
Definition: ibf.cpp:143
void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map< uint64_t, uint16_t > &hash_table)
Reads a binary file that needle minimiser creates.
Definition: ibf.cpp:200
std::vector< uint16_t > ibf(std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< double > &fpr, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1)
Creates IBFs.
Definition: ibf.cpp:672
void read_binary_start(min_arguments &args, std::filesystem::path filename, uint64_t &num_of_minimisers, uint8_t &cutoff)
Reads the beginning of a binary file that needle minimiser creates.
Definition: ibf.cpp:232
Generates a random integer not greater than a given maximum.
Definition: ibf.h:31
int maxi
Definition: ibf.h:32
RandomGenerator(int max)
Definition: ibf.h:33
int operator()()
Definition: ibf.h:37
arguments used for estimate, ibf, ibfmin
Definition: shared.h:41
arguments used for estimate, ibf, minimiser
Definition: shared.h:32
Definition: ibf.h:22
bool paired
Definition: ibf.h:26
std::filesystem::path include_file
Definition: ibf.h:23
std::vector< int > samples
Definition: ibf.h:25
std::filesystem::path exclude_file
Definition: ibf.h:24
bool experiment_names
Definition: ibf.h:27