#include <iostream>
#include <math.h>
#include <numeric>
#include <string>
#include <seqan3/alphabet/container/concatenated_sequences.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <filesystem>
#include "shared.h"
Go to the source code of this file.
|
void | count (min_arguments const &args, std::vector< std::filesystem::path > sequence_files, std::filesystem::path genome_file, std::filesystem::path exclude_file, bool paired) |
| Get the concrete expression values (= median of all counts of one transcript) for given experiments. This function can be used to estimate how good the median approach can be, if all count values are available. More...
|
|
void | read_binary (std::filesystem::path filename, robin_hood::unordered_node_map< uint64_t, uint16_t > &hash_table) |
| Reads a binary file that needle minimiser creates. More...
|
|
void | read_binary_start (min_arguments &args, std::filesystem::path filename, uint64_t &num_of_minimisers, uint8_t &cutoff) |
| Reads the beginning of a binary file that needle minimiser creates. More...
|
|
std::vector< uint16_t > | ibf (std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< double > &fpr, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1) |
| Creates IBFs. More...
|
|
std::vector< uint16_t > | ibf (std::vector< std::filesystem::path > const &minimiser_files, estimate_ibf_arguments &ibf_args, std::vector< double > &fpr, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1) |
| Creates IBFs based on the minimiser files. More...
|
|
void | minimiser (std::vector< std::filesystem::path > const &sequence_files, min_arguments const &args, minimiser_arguments &minimiser_args, std::vector< uint8_t > &cutoffs) |
| Create minimiser and header files. More...
|
|
◆ count()
void count |
( |
min_arguments const & |
args, |
|
|
std::vector< std::filesystem::path > |
sequence_files, |
|
|
std::filesystem::path |
genome_file, |
|
|
std::filesystem::path |
exclude_file, |
|
|
bool |
paired |
|
) |
| |
Get the concrete expression values (= median of all counts of one transcript) for given experiments. This function can be used to estimate how good the median approach can be, if all count values are available.
- Parameters
-
args | The minimiser arguments to use (seed, shape, window size). |
sequence_files | The sequence files, which contains the reads. |
genome_file | A file containing the transcripts which expression values should be determined. |
exclude_file | A file containing minimizers which should be ignored. |
paired | Flag to indicate if input data is paired or not. |
◆ ibf() [1/2]
std::vector<uint16_t> ibf |
( |
std::vector< std::filesystem::path > const & |
minimiser_files, |
|
|
estimate_ibf_arguments & |
ibf_args, |
|
|
std::vector< double > & |
fpr, |
|
|
std::filesystem::path const |
expression_by_genome_file = "" , |
|
|
size_t |
num_hash = 1 |
|
) |
| |
Creates IBFs based on the minimiser files.
- Parameters
-
minimiser_files | A vector of minimiser file paths. |
ibf_args | The IBF specific arguments to use (bin size, number of hash functions, ...). See struct ibf_arguments. |
fpr | The average false positive rate that should be used. |
expression_by_genome_file | File that contains the only minimisers that should be comnsidered for the determination of the expression_thresholds. |
num_hash | The number of hash functions to use. |
- Returns
- The expression thresholds per experiment.
◆ ibf() [2/2]
std::vector<uint16_t> ibf |
( |
std::vector< std::filesystem::path > const & |
sequence_files, |
|
|
estimate_ibf_arguments & |
ibf_args, |
|
|
minimiser_arguments & |
minimiser_args, |
|
|
std::vector< double > & |
fpr, |
|
|
std::vector< uint8_t > & |
cutoffs, |
|
|
std::filesystem::path const |
expression_by_genome_file = "" , |
|
|
size_t |
num_hash = 1 |
|
) |
| |
Creates IBFs.
- Parameters
-
sequence_files | A vector of sequence file paths. |
ibf_args | The IBF specific arguments to use (bin size, number of hash functions, ...). See struct ibf_arguments. |
minimiser_args | The minimiser specific arguments to use. |
fpr | The average false positive rate that should be used. |
cutoffs | List of cutoffs. |
expression_by_genome_file | File that contains the only minimisers that should be considered for the determination of the expression thresholds. |
num_hash | The number of hash functions to use. |
- Returns
- The expression thresholds per experiment.
◆ minimiser()
void minimiser |
( |
std::vector< std::filesystem::path > const & |
sequence_files, |
|
|
min_arguments const & |
args, |
|
|
minimiser_arguments & |
minimiser_args, |
|
|
std::vector< uint8_t > & |
cutoffs |
|
) |
| |
Create minimiser and header files.
- Parameters
-
sequence_files | A vector of sequence file paths. |
args | The minimiser arguments to use (seed, shape, window size). |
minimiser_args | The minimiser specific arguments to use. |
cutoffs | List of cutoffs. |
◆ read_binary()
void read_binary |
( |
std::filesystem::path |
filename, |
|
|
robin_hood::unordered_node_map< uint64_t, uint16_t > & |
hash_table |
|
) |
| |
Reads a binary file that needle minimiser creates.
- Parameters
-
filename | The filename of the binary file. |
hash_table | The hash table to store minimisers into. |
◆ read_binary_start()
void read_binary_start |
( |
min_arguments & |
args, |
|
|
std::filesystem::path |
filename, |
|
|
uint64_t & |
num_of_minimisers, |
|
|
uint8_t & |
cutoff |
|
) |
| |
Reads the beginning of a binary file that needle minimiser creates.
- Parameters
-
args | Min arguments. |
filename | The filename of the binary file. |
num_of_minimisers | Variable, where to number of minimisers should be stored. |
cutoff | cutoff value. |