1 #ifndef BMSPARSEVEC_ALGO__H__INCLUDED__ 2 #define BMSPARSEVEC_ALGO__H__INCLUDED__ 24 #ifndef BM__H__INCLUDED__ 27 # error missing include (bm.h or bm64.h) 37 # pragma warning( disable: 4146 ) 65 unsigned sv_plains = svect.plains();
67 BM_ASSERT(sv_plains > high_bit && high_bit > 0);
73 for (i = high_bit+1; i < sv_plains; ++i)
84 for (i = high_bit;
true; --i)
103 template<
typename SV>
106 if (low_bit == 0)
return;
109 unsigned sv_plains = svect.plains();
114 for (i = low_bit+1; i < sv_plains; ++i)
118 bv_acc1.
bit_or(*bv_plain);
125 for (i = low_bit-1;
true; --i)
130 bv_acc2.
bit_or(*bv_plain);
143 bv_acc1.bit_xor(bv_acc2);
144 bv_low_plain->bit_or(bv_acc1);
160 template<
typename SV>
182 void bind(
const SV& sv,
bool sorted);
199 typename SV::value_type value,
215 typename SV::value_type value,
216 typename SV::size_type& pos);
222 const typename SV::value_type* str,
223 typename SV::size_type& pos);
230 bool find_eq_str(
const typename SV::value_type* str,
231 typename SV::size_type& pos);
238 const typename SV::value_type* str,
239 typename SV::size_type& pos);
253 const typename SV::value_type val,
254 typename SV::size_type& pos);
267 const typename SV::value_type* str,
268 typename SV::size_type& pos);
276 typename SV::size_type& pos);
310 template<
typename It>
321 bool any_zero =
false;
322 for (; start < end; ++start)
324 value_type v = *start;
325 any_zero |= (v == 0);
342 typename SV::value_type value,
361 typename SV::value_type value,
363 typename SV::size_type search_limit = 0);
367 typename SV::value_type value,
372 const typename SV::value_type* str,
379 typename SV::value_type value);
383 const typename SV::value_type* str,
384 unsigned octet_start);
390 int compare_str(
const SV& sv, size_type idx,
const value_type* str);
393 int compare(
const SV& sv, size_type idx,
const value_type val);
413 typedef bm::heap_matrix<
typename SV::value_type,
420 allocator_pool_type pool_;
421 bvector_type bv_tmp_;
425 size_type mask_from_;
430 heap_matrix_type block0_elements_cache_;
431 heap_matrix_type block3_elements_cache_;
432 size_type effective_str_max_;
434 value_type remap_value_vect_[SV::max_vector_size];
436 bm::id64_t vector_plain_masks_[SV::max_vector_size];
437 matrix_search_buf_type hmatr_;
452 template<
typename SV>
479 void remap(
const bvector_type& bv_in,
480 bvector_type& bv_out);
488 void remap(
const bvector_type& bv_in,
490 bvector_type& bv_out);
500 bool remap(size_type id_from,
const SV& sv_brel, size_type& id_to);
511 void run(
const bvector_type& bv_in,
513 bvector_type& bv_out)
515 remap(bv_in, sv_brel, bv_out);
528 void attach_sv(
const SV* sv_brel,
bool compute_stats =
false);
532 void one_pass_run(
const bvector_type& bv_in,
534 bvector_type& bv_out);
537 template<
unsigned BSIZE>
572 template<
typename SV>
574 : sv_ptr_(0), gb_(0), have_stats_(false)
579 SV::throw_bad_alloc();
585 template<
typename SV>
595 template<
typename SV>
605 if (sv_brel->empty() || !compute_stats)
607 const bvector_type* bv_non_null = sv_brel->get_null_bvector();
623 template<
typename SV>
631 const bvector_type* bv_non_null = sv_brel.get_null_bvector();
634 if (!bv_non_null->test(id_from))
637 size_type idx = sv_brel.translate_address(id_from);
638 id_to = sv_brel.get(idx);
644 template<
typename SV>
656 remap(bv_in, bv_out);
661 template<
typename SV>
707 bv_out.set_bit_no_check(0);
716 buf_cnt = nb_old = 0;
719 for (; en.valid(); ++en)
721 typename SV::size_type idx = *en;
722 idx =
sv_ptr_->translate_address(idx);
734 gb_->gather_idx_[buf_cnt++] = idx;
738 gb_->gather_idx_[buf_cnt++] = idx;
758 template<
typename SV>
768 typename SV::const_iterator it = sv_brel.begin();
769 for (; it.valid(); ++it)
771 typename SV::value_type t_id = *it;
775 bv_out.set_bit_no_check(t_id);
785 template<
typename SV>
792 effective_str_max_ = 0;
797 template<
typename SV>
806 effective_str_max_ = sv.effective_vector_max();
808 block0_elements_cache_.resize(total_nb, effective_str_max_+1);
809 block0_elements_cache_.set_zero();
811 block3_elements_cache_.resize(total_nb * 3, effective_str_max_+1);
812 block3_elements_cache_.set_zero();
818 value_type* s0 = block0_elements_cache_.row(nb);
819 sv.get(i, s0,
size_type(block0_elements_cache_.cols()));
823 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
825 sv.get(idx, s1,
size_type(block3_elements_cache_.cols()));
831 for (
unsigned i = 0; i < SV::max_vector_size; ++i)
833 vector_plain_masks_[i] = sv.get_plains_mask(i);
840 template<
typename SV>
844 effective_str_max_ = 0;
849 template<
typename SV>
858 find_nonzero(sv, bv_out);
859 if (sv.is_compressed())
862 bv_out.set_range(sv.effective_size(),
bm::id_max - 1,
false);
863 decompress(sv, bv_out);
869 correct_nulls(sv, bv_out);
874 template<
typename SV>
890 bv_out.set_range(sv.size(),
bm::id_max - 1,
false);
896 template<
typename SV>
902 bv_out.bit_and(*bv_null);
907 template<
typename SV>
909 typename SV::value_type value,
911 typename SV::size_type search_limit)
918 find_zero(sv, bv_out);
923 bool found = prepare_and_sub_aggregator(sv, value);
930 bool any = (search_limit == 1);
931 found = agg_.combine_and_sub(bv_out, any);
938 template<
typename SV>
940 typename SV::value_type value,
951 bool found = prepare_and_sub_aggregator(sv, value);
954 found = agg_.find_first_and_sub(idx);
962 template<
typename SV>
964 const typename SV::value_type* str,
976 unsigned common_prefix_len = 0;
980 agg_.set_range_hint(mask_from_, mask_to_);
981 common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
986 str = remap_value_vect_;
990 if (sv.is_remap() && str != remap_value_vect_)
992 bool r = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
995 str = remap_value_vect_;
999 bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len);
1003 found = agg_.find_first_and_sub(idx);
1010 template<
typename SV>
1012 const typename SV::value_type* str,
1013 unsigned octet_start)
1015 unsigned char bits[64];
1018 for (; str[len] != 0; ++len)
1023 for (
int octet_idx = len-1; octet_idx >= 0; --octet_idx)
1025 if (
unsigned(octet_idx) < octet_start)
1028 unsigned value = unsigned(str[octet_idx]) & 0xFF;
1032 if (&sv == bound_sv_)
1033 plains_mask = vector_plain_masks_[octet_idx];
1035 plains_mask = sv.get_plains_mask(
unsigned(octet_idx));
1037 if ((value & plains_mask) != value)
1042 unsigned short bit_count_v =
bm::bitscan(value, bits);
1043 for (
unsigned i = 0; i < bit_count_v; ++i)
1045 unsigned bit_idx = bits[i];
1047 unsigned plain_idx = (unsigned(octet_idx) * 8) + bit_idx;
1052 unsigned vinv = unsigned(value);
1063 vinv &= unsigned(plains_mask);
1064 for (
unsigned octet_plain = (
unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1068 unsigned plain_idx = octet_plain + bit_idx;
1077 unsigned plain_idx = unsigned(len * 8) + 1;
1078 typename SV::size_type plains;
1079 if (&sv == bound_sv_)
1080 plains = effective_str_max_ * unsigned(
sizeof(
value_type)) * 8;
1082 plains = sv.plains();
1083 for (; plain_idx < plains; ++plain_idx)
1094 template<
typename SV>
1096 typename SV::value_type value)
1098 unsigned char bits[
sizeof(value) * 8];
1099 unsigned short bit_count_v =
bm::bitscan(value, bits);
1106 for (
unsigned i = bit_count_v; i > 0; --i)
1108 unsigned bit_idx = bits[i-1];
1117 unsigned sv_plains = sv.effective_plains();
1118 for (
unsigned i = 0; i < sv_plains; ++i)
1122 if (bv && !(value & mask))
1131 template<
typename SV>
1133 typename SV::value_type value,
1141 find_zero(sv, bv_out);
1145 unsigned char bits[
sizeof(value) * 8];
1146 unsigned short bit_count_v =
bm::bitscan(value, bits);
1151 const bvector_type* bv_plain = sv.get_plain(bits[--bit_count_v]);
1160 for (
unsigned i = 0; i < bit_count_v; ++i)
1164 bv_out &= *bv_plain;
1173 unsigned sv_plains = sv.effective_plains();
1174 for (
unsigned i = 0; (i < sv_plains) && value; ++i)
1177 if (bv_plain && !(value & (
value_type(1) << i)))
1178 bv_out -= *bv_plain;
1184 template<
typename SV>
1186 typename SV::size_type& pos)
1189 return this->find_eq_str(*bound_sv_, str, pos);
1194 template<
typename SV>
1196 const typename SV::value_type* str,
1197 typename SV::size_type& pos)
1204 bool remaped =
false;
1207 if (sv.is_remap() && str != remap_value_vect_)
1209 remaped = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1212 str = remap_value_vect_;
1217 found = find_first_eq(sv, str, found_pos, remaped);
1223 if (sv.is_compressed())
1224 found = sv.find_rank(found_pos + 1, pos);
1232 find_zero(sv, bv_zero);
1233 found = bv_zero.find(pos);
1240 template<
typename SV>
1242 const typename SV::value_type* str,
1243 typename SV::size_type& pos)
1251 bool remaped =
false;
1255 if (sv.is_remap() && str != remap_value_vect_)
1257 remaped = sv.remap_tosv(
1258 remap_value_vect_, SV::max_vector_size, str);
1264 reset_search_range();
1269 l = 0; r = sv.size()-1;
1276 if (dist < min_distance_cutoff)
1289 int cmp = this->compare_str(sv, mid, str);
1307 int cmp = this->compare_str(sv, mid, str);
1312 cmp = this->compare_str(sv, mid, str);
1317 cmp = this->compare_str(sv, mid, str);
1335 set_search_range(l, r);
1339 typename SV::size_type mid = dist/2+l;
1344 if (nb == 0 && r > bm::gap_max_bits)
1351 int cmp = this->compare_str(sv, mid, str);
1356 set_search_range(l, mid);
1366 found = find_first_eq(sv, str, found_pos, remaped);
1372 if (sv.is_compressed())
1373 found = sv.find_rank(found_pos + 1, pos);
1376 reset_search_range();
1382 find_zero(sv, bv_zero);
1383 found = bv_zero.find(pos);
1390 template<
typename SV>
1392 typename SV::size_type& pos)
1395 return bfind_eq_str(*bound_sv_, str, pos);
1400 template<
typename SV>
1402 const typename SV::value_type val,
1403 typename SV::size_type& pos)
1415 cmp = this->compare(sv, l, val);
1426 cmp = this->compare(sv, r, val);
1434 cmp = this->compare(sv, r, val);
1448 if (dist < linear_cutoff1)
1452 cmp = this->compare(sv, l, val);
1469 cmp = this->compare(sv, mid, val);
1476 cmp = this->compare(sv, i, val);
1490 if (dist < linear_cutoff2)
1492 typename SV::const_iterator it(&sv, l);
1493 for (; it.valid(); ++it, ++l)
1495 typename SV::value_type sv_value = it.value();
1496 if (sv_value == val)
1520 template<
typename SV>
1523 const typename SV::value_type* str,
1524 typename SV::size_type& pos)
1537 cmp = this->compare_str(sv, l, str);
1548 cmp = this->compare_str(sv, r, str);
1556 cmp = this->compare_str(sv, r, str);
1570 if (dist < linear_cutoff1)
1574 cmp = this->compare_str(sv, l, str);
1590 cmp = this->compare_str(sv, mid, str);
1597 cmp = this->compare_str(sv, i, str);
1611 if (dist < linear_cutoff2)
1615 dist = sv.decode(hmatr_, l, dist+1);
1616 for (
unsigned i = 0; i < dist; ++i, ++l)
1618 const typename SV::value_type* hm_str = hmatr_.row(i);
1619 cmp = ::strcmp(hm_str, str);
1631 cmp = this->compare_str(sv, l, str);
1650 template<
typename SV>
1655 if (bound_sv_ == &sv)
1661 value_type* s0 = block0_elements_cache_.row(nb);
1664 sv.get(idx, s0,
size_type(block0_elements_cache_.cols()));
1667 for (
unsigned i = 0; i < block0_elements_cache_.cols(); ++i)
1669 char octet = str[i];
char value = s0[i];
1670 res = (value > octet) - (value < octet);
1682 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
1684 for (
unsigned i = 0; i < block3_elements_cache_.cols(); ++i)
1686 char octet = str[i];
char value = s1[i];
1687 res = (value > octet) - (value < octet);
1696 return sv.compare(idx, str);
1701 template<
typename SV>
1707 return sv.compare(idx, val);
1712 template<
typename SV>
1714 typename SV::value_type value,
1724 find_zero(sv, bv_out);
1728 find_eq_with_nulls(sv, value, bv_out, 0);
1730 decompress(sv, bv_out);
1731 correct_nulls(sv, bv_out);
1736 template<
typename SV>
1738 typename SV::value_type value,
1739 typename SV::size_type& pos)
1744 find_eq(sv, value, bv_zero);
1745 bool found = bv_zero.find(pos);
1750 bool found = find_first_eq(sv, value, found_pos);
1756 if (sv.is_compressed())
1757 found = sv.find_rank(found_pos + 1, pos);
1765 template<
typename SV>
1770 for (
unsigned i = 0; i < sv.plains(); ++i)
1771 agg_.add(sv.get_plain(i));
1772 agg_.combine_or(bv_out);
1778 template<
typename SV>
1782 if (!sv.is_compressed())
1784 const bvector_type* bv_non_null = sv.get_null_bvector();
1788 rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
1789 bv_out.swap(bv_tmp_);
1794 template<
typename SV>
1805 template<
typename SV>
bm::heap_matrix< typename SV::value_type, linear_cutoff2, SV::sv_octet_plains, allocator_type > matrix_search_buf_type
bvector_type * bvector_type_ptr
void find_eq(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to search value
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w)
ad-hoc conditional expressions
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost)
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
const bvector_type * bvector_type_const_ptr
void invert(const SV &sv, typename SV::bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
unsigned long long int id64_t
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
void assign_if_not_set(allocator_pool_type &pool, bvector< Alloc > &bv)
void find_eq(const SV &sv, It start, It end, typename SV::bvector_type &bv_out)
find all values A IN (C, D, E, F)
int compare(const SV &sv, size_type idx, const value_type val)
compare sv[idx] with input value
bool find_eq_with_nulls(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out, typename SV::size_type search_limit=0)
find value (may include NULL indexes)
pre-processor un-defines to avoid global space pollution (internal)
algorithms for sparse_vector scan/seach
void reset_binding()
reset sparse vector binding
int compare_str(const SV &sv, size_type idx, const value_type *str)
compare sv[idx] with input str
void reset_search_range()
reset (disable) search range
Algorithms for fast aggregation of N bvectors.
void operator=(const sparse_vector_scanner &)=delete
void set_search_range(size_type from, size_type to)
set search boundaries (hint for the aggregator)
input set is sorted (ascending order)
const unsigned sub_block3_size
bool prepare_and_sub_aggregator(const SV &sv, typename SV::value_type value)
Prepare aggregator for AND-SUB (EQ) search.
bvector_type::allocator_type allocator_type
bool find_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
find first sparse vector element (string)
SV::value_type value_type
allocator_type::allocator_pool_type allocator_pool_type
void find_zero(const SV &sv, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to 0
allocator_type::allocator_pool_type allocator_pool_type
void find_nonzero(const SV &sv, typename SV::bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all plains.
const unsigned gap_max_bits
bool lower_bound_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
lower bound search for an array position
bool find_first_eq(const SV &sv, typename SV::value_type value, size_type &idx)
find first value (may include NULL indexes)
const unsigned set_block_mask
Constant iterator designed to enumerate "ON" bits.
bool lower_bound(const SV &sv, const typename SV::value_type val, typename SV::size_type &pos)
lower bound search for an array position
void find_eq_with_nulls_horizontal(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
For testing purposes only.
unsigned short bitscan(V w, B *bits)
SV::bvector_type bvector_type
void decompress(const SV &sv, typename SV::bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
const unsigned set_block_shift
void correct_nulls(const SV &sv, typename SV::bvector_type &bv_out)
Exclude possible NULL values from the result vector.
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
bm::bvector< Alloc > & bit_or(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand OR : this := bv1 OR bv2
sorted and in one block (internal!)
bool bfind_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
binary find first sparse vector element (string) Sparse vector must be sorted.
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type