1 #ifndef BMSERIAL__H__INCLUDED__ 2 #define BMSERIAL__H__INCLUDED__ 33 #ifndef BM__H__INCLUDED__ 36 # error missing include (bm.h or bm64.h) 41 #pragma warning( push ) 42 #pragma warning( disable : 4311 4312 4127) 87 typedef byte_buffer<allocator_type>
buffer;
146 unsigned char* buf,
size_t buf_size);
158 const statistics_type* bv_stat = 0);
216 bool inverted =
false);
258 unsigned size_control);
289 void add_model(
unsigned char mod,
unsigned score);
298 typedef bm::heap_vector<bm::gap_word_t, allocator_type> block_arridx_type;
299 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
303 unsigned bit_model_d0_size_;
304 unsigned bit_model_0run_size_;
305 block_arridx_type bit_idx_arr_;
306 unsigned scores_[64];
307 unsigned char models_[64];
310 allocator_type alloc_;
311 size_type* compression_stat_;
313 bool byte_order_serial_;
315 unsigned compression_level_;
316 bool own_temp_block_;
320 allocator_pool_type pool_;
337 void read_gap_block(decoder_type&
decoder,
345 unsigned read_id_list(decoder_type& decoder,
350 void read_bic_arr(decoder_type& decoder,
bm::word_t* blk);
353 void read_bic_gap(decoder_type& decoder,
bm::word_t* blk);
356 void read_bic_arr_inv(decoder_type& decoder,
bm::word_t* blk);
359 void read_digest0_block(decoder_type& decoder,
bm::word_t* blk);
364 void read_0runs_block(decoder_type& decoder,
bm::word_t* blk);
367 const char*
err_msg() {
return "BM::Invalid serialization format"; }
378 template<
class BV,
class DEC>
394 const unsigned char* buf,
400 void deserialize_gap(
unsigned char btype, decoder_type& dec,
401 bvector_type& bv, blocks_manager_type& bman,
404 void decode_bit_block(
unsigned char btype, decoder_type& dec,
405 blocks_manager_type& bman,
425 template<
class BV,
class SerialIterator>
435 serial_iterator_type& sit,
438 bool exit_on_one =
false);
446 void load_id_list(bvector_type& bv,
447 serial_iterator_type& sit,
453 size_type finalize_target_vector(blocks_manager_type& bman,
455 size_type bv_block_idx);
459 size_type process_id_list(bvector_type& bv,
460 serial_iterator_type& sit,
463 const char* err_msg() {
return "BM::de-serialization format error"; }
490 block_idx_type
bv_size()
const {
return bv_size_; }
493 bool is_eof()
const {
return end_of_stream_; }
499 block_idx_type skip_mono_blocks();
511 unsigned dec_size()
const {
return decoder_.size(); }
542 block_idx_type
block_idx()
const {
return this->block_idx_; }
578 return get_bit_block_COUNT(dst_block, tmp_block);
585 bool clear_target=
true);
641 const unsigned char* buf,
644 bool exit_on_one =
false 728 compression_stat_(0),
730 byte_order_serial_(true),
731 compression_level_(
bm::set_compression_default)
733 bit_idx_arr_.resize(65536);
736 temp_block_ = alloc_.alloc_bit_block();
737 own_temp_block_ =
true;
741 temp_block_ = temp_block;
742 own_temp_block_ =
false;
744 compression_stat_ = (
size_type*) alloc_.alloc_bit_block();
745 optimize_ = free_ =
false;
751 compression_stat_(0),
753 byte_order_serial_(true),
754 compression_level_(
bm::set_compression_default)
759 temp_block_ = alloc_.alloc_bit_block();
760 own_temp_block_ =
true;
764 temp_block_ = temp_block;
765 own_temp_block_ =
false;
767 compression_stat_ = (
size_type*) alloc_.alloc_bit_block();
768 optimize_ = free_ =
false;
775 alloc_.free_bit_block(temp_block_);
776 if (compression_stat_)
777 alloc_.free_bit_block((
bm::word_t*)compression_stat_);
784 for (
unsigned i = 0; i < 256; ++i)
785 compression_stat_[i] = 0;
793 compression_level_ = clevel;
805 byte_order_serial_ = value;
813 unsigned char header_flag = 0;
819 if (!byte_order_serial_)
829 enc.
put_8(header_flag);
831 if (byte_order_serial_)
834 enc.
put_8((
unsigned char)bo);
877 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
890 enc.
put_16(gap_block, len-1);
900 if (len > 3 && (compression_level_ > 3))
915 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
929 enc.
put_16(gap_block, len-1);
942 if (compression_level_ > 3 && arr_len > 1)
950 bout.
gamma(prev + 1);
952 for (
unsigned i = 1; i < arr_len; ++i)
955 bout.
gamma(curr - prev);
960 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
962 if (gamma_size >= plain_size)
968 compression_stat_[scode]++;
975 compression_stat_[scode]++;
1001 bout.
gamma(arr_len-4);
1006 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1008 if (enc_size >= raw_size)
1014 compression_stat_[scode]++;
1021 compression_stat_[scode]++;
1028 scores_[mod_size_] = score; models_[mod_size_] = mod;
1042 if (compression_level_ <= 1)
1047 if (compression_level_ <= 5)
1050 if (compression_level_ >= 2)
1059 bit_model_d0_size_ = unsigned(8 + (32 * d0_bc *
sizeof(
bm::word_t)));
1078 if (compression_level_ >= 3)
1082 unsigned arr_size_inv =
1088 if (compression_level_ >= 4)
1090 const unsigned gamma_bits_per_int = 6;
1093 if (compression_level_ == 4)
1097 16 + (bit_gaps-1) * gamma_bits_per_int);
1100 16 + bc * gamma_bits_per_int);
1101 if (inverted_bc > 3 && inverted_bc < bit_gaps && inverted_bc <
bm::gap_equiv_len)
1103 16 + inverted_bc * gamma_bits_per_int);
1106 if (compression_level_ >= 5)
1108 const unsigned bie_bits_per_int = 4;
1112 32 + (bit_gaps-1) * bie_bits_per_int);
1122 if (inverted_bc > 3 && inverted_bc >=
bm::gap_equiv_len && inverted_bc < bie_cut_off)
1139 for (
unsigned i = 0; i < mod_size_; ++i)
1141 if (scores_[i] < min_score)
1143 min_score = scores_[i];
1157 if (compression_level_ <= 2)
1165 if (compression_level_ < 4)
1167 if (compression_level_ == 4)
1172 if (inverted_bc < len)
1174 if (compression_level_ < 4)
1176 if (compression_level_ == 4)
1185 if (compression_level_ == 4)
1199 bool invert =
false;
1201 unsigned char enc_choice = find_gap_best_encoding(gap_block);
1205 gamma_gap_block(gap_block, enc);
1214 enc.
put_16(gap_temp_block[0]);
1228 gamma_gap_array(gap_temp_block, arr_len, enc, invert);
1231 interpolated_encode_gap_block(gap_block, enc);
1242 interpolated_gap_array(gap_temp_block, arr_len, enc, invert);
1245 gamma_gap_block(gap_block, enc);
1256 enc.
put_8((blk[0]==0) ? 0 : 1);
1281 if (((j+1 < bm::set_block_size) && blk[j+1]) ||
1282 ((j+2 < bm::set_block_size) && blk[j+2]))
1292 enc.
put_32(blk + i, j - i);
1310 if (bit_model_0run_size_ < bit_model_d0_size_)
1312 encode_bit_interval(block, enc, 0);
1331 enc.
put_32(block[off+j+0]);
1332 enc.
put_32(block[off+j+1]);
1333 enc.
put_32(block[off+j+2]);
1334 enc.
put_32(block[off+j+3]);
1336 }
while (j < bm::set_block_digest_wave_size);
1347 encode_bit_interval(block, enc, 0);
1366 bv.calc_stat(&stat);
1371 optimize_ = free_ =
false;
1385 optimize_ = free_ =
true;
1390 bv.optimize(temp_block_, BV::opt_compress, &st);
1393 optimize_ = free_ =
false;
1402 unsigned mask = inverted ? ~0u : 0u;
1411 unsigned char scode =
1414 compression_stat_[scode]++;
1417 encode_bit_digest(block, enc, digest0_);
1426 gamma_gap_block(bit_idx_arr_.data(), enc);
1433 unsigned mask = inverted ? ~0u : 0u;
1441 gamma_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
1452 unsigned mask = inverted ? ~0u : 0u;
1460 interpolated_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
1463 encode_bit_digest(block, enc, digest0_);
1472 interpolated_encode_gap_block(bit_idx_arr_.data(), enc);
1497 enc.
put_8((
unsigned char)head);
1504 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1506 if (enc_size >= raw_size)
1512 compression_stat_[scode]++;
1515 encode_bit_digest(block, enc, digest0_);
1522 unsigned mask = inverted ? ~0u : 0u;
1530 unsigned char scode =
1549 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1551 if (enc_size >= raw_size)
1557 if (digest0_ != ~0ull && enc_size > bit_model_d0_size_)
1563 compression_stat_[scode]++;
1568 encode_bit_digest(block, enc, digest0_);
1573 #define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO) \ 1575 enc.put_8(B_1ZERO); \ 1576 else if (nb < 256u) \ 1578 enc.put_8(B_8ZERO); \ 1579 enc.put_8((unsigned char)nb); \ 1581 else if (nb < 65536u) \ 1583 enc.put_8(B_16ZERO); \ 1584 enc.put_16((unsigned short)nb); \ 1586 else if (nb < bm::id_max32) \ 1588 enc.put_8(B_32ZERO); \ 1589 enc.put_32(unsigned(nb)); \ 1593 enc.put_8(B_64ZERO); \ 1597 #define BM_SET_ONE_BLOCKS(x) \ 1599 block_idx_type end_block = i + x; \ 1600 for (;i < end_block; ++i) \ 1601 bman.set_block_all_set(i); \ 1607 typename serializer<BV>::size_type
1609 unsigned char* buf,
size_t buf_size)
1613 reset_compression_stats();
1617 encode_header(bv, enc);
1623 bman.get_block_coord(i, i0, j0);
1625 const bm::word_t* blk = bman.get_block(i0, j0);
1639 if (next_nb == bm::set_total_blocks)
1641 enc.
put_8(set_block_azero);
1646 if (nb > 1 && nb < 128)
1649 unsigned char c = (
unsigned char)((1u << 7) | nb);
1674 bman.get_block_coord(j, i0, j0);
1675 const bm::word_t* blk_next = bman.get_block(i0, j0);
1679 if (j == bm::set_total_blocks)
1681 enc.
put_8(set_block_aone);
1710 unsigned char model = find_bit_best_encoding(blk);
1718 unsigned bit_idx = 0;
1731 encode_bit_array(blk, enc,
false);
1734 encode_bit_array(blk, enc,
true);
1737 gamma_gap_bit_block(blk, enc);
1740 encode_bit_interval(blk, enc, 0);
1743 gamma_arr_bit_block(blk, enc,
false);
1746 gamma_arr_bit_block(blk, enc,
true);
1749 bienc_arr_bit_block(blk, enc,
false);
1752 bienc_arr_bit_block(blk, enc,
true);
1755 interpolated_arr_bit_block(blk, enc,
false);
1758 interpolated_arr_bit_block(blk, enc,
true);
1761 interpolated_gap_bit_block(blk, enc);
1764 bienc_gap_bit_block(blk, enc);
1767 encode_bit_digest(blk, enc, digest0_);
1784 enc.
put_8(set_block_end);
1851 bv_serial.gap_length_serialization(
false);
1853 bv_serial.gap_length_serialization(
true);
1855 return bv_serial.serialize(bv, buf, 0);
1901 const unsigned char* buf,
1907 unsigned char header_flag = dec.
get_8();
1914 if (bo_current == bo)
1939 unsigned block_type,
1949 dst_arr[0] = decoder.
get_16();
1955 decoder.
get_16(dst_arr, len);
1960 bit_in_type bin(decoder);
1966 if (k == 0) --bit_idx;
1969 dst_arr[k] = bit_idx;
1979 bit_in_type bin(decoder);
1982 dst_arr[len-1] = max_v;
1983 bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
1989 throw std::logic_error(err_msg());
1991 BM_THROW(BM_ERR_SERIALFORMAT);
2005 unsigned arr_len = dec.
get_16();
2007 bit_in_type bin(dec);
2011 bin.bic_decode_u16_dry(arr_len-2, min_v, max_v);
2016 bin.bic_decode_u16_bitset(blk, arr_len-2, min_v, max_v);
2024 this->read_bic_arr(decoder, blk);
2036 unsigned arr_len = dec.
get_16();
2042 id_array_[0] = head;
2043 id_array_[1] = min_v;
2044 id_array_[arr_len] = 65535;
2046 bit_in_type bin(dec);
2047 bin.bic_decode_u16(&id_array_[2], arr_len-2, min_v, 65535);
2077 }
while (j < bm::set_block_digest_wave_size);
2083 block[off+j+0] |= dec.
get_32();
2084 block[off+j+1] |= dec.
get_32();
2085 block[off+j+2] |= dec.
get_32();
2086 block[off+j+3] |= dec.
get_32();
2088 }
while (j < bm::set_block_digest_wave_size);
2101 unsigned char run_type = dec.
get_8();
2104 unsigned run_length = dec.
get_16();
2107 unsigned run_end = j + run_length;
2108 BM_ASSERT(run_end <= bm::set_block_size);
2109 for (;j < run_end; ++j)
2111 unsigned w = dec.
get_32();
2125 unsigned block_type,
2137 *dst_block = gap_head;
2138 decoder.
get_16(dst_block+1, len - 1);
2166 unsigned arr_len = read_id_list(decoder, block_type, id_array_);
2176 unsigned len = (gap_head >> 3);
2180 *dst_block = gap_head;
2183 bit_in_type bin(decoder);
2187 for (
unsigned i = 1; i < len; ++i)
2191 *(++gap_data_ptr) = gap_sum;
2201 unsigned len = (gap_head >> 3);
2202 *dst_block = gap_head;
2204 dst_block[1] = min_v;
2205 bit_in_type bin(decoder);
2206 bin.bic_decode_u16(&dst_block[2], len-2, min_v, 65535);
2213 throw std::logic_error(err_msg());
2215 BM_THROW(BM_ERR_SERIALFORMAT);
2219 if (block_type == set_block_arrgap_egamma_inv ||
2220 block_type == set_block_arrgap_inv ||
2221 block_type == set_block_arrgap_bienc_inv)
2229 template<
class BV,
class DEC>
2232 temp_block_ = alloc_.alloc_bit_block();
2234 this->id_array_ = bit_idx_arr_.data();
2238 template<
class BV,
class DEC>
2241 alloc_.free_bit_block(temp_block_);
2245 template<
class BV,
class DEC>
2268 *gap_temp_block = gap_head;
2269 dec.
get_16(gap_temp_block+1, len - 1);
2274 blk = bman.get_allocator().alloc_bit_block();
2275 bman.set_block(nb, blk);
2282 bv.combine_operation_with_block(nb,
2296 bman.get_allocator().alloc_gap_block(
unsigned(level), bman.glen());
2298 *gap_blk_ptr = gap_head;
2303 dec.
get_16(gap_blk + 1, len - 1);
2309 *gap_temp_block = gap_head;
2310 dec.
get_16(gap_temp_block + 1, len - 1);
2320 unsigned arr_len = this->read_id_list(dec, btype, this->id_array_);
2321 gap_temp_block[0] = 0;
2330 bv.combine_operation_with_block(nb,
2344 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
2348 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
2353 throw std::logic_error(this->err_msg());
2355 BM_THROW(BM_ERR_SERIALFORMAT);
2359 bv.combine_operation_with_block(nb,
2365 template<
class BV,
class DEC>
2374 blk = bman.get_allocator().alloc_bit_block();
2375 bman.set_block(nb, blk);
2380 blk = bman.deoptimize_block(nb);
2388 blk = bman.deoptimize_block(nb);
2392 for (
unsigned k = 0; k < len; ++k)
2401 this->read_bic_arr(dec, blk);
2406 blk = bman.deoptimize_block(nb);
2409 this->read_bic_arr(dec, temp_block_);
2414 this->read_bic_gap(dec, blk);
2417 this->read_digest0_block(dec, blk);
2422 throw std::logic_error(this->err_msg());
2424 BM_THROW(BM_ERR_SERIALFORMAT);
2432 template<
class BV,
class DEC>
2434 const unsigned char* buf,
2438 if (!bman.is_init())
2444 bv.set_new_blocks_strat(
BM_GAP);
2450 unsigned char header_flag = dec.
get_8();
2460 throw std::logic_error(this->err_msg());
2462 BM_THROW(BM_ERR_SERIALFORMAT);
2473 if (header_flag & BM_HM_64_BIT)
2480 if (bv_size > bv.size())
2483 for (
unsigned cnt = dec.
get_32(); cnt; --cnt)
2489 return dec.
size()-1;
2506 if (header_flag & BM_HM_64_BIT)
2512 throw std::logic_error(this->err_msg());
2514 BM_THROW(BM_ERR_SERIALFORMAT);
2521 if (bv_size > bv.size())
2525 unsigned char btype;
2531 btype = dec.
get_8();
2533 bman.get_block_coord(i, i0, j0);
2534 bm::word_t* blk = bman.get_block_ptr(i0, j0);
2537 if (btype & (1 << 7))
2539 nb = btype & ~(1 << 7);
2571 throw std::logic_error(this->err_msg());
2573 BM_THROW(BM_ERR_SERIALFORMAT);
2579 bman.set_all_set(i, bm::set_total_blocks-1);
2583 bman.set_block_all_set(i);
2600 throw std::logic_error(this->err_msg());
2602 BM_THROW(BM_ERR_SERIALFORMAT);
2611 blk = bman.get_allocator().alloc_bit_block();
2612 bman.set_block(i, blk);
2618 bv.combine_operation_with_block(i,
2628 bv.set_bit_no_check(bit_idx);
2634 this->read_0runs_block(dec, temp_block);
2635 bv.combine_operation_with_block(i,
2642 unsigned head_idx, tail_idx;
2648 blk = bman.get_allocator().alloc_bit_block();
2649 bman.set_block(i, blk);
2650 for (
unsigned k = 0; k < head_idx; ++k)
2654 dec.
get_32(blk + head_idx, tail_idx - head_idx + 1);
2662 dec.
get_32(temp_block + head_idx, tail_idx - head_idx + 1);
2664 bv.combine_operation_with_block(i,
2679 deserialize_gap(btype, dec, bv, bman, i, blk);
2687 blk = bman.deoptimize_block(i);
2693 blk = bman.get_allocator().alloc_bit_block();
2694 bman.set_block(i, blk);
2700 for (
unsigned k = 0; k < len; ++k)
2709 for (
unsigned k = 0; k < len; ++k)
2720 decode_bit_block(btype, dec, bman, i, blk);
2723 decode_bit_block(btype, dec, bman, i, blk);
2728 throw std::logic_error(this->err_msg());
2730 BM_THROW(BM_ERR_SERIALFORMAT);
2735 bv.set_new_blocks_strat(strat);
2745 end_of_stream_(false),
2785 unsigned char header_flag =
decoder_.get_8();
2958 throw std::logic_error(this->
err_msg());
2960 BM_THROW(BM_ERR_SERIALFORMAT);
2981 throw std::logic_error(this->
err_msg());
2983 BM_THROW(BM_ERR_SERIALFORMAT);
3011 for (
unsigned k = 0; k < len; ++k)
3019 for (
unsigned k = 0; k < len; ++k)
3045 unsigned char run_type =
decoder_.get_8();
3048 unsigned run_length =
decoder_.get_16();
3051 decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
3059 unsigned head_idx =
decoder_.get_16();
3060 unsigned tail_idx =
decoder_.get_16();
3063 for (
unsigned i = 0; i < head_idx; ++i)
3065 decoder_.get_32(dst_block + head_idx,
3066 tail_idx - head_idx + 1);
3072 int pos = int(tail_idx - head_idx) + 1;
3085 throw std::logic_error(this->
err_msg());
3087 BM_THROW(BM_ERR_SERIALFORMAT);
3116 throw std::logic_error(this->
err_msg());
3118 BM_THROW(BM_ERR_SERIALFORMAT);
3138 unsigned head_idx =
decoder_.get_16();
3139 unsigned tail_idx =
decoder_.get_16();
3140 for (
unsigned i = head_idx; i <= tail_idx; ++i)
3146 unsigned char run_type =
decoder_.get_8();
3149 unsigned run_length =
decoder_.get_16();
3152 unsigned run_end = j + run_length;
3153 for (;j < run_end; ++j)
3190 throw std::logic_error(this->
err_msg());
3192 BM_THROW(BM_ERR_SERIALFORMAT);
3213 unsigned char run_type =
decoder_.get_8();
3216 unsigned run_length =
decoder_.get_16();
3218 unsigned run_end = j + run_length;
3221 for (;j < run_end; ++j)
3229 for (;j < run_end; ++j)
3240 unsigned head_idx =
decoder_.get_16();
3241 unsigned tail_idx =
decoder_.get_16();
3243 for ( i = 0; i < head_idx; ++i)
3245 for ( i = head_idx; i <= tail_idx; ++i)
3302 throw std::logic_error(this->
err_msg());
3304 BM_THROW(BM_ERR_SERIALFORMAT);
3327 unsigned char run_type =
decoder_.get_8();
3330 unsigned run_length =
decoder_.get_16();
3333 unsigned run_end = j + run_length;
3334 for (;j < run_end; ++j)
3349 unsigned head_idx =
decoder_.get_16();
3350 unsigned tail_idx =
decoder_.get_16();
3351 for (
unsigned i = head_idx; i <= tail_idx; ++i)
3406 throw std::logic_error(this->
err_msg());
3408 BM_THROW(BM_ERR_SERIALFORMAT);
3427 dst_block[i] &= ~
decoder_.get_32();
3431 unsigned char run_type =
decoder_.get_8();
3434 unsigned run_length =
decoder_.get_16();
3437 unsigned run_end = j + run_length;
3438 for (;j < run_end; ++j)
3441 dst_block[j] &= ~
decoder_.get_32();
3453 unsigned head_idx =
decoder_.get_16();
3454 unsigned tail_idx =
decoder_.get_16();
3455 for (
unsigned i = head_idx; i <= tail_idx; ++i)
3456 dst_block[i] &= ~
decoder_.get_32();
3507 throw std::logic_error(this->
err_msg());
3509 BM_THROW(BM_ERR_SERIALFORMAT);
3533 unsigned char run_type =
decoder_.get_8();
3536 unsigned run_length =
decoder_.get_16();
3539 unsigned run_end = j + run_length;
3540 for (;j < run_end; ++j)
3554 unsigned head_idx =
decoder_.get_16();
3555 unsigned tail_idx =
decoder_.get_16();
3556 for (
unsigned i = head_idx; i <= tail_idx; ++i)
3590 throw std::logic_error(this->
err_msg());
3592 BM_THROW(BM_ERR_SERIALFORMAT);
3619 unsigned char run_type =
decoder_.get_8();
3622 unsigned run_length =
decoder_.get_16();
3625 unsigned run_end = j + run_length;
3626 for (;j < run_end; ++j)
3641 unsigned head_idx =
decoder_.get_16();
3642 unsigned tail_idx =
decoder_.get_16();
3643 for (
unsigned i = head_idx; i <= tail_idx; ++i)
3671 throw std::logic_error(this->
err_msg());
3673 BM_THROW(BM_ERR_SERIALFORMAT);
3699 unsigned char run_type =
decoder_.get_8();
3702 unsigned run_length =
decoder_.get_16();
3705 unsigned run_end = j + run_length;
3706 for (;j < run_end; ++j)
3720 unsigned head_idx =
decoder_.get_16();
3721 unsigned tail_idx =
decoder_.get_16();
3722 for (
unsigned i = head_idx; i <= tail_idx; ++i)
3756 throw std::logic_error(this->
err_msg());
3758 BM_THROW(BM_ERR_SERIALFORMAT);
3791 unsigned char run_type =
decoder_.get_8();
3794 unsigned run_length =
decoder_.get_16();
3795 unsigned run_end = j + run_length;
3798 for (;j < run_end; ++j)
3806 for (;j < run_end; ++j)
3817 unsigned head_idx =
decoder_.get_16();
3818 unsigned tail_idx =
decoder_.get_16();
3821 for (i = 0; i < head_idx; ++i)
3823 for (i = head_idx; i <= tail_idx; ++i)
3856 throw std::logic_error(this->
err_msg());
3858 BM_THROW(BM_ERR_SERIALFORMAT);
3862 return count_adapter.
sum();
3891 unsigned char run_type =
decoder_.get_8();
3894 unsigned run_length =
decoder_.get_16();
3895 unsigned run_end = j + run_length;
3898 for (;j < run_end; ++j)
3906 for (;j < run_end; ++j)
3917 unsigned head_idx =
decoder_.get_16();
3918 unsigned tail_idx =
decoder_.get_16();
3921 for (i = 0; i < head_idx; ++i)
3923 for (i = head_idx; i <= tail_idx; ++i)
3957 throw std::logic_error(this->
err_msg());
3959 BM_THROW(BM_ERR_SERIALFORMAT);
3963 return count_adapter.
sum();
3992 unsigned char run_type =
decoder_.get_8();
3995 unsigned run_length =
decoder_.get_16();
3996 unsigned run_end = j + run_length;
3999 for (;j < run_end; ++j)
4007 for (;j < run_end; ++j)
4018 unsigned head_idx =
decoder_.get_16();
4019 unsigned tail_idx =
decoder_.get_16();
4022 for (i = 0; i < head_idx; ++i)
4024 for (i = head_idx; i <= tail_idx; ++i)
4058 throw std::logic_error(this->
err_msg());
4060 BM_THROW(BM_ERR_SERIALFORMAT);
4064 return count_adapter.
sum();
4093 unsigned char run_type =
decoder_.get_8();
4096 unsigned run_length =
decoder_.get_16();
4097 unsigned run_end = j + run_length;
4100 for (;j < run_end; ++j)
4116 unsigned head_idx =
decoder_.get_16();
4117 unsigned tail_idx =
decoder_.get_16();
4120 for (i = head_idx; i <= tail_idx; ++i)
4152 throw std::logic_error(this->
err_msg());
4154 BM_THROW(BM_ERR_SERIALFORMAT);
4157 return count_adapter.
sum();
4182 for (
unsigned k = 0; k < len; ++k)
4238 unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
4249 const unsigned char* buf,
4256 unsigned char header_flag = dec.
get_8();
4262 blocks_manager_type& bman = bv.get_blocks_manager();
4263 bit_block_guard<blocks_manager_type> bg(bman);
4264 if (temp_block == 0)
4266 temp_block = bg.allocate();
4275 if (bo_current == bo)
4301 throw std::logic_error(
"BM::Platform error unknown endian");
4303 BM_THROW(BM_ERR_SERIALFORMAT);
4309 template<
class BV,
class SerialIterator>
4312 serial_iterator_type& sit,
4316 const unsigned win_size = 64;
4321 for (
unsigned i = 0; i <= id_count;)
4324 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
4326 id_buffer[j] = sit.get_id();
4334 for (
unsigned i = 0; i <= id_count;)
4337 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
4339 id_buffer[j] = sit.get_id();
4347 template<
class BV,
class SerialIterator>
4350 blocks_manager_type& bman,
4365 if (bv_block_idx <= nblock_last)
4366 bman.set_all_zero(bv_block_idx, nblock_last);
4374 bman.get_block_coord(bv_block_idx, i, j);
4375 bm::word_t*** blk_root = bman.top_blocks_root();
4376 unsigned top_size = bman.top_block_size();
4377 for (;i < top_size; ++i)
4390 count += bman.block_bitcount(blk_blk[j]);
4400 throw std::logic_error(
err_msg());
4402 BM_THROW(BM_ERR_SERIALFORMAT);
4408 template<
class BV,
class SerialIterator>
4412 serial_iterator_type& sit,
4416 unsigned id_count = sit.get_id_count();
4417 bool set_clear =
true;
4424 load_id_list(bv_tmp, sit, id_count,
true);
4436 load_id_list(bv, sit, id_count, set_clear);
4439 for (
unsigned i = 0; i < id_count; ++i)
4447 for (
unsigned i = 0; i < id_count; ++i)
4457 for (
size_type i = 0; i < id_count; ++i)
4460 count += bv.get_bit(
id);
4468 load_id_list(bv_tmp, sit, id_count,
true);
4476 load_id_list(bv_tmp, sit, id_count,
true);
4484 load_id_list(bv_tmp, sit, id_count,
false);
4485 count += bv_tmp.count();
4491 load_id_list(bv_tmp, sit, id_count,
true);
4499 throw std::logic_error(
err_msg());
4501 BM_THROW(BM_ERR_SERIALFORMAT);
4509 template<
class BV,
class SerialIterator>
4522 gap_temp_block[0] = 0;
4524 blocks_manager_type& bman = bv.get_blocks_manager();
4525 if (!bman.is_init())
4528 if (sit.bv_size() && (sit.bv_size() > bv.size()))
4529 bv.resize(sit.bv_size());
4531 typename serial_iterator_type::iterator_state
state;
4532 state = sit.get_state();
4533 if (state == serial_iterator_type::e_list_ids)
4535 count = process_id_list(bv, sit, op);
4546 count += finalize_target_vector(bman, op, bv_block_idx);
4550 state = sit.state();
4553 case serial_iterator_type::e_blocks:
4556 case serial_iterator_type::e_bit_block:
4558 BM_ASSERT(sit.block_idx() == bv_block_idx);
4560 bman.get_block_coord(bv_block_idx, i0, j0);
4561 bm::word_t* blk = bman.get_block_ptr(i0, j0);
4574 blk = bman.make_bit_block(bv_block_idx);
4587 throw std::logic_error(
err_msg());
4589 BM_THROW(BM_ERR_SERIALFORMAT);
4606 blk = bman.deoptimize_block(bv_block_idx);
4612 unsigned c = sit.get_bit_block(blk, temp_block, sop);
4614 if (exit_on_one && count)
4620 case serial_iterator_type::e_zero_blocks:
4622 BM_ASSERT(bv_block_idx == sit.block_idx());
4629 bv_block_idx = sit.skip_mono_blocks();
4635 bv_block_idx = sit.skip_mono_blocks();
4636 bman.set_all_zero(nb_start, bv_block_idx-1);
4646 bman.get_block_coord(bv_block_idx, i0, j0);
4647 bm::word_t* blk = bman.get_block_ptr(i0, j0);
4658 bman.zero_block(bv_block_idx);
4672 count += blk ? bman.block_bitcount(blk) : 0;
4673 if (exit_on_one && count)
4685 case serial_iterator_type::e_one_blocks:
4687 BM_ASSERT(bv_block_idx == sit.block_idx());
4689 bman.get_block_coord(bv_block_idx, i0, j0);
4690 bm::word_t* blk = bman.get_block_ptr(i0, j0);
4697 bman.set_block_all_set(bv_block_idx);
4705 bman.zero_block(bv_block_idx);
4711 count += blk ? bman.block_bitcount(blk) : 0;
4719 blk = bman.deoptimize_block(bv_block_idx);
4750 bman.set_block_all_set(bv_block_idx);
4764 if (exit_on_one && count)
4769 case serial_iterator_type::e_gap_block:
4771 BM_ASSERT(bv_block_idx == sit.block_idx());
4773 bman.get_block_coord(bv_block_idx, i0, j0);
4774 const bm::word_t* blk = bman.get_block(i0, j0);
4776 sit.get_gap_block(gap_temp_block);
4794 if (exit_on_one && count)
4802 bman.zero_block(bv_block_idx);
4813 bv_block_idx, gap_temp_block, level);
4826 bv.combine_operation_with_block(bv_block_idx,
4834 bv.combine_operation_with_block(
4843 bman.get_block_coord(bv_block_idx, i0, j0);
4844 blk = bman.get_block_ptr(i0, j0);
4860 throw std::logic_error(
err_msg());
4862 BM_THROW(BM_ERR_SERIALFORMAT);
4882 #pragma warning( pop ) bm::id_t bit_block_count(const bm::word_t *block)
Bitcount for bit block.
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Bitblock copy operation.
void encode_header(const BV &bv, bm::encoder &enc)
Encode serialization header information.
void gap_add_to_bitset(unsigned *dest, const T *pcurr, unsigned len)
Adds(OR) GAP block to bitblock.
void bit_invert(T *start)
block_arridx_type gap_temp_block_
void encode_bit_interval(const bm::word_t *blk, bm::encoder &enc, unsigned size_control)
Encode BIT block with repeatable runs of zeroes.
void read_bic_arr(decoder_type &decoder, bm::word_t *blk)
Read binary interpolated list into a bit-set.
bm::heap_vector< bm::gap_word_t, allocator_type > block_arridx_type
void put_64(bm::id64_t w)
Puts 64 bits word into encoding buffer.
void put_8(unsigned char c)
Puts one character into the encoding buffer.
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w)
unsigned get_bit_block_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Bit COUNT SUB AB functor.
unsigned id_cnt_
Id counter for id list.
bvector_type::block_idx_type block_idx_type
one or more all-1 bit blocks
bm::id64_t bit_block_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
static size_type deserialize(bvector_type &bv, serial_iterator_type &sit, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
const unsigned char set_block_gap
Plain GAP block.
const unsigned set_block_size
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Bitvector deserialization from memory.
unsigned get_bit_block_COUNT_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
void interpolated_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted)
void interpolated_gap_bit_block(const bm::word_t *block, bm::encoder &enc)
encode bit-block as interpolated gap block
Base deserialization class.
const unsigned char set_block_arrgap_bienc_inv
Interpolated GAP array (inverted)
const unsigned set_sub_array_size
serialization_flags
Bit mask flags for serialization algorithm.
const unsigned char set_block_32zero
Up to 4G zero blocks.
const unsigned char set_block_32one
UP to 4G all-set blocks.
void optimize_serialize_destroy(BV &bv, typename serializer< BV >::buffer &buf)
Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized an...
unsigned gap_add_value(T *buf, unsigned pos)
Add new value to the end of GAP buffer.
void read_gap_block(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_block, bm::gap_word_t &gap_head)
Read GAP block from the stream.
void next()
get next block
unsigned char * position_type
#define IS_VALID_ADDR(addr)
deseriaizer_base< DEC >::decoder_type decoder_type
unsigned get_bit_block_COUNT_SUB_BA(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_compression_level() const
Get compression level (0-5), Default 5 (recommended) 0 - take as is 1, 2 - apply light weight RLE/GAP...
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false)
Convert gap block into array of ints corresponding to 1 bits.
void gap_set_all(T *buf, unsigned set_max, unsigned value)
Sets all bits to 0 or 1 (GAP)
void bienc_gap_bit_block(const bm::word_t *block, bm::encoder &enc)
encode bit-block as interpolated bit block of gaps
SerialIterator serial_iterator_type
unsigned long long int id64_t
void assign_if_not_set(allocator_pool_type &pool, bvector< Alloc > &bv)
bvector_type::blocks_manager_type blocks_manager_type
const unsigned char set_block_arr_bienc
Interpolated block as int array.
serializer(const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
Constructor.
save no byte-order info (save some space)
save no GAP info (save some space)
unsigned char find_gap_best_encoding(const bm::gap_word_t *gap_block)
Determine best representation for GAP block based on current set compression level.
unsigned get_bit_block_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
void put_32(bm::word_t w)
Puts 32 bits word into encoding buffer.
const unsigned char set_block_bit
Plain bit block.
unsigned bit_block_calc_change(const bm::word_t *block)
bm::id_t last_id_
Last id from the id list.
bvector_type::statistics statistics_type
const unsigned gap_equiv_len
Deserializer for bit-vector.
unsigned bit_block_find(const bm::word_t *block, unsigned nbit, unsigned *pos)
Searches for the next 1 bit in the BIT block.
size_t max_serialize_mem
estimated maximum memory for serialization
void gap_convert_to_bitset(unsigned *dest, const T *buf)
GAP block to bitblock conversion.
pre-processor un-defines to avoid global space pollution (internal)
const unsigned bie_cut_off
const unsigned char set_block_bit_digest0
H-compression with digest mask.
void for_each_dgap(const T *gap_buf, Func &func)
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
void gamma_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
void interpolated_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted)
Encode GAP block as an array with binary interpolated coder.
bm::id_t get_id() const
Get last id from the id list.
void interpolated_encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
const unsigned char set_block_bit_interval
Interval block.
BMFORCEINLINE bm::gap_word_t gap_length(const bm::gap_word_t *buf)
Returs GAP block length.
Bit-vector serialization class.
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks...
const unsigned char set_block_sgapgap
SGAP compressed GAP block.
bm::id_t bit_operation_or_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock OR operation and calculates bitcount of the result.
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
void set_pos(unsigned char *buf_pos)
Set current memory stream position.
const unsigned char set_block_bitgap_bienc
Interpolated bit-block as GAPs.
#define BM_SET_ONE_BLOCKS(x)
unsigned get_bit_block_COUNT(bm::word_t *dst_block, bm::word_t *tmp_block)
bool check_block_one(const bm::word_t *blk, bool deep_scan)
Checks if block has only 1 bits.
bm::word_t sum() const
Get accumulated sum.
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
one or more zero bit blocks
Byte based reader for un-aligned bit streaming.
Statistical information about bitset's memory allocation details.
void flush()
Flush the incomplete 32-bit accumulator word.
Class for decoding data from memory buffer.
block_idx_type block_idx() const
Get current block index.
#define BMSET_PTRGAP(ptr)
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size)
Inspects block for full zero words.
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
unsigned get_id_count() const
Number of ids in the inverted list (valid for e_list_ids)
get_bit_func_type bit_func_table_[bm::set_END]
const unsigned char set_block_arrgap_egamma_inv
Gamma compressed inverted delta GAP array.
const unsigned char set_block_gap_bienc
Interpolated GAP block.
blocks_manager_type::block_idx_type block_idx_type
deseriaizer_base< DEC > parent_type
void encode_bit_array(const bm::word_t *block, bm::encoder &enc, bool inverted)
Encode bit-block as an array of bits.
~serial_stream_iterator()
block_idx_type bv_size() const
serialized bitvector size
unsigned get_bit_block_ASSIGN(bm::word_t *dst_block, bm::word_t *tmp_block)
void gap_invert(T *buf)
Inverts all bits in the GAP buffer.
block_idx_type skip_mono_blocks()
skip all zero or all-one blocks
const unsigned char set_block_arrbit
List of bits ON.
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos)
Set 1 bit in a block.
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
#define IS_FULL_BLOCK(addr)
const unsigned char set_block_azero
All other blocks zero.
void get_inv_arr(bm::word_t *block)
const unsigned char set_block_arrgap_inv
List of bits OFF (GAP block)
bm::short_t get_16()
Reads 16-bit word from the decoding buffer.
unsigned char find_bit_best_encoding(const bm::word_t *block)
Determine best representation for a bit-block.
gap_word_t glevels_[bm::gap_levels]
GAP levels.
void byte_order_serialization(bool value)
Set byte-order serialization (for cross platform compatibility)
unsigned get_block_type() const
Get current block type.
bvector_type::allocator_type allocator_type
iterator_state
iterator is a state machine, this enum encodes its key value
block_idx_type mono_block_cnt_
number of 0 or 1 blocks
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w)
const unsigned gap_max_buff_len
unsigned get_bit_block_COUNT_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Bit manipulation primitives (internal)
const unsigned char set_block_bit_0runs
Bit block with encoded zero intervals.
Encoding utilities for serialization (internal)
void reset_compression_stats()
Reset all accumulated compression statistics.
Bit-block sum adapter, takes values and sums it /internal.
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value)
Bitblock memset operation.
static ByteOrder byte_order()
const unsigned gap_levels
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x)
bvector_type::size_type size_type
size_t serialize(BV &bv, unsigned char *buf, unsigned serialization_flags=0)
Saves bitvector into memory. Allocates temporary memory block for bvector.
void gamma_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted)
const unsigned set_compression_default
Default compression level.
unsigned short gap_word_t
Iterator to walk forward the serialized stream.
unsigned gap_set_array(T *buf, const T *arr, unsigned len)
Convert array to GAP buffer.
unsigned get_bit_block_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
blocks_manager< Alloc > blocks_manager_type
unsigned get_bit_block_COUNT_A(bm::word_t *dst_block, bm::word_t *tmp_block)
Byte based writer for un-aligned bit streaming.
int gap_calc_level(unsigned len, const T *glevel_len)
Calculates GAP block capacity level.
bm::distance_metric_descriptor::size_type count_xor(const BV &bv1, const BV &bv2)
Computes bitcount of XOR operation of two bitsets.
#define BMPTR_SETBIT0(ptr)
void encode_bit_digest(const bm::word_t *blk, bm::encoder &enc, bm::id64_t d0)
Encode bit-block using digest (hierarchical compression)
deseriaizer_base< DEC >::decoder_type decoder_type
bm::id64_t get_64()
Reads 64-bit word from the decoding buffer.
const unsigned char set_block_1zero
One all-zero block.
static size_type deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
BV::blocks_manager_type blocks_manager_type
unsigned get_arr_bit(bm::word_t *dst_block, bool clear_target=true)
Get array of bits out of the decoder into bit block (Converts inverted list into bits) Returns number...
void set_gap_level(T *buf, int level)
Sets GAP block capacity level.
const unsigned char set_block_8zero
Up to 256 zero blocks.
const unsigned char set_block_arrgap_egamma
Gamma compressed delta GAP array.
void put_16(bm::short_t s)
Puts short word (16 bits) into the encoding buffer.
void gap_length_serialization(bool value)
Set GAP length serialization (serializes GAP levels of the original vector)
#define FULL_BLOCK_REAL_ADDR
void read_bic_arr_inv(decoder_type &decoder, bm::word_t *blk)
Read inverted binary interpolated list into a bit-set.
const unsigned char set_block_arrbit_inv
List of bits OFF.
unsigned bit_to_gap(gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len)
set_operation
Codes of set operations.
bm::id_t bit_operation_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock XOR operation and calculates bitcount of the result.
bm::gap_word_t * id_array_
ptr to idx array for temp decode use
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
void add_model(unsigned char mod, unsigned score)
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation and calculates bitcount of the result.
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count)
Encode 8-bit prefix + an array.
const unsigned bits_in_block
void gamma_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false)
Encode GAP block as delta-array with Elias Gamma coder.
const unsigned set_compression_max
Maximum supported compression level.
void read_digest0_block(decoder_type &decoder, bm::word_t *blk)
Read digest0-type bit-block.
bvector_type::size_type size_type
unsigned char get_8()
Reads character from the decoding buffer.
const unsigned gap_max_bits
bvector_type::size_type size_type
Utilities for bit transposition (internal) (experimental!)
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count)
Encode 8-bit prefix + an array.
bvector_type::block_idx_type block_idx_type
distance_metric operation2metric(set_operation op)
Convert set operation into compatible distance metric.
bool is_eof() const
Returns true if end of bit-stream reached.
gap_word_t * block_idx_arr_
static const char * err_msg()
bm::operation setop2op(bm::set_operation op)
Convert set operation to operation.
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size)
unsigned char * get_pos() const
Get current memory stream position.
serialization_header_mask
iterator_state state() const
Returns iterator internal state.
BV::size_type count_or(const BV &bv1, const BV &bv2)
Computes bitcount of OR operation of two bitsets.
void bienc_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted)
const unsigned char set_block_sgapbit
SGAP compressed bitblock.
unsigned(serial_stream_iterator< DEC >::* get_bit_func_type)(bm::word_t *, bm::word_t *)
member function pointer for bitset-bitset get operations
size_t size() const
Returns size of the current encoding stream.
const unsigned char set_block_16one
UP to 65536 all-set blocks.
const unsigned char set_block_end
End of serialization.
unsigned gap_bit_count_unr(const T *buf)
Calculates number of bits ON in GAP buffer. Loop unrolled version.
unsigned get_bit_block_COUNT_B(bm::word_t *dst_block, bm::word_t *tmp_block)
void read_bic_gap(decoder_type &decoder, bm::word_t *blk)
Read binary interpolated gap blocks into a bitset.
const unsigned char set_block_8one
Up to 256 all-set blocks.
bvector_type::allocator_type allocator_type
BV::size_type count_sub(const BV &bv1, const BV &bv2)
Computes bitcount of SUB operation of two bitsets.
Functor for Elias Gamma encoding.
const unsigned char set_block_arr_bienc_inv
Interpolated inverted block int array.
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
const unsigned char set_block_16zero
Up to 65536 zero blocks.
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different distance metrics.
T bit_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len, unsigned mask=0)
Convert bit block into an array of ints corresponding to 1 bits.
iterator_state get_state() const
const unsigned set_total_blocks
const unsigned char set_block_1one
One block all-set (1111...)
void gamma(unsigned value)
Elias Gamma encode the specified value.
unsigned get_bit_block_SUB(bm::word_t *dst_block, bm::word_t *tmp_block)
distance_metric
Distance metrics codes defined for vectors A and B.
Serialization stream iterator.
unsigned block_type_
current block type
bm::id64_t calc_block_digest0(const bm::word_t *const block)
Compute digest for 64 non-zero areas.
const unsigned char set_block_aone
All other blocks one.
size_t deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block)
decoder_type & decoder()
Get low level access to the decoder (use carefully)
const unsigned char set_block_gapbit
GAP compressed bitblock.
block_arridx_type bit_idx_arr_
unsigned get_bit_block_COUNT_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
const unsigned set_block_shift
strategy
Block allocation strategies.
unsigned dec_size() const
Return current decoder size.
bool is_const_set_operation(set_operation op)
Returns true if set operation is constant (bitcount)
const unsigned char set_block_64one
lots of all-set blocks
unsigned get_bit_block_COUNT_SUB_AB(bm::word_t *dst_block, bm::word_t *tmp_block)
size_t size() const
Returns size of the current decoding stream.
void gamma_gap_bit_block(const bm::word_t *block, bm::encoder &enc)
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock SUB operation and calculates bitcount of the result.
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode.
void get_gap_block(bm::gap_word_t *dst_block)
Read gap block data (with head)
void set_compression_level(unsigned clevel)
Set compression level.
block_idx_type block_idx_
current block index
#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO)
const unsigned char set_block_arrgap_bienc
Interpolated GAP array.
const size_type * get_compression_stat() const
Return serialization counter vector.
const unsigned char set_block_gap_egamma
Gamma compressed GAP block.
ByteOrder
Byte orders recognized by the library.
byte_buffer< allocator_type > buffer
Bit manipulation primitives (internal)
unsigned get_bit_block(bm::word_t *dst_block, bm::word_t *tmp_block, set_operation op)
read bit block, using logical operation
bool check_block_zero(const bm::word_t *blk, bool deep_scan)
Checks all conditions and returns true if block consists of only 0 bits.
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
const unsigned char set_block_arrgap
List of bits ON (GAP block)
const unsigned char set_block_bit_1bit
Bit block with 1 bit ON.
void encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
const unsigned char set_block_64zero
lots of zero blocks
const unsigned gap_max_bits_cmrz
bm::word_t get_32()
Reads 32-bit word from the decoding buffer.
const unsigned set_block_digest_wave_size
BMFORCEINLINE void clear_bit(unsigned *dest, unsigned bitpos)
Set 1 bit in a block.