BitMagic-C++
bmutil.h
Go to the documentation of this file.
1 #ifndef BMUTIL__H__INCLUDED__
2 #define BMUTIL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmutil.h
22  \brief Bit manipulation primitives (internal)
23 */
24 
25 #include "bmdef.h"
26 #include "bmconst.h"
27 
28 #if defined(_M_AMD64) || defined(_M_X64)
29 #include <intrin.h>
30 #elif defined(BMSSE2OPT) || defined(BMSSE42OPT)
31 #include <emmintrin.h>
32 #elif defined(BMAVX2OPT)
33 #include <emmintrin.h>
34 #include <avx2intrin.h>
35 #endif
36 
37 #ifdef __GNUG__
38 #pragma GCC diagnostic push
39 #pragma GCC diagnostic ignored "-Wconversion"
40 #endif
41 
42 #ifdef _MSC_VER
43 #pragma warning( push )
44 #pragma warning( disable : 4146)
45 #endif
46 
47 
48 namespace bm
49 {
50 
51  /**
52  bit-block array wrapped into union for correct interpretation of
53  32-bit vs 64-bit access vs SIMD
54  @internal
55  */
56  struct bit_block_t
57  {
58  union bunion_t
59  {
62 
63 #if defined(BMAVX512OPT)
65 #endif
66 #if defined(BMAVX2OPT)
68 #endif
69 #if defined(BMSSE2OPT) || defined(BMSSE42OPT)
71 #endif
72  } b_;
73 
74  operator bm::word_t*() { return &(b_.w32[0]); }
75  operator const bm::word_t*() const { return &(b_.w32[0]); }
76  explicit operator bm::id64_t*() { return &b_.w64[0]; }
77  explicit operator const bm::id64_t*() const { return &b_.w64[0]; }
78 #ifdef BMAVX512OPT
79  explicit operator __m512i*() { return &b_.w512[0]; }
80  explicit operator const __m512i*() const { return &b_.w512[0]; }
81 #endif
82 #ifdef BMAVX2OPT
83  explicit operator __m256i*() { return &b_.w256[0]; }
84  explicit operator const __m256i*() const { return &b_.w256[0]; }
85 #endif
86 #if defined(BMSSE2OPT) || defined(BMSSE42OPT)
87  explicit operator __m128i*() { return &b_.w128[0]; }
88  explicit operator const __m128i*() const { return &b_.w128[0]; }
89 #endif
90 
91  const bm::word_t* begin() const { return (b_.w32 + 0); }
92  bm::word_t* begin() { return (b_.w32 + 0); }
93  const bm::word_t* end() const { return (b_.w32 + bm::set_block_size); }
94  bm::word_t* end() { return (b_.w32 + bm::set_block_size); }
95  };
96 
97 
98 /**
99  Get minimum of 2 values
100 */
101 template<typename T>
102 T min_value(T v1, T v2)
103 {
104  return v1 < v2 ? v1 : v2;
105 }
106 
107 
108 /**
109  Fast loop-less function to find LOG2
110 */
111 template<typename T>
112 T ilog2(T x)
113 {
114  unsigned int l = 0;
115 
116  if (x >= 1<<16) { x = (T)(x >> 16); l |= 16; }
117  if (x >= 1<<8) { x = (T)(x >> 8); l |= 8; }
118  if (x >= 1<<4) { x = (T)(x >> 4); l |= 4; }
119  if (x >= 1<<2) { x = (T)(x >> 2); l |= 2; }
120  if (x >= 1<<1) l |=1;
121  return (T)l;
122 }
123 
124 template<>
126 {
127  unsigned int l = 0;
128  if (x >= 1<<8) { x = (bm::gap_word_t)(x >> 8); l |= 8; }
129  if (x >= 1<<4) { x = (bm::gap_word_t)(x >> 4); l |= 4; }
130  if (x >= 1<<2) { x = (bm::gap_word_t)(x >> 2); l |= 2; }
131  if (x >= 1<<1) l |=1;
132  return (bm::gap_word_t)l;
133 }
134 
135 /**
136  Mini auto-pointer for internal memory management
137  @internal
138 */
139 template<class T>
141 {
142 public:
143  ptr_guard(T* p) : ptr_(p) {}
144  ~ptr_guard() { delete ptr_; }
145 private:
146  ptr_guard(const ptr_guard<T>& p);
147  ptr_guard& operator=(const ptr_guard<T>& p);
148 private:
149  T* ptr_;
150 };
151 
152 /**
153  Portable LZCNT with (uses minimal LUT)
154  @ingroup bitfunc
155  @internal
156 */
157 inline
158 unsigned count_leading_zeros(unsigned x)
159 {
160  unsigned n =
161  (x >= (1U << 16)) ?
162  ((x >= (1U << 24)) ? ((x >= (1 << 28)) ? 28u : 24u) : ((x >= (1U << 20)) ? 20u : 16u))
163  :
164  ((x >= (1U << 8)) ? ((x >= (1U << 12)) ? 12u : 8u) : ((x >= (1U << 4)) ? 4u : 0u));
165  return unsigned(bm::lzcnt_table<true>::_lut[x >> n]) - n;
166 }
167 
168 /**
169  Portable TZCNT with (uses 37-LUT)
170  @ingroup bitfunc
171  @internal
172 */
173 inline
174 unsigned count_trailing_zeros(unsigned v)
175 {
176  // (v & -v) isolates the last set bit
177  return unsigned(bm::tzcnt_table<true>::_lut[(-v & v) % 37]);
178 }
179 
180 /**
181  Lookup table based integer LOG2
182 */
183 template<typename T>
184 T ilog2_LUT(T x)
185 {
186  unsigned l = 0;
187  if (x & 0xffff0000)
188  {
189  l += 16; x >>= 16;
190  }
191 
192  if (x & 0xff00)
193  {
194  l += 8; x >>= 8;
195  }
196  return l + T(first_bit_table<true>::_idx[x]);
197 }
198 
199 /**
200  Lookup table based short integer LOG2
201 */
202 template<>
203 inline bm::gap_word_t ilog2_LUT<bm::gap_word_t>(bm::gap_word_t x)
204 {
205  bm::gap_word_t l = 0;
206  if (x & 0xff00)
207  {
208  l = bm::gap_word_t( + 8u);
209  x = bm::gap_word_t(x >> 8u);
210  }
212 }
213 
214 
215 // if we are running on x86 CPU we can use inline ASM
216 
217 #ifdef BM_x86
218 #ifdef __GNUG__
219 
221 unsigned bsf_asm32(unsigned int v)
222 {
223  unsigned r;
224  asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
225  return r;
226 }
227 
229 unsigned bsr_asm32(unsigned int v)
230 {
231  unsigned r;
232  asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
233  return r;
234 }
235 
236 #endif // __GNUG__
237 
238 #ifdef _MSC_VER
239 
240 #if defined(_M_AMD64) || defined(_M_X64) // inline assembly not supported
241 
243 unsigned int bsr_asm32(unsigned int value)
244 {
245  unsigned long r;
246  _BitScanReverse(&r, value);
247  return r;
248 }
249 
251 unsigned int bsf_asm32(unsigned int value)
252 {
253  unsigned long r;
254  _BitScanForward(&r, value);
255  return r;
256 }
257 
258 #else
259 
261 unsigned int bsr_asm32(unsigned int value)
262 {
263  __asm bsr eax, value
264 }
265 
267 unsigned int bsf_asm32(unsigned int value)
268 {
269  __asm bsf eax, value
270 }
271 
272 #endif
273 
274 #endif // _MSC_VER
275 
276 #endif // BM_x86
277 
278 
279 // From:
280 // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8562
281 //
282 template<typename T>
284 {
285  return
286  DeBruijn_bit_position<true>::_multiply[(((v & -v) * 0x077CB531U)) >> 27];
287 }
288 
289 inline
290 unsigned bit_scan_reverse32(unsigned value)
291 {
292  BM_ASSERT(value);
293 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
294  return bm::bsr_asm32(value);
295 #else
296  return bm::ilog2_LUT<unsigned int>(value);
297 #endif
298 }
299 
300 inline
301 unsigned bit_scan_forward32(unsigned value)
302 {
303  BM_ASSERT(value);
304 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
305  return bm::bsf_asm32(value);
306 #else
307  return bit_scan_fwd(value);
308 #endif
309 }
310 
311 
313 unsigned long long bmi_bslr_u64(unsigned long long w)
314 {
315 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
316  return _blsr_u64(w);
317 #else
318  return w & (w - 1);
319 #endif
320 }
321 
323 unsigned long long bmi_blsi_u64(unsigned long long w)
324 {
325 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
326  return _blsi_u64(w);
327 #else
328  return w & (-w);
329 #endif
330 }
331 
332 /// 64-bit bit-scan reverse
333 inline
335 {
336  BM_ASSERT(w);
337 
338 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
339  return (unsigned)_lzcnt_u64(w);
340 #else
341  unsigned z;
342  unsigned w1 = unsigned(w >> 32);
343  if (!w1)
344  {
345  z = 32;
346  w1 = unsigned(w);
347  z += 31 - bm::bit_scan_reverse32(w1);
348  }
349  else
350  {
351  z = 31 - bm::bit_scan_reverse32(w1);
352  }
353  return z;
354 #endif
355 }
356 
357 inline
359 {
360  BM_ASSERT(w);
361 
362 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
363  return (unsigned)_tzcnt_u64(w);
364 #else
365  unsigned z;
366  unsigned w1 = unsigned(w);
367  if (!w1)
368  {
369  z = 32;
370  w1 = unsigned(w >> 32);
371  z += bm::bit_scan_forward32(w1);
372  }
373  else
374  {
375  z = bm::bit_scan_forward32(w1);
376  }
377  return z;
378 #endif
379 }
380 
381 
382 
383 #ifdef __GNUG__
384 #pragma GCC diagnostic pop
385 #endif
386 #ifdef _MSC_VER
387 #pragma warning( pop )
388 #endif
389 
390 
391 } // bm
392 
393 #endif
#define BM_VECT_ALIGN
Definition: bmdef.h:346
const unsigned set_block_size
Definition: bmconst.h:54
Structure for TZCNT constants.
Definition: bmconst.h:325
ptr_guard(T *p)
Definition: bmutil.h:143
Constants, tables and typedefs.
T ilog2_LUT(T x)
Lookup table based integer LOG2.
Definition: bmutil.h:184
unsigned long long int id64_t
Definition: bmconst.h:34
Definition: bm.h:76
T bit_scan_fwd(T v)
Definition: bmutil.h:283
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
Definition: bmutil.h:323
Mini auto-pointer for internal memory management.
Definition: bmutil.h:140
T min_value(T v1, T v2)
Get minimum of 2 values.
Definition: bmutil.h:102
unsigned int word_t
Definition: bmconst.h:38
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w)
Definition: bmutil.h:313
__m128i BM_VECT_ALIGN w128 [bm::set_block_size/4] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:70
bit-block array wrapped into union for correct interpretation of 32-bit vs 64-bit access vs SIMD ...
Definition: bmutil.h:56
unsigned short gap_word_t
Definition: bmconst.h:76
bm::word_t * begin()
Definition: bmutil.h:92
unsigned count_trailing_zeros_u64(bm::id64_t w)
Definition: bmutil.h:358
const bm::word_t * end() const
Definition: bmutil.h:93
bm::id64_t BM_VECT_ALIGN w64 [bm::set_block_size/2] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:61
Definitions(internal)
const bm::word_t * begin() const
Definition: bmutil.h:91
unsigned count_leading_zeros(unsigned x)
Portable LZCNT with (uses minimal LUT)
Definition: bmutil.h:158
Structure for LZCNT constants (4-bit)
Definition: bmconst.h:310
unsigned bit_scan_forward32(unsigned value)
Definition: bmutil.h:301
#define BMFORCEINLINE
Definition: bmdef.h:190
#define BM_ASSERT
Definition: bmdef.h:117
unsigned count_leading_zeros_u64(bm::id64_t w)
64-bit bit-scan reverse
Definition: bmutil.h:334
union bm::bit_block_t::bunion_t b_
unsigned count_trailing_zeros(unsigned v)
Portable TZCNT with (uses 37-LUT)
Definition: bmutil.h:174
bm::word_t * end()
Definition: bmutil.h:94
T ilog2(T x)
Fast loop-less function to find LOG2.
Definition: bmutil.h:112
DeBruijn majic table.
Definition: bmconst.h:241
unsigned bit_scan_reverse32(unsigned value)
Definition: bmutil.h:290
bm::word_t BM_VECT_ALIGN w32 [bm::set_block_size] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:60
Structure keeps index of first right 1 bit for every byte.
Definition: bmconst.h:256