Package fabio :: Module compression
[hide private]
[frames] | no frames]

Source Code for Module fabio.compression

  1  #!/usr/bin/env python 
  2  # coding: utf8 
  3  """ 
  4  Authors: Jérôme Kieffer, ESRF 
  5           email:jerome.kieffer@esrf.fr 
  6   
  7  FabIO library containing compression and decompression algorithm for various 
  8  """ 
  9  __author__ = "Jérôme Kieffer" 
 10  __contact__ = "jerome.kieffer@esrf.eu" 
 11  __license__ = "GPLv3+" 
 12  __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" 
 13   
 14   
 15  import logging, struct, hashlib, base64, StringIO, sys 
 16  if sys.version_info >= (3,): 
 17      str = bytes 
 18  logger = logging.getLogger("compression") 
 19  import numpy 
 20   
 21  try: 
 22      import gzip 
 23  except ImportError: 
 24      logger.error("Unable to import gzip module: disabling gzip compression") 
 25      gzip = None 
 26   
 27  try: 
 28      import bz2 
 29  except ImportError: 
 30      logger.error("Unable to import bz2 module: disabling bz2 compression") 
 31      bz2 = None 
 32   
 33  try: 
 34      import zlib 
 35  except ImportError: 
 36      logger.error("Unable to import zlib module: disabling zlib compression") 
 37      zlib = None 
 38   
39 -def md5sum(blob):
40 """ 41 returns the md5sum of an object... 42 """ 43 return base64.b64encode(hashlib.md5(blob).digest())
44 45
46 -def endianness():
47 """ 48 Return the native endianness of the system 49 """ 50 if numpy.little_endian: 51 return "LITTLE_ENDIAN" 52 else: 53 return "BIG_ENDIAN"
54 55
56 -def decGzip(stream):
57 """ 58 59 Decompress a chunk of data using the gzip algorithm from Python or alternatives if possible 60 61 """ 62 63 if gzip is None: 64 raise ImportError("gzip module is not available") 65 fileobj = StringIO.StringIO(stream) 66 try: 67 rawData = gzip.GzipFile(fileobj=fileobj).read() 68 except IOError: 69 logger.warning("Encounter the python-gzip bug with trailing garbage, trying subprocess gzip") 70 try: 71 #This is as an ugly hack against a bug in Python gzip 72 import subprocess 73 sub = subprocess.Popen(["gzip", "-d", "-f"], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) 74 rawData, err = sub.communicate(input=stream) 75 logger.debug("Gzip subprocess ended with %s err= %s; I got %s bytes back" % (sub.wait(), err, len(rawData))) 76 except Exception, error: #IGNORE:W0703 77 logger.warning("Unable to use the subprocess gzip (%s). Is gzip available? " % error) 78 for i in range(1, 513): 79 try: 80 fileobj = StringIO.StringIO(stream[:-i]) 81 rawData = gzip.GzipFile(fileobj=fileobj).read() 82 except IOError: 83 logger.debug("trying with %s bytes less, doesn't work" % i) 84 else: 85 break 86 else: 87 logger.error("I am totally unable to read this gzipped compressed data block, giving up") 88 return rawData
89 90
91 -def decBzip2(stream):
92 """ 93 94 Decompress a chunk of data using the bzip2 algorithm from Python 95 96 """ 97 if bz2 is None: 98 raise ImportError("bz2 module is not available") 99 return bz2.decompress(stream)
100 101
102 -def decZlib(stream):
103 """ 104 105 Decompress a chunk of data using the zlib algorithm from Python 106 107 """ 108 if zlib is None: 109 raise ImportError("zlib module is not available") 110 return zlib.decompress(stream)
111 112
113 -def decByteOffet_python(stream, size):
114 """ 115 Analyze a stream of char with any length of exception (2,4, or 8 bytes integers) 116 117 @param stream: string representing the compressed data 118 @param size: the size of the output array (of longInts) 119 @return: 1D-ndarray 120 121 """ 122 logger.debug("CBF decompression using Python with Cython loops") 123 dataOut = numpy.zeros((size), dtype=numpy.int64) 124 i = 0 125 j = 0 126 last = 0 127 current = 0 128 while ((i < len(stream)) and (j < size)): 129 if (stream[i] == '\x80'): 130 if (stream[i + 1:i + 3] == "\x00\x80"): 131 if (stream[i + 3:i + 7] == "\x00\x00\x00\x80"): 132 current = struct.unpack("<q", stream[i + 7:i + 15])[0] 133 i += 15 134 else: 135 current = struct.unpack("<i", stream[i + 3:i + 7])[0] 136 i += 7 137 else: 138 current = struct.unpack("<h", stream[i + 1:i + 3])[0] 139 i += 3 140 else: 141 current = struct.unpack("<b", stream[i])[0] 142 i += 1 143 last += current 144 dataOut[j] = last 145 j += 1 146 return dataOut
147
148 -def decByteOffet_weave(stream, size):
149 """ 150 Analyze a stream of char with any length of exception (2,4, or 8 bytes integers) 151 152 @param stream: string representing the compressed data 153 @param size: the size of the output array (of longInts) 154 @return: 1D-ndarray 155 156 """ 157 logger.debug("CBF decompression using Weave") 158 try: 159 from scipy import weave 160 from scipy.weave import converters 161 except ImportError: 162 logger.warning("scipy.weave is not available, falling back on slow Numpy implementations") 163 return decByteOffet_numpy(stream, size) 164 dataIn = numpy.fromstring(stream, dtype="uint8") 165 n = dataIn.size 166 dataOut = numpy.zeros(size, dtype="int64") 167 codeC = """ 168 unsigned char key = 0x80; 169 long j = 0; 170 long last=0; 171 long current=0; 172 for (int i=0; i< n; i++){ 173 if (j>=size){ 174 //printf("i= %i<%i, j=%i < size= %i %i\\n",i,n,j,size,dataIn(i)); 175 break; 176 } 177 if (dataIn(i) == key){ 178 if ( (dataIn(i+1)==0) and (dataIn(i+2)==key) ){ 179 if ( (dataIn(i+3)==0) and (dataIn(i+4)==0) and (dataIn(i+5)==0) and (dataIn(i+6)==key) ) { 180 // 64 bits mode 181 char tmp = dataIn(i+14) ; 182 current = (long(tmp)<<56) | (long(dataIn(i+13))<<48) | (long(dataIn(i+12))<<40) | (long(dataIn(i+11))<<32) | (long(dataIn(i+10))<<24) | (long(dataIn(i+9))<<16) | (long(dataIn(i+8))<<8) | (long(dataIn(i+7))); 183 // printf("64 bit int at pos %i, %i, value=%ld \\n",i,j,current); 184 i+=14; 185 }else{ 186 // 32 bits mode 187 char tmp = dataIn(i+6) ; 188 current = (long(tmp)<<24) | (long(dataIn(i+5))<<16) | (long(dataIn(i+4))<<8) | (long(dataIn(i+3))); 189 // printf("32 bit int at pos %i, %i, value=%ld was %i %i %i %i %i %i %i\\n",i,j,current,dataIn(i),dataIn(i+1),dataIn(i+2),dataIn(i+3),dataIn(i+4),dataIn(i+5),dataIn(i+6)); 190 // printf("%ld %ld %ld %ld\\n",(long(tmp)<<24) , (long(dataIn(i+5))<<16) , (long(dataIn(i+4))<<8) ,long(dataIn(i+3))); 191 i+=6; 192 } 193 }else{ 194 // 16 bit mode 195 char tmp = dataIn(i+2); 196 current = (long(tmp)<<8) | (long (dataIn(i+1))); 197 // printf("16 bit int at pos %i, %i, value=%ld was %i %i %i\\n",i,j,current,dataIn(i),dataIn(i+1),dataIn(i+2)); 198 i+=2; 199 } 200 }else{ 201 // 8 bit mode 202 char tmp = dataIn(i) ; 203 current= long(tmp) ; 204 } 205 last+=current; 206 dataOut(j)=last; 207 j++ ; 208 } 209 return_val=0; 210 """ 211 rc = weave.inline(codeC, ["dataIn", "dataOut", "n", "size" ], verbose=2, type_converters=converters.blitz) 212 if rc != 0: 213 logger.warning("weave binary module return error code %s" % rc) 214 return dataOut
215 216 217
218 -def decByteOffet_numpy(stream, size=None):
219 """ 220 Analyze a stream of char with any length of exception: 221 2, 4, or 8 bytes integers 222 223 @param stream: string representing the compressed data 224 @param size: the size of the output array (of longInts) 225 @return: 1D-ndarray 226 227 """ 228 logger.debug("CBF decompression using Numpy") 229 listnpa = [] 230 key16 = "\x80" 231 key32 = "\x00\x80" 232 key64 = "\x00\x00\x00\x80" 233 shift = 1 234 while True: 235 idx = stream.find(key16) 236 if idx == -1: 237 listnpa.append(numpy.fromstring(stream, dtype="int8")) 238 break 239 listnpa.append(numpy.fromstring(stream[:idx], dtype="int8")) 240 241 if stream[idx + 1:idx + 3] == key32: 242 if stream[idx + 3:idx + 7] == key64: 243 # long int 64 bits 244 listnpa.append(numpy.fromstring(stream[idx + 7:idx + 15], 245 dtype="int64")) 246 shift = 15 247 else: #32 bit int 248 listnpa.append(numpy.fromstring(stream[idx + 3:idx + 7], 249 dtype="int32")) 250 shift = 7 251 else: # int16 252 listnpa.append(numpy.fromstring(stream[idx + 1:idx + 3], 253 dtype="int16")) 254 shift = 3 255 stream = stream[idx + shift:] 256 return (numpy.hstack(listnpa)).astype("int64").cumsum()
257 258
259 -def decByteOffet_cython(stream, size=None):
260 """ 261 Analyze a stream of char with any length of exception: 262 2, 4, or 8 bytes integers 263 264 @param stream: string representing the compressed data 265 @param size: the size of the output array (of longInts) 266 @return: 1D-ndarray 267 268 """ 269 logger.debug("CBF decompression using cython") 270 try: 271 from fabio.byte_offset import analyseCython 272 except ImportError, error: 273 logger.error("Failed to import byte_offset cython module, falling back on numpy method") 274 return decByteOffet_numpy(stream, size) 275 else: 276 return analyseCython(stream, size)
277
278 -def compByteOffet_numpy(data):
279 """ 280 Compress a dataset into a string using the byte_offet algorithm 281 282 @param data: ndarray 283 @return: string/bytes with compressed data 284 285 test = numpy.array([0,1,2,127,0,1,2,128,0,1,2,32767,0,1,2,32768,0,1,2,2147483647,0,1,2,2147483648,0,1,2,128,129,130,32767,32768,128,129,130,32768,2147483647,2147483648]) 286 287 """ 288 flat = data.astype("int64").ravel() 289 delta = numpy.zeros_like(flat) 290 delta[0] = flat[0] 291 delta[1:] = flat[1:] - flat[:-1] 292 mask = ((delta > 127) + (delta < -127)) 293 exceptions = numpy.nonzero(mask)[0] 294 if numpy.little_endian: 295 byteswap = False 296 else: 297 byteswap = True 298 start = 0 299 binary_blob = "" 300 for stop in exceptions: 301 if stop - start > 0: 302 binary_blob += delta[start:stop].astype("int8").tostring() 303 exc = delta[stop] 304 if (exc > 2147483647) or (exc < -2147483647): #2**31-1 305 binary_blob += "\x80\x00\x80\x00\x00\x00\x80" 306 if byteswap: 307 binary_blob += delta[stop:stop + 1].byteswap().tostring() 308 else: 309 binary_blob += delta[stop:stop + 1].tostring() 310 elif (exc > 32767) or (exc < -32767): #2**15-1 311 binary_blob += "\x80\x00\x80" 312 if byteswap: 313 binary_blob += delta[stop:stop + 1].astype("int32").byteswap().tostring() 314 else: 315 binary_blob += delta[stop:stop + 1].astype("int32").tostring() 316 else: #>127 317 binary_blob += "\x80" 318 if byteswap: 319 binary_blob += delta[stop:stop + 1].astype("int16").byteswap().tostring() 320 else: 321 binary_blob += delta[stop:stop + 1].astype("int16").tostring() 322 start = stop + 1 323 if start < delta.size: 324 binary_blob += delta[start:].astype("int8").tostring() 325 return binary_blob
326 327
328 -def decTY1(raw_8, raw_16=None, raw_32=None):
329 """ 330 Modified byte offset decompressor used in Oxford Diffraction images 331 332 @param raw_8: strings containing raw data with integer 8 bits 333 @param raw_16: strings containing raw data with integer 16 bits 334 @param raw_32: strings containing raw data with integer 32 bits 335 @return: numpy.ndarray 336 337 """ 338 data = numpy.fromstring(raw_8, dtype="uint8").astype(int) 339 data -= 127 340 if raw_32 is not None: 341 int32 = numpy.fromstring(raw_32, dtype="int32").astype(int) 342 exception32 = numpy.nonzero(data == 128) 343 if raw_16 is not None: 344 int16 = numpy.fromstring(raw_16, dtype="int16").astype(int) 345 exception16 = numpy.nonzero(data == 127) 346 data[exception16] = int16 347 if raw_32: 348 data[exception32] = int32 349 summed = data.cumsum() 350 smax = summed.max() 351 if (smax > (2 ** 31 - 1)): 352 bytecode = "int64" 353 elif (smax > (2 ** 15 - 1)): 354 bytecode = "int32" 355 elif (smax > (2 ** 7 - 1)): 356 bytecode = "int16" 357 else: 358 bytecode = "int8" 359 return summed.astype(bytecode)
360 decKM4CCD = decTY1 361
362 -def compTY1(data):
363 """ 364 Modified byte offset compressor used in Oxford Diffraction images 365 366 @param data: numpy.ndarray with the input data (integers!) 367 @return: 3-tuple of strings: raw_8,raw_16,raw_32 containing raw data with integer of the given size 368 369 """ 370 fdata = data.flatten() 371 diff = numpy.zeros_like(fdata) 372 diff[0] = fdata[0] 373 diff[1:] = fdata[1:] - fdata[:-1] 374 adiff = abs(diff) 375 exception32 = (adiff > 32767)#2**15-1 376 exception16 = (adiff >= 127) - exception32 #2**7-1) 377 we16 = numpy.where(exception16) 378 we32 = numpy.where(exception32) 379 raw_16 = diff[we16].astype("int16").tostring() 380 raw_32 = diff[we32].astype("int32").tostring() 381 diff[we16] = 127 382 diff[we32] = 128 383 diff += 127 384 raw_8 = diff.astype("uint8").tostring() 385 return raw_8, raw_16, raw_32
386
387 -def decPCK(stream, dim1=None, dim2=None, overflowPix=None):
388 """ 389 Modified CCP4 pck decompressor used in MAR345 images 390 391 @param stream: string or file 392 @return: numpy.ndarray (square array) 393 394 """ 395 396 try: 397 from mar345_IO import uncompress_pck 398 except ImportError, error: 399 raise RuntimeError("Unable to import mar345_IO to read compressed dataset") 400 if "seek" in dir(stream): 401 stream.seek(0) 402 raw = stream.read() 403 else: 404 raw = str(stream) 405 406 return uncompress_pck(raw, dim1, dim2, overflowPix)
407 408
409 -def compPCK(data):
410 """ 411 Modified CCP4 pck compressor used in MAR345 images 412 413 @param data: numpy.ndarray (square array) 414 @return: compressed stream 415 416 """ 417 try: 418 from mar345_IO import compress_pck 419 except ImportError, error: 420 raise RuntimeError("Unable to import mar345_IO to write compressed dataset") 421 return compress_pck(data)
422