1
2
3 """
4 Authors: Jérôme Kieffer, ESRF
5 email:jerome.kieffer@esrf.fr
6
7 FabIO library containing compression and decompression algorithm for various
8 """
9 __author__ = "Jérôme Kieffer"
10 __contact__ = "jerome.kieffer@esrf.eu"
11 __license__ = "GPLv3+"
12 __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
13
14
15 import logging, struct, hashlib, base64, StringIO, sys
16 if sys.version_info >= (3,):
17 str = bytes
18 logger = logging.getLogger("compression")
19 import numpy
20
21 try:
22 import gzip
23 except ImportError:
24 logger.error("Unable to import gzip module: disabling gzip compression")
25 gzip = None
26
27 try:
28 import bz2
29 except ImportError:
30 logger.error("Unable to import bz2 module: disabling bz2 compression")
31 bz2 = None
32
33 try:
34 import zlib
35 except ImportError:
36 logger.error("Unable to import zlib module: disabling zlib compression")
37 zlib = None
38
40 """
41 returns the md5sum of an object...
42 """
43 return base64.b64encode(hashlib.md5(blob).digest())
44
45
47 """
48 Return the native endianness of the system
49 """
50 if numpy.little_endian:
51 return "LITTLE_ENDIAN"
52 else:
53 return "BIG_ENDIAN"
54
55
57 """
58
59 Decompress a chunk of data using the gzip algorithm from Python or alternatives if possible
60
61 """
62
63 if gzip is None:
64 raise ImportError("gzip module is not available")
65 fileobj = StringIO.StringIO(stream)
66 try:
67 rawData = gzip.GzipFile(fileobj=fileobj).read()
68 except IOError:
69 logger.warning("Encounter the python-gzip bug with trailing garbage, trying subprocess gzip")
70 try:
71
72 import subprocess
73 sub = subprocess.Popen(["gzip", "-d", "-f"], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
74 rawData, err = sub.communicate(input=stream)
75 logger.debug("Gzip subprocess ended with %s err= %s; I got %s bytes back" % (sub.wait(), err, len(rawData)))
76 except Exception, error:
77 logger.warning("Unable to use the subprocess gzip (%s). Is gzip available? " % error)
78 for i in range(1, 513):
79 try:
80 fileobj = StringIO.StringIO(stream[:-i])
81 rawData = gzip.GzipFile(fileobj=fileobj).read()
82 except IOError:
83 logger.debug("trying with %s bytes less, doesn't work" % i)
84 else:
85 break
86 else:
87 logger.error("I am totally unable to read this gzipped compressed data block, giving up")
88 return rawData
89
90
92 """
93
94 Decompress a chunk of data using the bzip2 algorithm from Python
95
96 """
97 if bz2 is None:
98 raise ImportError("bz2 module is not available")
99 return bz2.decompress(stream)
100
101
103 """
104
105 Decompress a chunk of data using the zlib algorithm from Python
106
107 """
108 if zlib is None:
109 raise ImportError("zlib module is not available")
110 return zlib.decompress(stream)
111
112
114 """
115 Analyze a stream of char with any length of exception (2,4, or 8 bytes integers)
116
117 @param stream: string representing the compressed data
118 @param size: the size of the output array (of longInts)
119 @return: 1D-ndarray
120
121 """
122 logger.debug("CBF decompression using Python with Cython loops")
123 dataOut = numpy.zeros((size), dtype=numpy.int64)
124 i = 0
125 j = 0
126 last = 0
127 current = 0
128 while ((i < len(stream)) and (j < size)):
129 if (stream[i] == '\x80'):
130 if (stream[i + 1:i + 3] == "\x00\x80"):
131 if (stream[i + 3:i + 7] == "\x00\x00\x00\x80"):
132 current = struct.unpack("<q", stream[i + 7:i + 15])[0]
133 i += 15
134 else:
135 current = struct.unpack("<i", stream[i + 3:i + 7])[0]
136 i += 7
137 else:
138 current = struct.unpack("<h", stream[i + 1:i + 3])[0]
139 i += 3
140 else:
141 current = struct.unpack("<b", stream[i])[0]
142 i += 1
143 last += current
144 dataOut[j] = last
145 j += 1
146 return dataOut
147
149 """
150 Analyze a stream of char with any length of exception (2,4, or 8 bytes integers)
151
152 @param stream: string representing the compressed data
153 @param size: the size of the output array (of longInts)
154 @return: 1D-ndarray
155
156 """
157 logger.debug("CBF decompression using Weave")
158 try:
159 from scipy import weave
160 from scipy.weave import converters
161 except ImportError:
162 logger.warning("scipy.weave is not available, falling back on slow Numpy implementations")
163 return decByteOffet_numpy(stream, size)
164 dataIn = numpy.fromstring(stream, dtype="uint8")
165 n = dataIn.size
166 dataOut = numpy.zeros(size, dtype="int64")
167 codeC = """
168 unsigned char key = 0x80;
169 long j = 0;
170 long last=0;
171 long current=0;
172 for (int i=0; i< n; i++){
173 if (j>=size){
174 //printf("i= %i<%i, j=%i < size= %i %i\\n",i,n,j,size,dataIn(i));
175 break;
176 }
177 if (dataIn(i) == key){
178 if ( (dataIn(i+1)==0) and (dataIn(i+2)==key) ){
179 if ( (dataIn(i+3)==0) and (dataIn(i+4)==0) and (dataIn(i+5)==0) and (dataIn(i+6)==key) ) {
180 // 64 bits mode
181 char tmp = dataIn(i+14) ;
182 current = (long(tmp)<<56) | (long(dataIn(i+13))<<48) | (long(dataIn(i+12))<<40) | (long(dataIn(i+11))<<32) | (long(dataIn(i+10))<<24) | (long(dataIn(i+9))<<16) | (long(dataIn(i+8))<<8) | (long(dataIn(i+7)));
183 // printf("64 bit int at pos %i, %i, value=%ld \\n",i,j,current);
184 i+=14;
185 }else{
186 // 32 bits mode
187 char tmp = dataIn(i+6) ;
188 current = (long(tmp)<<24) | (long(dataIn(i+5))<<16) | (long(dataIn(i+4))<<8) | (long(dataIn(i+3)));
189 // printf("32 bit int at pos %i, %i, value=%ld was %i %i %i %i %i %i %i\\n",i,j,current,dataIn(i),dataIn(i+1),dataIn(i+2),dataIn(i+3),dataIn(i+4),dataIn(i+5),dataIn(i+6));
190 // printf("%ld %ld %ld %ld\\n",(long(tmp)<<24) , (long(dataIn(i+5))<<16) , (long(dataIn(i+4))<<8) ,long(dataIn(i+3)));
191 i+=6;
192 }
193 }else{
194 // 16 bit mode
195 char tmp = dataIn(i+2);
196 current = (long(tmp)<<8) | (long (dataIn(i+1)));
197 // printf("16 bit int at pos %i, %i, value=%ld was %i %i %i\\n",i,j,current,dataIn(i),dataIn(i+1),dataIn(i+2));
198 i+=2;
199 }
200 }else{
201 // 8 bit mode
202 char tmp = dataIn(i) ;
203 current= long(tmp) ;
204 }
205 last+=current;
206 dataOut(j)=last;
207 j++ ;
208 }
209 return_val=0;
210 """
211 rc = weave.inline(codeC, ["dataIn", "dataOut", "n", "size" ], verbose=2, type_converters=converters.blitz)
212 if rc != 0:
213 logger.warning("weave binary module return error code %s" % rc)
214 return dataOut
215
216
217
219 """
220 Analyze a stream of char with any length of exception:
221 2, 4, or 8 bytes integers
222
223 @param stream: string representing the compressed data
224 @param size: the size of the output array (of longInts)
225 @return: 1D-ndarray
226
227 """
228 logger.debug("CBF decompression using Numpy")
229 listnpa = []
230 key16 = "\x80"
231 key32 = "\x00\x80"
232 key64 = "\x00\x00\x00\x80"
233 shift = 1
234 while True:
235 idx = stream.find(key16)
236 if idx == -1:
237 listnpa.append(numpy.fromstring(stream, dtype="int8"))
238 break
239 listnpa.append(numpy.fromstring(stream[:idx], dtype="int8"))
240
241 if stream[idx + 1:idx + 3] == key32:
242 if stream[idx + 3:idx + 7] == key64:
243
244 listnpa.append(numpy.fromstring(stream[idx + 7:idx + 15],
245 dtype="int64"))
246 shift = 15
247 else:
248 listnpa.append(numpy.fromstring(stream[idx + 3:idx + 7],
249 dtype="int32"))
250 shift = 7
251 else:
252 listnpa.append(numpy.fromstring(stream[idx + 1:idx + 3],
253 dtype="int16"))
254 shift = 3
255 stream = stream[idx + shift:]
256 return (numpy.hstack(listnpa)).astype("int64").cumsum()
257
258
260 """
261 Analyze a stream of char with any length of exception:
262 2, 4, or 8 bytes integers
263
264 @param stream: string representing the compressed data
265 @param size: the size of the output array (of longInts)
266 @return: 1D-ndarray
267
268 """
269 logger.debug("CBF decompression using cython")
270 try:
271 from fabio.byte_offset import analyseCython
272 except ImportError, error:
273 logger.error("Failed to import byte_offset cython module, falling back on numpy method")
274 return decByteOffet_numpy(stream, size)
275 else:
276 return analyseCython(stream, size)
277
279 """
280 Compress a dataset into a string using the byte_offet algorithm
281
282 @param data: ndarray
283 @return: string/bytes with compressed data
284
285 test = numpy.array([0,1,2,127,0,1,2,128,0,1,2,32767,0,1,2,32768,0,1,2,2147483647,0,1,2,2147483648,0,1,2,128,129,130,32767,32768,128,129,130,32768,2147483647,2147483648])
286
287 """
288 flat = data.astype("int64").ravel()
289 delta = numpy.zeros_like(flat)
290 delta[0] = flat[0]
291 delta[1:] = flat[1:] - flat[:-1]
292 mask = ((delta > 127) + (delta < -127))
293 exceptions = numpy.nonzero(mask)[0]
294 if numpy.little_endian:
295 byteswap = False
296 else:
297 byteswap = True
298 start = 0
299 binary_blob = ""
300 for stop in exceptions:
301 if stop - start > 0:
302 binary_blob += delta[start:stop].astype("int8").tostring()
303 exc = delta[stop]
304 if (exc > 2147483647) or (exc < -2147483647):
305 binary_blob += "\x80\x00\x80\x00\x00\x00\x80"
306 if byteswap:
307 binary_blob += delta[stop:stop + 1].byteswap().tostring()
308 else:
309 binary_blob += delta[stop:stop + 1].tostring()
310 elif (exc > 32767) or (exc < -32767):
311 binary_blob += "\x80\x00\x80"
312 if byteswap:
313 binary_blob += delta[stop:stop + 1].astype("int32").byteswap().tostring()
314 else:
315 binary_blob += delta[stop:stop + 1].astype("int32").tostring()
316 else:
317 binary_blob += "\x80"
318 if byteswap:
319 binary_blob += delta[stop:stop + 1].astype("int16").byteswap().tostring()
320 else:
321 binary_blob += delta[stop:stop + 1].astype("int16").tostring()
322 start = stop + 1
323 if start < delta.size:
324 binary_blob += delta[start:].astype("int8").tostring()
325 return binary_blob
326
327
328 -def decTY1(raw_8, raw_16=None, raw_32=None):
329 """
330 Modified byte offset decompressor used in Oxford Diffraction images
331
332 @param raw_8: strings containing raw data with integer 8 bits
333 @param raw_16: strings containing raw data with integer 16 bits
334 @param raw_32: strings containing raw data with integer 32 bits
335 @return: numpy.ndarray
336
337 """
338 data = numpy.fromstring(raw_8, dtype="uint8").astype(int)
339 data -= 127
340 if raw_32 is not None:
341 int32 = numpy.fromstring(raw_32, dtype="int32").astype(int)
342 exception32 = numpy.nonzero(data == 128)
343 if raw_16 is not None:
344 int16 = numpy.fromstring(raw_16, dtype="int16").astype(int)
345 exception16 = numpy.nonzero(data == 127)
346 data[exception16] = int16
347 if raw_32:
348 data[exception32] = int32
349 summed = data.cumsum()
350 smax = summed.max()
351 if (smax > (2 ** 31 - 1)):
352 bytecode = "int64"
353 elif (smax > (2 ** 15 - 1)):
354 bytecode = "int32"
355 elif (smax > (2 ** 7 - 1)):
356 bytecode = "int16"
357 else:
358 bytecode = "int8"
359 return summed.astype(bytecode)
360 decKM4CCD = decTY1
361
363 """
364 Modified byte offset compressor used in Oxford Diffraction images
365
366 @param data: numpy.ndarray with the input data (integers!)
367 @return: 3-tuple of strings: raw_8,raw_16,raw_32 containing raw data with integer of the given size
368
369 """
370 fdata = data.flatten()
371 diff = numpy.zeros_like(fdata)
372 diff[0] = fdata[0]
373 diff[1:] = fdata[1:] - fdata[:-1]
374 adiff = abs(diff)
375 exception32 = (adiff > 32767)
376 exception16 = (adiff >= 127) - exception32
377 we16 = numpy.where(exception16)
378 we32 = numpy.where(exception32)
379 raw_16 = diff[we16].astype("int16").tostring()
380 raw_32 = diff[we32].astype("int32").tostring()
381 diff[we16] = 127
382 diff[we32] = 128
383 diff += 127
384 raw_8 = diff.astype("uint8").tostring()
385 return raw_8, raw_16, raw_32
386
387 -def decPCK(stream, dim1=None, dim2=None, overflowPix=None):
388 """
389 Modified CCP4 pck decompressor used in MAR345 images
390
391 @param stream: string or file
392 @return: numpy.ndarray (square array)
393
394 """
395
396 try:
397 from mar345_IO import uncompress_pck
398 except ImportError, error:
399 raise RuntimeError("Unable to import mar345_IO to read compressed dataset")
400 if "seek" in dir(stream):
401 stream.seek(0)
402 raw = stream.read()
403 else:
404 raw = str(stream)
405
406 return uncompress_pck(raw, dim1, dim2, overflowPix)
407
408
410 """
411 Modified CCP4 pck compressor used in MAR345 images
412
413 @param data: numpy.ndarray (square array)
414 @return: compressed stream
415
416 """
417 try:
418 from mar345_IO import compress_pck
419 except ImportError, error:
420 raise RuntimeError("Unable to import mar345_IO to write compressed dataset")
421 return compress_pck(data)
422