Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Lib/bz2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"

from builtins import open as _builtin_open
from compression._common import _streams
import io
import os
import _compression

from _bz2 import BZ2Compressor, BZ2Decompressor

Expand All @@ -23,7 +23,7 @@
_MODE_WRITE = 3


class BZ2File(_compression.BaseStream):
class BZ2File(_streams.BaseStream):

"""A file object providing transparent bzip2 (de)compression.

Expand Down Expand Up @@ -88,7 +88,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9):
raise TypeError("filename must be a str, bytes, file or PathLike object")

if self._mode == _MODE_READ:
raw = _compression.DecompressReader(self._fp,
raw = _streams.DecompressReader(self._fp,
BZ2Decompressor, trailing_error=OSError)
self._buffer = io.BufferedReader(raw)
else:
Expand Down Expand Up @@ -248,7 +248,7 @@ def writelines(self, seq):

Line separators are not added between the written byte strings.
"""
return _compression.BaseStream.writelines(self, seq)
return _streams.BaseStream.writelines(self, seq)

def seek(self, offset, whence=io.SEEK_SET):
"""Change the file position.
Expand Down
Empty file added Lib/compression/__init__.py
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Internal classes used by the gzip, lzma and bz2 modules"""
"""Internal classes used by compression modules"""

import io
import sys
Expand Down
5 changes: 5 additions & 0 deletions Lib/compression/bz2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import bz2
__doc__ = bz2.__doc__
del bz2

from bz2 import *
5 changes: 5 additions & 0 deletions Lib/compression/gzip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import gzip
__doc__ = gzip.__doc__
del gzip

from gzip import *
5 changes: 5 additions & 0 deletions Lib/compression/lzma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import lzma
__doc__ = lzma.__doc__
del lzma

from lzma import *
5 changes: 5 additions & 0 deletions Lib/compression/zlib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import zlib
__doc__ = zlib.__doc__
del zlib

from zlib import *
242 changes: 242 additions & 0 deletions Lib/compression/zstd/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
"""Python bindings to the Zstandard (zstd) compression library (RFC-8878)."""

__all__ = (
# compression.zstd
'COMPRESSION_LEVEL_DEFAULT',
'compress',
'CompressionParameter',
'decompress',
'DecompressionParameter',
'finalize_dict',
'get_frame_info',
'Strategy',
'train_dict',

# compression.zstd._zstdfile
'open',
'ZstdFile',

# _zstd
'get_frame_size',
'zstd_version',
'zstd_version_info',
'ZstdCompressor',
'ZstdDecompressor',
'ZstdDict',
'ZstdError',
)

import _zstd
import enum
from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError,
get_frame_size, zstd_version)
from compression.zstd._zstdfile import ZstdFile, open, _nbytes

# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE)
zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100),
_zstd.zstd_version_number % 100)
"""Version number of the runtime zstd library as a tuple of integers."""

COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT
"""The default compression level for Zstandard, currently '3'."""


class FrameInfo:
"""Information about a Zstandard frame."""

__slots__ = 'decompressed_size', 'dictionary_id'

def __init__(self, decompressed_size, dictionary_id):
super().__setattr__('decompressed_size', decompressed_size)
super().__setattr__('dictionary_id', dictionary_id)

def __repr__(self):
return (f'FrameInfo(decompressed_size={self.decompressed_size}, '
f'dictionary_id={self.dictionary_id})')

def __setattr__(self, name, _):
raise AttributeError(f"can't set attribute {name!r}")


def get_frame_info(frame_buffer):
"""Get Zstandard frame information from a frame header.

*frame_buffer* is a bytes-like object. It should start from the beginning
of a frame, and needs to include at least the frame header (6 to 18 bytes).

The returned FrameInfo object has two attributes.
'decompressed_size' is the size in bytes of the data in the frame when
decompressed, or None when the decompressed size is unknown.
'dictionary_id' is an int in the range (0, 2**32). The special value 0
means that the dictionary ID was not recorded in the frame header,
the frame may or may not need a dictionary to be decoded,
and the ID of such a dictionary is not specified.
"""
return FrameInfo(*_zstd.get_frame_info(frame_buffer))


def train_dict(samples, dict_size):
"""Return a ZstdDict representing a trained Zstandard dictionary.

*samples* is an iterable of samples, where a sample is a bytes-like
object representing a file.

*dict_size* is the dictionary's maximum size, in bytes.
"""
if not isinstance(dict_size, int):
ds_cls = type(dict_size).__qualname__
raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.')

samples = tuple(samples)
chunks = b''.join(samples)
chunk_sizes = tuple(_nbytes(sample) for sample in samples)
if not chunks:
raise ValueError("samples contained no data; can't train dictionary.")
dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size)
return ZstdDict(dict_content)


def finalize_dict(zstd_dict, /, samples, dict_size, level):
"""Return a ZstdDict representing a finalized Zstandard dictionary.

Given a custom content as a basis for dictionary, and a set of samples,
finalize *zstd_dict* by adding headers and statistics according to the
Zstandard dictionary format.

You may compose an effective dictionary content by hand, which is used as
basis dictionary, and use some samples to finalize a dictionary. The basis
dictionary may be a "raw content" dictionary. See *is_raw* in ZstdDict.

*samples* is an iterable of samples, where a sample is a bytes-like object
representing a file.
*dict_size* is the dictionary's maximum size, in bytes.
*level* is the expected compression level. The statistics for each
compression level differ, so tuning the dictionary to the compression level
can provide improvements.
"""

if not isinstance(zstd_dict, ZstdDict):
raise TypeError('zstd_dict argument should be a ZstdDict object.')
if not isinstance(dict_size, int):
raise TypeError('dict_size argument should be an int object.')
if not isinstance(level, int):
raise TypeError('level argument should be an int object.')

samples = tuple(samples)
chunks = b''.join(samples)
chunk_sizes = tuple(_nbytes(sample) for sample in samples)
if not chunks:
raise ValueError("The samples are empty content, can't finalize the "
"dictionary.")
dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks,
chunk_sizes, dict_size, level)
return ZstdDict(dict_content)


def compress(data, level=None, options=None, zstd_dict=None):
"""Return Zstandard compressed *data* as bytes.

*level* is an int specifying the compression level to use, defaulting to
COMPRESSION_LEVEL_DEFAULT ('3').
*options* is a dict object that contains advanced compression
parameters. See CompressionParameter for more on options.
*zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See
the function train_dict for how to train a ZstdDict on sample data.

For incremental compression, use a ZstdCompressor instead.
"""
comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict)
return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME)


def decompress(data, zstd_dict=None, options=None):
"""Decompress one or more frames of Zstandard compressed *data*.

*zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See
the function train_dict for how to train a ZstdDict on sample data.
*options* is a dict object that contains advanced compression
parameters. See DecompressionParameter for more on options.

For incremental decompression, use a ZstdDecompressor instead.
"""
results = []
while True:
decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict)
results.append(decomp.decompress(data))
if not decomp.eof:
raise ZstdError('Compressed data ended before the '
'end-of-stream marker was reached')
data = decomp.unused_data
if not data:
break
return b''.join(results)


class CompressionParameter(enum.IntEnum):
"""Compression parameters."""

compression_level = _zstd.ZSTD_c_compressionLevel
window_log = _zstd.ZSTD_c_windowLog
hash_log = _zstd.ZSTD_c_hashLog
chain_log = _zstd.ZSTD_c_chainLog
search_log = _zstd.ZSTD_c_searchLog
min_match = _zstd.ZSTD_c_minMatch
target_length = _zstd.ZSTD_c_targetLength
strategy = _zstd.ZSTD_c_strategy

enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching
ldm_hash_log = _zstd.ZSTD_c_ldmHashLog
ldm_min_match = _zstd.ZSTD_c_ldmMinMatch
ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog
ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog

content_size_flag = _zstd.ZSTD_c_contentSizeFlag
checksum_flag = _zstd.ZSTD_c_checksumFlag
dict_id_flag = _zstd.ZSTD_c_dictIDFlag

nb_workers = _zstd.ZSTD_c_nbWorkers
job_size = _zstd.ZSTD_c_jobSize
overlap_log = _zstd.ZSTD_c_overlapLog

def bounds(self):
"""Return the (lower, upper) int bounds of a compression parameter.

Both the lower and upper bounds are inclusive.
"""
return _zstd.get_param_bounds(self.value, is_compress=True)


class DecompressionParameter(enum.IntEnum):
"""Decompression parameters."""

window_log_max = _zstd.ZSTD_d_windowLogMax

def bounds(self):
"""Return the (lower, upper) int bounds of a decompression parameter.

Both the lower and upper bounds are inclusive.
"""
return _zstd.get_param_bounds(self.value, is_compress=False)


class Strategy(enum.IntEnum):
"""Compression strategies, listed from fastest to strongest.

Note that new strategies might be added in the future.
Only the order (from fast to strong) is guaranteed,
the numeric value might change.
"""

fast = _zstd.ZSTD_fast
dfast = _zstd.ZSTD_dfast
greedy = _zstd.ZSTD_greedy
lazy = _zstd.ZSTD_lazy
lazy2 = _zstd.ZSTD_lazy2
btlazy2 = _zstd.ZSTD_btlazy2
btopt = _zstd.ZSTD_btopt
btultra = _zstd.ZSTD_btultra
btultra2 = _zstd.ZSTD_btultra2


# Check validity of the CompressionParameter & DecompressionParameter types
_zstd.set_parameter_types(CompressionParameter, DecompressionParameter)
Loading
Loading