From 48f17d83d64df1e55e46679e9a7e2ab74b4e74a4 Mon Sep 17 00:00:00 2001 From: "Timothy C. Quinn" Date: Mon, 27 May 2024 21:09:17 -0400 Subject: [PATCH 1/8] Path for zfs diff bug and add move V feature zfs diff has an issue that spaces in file names are replaced with \0040. Added patch to correct this condition. Also added feature to have a move flag for diffs (V). By default zfs diff returns R for Rename and Move but new flag on get_diffs allows for distinction between rename (R) and move (V). --- README.md | 3 +++ pyproject.toml | 2 +- src/zfslib/zfslib.py | 18 ++++++++++++++---- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9e167eb..c6efcb7 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,9 @@ See examples folder for code examples # - + The path has been created # - M The path has been modified # - R The path has been renamed + # - V The path has moved + # get_move - Derrive the V flag for paths that have moved. + # By default zfs returns R for renamed and moved paths. ``` ### `.snap_path` diff --git a/pyproject.toml b/pyproject.toml index 31eec81..49c7590 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "zfslib" -version = "0.12.0" +version = "0.13.0" description = "ZFS Utilities For Python3" license = "MIT" authors = ["Timothy C. Quinn"] diff --git a/src/zfslib/zfslib.py b/src/zfslib/zfslib.py index 685cb79..dbe7dbd 100644 --- a/src/zfslib/zfslib.py +++ b/src/zfslib/zfslib.py @@ -578,7 +578,8 @@ def __init__(self, pool, name, parent=None): # - + The path has been created # - M The path has been modified # - R The path has been renamed - def get_diffs(self, snap_from, snap_to=None, include=None, exclude=None, file_type=None, chg_type=None): + # - V The path has been moved + def get_diffs(self, snap_from, snap_to=None, include=None, exclude=None, file_type=None, chg_type=None, get_move:bool=False): self.assertHaveMounts() assert self.mounted, "Cannot get diffs for Unmounted Dataset. Verify mounted flag on Dataset before calling" @@ -597,6 +598,7 @@ def __tv(k, v): if isinstance(v, list): return v raise AssertionError(f"{k} can only be a str or list. Got: {type(v)}") + if chg_type == 'V': get_move = True file_type = __tv('file_type', file_type) chg_type = __tv('chg_type', chg_type) @@ -631,7 +633,7 @@ def __row(s): for i, row in enumerate(rows): # if i == 429: # print("HERE") - d = Diff(row, snap_left, snap_right) + d = Diff(row, snap_left, snap_right, get_move=get_move) if d.path_full.find('(on_delete_queue)') > 0: # It looks to be an artefact of ZFS that does not actually exist in FS # https://github.com/openzfs/zfs/blob/master/lib/libzfs/libzfs_diff.c @@ -798,8 +800,9 @@ class Diff(): ,'+': 'The path has been created' ,'M': 'The path has been modified' ,'R': 'The path has been renamed' + ,'V': 'The path has been moved' } - def __init__(self, row, snap_left, snap_right): + def __init__(self, row, snap_left, snap_right, get_move:bool=False): self.no_from_snap=False self.to_present=False if isinstance(snap_left, str) and snap_left == '(na-first)': @@ -829,6 +832,13 @@ def __init__(self, row, snap_left, snap_right): else: raise Exception(f"Unexpected len: {len(row)}. Row = {row}") + # Derrive Move change type + if get_move and chg_type == 'R' and path_new is not None: + chg_type = 'V' + + # Fix issue related to https://github.com/openzfs/zfs/issues/6318 + path = path.replace("\\0040", " ") + chg_time = datetime.fromtimestamp(int(inode_ts[:inode_ts.find('.')])) self.chg_ts = inode_ts self.chg_time = chg_time @@ -871,7 +881,7 @@ def _get_snap_path_right(self): if self.to_present: return self.path_full snap_path = self.snap_right.snap_path - path_full = self.path_full_new if self.chg_type == 'R' else self.path_full + path_full = self.path_full_new if self.chg_type in ('R','V') else self.path_full return "{}{}".format(snap_path, path_full.replace(self.snap_left.dataset.mountpoint, '')) snap_path_right = property(_get_snap_path_right) From a5f1fd5f31eb1993e22f0c9769fa37d1fbd50443 Mon Sep 17 00:00:00 2001 From: "Timothy C. Quinn" Date: Mon, 27 May 2024 22:32:13 -0400 Subject: [PATCH 2/8] Bug fix for new move feature --- src/zfslib/zfslib.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/zfslib/zfslib.py b/src/zfslib/zfslib.py index dbe7dbd..11eb0f2 100644 --- a/src/zfslib/zfslib.py +++ b/src/zfslib/zfslib.py @@ -833,8 +833,9 @@ def __init__(self, row, snap_left, snap_right, get_move:bool=False): raise Exception(f"Unexpected len: {len(row)}. Row = {row}") # Derrive Move change type - if get_move and chg_type == 'R' and path_new is not None: - chg_type = 'V' + if get_move and file_type == 'F' and chg_type == 'R': + if splitPath(path)[1] != splitPath(path_new)[1]: + chg_type = 'V' # Fix issue related to https://github.com/openzfs/zfs/issues/6318 path = path.replace("\\0040", " ") From 1b7699220e8b0a3e88aeb1abfc2ddc3a58ec1713 Mon Sep 17 00:00:00 2001 From: "Timothy C. Quinn" Date: Tue, 28 May 2024 11:25:12 -0400 Subject: [PATCH 3/8] New ign_xattrdir flag for get_diffs() New ign_xattrdir flag for get_diffs() and fix \\0040 issue for path_new as well as path. --- pyproject.toml | 2 +- src/zfslib/zfslib.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 49c7590..22c6d03 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "zfslib" -version = "0.13.0" +version = "0.13.2" description = "ZFS Utilities For Python3" license = "MIT" authors = ["Timothy C. Quinn"] diff --git a/src/zfslib/zfslib.py b/src/zfslib/zfslib.py index 11eb0f2..8d1edc4 100644 --- a/src/zfslib/zfslib.py +++ b/src/zfslib/zfslib.py @@ -579,7 +579,8 @@ def __init__(self, pool, name, parent=None): # - M The path has been modified # - R The path has been renamed # - V The path has been moved - def get_diffs(self, snap_from, snap_to=None, include=None, exclude=None, file_type=None, chg_type=None, get_move:bool=False): + # ign_xattrdir - Filter out entries + def get_diffs(self, snap_from, snap_to=None, include=None, exclude=None, file_type=None, chg_type=None, get_move:bool=False, ign_xattrdir:bool=False): self.assertHaveMounts() assert self.mounted, "Cannot get diffs for Unmounted Dataset. Verify mounted flag on Dataset before calling" @@ -631,6 +632,10 @@ def __row(s): rows = list(map(lambda s: __row(s), stdout.splitlines())) diffs = [] for i, row in enumerate(rows): + + if ign_xattrdir and row[3].find('/') > -1: + continue + # if i == 429: # print("HERE") d = Diff(row, snap_left, snap_right, get_move=get_move) @@ -832,14 +837,16 @@ def __init__(self, row, snap_left, snap_right, get_move:bool=False): else: raise Exception(f"Unexpected len: {len(row)}. Row = {row}") + # Fix issue related to https://github.com/openzfs/zfs/issues/6318 + path = path.replace("\\0040", " ") + if path_new: + path_new = path_new.replace("\\0040", " ") + # Derrive Move change type if get_move and file_type == 'F' and chg_type == 'R': if splitPath(path)[1] != splitPath(path_new)[1]: chg_type = 'V' - # Fix issue related to https://github.com/openzfs/zfs/issues/6318 - path = path.replace("\\0040", " ") - chg_time = datetime.fromtimestamp(int(inode_ts[:inode_ts.find('.')])) self.chg_ts = inode_ts self.chg_time = chg_time From d50f547f7b20e054f1b9d29096ae735cc207a895 Mon Sep 17 00:00:00 2001 From: Aaron Whitehouse Date: Wed, 21 May 2025 22:56:42 +0100 Subject: [PATCH 4/8] Update README.md to change "zfs_prop" to "zfs_props", which seems to make it work --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c6efcb7..fcc7dc5 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ See examples folder for code examples # Load poolset. - # zfs properties can be queried here with: zfs_prop=['prop1','prop2',...] + # zfs properties can be queried here with: zfs_props=['prop1','prop2',...] # zpool properties can be queried here with: zpool_props=['prop1','prop2',...] # Default properties: name, creation # If get_mounts=True, mountpoint and mounted are also retrieved automatically @@ -164,4 +164,4 @@ See examples folder for code examples See `test.py` for more sample code -Credits: This code is based heavily on [zfs-tools by Rudd-O](https://github.com/Rudd-O/zfs-tools). \ No newline at end of file +Credits: This code is based heavily on [zfs-tools by Rudd-O](https://github.com/Rudd-O/zfs-tools). From a55b18bbd87cf6f20eab470839f00ad432708815 Mon Sep 17 00:00:00 2001 From: yourfate <2371889-youRFate@users.noreply.gitlab.com> Date: Thu, 23 Oct 2025 19:55:12 +0200 Subject: [PATCH 5/8] datasets mounted at / returned a snap_path that start with //, fixed If a dataset was mounted at /, snapshots of that dataset returned a snap_path that starts with //. By using pathlib to join the paths we make this safer. --- src/zfslib/zfslib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zfslib/zfslib.py b/src/zfslib/zfslib.py index 8d1edc4..0fc5f5f 100644 --- a/src/zfslib/zfslib.py +++ b/src/zfslib/zfslib.py @@ -741,7 +741,7 @@ def _get_snap_path(self): assert isinstance(self.parent, Dataset), \ "This function is only available for Snapshots of Datasets not Pools" self.parent.assertHaveMounts() - return f"{self.parent.mountpoint}/.zfs/snapshot/{self.name}" + return str(pathlib.Path(self.parent.mountpoint) / f".zfs/snapshot/{self.name}") snap_path = property(_get_snap_path) From b37990cf5326195511db42d77706e4b617c7a0c8 Mon Sep 17 00:00:00 2001 From: "Timothy C. Quinn" Date: Sat, 20 Dec 2025 21:03:45 -0500 Subject: [PATCH 6/8] Update path replacement for special characters The ZFS path escape sequence \0040 (representing a space character) is not being properly replaced with an actual space in the compiled PyInstaller binary, causing the diff command to fail because it's looking for files with literal \0040 in their names instead of spaces. --- src/zfslib/zfslib.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/zfslib/zfslib.py b/src/zfslib/zfslib.py index 0fc5f5f..0b9eeb7 100644 --- a/src/zfslib/zfslib.py +++ b/src/zfslib/zfslib.py @@ -742,6 +742,7 @@ def _get_snap_path(self): "This function is only available for Snapshots of Datasets not Pools" self.parent.assertHaveMounts() return str(pathlib.Path(self.parent.mountpoint) / f".zfs/snapshot/{self.name}") + snap_path = property(_get_snap_path) @@ -838,9 +839,9 @@ def __init__(self, row, snap_left, snap_right, get_move:bool=False): raise Exception(f"Unexpected len: {len(row)}. Row = {row}") # Fix issue related to https://github.com/openzfs/zfs/issues/6318 - path = path.replace("\\0040", " ") + path = path.replace(r"\0040", " ") if path_new: - path_new = path_new.replace("\\0040", " ") + path_new = path_new.replace(r"\0040", " ") # Derrive Move change type if get_move and file_type == 'F' and chg_type == 'R': From 4842c13e9b7c7e7ca9cc2be2873369098d7d0223 Mon Sep 17 00:00:00 2001 From: "Timothy C. Quinn" Date: Sat, 20 Dec 2025 23:24:50 -0500 Subject: [PATCH 7/8] Implement octal escape decoding for ZFS paths Add function to decode octal escape sequences in ZFS paths for a more holistic fix --- src/zfslib/zfslib.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/zfslib/zfslib.py b/src/zfslib/zfslib.py index 0b9eeb7..5870b0d 100644 --- a/src/zfslib/zfslib.py +++ b/src/zfslib/zfslib.py @@ -13,6 +13,7 @@ import subprocess import os +import re import fnmatch import pathlib import inspect @@ -839,9 +840,11 @@ def __init__(self, row, snap_left, snap_right, get_move:bool=False): raise Exception(f"Unexpected len: {len(row)}. Row = {row}") # Fix issue related to https://github.com/openzfs/zfs/issues/6318 - path = path.replace(r"\0040", " ") + # Octal escapes in paths to get around issues with special characters + # a particular issue when compiling an app using pyinstaller + path = decode_octal_escapes(path) if path_new: - path_new = path_new.replace(r"\0040", " ") + path_new = decode_octal_escapes(path_new) # Derrive Move change type if get_move and file_type == 'F' and chg_type == 'R': @@ -1088,6 +1091,28 @@ def f(*popenargs, **kwargs): ''' END LEGACY DUCK PUNCHING ''' +# Compiled regex for matching octal escape sequences (e.g., \0040) +_OCTAL_ESCAPE_PATTERN = re.compile(r'\\(\d{3,4})') + +def decode_octal_escapes(s): + """ + Decode octal escape sequences in ZFS paths to UTF-8 characters. + ZFS uses octal escapes like \0040 for space, \0342\0200\0231 for ', etc. + See: https://github.com/openzfs/zfs/issues/6318 + """ + if not s: + return s + + def replace_octal(match): + try: + return bytes([int(match.group(1), 8)]).decode('utf-8', errors='replace') + except (ValueError, UnicodeDecodeError): + return match.group(0) # Return original if decode fails + + return _OCTAL_ESCAPE_PATTERN.sub(replace_octal, s) + + + # No operation lambda dropin or breakpoint marker def noop(*args, **kwargs): if len(args): return args[0] From 758e56a0ee8885e439b7d2153eca2a302b145a62 Mon Sep 17 00:00:00 2001 From: "Timothy C. Quinn" Date: Sun, 21 Dec 2025 00:39:24 -0500 Subject: [PATCH 8/8] Update decode_octal_escapes to support utf-8 multi-byte Fix UTF-8 decoding of ZFS octal escape sequences in file paths ZFS encodes special characters in paths using octal sequences (e.g., \0040 for space). Multi-byte UTF-8 characters like ' (U+2019) are encoded as multiple consecutive sequences (\0342\0200\0231). Previous implementation decoded each octal sequence individually, breaking UTF-8 multi-byte characters and causing FileNotFoundError when accessing files with characters like fancy quotes, em-dashes, etc. Updated decode_octal_escapes() to: - Buffer consecutive octal sequences before decoding - Decode complete UTF-8 byte sequences together - Handle invalid sequences with latin-1 fallback --- src/zfslib/zfslib.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/zfslib/zfslib.py b/src/zfslib/zfslib.py index 5870b0d..9ec502f 100644 --- a/src/zfslib/zfslib.py +++ b/src/zfslib/zfslib.py @@ -1091,25 +1091,44 @@ def f(*popenargs, **kwargs): ''' END LEGACY DUCK PUNCHING ''' -# Compiled regex for matching octal escape sequences (e.g., \0040) -_OCTAL_ESCAPE_PATTERN = re.compile(r'\\(\d{3,4})') def decode_octal_escapes(s): """ - Decode octal escape sequences in ZFS paths to UTF-8 characters. - ZFS uses octal escapes like \0040 for space, \0342\0200\0231 for ', etc. - See: https://github.com/openzfs/zfs/issues/6318 + Decode ZFS octal escape sequences to UTF-8 characters. + Handles multi-byte UTF-8 sequences like \0342\0200\0231 -> ' """ if not s: return s - def replace_octal(match): + result = [] + byte_buffer = [] + i = 0 + + while i < len(s): + # Check for octal escape sequence \#### (4 digits) + if i + 4 < len(s) and s[i:i+1] == '\\' and s[i+1:i+5].isdigit(): + octal_val = int(s[i+1:i+5], 8) + byte_buffer.append(octal_val) + i += 5 + else: + # Not an octal sequence - flush byte buffer if any + if byte_buffer: + try: + result.append(bytes(byte_buffer).decode('utf-8')) + except UnicodeDecodeError: + result.append(bytes(byte_buffer).decode('latin-1', errors='replace')) + byte_buffer = [] + result.append(s[i]) + i += 1 + + # Flush remaining bytes + if byte_buffer: try: - return bytes([int(match.group(1), 8)]).decode('utf-8', errors='replace') - except (ValueError, UnicodeDecodeError): - return match.group(0) # Return original if decode fails + result.append(bytes(byte_buffer).decode('utf-8')) + except UnicodeDecodeError: + result.append(bytes(byte_buffer).decode('latin-1', errors='replace')) - return _OCTAL_ESCAPE_PATTERN.sub(replace_octal, s) + return ''.join(result)