collect: compute size before creating tarball, reject if not enough fs space#42
collect: compute size before creating tarball, reject if not enough fs space#42bloom1 wants to merge 7 commits into
Conversation
|
Build succeeded. ✔️ tox-linters SUCCESS in 2m 19s |
|
Build succeeded. ✔️ tox-linters SUCCESS in 2m 21s |
| with tempfile.TemporaryDirectory(prefix='wazo-debug-') as temp_directory: | ||
| logger.info('Created temporary directory: "%s"', temp_directory) | ||
|
|
||
| check_free_space(temp_directory) |
There was a problem hiding this comment.
Should check space for both temp_directory and parsed_args.output_file since both can live on different file system.
Since parsed_args.output_file is a compressed file, the required space should be less than the temp_directory (possibly use a standard or worst-case compression ratio, e.g: 80%)
| if free_bytes < needed_bytes: | ||
| raise RuntimeError( | ||
| f'Not enough free space on filesystem hosting "{target_directory}": ' | ||
| f'{free_bytes} bytes available, but {needed_bytes} bytes are required ' |
There was a problem hiding this comment.
Proposition: rewrite the bytes into a meaningful value, e.g: 178291293 bytes available, but 629145600 bytes are required is less readable than 170.03 MiB available but are 600 MiB required
| f'{free_bytes} bytes available, but {needed_bytes} bytes are required ' | |
| f'{_format_bytes(free_bytes)} bytes available, but {_format_bytes(needed_bytes)} bytes are required ' |
where _format_bytes is a small helper (claude generated):
def _format_bytes(n: int) -> str:
size = float(n)
for unit in ('B', 'KiB', 'MiB', 'GiB'):
if abs(size) < 1024:
return f'{size:.1f} {unit}'
size /= 1024
return f'{size:.1f} TiB'| def _path_size(path): | ||
| if os.path.islink(path): | ||
| return 0 | ||
| if os.path.isfile(path): | ||
| try: | ||
| return os.path.getsize(path) | ||
| except OSError: | ||
| return 0 | ||
| if not os.path.isdir(path): | ||
| return 0 | ||
|
|
||
| is_asterisk_log_dir = path == ASTERISK_LOG_DIR | ||
| total = 0 | ||
| for root, dirs, files in os.walk(path): | ||
| if is_asterisk_log_dir and root != path: | ||
| # rsync filter excludes asterisk subdirectories (only keeps full* at top level) | ||
| dirs[:] = [] | ||
| continue | ||
| for name in files: | ||
| if is_asterisk_log_dir and not fnmatch.fnmatch(name, 'full*'): | ||
| continue | ||
| full = os.path.join(root, name) | ||
| if os.path.islink(full): | ||
| continue | ||
| try: | ||
| total += os.path.getsize(full) | ||
| except OSError: | ||
| continue | ||
| return total |
There was a problem hiding this comment.
os.path.getsize returns the logical size. Since we want to calculate the actual space needed, it would be safer to calculate the block used instead. We can use the os.lstat syscall instead
Would need to import stat
| def _path_size(path): | |
| if os.path.islink(path): | |
| return 0 | |
| if os.path.isfile(path): | |
| try: | |
| return os.path.getsize(path) | |
| except OSError: | |
| return 0 | |
| if not os.path.isdir(path): | |
| return 0 | |
| is_asterisk_log_dir = path == ASTERISK_LOG_DIR | |
| total = 0 | |
| for root, dirs, files in os.walk(path): | |
| if is_asterisk_log_dir and root != path: | |
| # rsync filter excludes asterisk subdirectories (only keeps full* at top level) | |
| dirs[:] = [] | |
| continue | |
| for name in files: | |
| if is_asterisk_log_dir and not fnmatch.fnmatch(name, 'full*'): | |
| continue | |
| full = os.path.join(root, name) | |
| if os.path.islink(full): | |
| continue | |
| try: | |
| total += os.path.getsize(full) | |
| except OSError: | |
| continue | |
| return total | |
| def _path_size(path: str) -> int: | |
| try: | |
| st = os.lstat(path) | |
| except OSError: | |
| return 0 | |
| if stat.S_ISLNK(st.st_mode): | |
| return 0 | |
| if stat.S_ISREG(st.st_mode): | |
| return st.st_blocks * 512 # actual block size used (512 is POSIX standard, not FS block size) | |
| if not stat.S_ISDIR(st.st_mode): | |
| return 0 | |
| is_asterisk_log_dir = path == ASTERISK_LOG_DIR | |
| total = 0 | |
| for root, dirs, files in os.walk(path, followlinks=False): | |
| if is_asterisk_log_dir and root != path: | |
| # rsync filter excludes asterisk subdirectories (only keeps full* at top level) | |
| dirs[:] = [] | |
| continue | |
| for name in files: | |
| if is_asterisk_log_dir and not fnmatch.fnmatch(name, 'full*'): | |
| continue | |
| try: | |
| entry_st = os.lstat(os.path.join(root, name)) | |
| except OSError: | |
| continue | |
| if stat.S_ISLNK(entry_st.st_mode): | |
| continue | |
| total += entry_st.st_blocks * 512 | |
| return total |
…_file Why: they could be on different filesystems
|
Build succeeded. ✔️ tox-linters SUCCESS in 2m 24s |
|
Build succeeded. ✔️ tox-linters SUCCESS in 2m 22s |
Why: the compression method depends on the filename given by the user. If the extension is only .tar, there is _no_ compression. Assuming double the size is a safe alternative.
|
Build succeeded. ✔️ tox-linters SUCCESS in 2m 23s |
|
Build succeeded. ✔️ tox-linters SUCCESS in 2m 23s |
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 1 potential issue.
❌ Bugbot Autofix is OFF. To automatically fix reported issues with cloud agents, enable autofix in the Cursor dashboard.
Reviewed by Cursor Bugbot for commit 3e8d937. Configure here.
Why: remove multiple calls to shutil and remove noisy info logger
3e8d937 to
903f3e1
Compare
|
Build succeeded. ✔️ tox-linters SUCCESS in 2m 20s |

Note
Low Risk
Operational guardrails on a support/debug CLI; failures are explicit before heavy I/O, with no auth or production traffic impact.
Overview
wazo-debug collectnow estimates how much data will be gathered and fails early with a clearRuntimeErrorif disk space is insufficient, instead of filling/tmpor the output path mid-run.Before
gather_facts,check_free_spacecompares free space on the temp dir and the output file’s filesystem (viashutil.disk_usageandst_dev). It assumes a worst-case tarball as large as the gathered tree (matchingtar cafbehavior). When temp and output share one filesystem, it requires room for both the staging copy and the archive (required_bytes * 2), plus a 500 MiB buffer. Error messages use human-readable sizes from_format_bytes.compute_gathering_sizewalks the same log/config/engine paths as rsync, centralized in_log_source_paths,_config_source_paths, and_engine_info_source_paths. Size usesst_blocks * 512(allocated blocks) and mirrors the Asterisk rsync rules: only top-levelfull*files under/var/log/asterisk, no subdirs. Gather commands were updated to use these helpers so estimates match what is actually copied.Reviewed by Cursor Bugbot for commit 903f3e1. Bugbot is set up for automated code reviews on this repo. Configure here.