From a8d211680893895299d2bb1b81a082a93f6deff9 Mon Sep 17 00:00:00 2001 From: j Date: Sun, 12 Oct 2025 20:37:39 +1300 Subject: [PATCH] docs: Update 2 files --- README.md | 20 ++++++---- transcode_bench.py | 97 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 95 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index f79cfdc..cd916f1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Video Transcoding Benchmark -A simple CLI tool to benchmark video transcoding performance on your system. Measures how many simultaneous 1080p video streams can be transcoded in real-time or better. +A simple CLI tool to benchmark video transcoding performance on your system. Measures how many simultaneous 1080p H.264 → 720p H.265 transcodes can be performed in real-time or better. ## Features @@ -68,8 +68,8 @@ python3 transcode_bench.py This will: 1. Detect available hardware acceleration -2. Generate a 30-second 1080p test video -3. Run benchmarks to find the maximum number of simultaneous streams +2. Generate a 10-second 1080p H.264 test video +3. Run benchmarks to find the maximum number of simultaneous 1080p H.264 → 720p H.265 transcodes 4. Output the result ### Command-Line Options @@ -136,14 +136,18 @@ Benchmark Score: 54 ## How It Works -1. **Detection**: Scans for available hardware encoders in your FFmpeg build -2. **Test Video**: Generates a synthetic 1080p video with test patterns -3. **Binary Search**: Uses binary search to efficiently find the maximum number of streams that can be transcoded simultaneously while maintaining real-time performance (e30 FPS) -4. **Parallel Execution**: Runs multiple FFmpeg processes in parallel to simulate concurrent transcoding workloads +1. **Detection**: Scans for available hardware encoders in your FFmpeg build and tests both H.264 decode and H.265 encode capabilities +2. **Test Video**: Generates a synthetic 1080p H.264 video with test patterns +3. **Realistic Transcoding**: Each stream transcodes 1080p H.264 → 720p H.265 with: + - Hardware-accelerated H.264 decode + - CPU-based scaling (1080p → 720p) + - Hardware-accelerated H.265 encode +4. **Adaptive Binary Search**: Uses adaptive binary search to efficiently find the maximum number of streams, based on single-stream performance +5. **Parallel Execution**: Runs multiple FFmpeg processes in parallel to simulate concurrent transcoding workloads ## Interpreting Results -The "Benchmark Score" represents how many 1080p video streams your system can transcode simultaneously while maintaining real-time or better performance. This is useful for: +The "Benchmark Score" represents how many 1080p H.264 → 720p H.265 transcodes your system can perform simultaneously while maintaining real-time or better performance (≥30 FPS). This is useful for: - Comparing hardware performance - Capacity planning for video processing workloads diff --git a/transcode_bench.py b/transcode_bench.py index 544b6ae..32149a4 100755 --- a/transcode_bench.py +++ b/transcode_bench.py @@ -330,12 +330,13 @@ class TestVideo: class TranscodeJob: """Represents a single transcode job.""" - def __init__(self, input_file: str, output_file: str, encoder: str, hwaccel_args: str, hw_decode: bool): + def __init__(self, input_file: str, output_file: str, encoder: str, hwaccel_args: str, hw_decode: bool, hevc_encoder: str): self.input_file = input_file self.output_file = output_file self.encoder = encoder self.hwaccel_args = hwaccel_args self.hw_decode = hw_decode + self.hevc_encoder = hevc_encoder # May be hardware or software self.process = None self.start_time = None self.end_time = None @@ -354,19 +355,40 @@ class TranscodeJob: cmd.extend(['-i', self.input_file]) - # For VA-API without HW decode, we need to upload to hardware - if 'vaapi' in self.encoder and not self.hw_decode: - cmd.extend(['-vf', 'format=nv12,hwupload']) + # Build video filter chain for realistic transcoding + vf_filters = [] - # Build encoding parameters - encode_params = ['-c:v', self.encoder] - - # For VA-API, use CQP mode instead of bitrate if needed + # For VA-API, we need to scale and upload to hardware if 'vaapi' in self.encoder: - # Use constant quality mode (lower is better quality, 20-30 is good) - encode_params.extend(['-qp', '23']) + if self.hw_decode: + # HW decode: download from GPU, scale on CPU, upload back + # (GPU scaling not widely supported, so use CPU) + vf_filters.append('hwdownload') + vf_filters.append('format=nv12') + + # Scale on CPU (works everywhere) + vf_filters.append('scale=1280x720') + vf_filters.append('format=nv12') + vf_filters.append('hwupload') else: - # Use bitrate mode for other encoders + # Other encoders: just scale + vf_filters.append('scale=1280x720') + + if vf_filters: + cmd.extend(['-vf', ','.join(vf_filters)]) + + # Build encoding parameters - use HEVC/H.265 for output + encode_params = ['-c:v', self.hevc_encoder] + + # Configure encoding parameters based on encoder type + if 'vaapi' in self.hevc_encoder: + # VA-API: use CQP mode + encode_params.extend(['-qp', '23']) + elif self.hevc_encoder == 'libx265': + # Software HEVC: use CRF mode + encode_params.extend(['-preset', 'medium', '-crf', '23']) + else: + # Other hardware encoders: use bitrate encode_params.extend(['-b:v', '4M']) cmd.extend(encode_params) @@ -467,6 +489,51 @@ class Benchmark: self.accel_name = accel_name self.hw_decode = hw_decode + # Determine HEVC encoder (hardware or software fallback) + self.hevc_encoder = self._detect_hevc_encoder() + + def _detect_hevc_encoder(self) -> str: + """Detect if hardware HEVC encoding is available, otherwise use software.""" + # Try hardware HEVC encoder first + hw_hevc_encoder = self.encoder.replace('h264', 'hevc').replace('264', '265') + + # Test if hardware HEVC works + try: + with tempfile.NamedTemporaryFile(suffix='.yuv', delete=False) as f: + yuv_file = f.name + + # Generate 1 frame + cmd1 = [ + 'ffmpeg', '-y', '-hide_banner', '-loglevel', 'error', + '-f', 'lavfi', '-i', 'testsrc2=size=1920x1080:duration=0.1:rate=1', + '-frames:v', '1', '-pix_fmt', 'nv12', yuv_file + ] + subprocess.run(cmd1, capture_output=True, timeout=5) + + # Try to encode with hardware HEVC + cmd2 = ['ffmpeg', '-y', '-hide_banner', '-loglevel', 'error'] + if self.hwaccel_args: + cmd2.extend(self.hwaccel_args.split()) + cmd2.extend([ + '-f', 'rawvideo', '-pix_fmt', 'nv12', '-s:v', '1920x1080', '-i', yuv_file, + '-vf', 'scale=1280x720,format=nv12,hwupload' if 'vaapi' in hw_hevc_encoder else 'scale=1280x720', + '-frames:v', '1', '-c:v', hw_hevc_encoder, '-qp', '23', + '-f', 'null', '-' + ]) + result = subprocess.run(cmd2, capture_output=True, timeout=5) + + os.unlink(yuv_file) + + if result.returncode == 0: + print(f" Using hardware HEVC encoder: {hw_hevc_encoder}") + return hw_hevc_encoder + except: + pass + + # Fall back to software + print(f" Hardware HEVC not available, falling back to software (libx265)") + return 'libx265' + def run_parallel_transcodes(self, num_streams: int, timeout: int = 60) -> Tuple[bool, float]: """ Run multiple parallel transcode jobs. @@ -478,7 +545,7 @@ class Benchmark: # Create jobs for i in range(num_streams): output_file = f'/dev/null' if os.name != 'nt' else 'NUL' - job = TranscodeJob(self.test_video, output_file, self.encoder, self.hwaccel_args, self.hw_decode) + job = TranscodeJob(self.test_video, output_file, self.encoder, self.hwaccel_args, self.hw_decode, self.hevc_encoder) jobs.append(job) # Start all jobs in parallel @@ -522,7 +589,7 @@ class Benchmark: A stream is considered "real-time" if it achieves >= min_fps. """ print(f"\nBenchmarking with {self.accel_name}...") - print("Finding maximum simultaneous 1080p streams at real-time or better...\n") + print("Finding maximum simultaneous 1080p H.264 → 720p H.265 transcode streams at real-time or better...\n") # First verify that 1 stream works print(f"Testing 1 simultaneous stream...", end=' ', flush=True) @@ -641,7 +708,9 @@ def main(): print("BENCHMARK RESULTS") print("=" * 60) print(f"Hardware Acceleration: {accel_name}") - print(f"Maximum Simultaneous 1080p Streams: {max_streams}") + print(f"HEVC Encoder: {benchmark.hevc_encoder}") + print(f"Transcode Task: 1080p H.264 → 720p H.265") + print(f"Maximum Simultaneous Streams: {max_streams}") print(f"(at {args.min_fps} FPS or better)") print("=" * 60)