StreamAnalyzer.java

0
// StreamAnalyzer.java
package SdkService.StreamService;

import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;

/**
 * StreamAnalyzer - 用于在回调中对原始流进行逐 NALU 的详细日志分析。
 *
 * 用法:
 *   byte[] bytes = ...; // 从回调获得
 *   StreamAnalyzer.analyze(bytes, true); // 第二个参数为 true 则把每个 NALU 写文件用于验证
 *
 * 目标:
 *   - 识别 start codes (00 00 01 / 00 00 00 01)
 *   - 列出每个 NALU 的位置、类型、长度、并做深入解析(H.264 SPS)
 *   - 去除 emulation prevention bytes 得到 RBSP(便于 bit-level 解读)
 *   - 打印清晰的“因为——所以”风格日志,帮助初学者理解
 */
public class StreamAnalyzer {

    public static void analyze(byte[] bytes, boolean dumpFiles) {
        System.out.println("=== StreamAnalyzer START ===");
        System.out.printf("Because we received a chunk of %d bytes, so we will scan for NAL start codes (00 00 01 or 00 00 00 01).\n", bytes.length);

        List<Integer> starts = findStartCodes(bytes);
        if (starts.isEmpty()) {
            // 有些 SDK 给的是裸 length+payload without startcode (比如 RTP payload),我们也要说明
            System.out.println("No 0x000001 / 0x00000001 start codes found. Because some streams use length-prefixed NALs (typical in MP4/RTSP), so you might need a different parser.");
            // still try treating whole buffer as one NAL
            analyzeSingleNal(bytes, 0, bytes.length, dumpFiles);
        } else {
            for (int i = 0; i < starts.size(); i++) {
                int start = starts.get(i);
                int end = (i + 1 < starts.size()) ? starts.get(i + 1) : bytes.length;
                analyzeSingleNal(bytes, start, end, dumpFiles);
            }
        }
        System.out.println("=== StreamAnalyzer END ===\n");
    }

    private static void analyzeSingleNal(byte[] bytes, int start, int end, boolean dumpFiles) {
        int startCodeLen = (bytes[start] == 0 && bytes.length > start+2 && bytes[start+2] == 1) ? 3 : 4;
        // If startCodeLen ==3 but actually 4-byte presence check is safe above.
        if (start + startCodeLen >= end) {
            System.out.printf("Found start code at %d but no payload. (startCodeLen=%d)\n", start, startCodeLen);
            return;
        }
        int nalHeaderIndex = start + startCodeLen;
        int nalLen = end - start;

        System.out.printf("\nFound NALU at pos=%d len=%d (includes startcode %d bytes). So we'll parse header and RBSP.\n",
                start, nalLen, startCodeLen);

        // Print first 16 hex bytes for quick glance
        System.out.printf("  HEX (first up to 32 bytes): %s\n", bytesToHex(bytes, start, Math.min(32, nalLen)));
        // Print first 32 bits as bits
        System.out.printf("  BIT (first up to 32 bits): %s\n", bytesToBits(bytes, nalHeaderIndex, Math.min(4, end - nalHeaderIndex)));

        // Detect H.264 vs H.265 by checking header layout heuristics:
        // H.264: nal_unit_header is 1 byte -> nal_unit_type = byte & 0x1F (values 1..12,7= SPS)
        // H.265: nal unit type is in bits [1..6] of first byte -> (byte & 0x7E) >> 1 (values > 31 for some types)
        int firstByte = bytes[nalHeaderIndex] & 0xFF;
        int nalTypeH264 = firstByte & 0x1F;
        int nalTypeH265 = (firstByte & 0x7E) >> 1;

        boolean maybeH265 = nalTypeH265 >= 32 || nalTypeH265 == 32 || (nalTypeH264 == 0); // heuristic
        boolean maybeH264 = nalTypeH264 != 0 && nalTypeH264 <= 12;

        if (maybeH264 && !maybeH265) {
            System.out.printf("  Heuristic: looks like H.264 (because nal_unit_type = %d fits AVC range 1..12). So we'll parse AVC NAL header.\n", nalTypeH264);
            parseH264Nal(bytes, nalHeaderIndex, end, dumpFiles);
        } else if (maybeH265 && !maybeH264) {
            System.out.printf("  Heuristic: looks like H.265/HEVC (because nal_unit_type = %d via HEVC formula). So we'll parse HEVC header minimally.\n", nalTypeH265);
            parseH265Nal(bytes, nalHeaderIndex, end, dumpFiles);
        } else {
            // ambiguous: print both interpretations and suggest next steps
            System.out.printf("  Ambiguous header: H.264 nal_type=%d, H.265 nal_type=%d. Because some bytes can overlap, so choose based on your camera settings or look for SPS/PPS patterns (0x67/0x68 for AVC, 0x42/0x44/0x40 for HEVC).\n",
                    nalTypeH264, nalTypeH265);
            // Try H.264 parse attempt
            parseH264Nal(bytes, nalHeaderIndex, end, dumpFiles);
            // Also print HEVC hint
            System.out.printf("  Also showing HEVC interpretation: nal_type=%d\n", nalTypeH265);
        }
    }

    // H.264 parser (detailed) - will parse SPS if found
    private static void parseH264Nal(byte[] bytes, int nalHeaderIndex, int end, boolean dumpFiles) {
        int nalType = bytes[nalHeaderIndex] & 0x1F;
        int nalRefIdc = (bytes[nalHeaderIndex] & 0x60) >> 5;
        System.out.printf("  [H.264] nal_ref_idc=%d, nal_unit_type=%d -> %s\n",
                nalRefIdc, nalType, h264NalName(nalType));
        int payloadStart = nalHeaderIndex + 1;
        int payloadLen = end - payloadStart;

        // Extract payload and RBSP
        byte[] payload = new byte[Math.max(0, payloadLen)];
        System.arraycopy(bytes, payloadStart, payload, 0, Math.max(0, payloadLen));
        byte[] rbsp = removeEmulationPreventionBytes(payload);

        System.out.printf("  Because emulation prevention bytes (0x03 after 0x0000) may exist, so we removed them. originalPayload=%d rbsp=%d bytes\n",
                payloadLen, rbsp.length);

        if (dumpFiles) {
            String name = String.format("nal_h264_type_%d_pos_%d.bin", nalType, nalHeaderIndex);
            try {
                try (FileOutputStream fos = new FileOutputStream(name)) {
                    fos.write(bytes, nalHeaderIndex - 4 >= 0 ? nalHeaderIndex - 4 : nalHeaderIndex, Math.min(end - (nalHeaderIndex - 4 >= 0 ? nalHeaderIndex - 4 : nalHeaderIndex), end - (nalHeaderIndex - 4 >= 0 ? nalHeaderIndex - 4 : nalHeaderIndex)));
                }
                System.out.printf("  Wrote NAL to file '%s' for ffplay/ffprobe validation. Because real players need a file to inspect.\n", name);
            } catch (IOException e) {
                System.out.printf("  Failed to write NAL file: %s\n", e.getMessage());
            }
        }

        if (nalType == 7) { // SPS
            System.out.println("  Because this NAL is SPS, so we'll bit-parse it to extract profile/level/width/height.");
            try {
                H264SPS sps = parseH264SPS(rbsp);
                System.out.printf("  -> SPS parsed: profile_idc=%d(%s) level_idc=%d, seq_parameter_set_id=%d\n",
                        sps.profile_idc, sps.profileStr(), sps.level_idc, sps.spsId);
                System.out.printf("  -> Derived resolution: width=%d height=%d (because pic_width_in_mbs_minus1=%d, pic_height_in_map_units_minus1=%d, frame_mbs_only_flag=%d, crop_left=%d crop_right=%d crop_top=%d crop_bottom=%d)\n",
                        sps.width, sps.height, sps.pic_width_in_mbs_minus1, sps.pic_height_in_map_units_minus1, sps.frame_mbs_only_flag,
                        sps.crop_left, sps.crop_right, sps.crop_top, sps.crop_bottom);
            } catch (Exception ex) {
                System.out.printf("  Failed to parse SPS: %s\n", ex.getMessage());
                System.out.println("  If parsing fails, you can still inspect RBSP bits printed below to manually decode fields using H.264 spec.");
                System.out.printf("  RBSP hex (first 64 bytes): %s\n", bytesToHex(rbsp, 0, Math.min(rbsp.length, 64)));
                System.out.printf("  RBSP bits (first 128 bits): %s\n", bytesToBits(rbsp, 0, Math.min(16, rbsp.length)));
            }
        } else if (nalType == 8) { // PPS
            System.out.println("  Because this NAL is PPS, it contains picture-level encoding params - often small (we don't parse in detail but print hex).");
            System.out.printf("  PPS RBSP hex: %s\n", bytesToHex(rbsp, 0, Math.min(rbsp.length, 64)));
        } else if (nalType == 5) {
            System.out.println("  This is IDR (keyframe). Because IDR contains all data to decode a frame independently, so the decoder can start here.");
            System.out.printf("  IDR RBSP first bytes hex: %s\n", bytesToHex(rbsp, 0, Math.min(rbsp.length, 64)));
        } else {
            System.out.printf("  Other NAL type (%d). We'll print RBSP hex & bits so you can inspect.\n", nalType);
            System.out.printf("  RBSP hex (first 64): %s\n", bytesToHex(rbsp, 0, Math.min(rbsp.length, 64)));
            System.out.printf("  RBSP bits (first 128): %s\n", bytesToBits(rbsp, 0, Math.min(16, rbsp.length)));
        }
    }

    // Minimal H.265 handling: identify nal type and print RBSP hex/bits; deep parsing of HEVC SPS is lengthy and not fully implemented here.
    private static void parseH265Nal(byte[] bytes, int nalHeaderIndex, int end, boolean dumpFiles) {
        int firstByte = bytes[nalHeaderIndex] & 0xFF;
        int nalType = (firstByte & 0x7E) >> 1; // bits 1..6
        System.out.printf("  [HEVC] nal_unit_type=%d -> %s\n", nalType, h265NalName(nalType));
        int payloadStart = nalHeaderIndex + 2; // HEVC NAL header is 2 bytes; safe assumption (depends on nuh_layer_id)
        if (payloadStart >= end) payloadStart = nalHeaderIndex + 1;
        int payloadLen = Math.max(0, end - payloadStart);
        byte[] payload = new byte[payloadLen];
        System.arraycopy(bytes, payloadStart, payload, 0, payloadLen);
        byte[] rbsp = removeEmulationPreventionBytes(payload);

        System.out.printf("  RBSP hex (first 64): %s\n", bytesToHex(rbsp, 0, Math.min(rbsp.length, 64)));
        System.out.printf("  RBSP bits (first 128): %s\n", bytesToBits(rbsp, 0, Math.min(16, rbsp.length)));
        System.out.println("  Note: HEVC SPS parsing is more complex (profile_tier_level, many flags). If you need width/height extraction, I can add a HEVC SPS parser; for now, please supply a sample SPS and I will parse it.");
    }

    // Helper: find both 0x00000001 and 0x000001 start codes
    private static List<Integer> findStartCodes(byte[] bytes) {
        List<Integer> list = new ArrayList<>();
        for (int i = 0; i < bytes.length - 3; i++) {
            if (bytes[i] == 0x00 && bytes[i+1] == 0x00 && bytes[i+2] == 0x00 && bytes[i+3] == 0x01) {
                list.add(i);
            } else if (i < bytes.length - 2 && bytes[i] == 0x00 && bytes[i+1] == 0x00 && bytes[i+2] == 0x01) {
                list.add(i);
            }
        }
        return list;
    }

    // Remove emulation_prevention_three_byte (0x03 after 0x0000)
    private static byte[] removeEmulationPreventionBytes(byte[] input) {
        ByteBuffer bb = ByteBuffer.allocate(input.length);
        for (int i = 0; i < input.length; i++) {
            if (i+2 < input.length && input[i] == 0x00 && input[i+1] == 0x00 && input[i+2] == 0x03) {
                bb.put((byte)0x00);
                bb.put((byte)0x00);
                i += 2; // skip the 0x03
                continue;
            } else {
                bb.put(input[i]);
            }
        }
        byte[] out = new byte[bb.position()];
        bb.rewind();
        bb.get(out);
        return out;
    }

    // Simple hex dump
    private static String bytesToHex(byte[] bytes, int offset, int len) {
        StringBuilder sb = new StringBuilder();
        for (int i = offset; i < offset + len && i < bytes.length; i++) {
            sb.append(String.format("%02X ", bytes[i]));
        }
        return sb.toString();
    }

    // bits as string for first N bytes (N bytes -> 8*N bits)
    private static String bytesToBits(byte[] bytes, int offset, int lenBytes) {
        StringBuilder sb = new StringBuilder();
        for (int i = offset; i < offset + lenBytes && i < bytes.length; i++) {
            sb.append(String.format("%8s", Integer.toBinaryString(bytes[i] & 0xFF)).replace(' ', '0'));
            if (i < offset + lenBytes - 1) sb.append(' ');
        }
        return sb.toString();
    }

    private static String h264NalName(int type) {
        return switch (type) {
            case 1 -> "Non-IDR slice (P/B)";
            case 5 -> "IDR (keyframe)";
            case 6 -> "SEI";
            case 7 -> "SPS (Sequence Parameter Set)";
            case 8 -> "PPS (Picture Parameter Set)";
            case 9 -> "AUD (Access Unit Delimiter)";
            default -> "Other/Unknown";
        };
    }

    private static String h265NalName(int type) {
        // Not exhaustive; give some common types
        if (type >= 0 && type <= 31) {
            return switch (type) {
                case 32 -> "VPS";
                case 33 -> "SPS";
                case 34 -> "PPS";
                case 19 -> "IDR_W_RADL";
                case 20 -> "IDR_N_LP";
                default -> "Other/Reserved";
            };
        } else {
            return "Unknown/Extended";
        }
    }

    // --------------------------
    // H.264 SPS Parser (basic, covers most common cases)
    // --------------------------
    private static class H264SPS {
        int profile_idc;
        int constraint_set_flags;
        int level_idc;
        int spsId;
        int pic_width_in_mbs_minus1;
        int pic_height_in_map_units_minus1;
        int frame_mbs_only_flag;
        int width;
        int height;
        int crop_left, crop_right, crop_top, crop_bottom;

        String profileStr() {
            return switch (profile_idc) {
                case 66 -> "Baseline";
                case 77 -> "Main";
                case 88 -> "Extended";
                case 100 -> "High";
                case 110 -> "High10";
                case 122 -> "High422";
                case 244 -> "High444";
                default -> "Unknown";
            };
        }
    }

    // BitReader for RBSP (big-endian bits)
    private static class BitReader {
        private byte[] data;
        private int bytePos = 0;
        private int bitPos = 0; // 0..7

        BitReader(byte[] data) { this.data = data; }

        // read n bits and return as int
        int readBits(int n) throws IOException {
            if (n <= 0 || n > 32) throw new IllegalArgumentException("readBits supports 1..32");
            int val = 0;
            for (int i = 0; i < n; i++) {
                if (bytePos >= data.length) throw new IOException("Out of data");
                int currentByte = data[bytePos] & 0xFF;
                int bit = (currentByte >> (7 - bitPos)) & 0x01;
                val = (val << 1) | bit;
                bitPos++;
                if (bitPos == 8) { bitPos = 0; bytePos++; }
            }
            return val;
        }

        // read 1 bit
        int readBit() throws IOException { return readBits(1); }

        // read unsigned Exp-Golomb code
        int readUE() throws IOException {
            int zeros = 0;
            while (true) {
                int b = readBit();
                if (b == 0) zeros++;
                else break;
            }
            int info = 0;
            if (zeros > 0) {
                info = readBits(zeros);
            }
            return (1 << zeros) - 1 + info;
        }

        // read signed Exp-Golomb code
        int readSE() throws IOException {
            int ue = readUE();
            int val = ((ue & 1) == 1) ? ((ue + 1) / 2) : (-(ue / 2));
            return val;
        }
    }

    private static H264SPS parseH264SPS(byte[] rbsp) throws Exception {
        BitReader br = new BitReader(rbsp);
        H264SPS sps = new H264SPS();
        sps.profile_idc = br.readBits(8);
        sps.constraint_set_flags = br.readBits(8); // includes reserved zero bits - we just pack them
        sps.level_idc = br.readBits(8);
        sps.spsId = br.readUE();

        // Some profiles have extra fields
        if (sps.profile_idc == 100 || sps.profile_idc == 110 || sps.profile_idc == 122 ||
                sps.profile_idc == 244 || sps.profile_idc == 44 || sps.profile_idc == 83 ||
                sps.profile_idc == 86 || sps.profile_idc == 118 || sps.profile_idc == 128) {
            // chroma_format_idc
            int chroma_format_idc = br.readUE();
            if (chroma_format_idc == 3) {
                int separate_colour_plane_flag = br.readBit();
            }
            int bit_depth_luma_minus8 = br.readUE();
            int bit_depth_chroma_minus8 = br.readUE();
            int qpprime_y_zero_transform_bypass_flag = br.readBit();
            int seq_scaling_matrix_present_flag = br.readBit();
            if (seq_scaling_matrix_present_flag == 1) {
                // skip scaling lists - complex; we'll not decode them here
                // because they are rare for typical camera SPS
                // Instead, skip safely by searching/consuming bits via try/catch
                // (Not implemented fully)
            }
        }

        int log2_max_frame_num_minus4 = br.readUE();
        int pic_order_cnt_type = br.readUE();
        if (pic_order_cnt_type == 0) {
            int log2_max_pic_order_cnt_lsb_minus4 = br.readUE();
        } else if (pic_order_cnt_type == 1) {
            int delta_pic_order_always_zero_flag = br.readBit();
            int offset_for_non_ref_pic = br.readSE();
            int offset_for_top_to_bottom_field = br.readSE();
            int num_ref_frames_in_pic_order_cnt_cycle = br.readUE();
            for (int i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) {
                int v = br.readSE();
            }
        }

        int max_num_ref_frames = br.readUE();
        int gaps_in_frame_num_value_allowed_flag = br.readBit();
        sps.pic_width_in_mbs_minus1 = br.readUE();
        sps.pic_height_in_map_units_minus1 = br.readUE();
        sps.frame_mbs_only_flag = br.readBit();
        if (sps.frame_mbs_only_flag == 0) {
            int mb_adaptive_frame_field_flag = br.readBit();
        }
        int direct_8x8_inference_flag = br.readBit();
        int frame_cropping_flag = br.readBit();
        sps.crop_left = sps.crop_right = sps.crop_top = sps.crop_bottom = 0;
        if (frame_cropping_flag == 1) {
            sps.crop_left = br.readUE();
            sps.crop_right = br.readUE();
            sps.crop_top = br.readUE();
            sps.crop_bottom = br.readUE();
        }
        // width/height calculation per spec:
        int picWidthInMbs = sps.pic_width_in_mbs_minus1 + 1;
        int picHeightInMapUnits = sps.pic_height_in_map_units_minus1 + 1;
        int frameHeightInMbs = (2 - sps.frame_mbs_only_flag) * picHeightInMapUnits;
        int width = picWidthInMbs * 16;
        int height = frameHeightInMbs * 16;

        // apply cropping
        int crop_unit_x = 1;
        int crop_unit_y = 2 - sps.frame_mbs_only_flag;
        // For 4:2:0 chroma_format_idc = 1, crop_unit_x = 2, crop_unit_y = 2 - frame_mbs_only_flag ??? (approx)
        // We will apply an approximate correction by treating typical cameras as 4:2:0.
        // If chroma_format_idc unknown, we assume 4:2:0
        crop_unit_x = 2;
        crop_unit_y = 2 - sps.frame_mbs_only_flag;

        width -= (sps.crop_left + sps.crop_right) * crop_unit_x;
        height -= (sps.crop_top + sps.crop_bottom) * crop_unit_y;

        sps.width = width;
        sps.height = height;
        return sps;
    }
}