{ "schema_version": "2", "repo": "FluidInference/pocket-tts-coreml", "repo_url": "https://huggingface.co/FluidInference/pocket-tts-coreml", "generated": "2026-04-26T05:04:10+00:00", "design": "Per-submodel quantization selection. The four PocketTTS submodels (cond_step, flowlm_step, flow_decoder, mimi_decoder) are independently swappable between fp16 and int8 \u2014 the int8 .mlpackage has the exact same input/output signature as the fp16 one, so a user can mix-and-match any subset (e.g. flow_decoder=fp16, the other three=int8) by loading different paths into the same PocketTTS pipeline.", "quantization": { "scheme": "CoreML weight-only int8", "granularity": "per-channel symmetric", "weight_threshold": 100000, "activation_precision": "fp16", "tool": "coremltools.optimize.coreml.linear_quantize_weights", "notes": "Only body weights with \u2265100K elements are quantized; small head/tail linears stay fp16. Activations are NOT quantized (W8A16 weight-only, not W8A8). Audio quality A/B was run on English 6L only \u2014 non-English not yet A/B'd." }, "ab_test": { "language": "english", "layer_count": "6L", "prompt": "The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs.", "voice": "alba", "seed": 42, "metrics": [ "Pearson r (waveform-level correlation vs fp16 baseline)", "Speaker similarity (256-dim embedding cosine; 0.65 = identity threshold)" ] }, "per_submodel": { "cond_step": { "audio_quality_int8_only": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" }, "swap_independent": true }, "flowlm_step": { "audio_quality_int8_only": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" }, "swap_independent": true }, "flow_decoder": { "audio_quality_int8_only": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" }, "swap_independent": true }, "mimi_decoder": { "audio_quality_int8_only": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" }, "swap_independent": true } }, "selection_profiles": { "all_fp16_baseline": { "description": "Reference; everything fp16. No int8 used.", "size_english_mlmodelc": "\u2248601 MiB (existing fp16 ship)", "models": { "cond_step": "fp16", "flowlm_step": "fp16", "flow_decoder": "fp16", "mimi_decoder": "fp16" }, "expected_quality": "reference" }, "safe_int8": { "description": "Quantize only the three transparent/safe submodels. Recommended for production.", "models": { "cond_step": "int8", "flowlm_step": "int8", "flow_decoder": "fp16", "mimi_decoder": "int8" }, "english_mlmodelc_size": "\u2248147 MiB (vs 601 MiB fp16; -75%)", "expected_quality": "speaker similarity \u22650.98 vs fp16" }, "aggressive_int8": { "description": "All four int8. Max compression but flow_decoder introduces audible drift.", "models": { "cond_step": "int8", "flowlm_step": "int8", "flow_decoder": "int8", "mimi_decoder": "int8" }, "english_mlmodelc_size": "\u2248156 MiB (vs 601 MiB fp16; -74%)", "expected_quality": "speaker similarity 0.940 (still above 0.65 threshold)" }, "user_custom": { "description": "Any of the 2^4 = 16 combinations is valid. Pick per submodel based on the audio_quality_int8_only field above and your own ear / target binary size.", "example": { "cond_step": "int8", "flowlm_step": "fp16", "flow_decoder": "fp16", "mimi_decoder": "int8" } } }, "languages": { "english": { "layer_count": "6L", "int8_dir": "languages/english/int8", "manifest_txt": "languages/english/english.txt", "size_bytes": 328626045, "size_human": "313.4 MiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/english/int8/cond_step.mlpackage", "size_bytes": 66955176, "size_human": "63.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/english/int8/cond_step.mlmodelc", "size_bytes": 66995543, "size_human": "63.9 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/english/int8/flowlm_step.mlpackage", "size_bytes": 76498561, "size_human": "73.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/english/int8/flowlm_step.mlmodelc", "size_bytes": 76542686, "size_human": "73.0 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/english/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/english/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/english/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/english/int8/mimi_decoder.mlmodelc", "size_bytes": 10903110, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } }, "german": { "layer_count": "6L", "int8_dir": "languages/german/int8", "manifest_txt": "languages/german/german.txt", "size_bytes": 328626042, "size_human": "313.4 MiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "languages/german/cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/german/int8/cond_step.mlpackage", "size_bytes": 66955175, "size_human": "63.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/german/int8/cond_step.mlmodelc", "size_bytes": 66995541, "size_human": "63.9 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "languages/german/flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/german/int8/flowlm_step.mlpackage", "size_bytes": 76498561, "size_human": "73.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/german/int8/flowlm_step.mlmodelc", "size_bytes": 76542686, "size_human": "73.0 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "languages/german/flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/german/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/german/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "languages/german/mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/german/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/german/int8/mimi_decoder.mlmodelc", "size_bytes": 10903110, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } }, "italian": { "layer_count": "6L", "int8_dir": "languages/italian/int8", "manifest_txt": "languages/italian/italian.txt", "size_bytes": 328626042, "size_human": "313.4 MiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "languages/italian/cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/italian/int8/cond_step.mlpackage", "size_bytes": 66955175, "size_human": "63.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/italian/int8/cond_step.mlmodelc", "size_bytes": 66995541, "size_human": "63.9 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "languages/italian/flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/italian/int8/flowlm_step.mlpackage", "size_bytes": 76498561, "size_human": "73.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/italian/int8/flowlm_step.mlmodelc", "size_bytes": 76542686, "size_human": "73.0 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "languages/italian/flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/italian/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/italian/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "languages/italian/mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/italian/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/italian/int8/mimi_decoder.mlmodelc", "size_bytes": 10903110, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } }, "portuguese": { "layer_count": "6L", "int8_dir": "languages/portuguese/int8", "manifest_txt": "languages/portuguese/portuguese.txt", "size_bytes": 328626042, "size_human": "313.4 MiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "languages/portuguese/cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/portuguese/int8/cond_step.mlpackage", "size_bytes": 66955175, "size_human": "63.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/portuguese/int8/cond_step.mlmodelc", "size_bytes": 66995541, "size_human": "63.9 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "languages/portuguese/flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/portuguese/int8/flowlm_step.mlpackage", "size_bytes": 76498561, "size_human": "73.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/portuguese/int8/flowlm_step.mlmodelc", "size_bytes": 76542686, "size_human": "73.0 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "languages/portuguese/flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/portuguese/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/portuguese/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "languages/portuguese/mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/portuguese/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/portuguese/int8/mimi_decoder.mlmodelc", "size_bytes": 10903110, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } }, "spanish": { "layer_count": "6L", "int8_dir": "languages/spanish/int8", "manifest_txt": "languages/spanish/spanish.txt", "size_bytes": 328626043, "size_human": "313.4 MiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "languages/spanish/cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/spanish/int8/cond_step.mlpackage", "size_bytes": 66955175, "size_human": "63.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/spanish/int8/cond_step.mlmodelc", "size_bytes": 66995541, "size_human": "63.9 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "languages/spanish/flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/spanish/int8/flowlm_step.mlpackage", "size_bytes": 76498561, "size_human": "73.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/spanish/int8/flowlm_step.mlmodelc", "size_bytes": 76542686, "size_human": "73.0 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "languages/spanish/flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/spanish/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/spanish/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "languages/spanish/mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/spanish/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/spanish/int8/mimi_decoder.mlmodelc", "size_bytes": 10903111, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } }, "french_24l": { "layer_count": "24L", "int8_dir": "languages/french_24l/int8", "manifest_txt": "languages/french_24l/french_24l.txt", "size_bytes": 1239353135, "size_human": "1.2 GiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "languages/french_24l/cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/french_24l/int8/cond_step.mlpackage", "size_bytes": 294576395, "size_human": "280.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/french_24l/int8/cond_step.mlmodelc", "size_bytes": 294737549, "size_human": "281.1 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "languages/french_24l/flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/french_24l/int8/flowlm_step.mlpackage", "size_bytes": 304120099, "size_human": "290.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/french_24l/int8/flowlm_step.mlmodelc", "size_bytes": 304285013, "size_human": "290.2 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "languages/french_24l/flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/french_24l/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/french_24l/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "languages/french_24l/mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/french_24l/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/french_24l/int8/mimi_decoder.mlmodelc", "size_bytes": 10903110, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } }, "german_24l": { "layer_count": "24L", "int8_dir": "languages/german_24l/int8", "manifest_txt": "languages/german_24l/german_24l.txt", "size_bytes": 1239353135, "size_human": "1.2 GiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "languages/german_24l/cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/german_24l/int8/cond_step.mlpackage", "size_bytes": 294576395, "size_human": "280.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/german_24l/int8/cond_step.mlmodelc", "size_bytes": 294737549, "size_human": "281.1 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "languages/german_24l/flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/german_24l/int8/flowlm_step.mlpackage", "size_bytes": 304120099, "size_human": "290.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/german_24l/int8/flowlm_step.mlmodelc", "size_bytes": 304285013, "size_human": "290.2 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "languages/german_24l/flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/german_24l/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/german_24l/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "languages/german_24l/mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/german_24l/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/german_24l/int8/mimi_decoder.mlmodelc", "size_bytes": 10903110, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } }, "italian_24l": { "layer_count": "24L", "int8_dir": "languages/italian_24l/int8", "manifest_txt": "languages/italian_24l/italian_24l.txt", "size_bytes": 1239353135, "size_human": "1.2 GiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "languages/italian_24l/cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/italian_24l/int8/cond_step.mlpackage", "size_bytes": 294576395, "size_human": "280.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/italian_24l/int8/cond_step.mlmodelc", "size_bytes": 294737549, "size_human": "281.1 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "languages/italian_24l/flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/italian_24l/int8/flowlm_step.mlpackage", "size_bytes": 304120099, "size_human": "290.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/italian_24l/int8/flowlm_step.mlmodelc", "size_bytes": 304285013, "size_human": "290.2 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "languages/italian_24l/flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/italian_24l/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/italian_24l/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "languages/italian_24l/mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/italian_24l/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/italian_24l/int8/mimi_decoder.mlmodelc", "size_bytes": 10903110, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } }, "portuguese_24l": { "layer_count": "24L", "int8_dir": "languages/portuguese_24l/int8", "manifest_txt": "languages/portuguese_24l/portuguese_24l.txt", "size_bytes": 1239353135, "size_human": "1.2 GiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "languages/portuguese_24l/cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/portuguese_24l/int8/cond_step.mlpackage", "size_bytes": 294576395, "size_human": "280.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/portuguese_24l/int8/cond_step.mlmodelc", "size_bytes": 294737549, "size_human": "281.1 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "languages/portuguese_24l/flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/portuguese_24l/int8/flowlm_step.mlpackage", "size_bytes": 304120099, "size_human": "290.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/portuguese_24l/int8/flowlm_step.mlmodelc", "size_bytes": 304285013, "size_human": "290.2 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "languages/portuguese_24l/flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/portuguese_24l/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/portuguese_24l/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "languages/portuguese_24l/mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/portuguese_24l/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/portuguese_24l/int8/mimi_decoder.mlmodelc", "size_bytes": 10903110, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } }, "spanish_24l": { "layer_count": "24L", "int8_dir": "languages/spanish_24l/int8", "manifest_txt": "languages/spanish_24l/spanish_24l.txt", "size_bytes": 1239353136, "size_human": "1.2 GiB", "submodels": { "cond_step": { "fp16": { "path_in_repo": "languages/spanish_24l/cond_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/spanish_24l/int8/cond_step.mlpackage", "size_bytes": 294576395, "size_human": "280.9 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/spanish_24l/int8/cond_step.mlmodelc", "size_bytes": 294737549, "size_human": "281.1 MiB" } }, "audio_quality": { "speaker_sim": 0.984, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flowlm_step": { "fp16": { "path_in_repo": "languages/spanish_24l/flowlm_step.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/spanish_24l/int8/flowlm_step.mlpackage", "size_bytes": 304120099, "size_human": "290.0 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/spanish_24l/int8/flowlm_step.mlmodelc", "size_bytes": 304285013, "size_human": "290.2 MiB" } }, "audio_quality": { "speaker_sim": 0.989, "pearson": 0.94, "verdict": "safe", "summary": "minor drift, well within identity threshold" } }, "flow_decoder": { "fp16": { "path_in_repo": "languages/spanish_24l/flow_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/spanish_24l/int8/flow_decoder.mlpackage", "size_bytes": 9923605, "size_human": "9.5 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/spanish_24l/int8/flow_decoder.mlmodelc", "size_bytes": 9939641, "size_human": "9.5 MiB" } }, "audio_quality": { "speaker_sim": 0.981, "pearson": 0.78, "verdict": "risky", "summary": "audible drift; 8-step LSD inner loop compounds quantization error" } }, "mimi_decoder": { "fp16": { "path_in_repo": "languages/spanish_24l/mimi_decoder.mlmodelc", "format": "mlmodelc", "note": "shipped separately; not part of this int8 upload" }, "int8": { "mlpackage": { "present": true, "path_in_repo": "languages/spanish_24l/int8/mimi_decoder.mlpackage", "size_bytes": 10867723, "size_human": "10.4 MiB" }, "mlmodelc": { "present": true, "path_in_repo": "languages/spanish_24l/int8/mimi_decoder.mlmodelc", "size_bytes": 10903111, "size_human": "10.4 MiB" } }, "audio_quality": { "speaker_sim": 0.998, "pearson": 1.0, "verdict": "transparent", "summary": "no audible difference vs fp16" } } } } }, "totals": { "size_bytes": 7839895890, "size_human": "7.3 GiB", "file_count": 80 } }