| { |
| "schema_version": "2", |
| "repo": "FluidInference/pocket-tts-coreml", |
| "repo_url": "https://huggingface.co/FluidInference/pocket-tts-coreml", |
| "generated": "2026-04-26T05:04:10+00:00", |
| "design": "Per-submodel quantization selection. The four PocketTTS submodels (cond_step, flowlm_step, flow_decoder, mimi_decoder) are independently swappable between fp16 and int8 \u2014 the int8 .mlpackage has the exact same input/output signature as the fp16 one, so a user can mix-and-match any subset (e.g. flow_decoder=fp16, the other three=int8) by loading different paths into the same PocketTTS pipeline.", |
| "quantization": { |
| "scheme": "CoreML weight-only int8", |
| "granularity": "per-channel symmetric", |
| "weight_threshold": 100000, |
| "activation_precision": "fp16", |
| "tool": "coremltools.optimize.coreml.linear_quantize_weights", |
| "notes": "Only body weights with \u2265100K elements are quantized; small head/tail linears stay fp16. Activations are NOT quantized (W8A16 weight-only, not W8A8). Audio quality A/B was run on English 6L only \u2014 non-English not yet A/B'd." |
| }, |
| "ab_test": { |
| "language": "english", |
| "layer_count": "6L", |
| "prompt": "The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs.", |
| "voice": "alba", |
| "seed": 42, |
| "metrics": [ |
| "Pearson r (waveform-level correlation vs fp16 baseline)", |
| "Speaker similarity (256-dim embedding cosine; 0.65 = identity threshold)" |
| ] |
| }, |
| "per_submodel": { |
| "cond_step": { |
| "audio_quality_int8_only": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| }, |
| "swap_independent": true |
| }, |
| "flowlm_step": { |
| "audio_quality_int8_only": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| }, |
| "swap_independent": true |
| }, |
| "flow_decoder": { |
| "audio_quality_int8_only": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| }, |
| "swap_independent": true |
| }, |
| "mimi_decoder": { |
| "audio_quality_int8_only": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| }, |
| "swap_independent": true |
| } |
| }, |
| "selection_profiles": { |
| "all_fp16_baseline": { |
| "description": "Reference; everything fp16. No int8 used.", |
| "size_english_mlmodelc": "\u2248601 MiB (existing fp16 ship)", |
| "models": { |
| "cond_step": "fp16", |
| "flowlm_step": "fp16", |
| "flow_decoder": "fp16", |
| "mimi_decoder": "fp16" |
| }, |
| "expected_quality": "reference" |
| }, |
| "safe_int8": { |
| "description": "Quantize only the three transparent/safe submodels. Recommended for production.", |
| "models": { |
| "cond_step": "int8", |
| "flowlm_step": "int8", |
| "flow_decoder": "fp16", |
| "mimi_decoder": "int8" |
| }, |
| "english_mlmodelc_size": "\u2248147 MiB (vs 601 MiB fp16; -75%)", |
| "expected_quality": "speaker similarity \u22650.98 vs fp16" |
| }, |
| "aggressive_int8": { |
| "description": "All four int8. Max compression but flow_decoder introduces audible drift.", |
| "models": { |
| "cond_step": "int8", |
| "flowlm_step": "int8", |
| "flow_decoder": "int8", |
| "mimi_decoder": "int8" |
| }, |
| "english_mlmodelc_size": "\u2248156 MiB (vs 601 MiB fp16; -74%)", |
| "expected_quality": "speaker similarity 0.940 (still above 0.65 threshold)" |
| }, |
| "user_custom": { |
| "description": "Any of the 2^4 = 16 combinations is valid. Pick per submodel based on the audio_quality_int8_only field above and your own ear / target binary size.", |
| "example": { |
| "cond_step": "int8", |
| "flowlm_step": "fp16", |
| "flow_decoder": "fp16", |
| "mimi_decoder": "int8" |
| } |
| } |
| }, |
| "languages": { |
| "english": { |
| "layer_count": "6L", |
| "int8_dir": "languages/english/int8", |
| "manifest_txt": "languages/english/english.txt", |
| "size_bytes": 328626045, |
| "size_human": "313.4 MiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/english/int8/cond_step.mlpackage", |
| "size_bytes": 66955176, |
| "size_human": "63.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/english/int8/cond_step.mlmodelc", |
| "size_bytes": 66995543, |
| "size_human": "63.9 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/english/int8/flowlm_step.mlpackage", |
| "size_bytes": 76498561, |
| "size_human": "73.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/english/int8/flowlm_step.mlmodelc", |
| "size_bytes": 76542686, |
| "size_human": "73.0 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/english/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/english/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/english/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/english/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903110, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| }, |
| "german": { |
| "layer_count": "6L", |
| "int8_dir": "languages/german/int8", |
| "manifest_txt": "languages/german/german.txt", |
| "size_bytes": 328626042, |
| "size_human": "313.4 MiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "languages/german/cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/german/int8/cond_step.mlpackage", |
| "size_bytes": 66955175, |
| "size_human": "63.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/german/int8/cond_step.mlmodelc", |
| "size_bytes": 66995541, |
| "size_human": "63.9 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "languages/german/flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/german/int8/flowlm_step.mlpackage", |
| "size_bytes": 76498561, |
| "size_human": "73.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/german/int8/flowlm_step.mlmodelc", |
| "size_bytes": 76542686, |
| "size_human": "73.0 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/german/flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/german/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/german/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/german/mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/german/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/german/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903110, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| }, |
| "italian": { |
| "layer_count": "6L", |
| "int8_dir": "languages/italian/int8", |
| "manifest_txt": "languages/italian/italian.txt", |
| "size_bytes": 328626042, |
| "size_human": "313.4 MiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "languages/italian/cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/italian/int8/cond_step.mlpackage", |
| "size_bytes": 66955175, |
| "size_human": "63.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/italian/int8/cond_step.mlmodelc", |
| "size_bytes": 66995541, |
| "size_human": "63.9 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "languages/italian/flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/italian/int8/flowlm_step.mlpackage", |
| "size_bytes": 76498561, |
| "size_human": "73.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/italian/int8/flowlm_step.mlmodelc", |
| "size_bytes": 76542686, |
| "size_human": "73.0 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/italian/flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/italian/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/italian/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/italian/mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/italian/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/italian/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903110, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| }, |
| "portuguese": { |
| "layer_count": "6L", |
| "int8_dir": "languages/portuguese/int8", |
| "manifest_txt": "languages/portuguese/portuguese.txt", |
| "size_bytes": 328626042, |
| "size_human": "313.4 MiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "languages/portuguese/cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/portuguese/int8/cond_step.mlpackage", |
| "size_bytes": 66955175, |
| "size_human": "63.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/portuguese/int8/cond_step.mlmodelc", |
| "size_bytes": 66995541, |
| "size_human": "63.9 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "languages/portuguese/flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/portuguese/int8/flowlm_step.mlpackage", |
| "size_bytes": 76498561, |
| "size_human": "73.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/portuguese/int8/flowlm_step.mlmodelc", |
| "size_bytes": 76542686, |
| "size_human": "73.0 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/portuguese/flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/portuguese/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/portuguese/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/portuguese/mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/portuguese/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/portuguese/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903110, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| }, |
| "spanish": { |
| "layer_count": "6L", |
| "int8_dir": "languages/spanish/int8", |
| "manifest_txt": "languages/spanish/spanish.txt", |
| "size_bytes": 328626043, |
| "size_human": "313.4 MiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "languages/spanish/cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/spanish/int8/cond_step.mlpackage", |
| "size_bytes": 66955175, |
| "size_human": "63.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/spanish/int8/cond_step.mlmodelc", |
| "size_bytes": 66995541, |
| "size_human": "63.9 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "languages/spanish/flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/spanish/int8/flowlm_step.mlpackage", |
| "size_bytes": 76498561, |
| "size_human": "73.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/spanish/int8/flowlm_step.mlmodelc", |
| "size_bytes": 76542686, |
| "size_human": "73.0 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/spanish/flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/spanish/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/spanish/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/spanish/mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/spanish/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/spanish/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903111, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| }, |
| "french_24l": { |
| "layer_count": "24L", |
| "int8_dir": "languages/french_24l/int8", |
| "manifest_txt": "languages/french_24l/french_24l.txt", |
| "size_bytes": 1239353135, |
| "size_human": "1.2 GiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "languages/french_24l/cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/french_24l/int8/cond_step.mlpackage", |
| "size_bytes": 294576395, |
| "size_human": "280.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/french_24l/int8/cond_step.mlmodelc", |
| "size_bytes": 294737549, |
| "size_human": "281.1 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "languages/french_24l/flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/french_24l/int8/flowlm_step.mlpackage", |
| "size_bytes": 304120099, |
| "size_human": "290.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/french_24l/int8/flowlm_step.mlmodelc", |
| "size_bytes": 304285013, |
| "size_human": "290.2 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/french_24l/flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/french_24l/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/french_24l/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/french_24l/mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/french_24l/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/french_24l/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903110, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| }, |
| "german_24l": { |
| "layer_count": "24L", |
| "int8_dir": "languages/german_24l/int8", |
| "manifest_txt": "languages/german_24l/german_24l.txt", |
| "size_bytes": 1239353135, |
| "size_human": "1.2 GiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "languages/german_24l/cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/german_24l/int8/cond_step.mlpackage", |
| "size_bytes": 294576395, |
| "size_human": "280.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/german_24l/int8/cond_step.mlmodelc", |
| "size_bytes": 294737549, |
| "size_human": "281.1 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "languages/german_24l/flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/german_24l/int8/flowlm_step.mlpackage", |
| "size_bytes": 304120099, |
| "size_human": "290.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/german_24l/int8/flowlm_step.mlmodelc", |
| "size_bytes": 304285013, |
| "size_human": "290.2 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/german_24l/flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/german_24l/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/german_24l/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/german_24l/mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/german_24l/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/german_24l/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903110, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| }, |
| "italian_24l": { |
| "layer_count": "24L", |
| "int8_dir": "languages/italian_24l/int8", |
| "manifest_txt": "languages/italian_24l/italian_24l.txt", |
| "size_bytes": 1239353135, |
| "size_human": "1.2 GiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "languages/italian_24l/cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/italian_24l/int8/cond_step.mlpackage", |
| "size_bytes": 294576395, |
| "size_human": "280.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/italian_24l/int8/cond_step.mlmodelc", |
| "size_bytes": 294737549, |
| "size_human": "281.1 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "languages/italian_24l/flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/italian_24l/int8/flowlm_step.mlpackage", |
| "size_bytes": 304120099, |
| "size_human": "290.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/italian_24l/int8/flowlm_step.mlmodelc", |
| "size_bytes": 304285013, |
| "size_human": "290.2 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/italian_24l/flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/italian_24l/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/italian_24l/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/italian_24l/mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/italian_24l/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/italian_24l/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903110, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| }, |
| "portuguese_24l": { |
| "layer_count": "24L", |
| "int8_dir": "languages/portuguese_24l/int8", |
| "manifest_txt": "languages/portuguese_24l/portuguese_24l.txt", |
| "size_bytes": 1239353135, |
| "size_human": "1.2 GiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "languages/portuguese_24l/cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/portuguese_24l/int8/cond_step.mlpackage", |
| "size_bytes": 294576395, |
| "size_human": "280.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/portuguese_24l/int8/cond_step.mlmodelc", |
| "size_bytes": 294737549, |
| "size_human": "281.1 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "languages/portuguese_24l/flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/portuguese_24l/int8/flowlm_step.mlpackage", |
| "size_bytes": 304120099, |
| "size_human": "290.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/portuguese_24l/int8/flowlm_step.mlmodelc", |
| "size_bytes": 304285013, |
| "size_human": "290.2 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/portuguese_24l/flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/portuguese_24l/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/portuguese_24l/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/portuguese_24l/mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/portuguese_24l/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/portuguese_24l/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903110, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| }, |
| "spanish_24l": { |
| "layer_count": "24L", |
| "int8_dir": "languages/spanish_24l/int8", |
| "manifest_txt": "languages/spanish_24l/spanish_24l.txt", |
| "size_bytes": 1239353136, |
| "size_human": "1.2 GiB", |
| "submodels": { |
| "cond_step": { |
| "fp16": { |
| "path_in_repo": "languages/spanish_24l/cond_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/spanish_24l/int8/cond_step.mlpackage", |
| "size_bytes": 294576395, |
| "size_human": "280.9 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/spanish_24l/int8/cond_step.mlmodelc", |
| "size_bytes": 294737549, |
| "size_human": "281.1 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.984, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flowlm_step": { |
| "fp16": { |
| "path_in_repo": "languages/spanish_24l/flowlm_step.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/spanish_24l/int8/flowlm_step.mlpackage", |
| "size_bytes": 304120099, |
| "size_human": "290.0 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/spanish_24l/int8/flowlm_step.mlmodelc", |
| "size_bytes": 304285013, |
| "size_human": "290.2 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.989, |
| "pearson": 0.94, |
| "verdict": "safe", |
| "summary": "minor drift, well within identity threshold" |
| } |
| }, |
| "flow_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/spanish_24l/flow_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/spanish_24l/int8/flow_decoder.mlpackage", |
| "size_bytes": 9923605, |
| "size_human": "9.5 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/spanish_24l/int8/flow_decoder.mlmodelc", |
| "size_bytes": 9939641, |
| "size_human": "9.5 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.981, |
| "pearson": 0.78, |
| "verdict": "risky", |
| "summary": "audible drift; 8-step LSD inner loop compounds quantization error" |
| } |
| }, |
| "mimi_decoder": { |
| "fp16": { |
| "path_in_repo": "languages/spanish_24l/mimi_decoder.mlmodelc", |
| "format": "mlmodelc", |
| "note": "shipped separately; not part of this int8 upload" |
| }, |
| "int8": { |
| "mlpackage": { |
| "present": true, |
| "path_in_repo": "languages/spanish_24l/int8/mimi_decoder.mlpackage", |
| "size_bytes": 10867723, |
| "size_human": "10.4 MiB" |
| }, |
| "mlmodelc": { |
| "present": true, |
| "path_in_repo": "languages/spanish_24l/int8/mimi_decoder.mlmodelc", |
| "size_bytes": 10903111, |
| "size_human": "10.4 MiB" |
| } |
| }, |
| "audio_quality": { |
| "speaker_sim": 0.998, |
| "pearson": 1.0, |
| "verdict": "transparent", |
| "summary": "no audible difference vs fp16" |
| } |
| } |
| } |
| } |
| }, |
| "totals": { |
| "size_bytes": 7839895890, |
| "size_human": "7.3 GiB", |
| "file_count": 80 |
| } |
| } |
|
|