pocket-tts-coreml / manifest.json
alexwengg's picture
Upload manifest.json
943c1eb verified
{
"schema_version": "2",
"repo": "FluidInference/pocket-tts-coreml",
"repo_url": "https://huggingface.co/FluidInference/pocket-tts-coreml",
"generated": "2026-04-26T05:04:10+00:00",
"design": "Per-submodel quantization selection. The four PocketTTS submodels (cond_step, flowlm_step, flow_decoder, mimi_decoder) are independently swappable between fp16 and int8 \u2014 the int8 .mlpackage has the exact same input/output signature as the fp16 one, so a user can mix-and-match any subset (e.g. flow_decoder=fp16, the other three=int8) by loading different paths into the same PocketTTS pipeline.",
"quantization": {
"scheme": "CoreML weight-only int8",
"granularity": "per-channel symmetric",
"weight_threshold": 100000,
"activation_precision": "fp16",
"tool": "coremltools.optimize.coreml.linear_quantize_weights",
"notes": "Only body weights with \u2265100K elements are quantized; small head/tail linears stay fp16. Activations are NOT quantized (W8A16 weight-only, not W8A8). Audio quality A/B was run on English 6L only \u2014 non-English not yet A/B'd."
},
"ab_test": {
"language": "english",
"layer_count": "6L",
"prompt": "The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs.",
"voice": "alba",
"seed": 42,
"metrics": [
"Pearson r (waveform-level correlation vs fp16 baseline)",
"Speaker similarity (256-dim embedding cosine; 0.65 = identity threshold)"
]
},
"per_submodel": {
"cond_step": {
"audio_quality_int8_only": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
},
"swap_independent": true
},
"flowlm_step": {
"audio_quality_int8_only": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
},
"swap_independent": true
},
"flow_decoder": {
"audio_quality_int8_only": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
},
"swap_independent": true
},
"mimi_decoder": {
"audio_quality_int8_only": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
},
"swap_independent": true
}
},
"selection_profiles": {
"all_fp16_baseline": {
"description": "Reference; everything fp16. No int8 used.",
"size_english_mlmodelc": "\u2248601 MiB (existing fp16 ship)",
"models": {
"cond_step": "fp16",
"flowlm_step": "fp16",
"flow_decoder": "fp16",
"mimi_decoder": "fp16"
},
"expected_quality": "reference"
},
"safe_int8": {
"description": "Quantize only the three transparent/safe submodels. Recommended for production.",
"models": {
"cond_step": "int8",
"flowlm_step": "int8",
"flow_decoder": "fp16",
"mimi_decoder": "int8"
},
"english_mlmodelc_size": "\u2248147 MiB (vs 601 MiB fp16; -75%)",
"expected_quality": "speaker similarity \u22650.98 vs fp16"
},
"aggressive_int8": {
"description": "All four int8. Max compression but flow_decoder introduces audible drift.",
"models": {
"cond_step": "int8",
"flowlm_step": "int8",
"flow_decoder": "int8",
"mimi_decoder": "int8"
},
"english_mlmodelc_size": "\u2248156 MiB (vs 601 MiB fp16; -74%)",
"expected_quality": "speaker similarity 0.940 (still above 0.65 threshold)"
},
"user_custom": {
"description": "Any of the 2^4 = 16 combinations is valid. Pick per submodel based on the audio_quality_int8_only field above and your own ear / target binary size.",
"example": {
"cond_step": "int8",
"flowlm_step": "fp16",
"flow_decoder": "fp16",
"mimi_decoder": "int8"
}
}
},
"languages": {
"english": {
"layer_count": "6L",
"int8_dir": "languages/english/int8",
"manifest_txt": "languages/english/english.txt",
"size_bytes": 328626045,
"size_human": "313.4 MiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/english/int8/cond_step.mlpackage",
"size_bytes": 66955176,
"size_human": "63.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/english/int8/cond_step.mlmodelc",
"size_bytes": 66995543,
"size_human": "63.9 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/english/int8/flowlm_step.mlpackage",
"size_bytes": 76498561,
"size_human": "73.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/english/int8/flowlm_step.mlmodelc",
"size_bytes": 76542686,
"size_human": "73.0 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/english/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/english/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/english/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/english/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903110,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
},
"german": {
"layer_count": "6L",
"int8_dir": "languages/german/int8",
"manifest_txt": "languages/german/german.txt",
"size_bytes": 328626042,
"size_human": "313.4 MiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "languages/german/cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/german/int8/cond_step.mlpackage",
"size_bytes": 66955175,
"size_human": "63.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/german/int8/cond_step.mlmodelc",
"size_bytes": 66995541,
"size_human": "63.9 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "languages/german/flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/german/int8/flowlm_step.mlpackage",
"size_bytes": 76498561,
"size_human": "73.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/german/int8/flowlm_step.mlmodelc",
"size_bytes": 76542686,
"size_human": "73.0 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "languages/german/flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/german/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/german/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "languages/german/mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/german/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/german/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903110,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
},
"italian": {
"layer_count": "6L",
"int8_dir": "languages/italian/int8",
"manifest_txt": "languages/italian/italian.txt",
"size_bytes": 328626042,
"size_human": "313.4 MiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "languages/italian/cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/italian/int8/cond_step.mlpackage",
"size_bytes": 66955175,
"size_human": "63.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/italian/int8/cond_step.mlmodelc",
"size_bytes": 66995541,
"size_human": "63.9 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "languages/italian/flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/italian/int8/flowlm_step.mlpackage",
"size_bytes": 76498561,
"size_human": "73.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/italian/int8/flowlm_step.mlmodelc",
"size_bytes": 76542686,
"size_human": "73.0 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "languages/italian/flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/italian/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/italian/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "languages/italian/mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/italian/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/italian/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903110,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
},
"portuguese": {
"layer_count": "6L",
"int8_dir": "languages/portuguese/int8",
"manifest_txt": "languages/portuguese/portuguese.txt",
"size_bytes": 328626042,
"size_human": "313.4 MiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "languages/portuguese/cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/portuguese/int8/cond_step.mlpackage",
"size_bytes": 66955175,
"size_human": "63.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/portuguese/int8/cond_step.mlmodelc",
"size_bytes": 66995541,
"size_human": "63.9 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "languages/portuguese/flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/portuguese/int8/flowlm_step.mlpackage",
"size_bytes": 76498561,
"size_human": "73.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/portuguese/int8/flowlm_step.mlmodelc",
"size_bytes": 76542686,
"size_human": "73.0 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "languages/portuguese/flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/portuguese/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/portuguese/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "languages/portuguese/mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/portuguese/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/portuguese/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903110,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
},
"spanish": {
"layer_count": "6L",
"int8_dir": "languages/spanish/int8",
"manifest_txt": "languages/spanish/spanish.txt",
"size_bytes": 328626043,
"size_human": "313.4 MiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "languages/spanish/cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/spanish/int8/cond_step.mlpackage",
"size_bytes": 66955175,
"size_human": "63.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/spanish/int8/cond_step.mlmodelc",
"size_bytes": 66995541,
"size_human": "63.9 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "languages/spanish/flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/spanish/int8/flowlm_step.mlpackage",
"size_bytes": 76498561,
"size_human": "73.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/spanish/int8/flowlm_step.mlmodelc",
"size_bytes": 76542686,
"size_human": "73.0 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "languages/spanish/flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/spanish/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/spanish/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "languages/spanish/mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/spanish/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/spanish/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903111,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
},
"french_24l": {
"layer_count": "24L",
"int8_dir": "languages/french_24l/int8",
"manifest_txt": "languages/french_24l/french_24l.txt",
"size_bytes": 1239353135,
"size_human": "1.2 GiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "languages/french_24l/cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/french_24l/int8/cond_step.mlpackage",
"size_bytes": 294576395,
"size_human": "280.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/french_24l/int8/cond_step.mlmodelc",
"size_bytes": 294737549,
"size_human": "281.1 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "languages/french_24l/flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/french_24l/int8/flowlm_step.mlpackage",
"size_bytes": 304120099,
"size_human": "290.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/french_24l/int8/flowlm_step.mlmodelc",
"size_bytes": 304285013,
"size_human": "290.2 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "languages/french_24l/flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/french_24l/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/french_24l/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "languages/french_24l/mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/french_24l/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/french_24l/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903110,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
},
"german_24l": {
"layer_count": "24L",
"int8_dir": "languages/german_24l/int8",
"manifest_txt": "languages/german_24l/german_24l.txt",
"size_bytes": 1239353135,
"size_human": "1.2 GiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "languages/german_24l/cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/german_24l/int8/cond_step.mlpackage",
"size_bytes": 294576395,
"size_human": "280.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/german_24l/int8/cond_step.mlmodelc",
"size_bytes": 294737549,
"size_human": "281.1 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "languages/german_24l/flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/german_24l/int8/flowlm_step.mlpackage",
"size_bytes": 304120099,
"size_human": "290.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/german_24l/int8/flowlm_step.mlmodelc",
"size_bytes": 304285013,
"size_human": "290.2 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "languages/german_24l/flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/german_24l/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/german_24l/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "languages/german_24l/mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/german_24l/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/german_24l/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903110,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
},
"italian_24l": {
"layer_count": "24L",
"int8_dir": "languages/italian_24l/int8",
"manifest_txt": "languages/italian_24l/italian_24l.txt",
"size_bytes": 1239353135,
"size_human": "1.2 GiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "languages/italian_24l/cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/italian_24l/int8/cond_step.mlpackage",
"size_bytes": 294576395,
"size_human": "280.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/italian_24l/int8/cond_step.mlmodelc",
"size_bytes": 294737549,
"size_human": "281.1 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "languages/italian_24l/flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/italian_24l/int8/flowlm_step.mlpackage",
"size_bytes": 304120099,
"size_human": "290.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/italian_24l/int8/flowlm_step.mlmodelc",
"size_bytes": 304285013,
"size_human": "290.2 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "languages/italian_24l/flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/italian_24l/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/italian_24l/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "languages/italian_24l/mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/italian_24l/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/italian_24l/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903110,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
},
"portuguese_24l": {
"layer_count": "24L",
"int8_dir": "languages/portuguese_24l/int8",
"manifest_txt": "languages/portuguese_24l/portuguese_24l.txt",
"size_bytes": 1239353135,
"size_human": "1.2 GiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "languages/portuguese_24l/cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/portuguese_24l/int8/cond_step.mlpackage",
"size_bytes": 294576395,
"size_human": "280.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/portuguese_24l/int8/cond_step.mlmodelc",
"size_bytes": 294737549,
"size_human": "281.1 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "languages/portuguese_24l/flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/portuguese_24l/int8/flowlm_step.mlpackage",
"size_bytes": 304120099,
"size_human": "290.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/portuguese_24l/int8/flowlm_step.mlmodelc",
"size_bytes": 304285013,
"size_human": "290.2 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "languages/portuguese_24l/flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/portuguese_24l/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/portuguese_24l/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "languages/portuguese_24l/mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/portuguese_24l/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/portuguese_24l/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903110,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
},
"spanish_24l": {
"layer_count": "24L",
"int8_dir": "languages/spanish_24l/int8",
"manifest_txt": "languages/spanish_24l/spanish_24l.txt",
"size_bytes": 1239353136,
"size_human": "1.2 GiB",
"submodels": {
"cond_step": {
"fp16": {
"path_in_repo": "languages/spanish_24l/cond_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/spanish_24l/int8/cond_step.mlpackage",
"size_bytes": 294576395,
"size_human": "280.9 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/spanish_24l/int8/cond_step.mlmodelc",
"size_bytes": 294737549,
"size_human": "281.1 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.984,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flowlm_step": {
"fp16": {
"path_in_repo": "languages/spanish_24l/flowlm_step.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/spanish_24l/int8/flowlm_step.mlpackage",
"size_bytes": 304120099,
"size_human": "290.0 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/spanish_24l/int8/flowlm_step.mlmodelc",
"size_bytes": 304285013,
"size_human": "290.2 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.989,
"pearson": 0.94,
"verdict": "safe",
"summary": "minor drift, well within identity threshold"
}
},
"flow_decoder": {
"fp16": {
"path_in_repo": "languages/spanish_24l/flow_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/spanish_24l/int8/flow_decoder.mlpackage",
"size_bytes": 9923605,
"size_human": "9.5 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/spanish_24l/int8/flow_decoder.mlmodelc",
"size_bytes": 9939641,
"size_human": "9.5 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.981,
"pearson": 0.78,
"verdict": "risky",
"summary": "audible drift; 8-step LSD inner loop compounds quantization error"
}
},
"mimi_decoder": {
"fp16": {
"path_in_repo": "languages/spanish_24l/mimi_decoder.mlmodelc",
"format": "mlmodelc",
"note": "shipped separately; not part of this int8 upload"
},
"int8": {
"mlpackage": {
"present": true,
"path_in_repo": "languages/spanish_24l/int8/mimi_decoder.mlpackage",
"size_bytes": 10867723,
"size_human": "10.4 MiB"
},
"mlmodelc": {
"present": true,
"path_in_repo": "languages/spanish_24l/int8/mimi_decoder.mlmodelc",
"size_bytes": 10903111,
"size_human": "10.4 MiB"
}
},
"audio_quality": {
"speaker_sim": 0.998,
"pearson": 1.0,
"verdict": "transparent",
"summary": "no audible difference vs fp16"
}
}
}
}
},
"totals": {
"size_bytes": 7839895890,
"size_human": "7.3 GiB",
"file_count": 80
}
}