diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..daf9bcb4c23d1fc7d5601d3c3bdf8e681dc46c00 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +--- +license: apache-2.0 +language: +- en +- es +- fr +- de +- it +- pt +- ru +- ar +- hi +- ko +- zh +library_name: mlx +base_model: arcee-ai/Trinity-Large-Preview +pipeline_tag: text-generation +tags: +- mlx +--- + +This model [finding1/Trinity-Large-Preview-MLX-6.5bpw](https://huggingface.co/finding1/Trinity-Large-Preview-MLX-6.5bpw) was +converted to MLX format from [arcee-ai/Trinity-Large-Preview](https://huggingface.co/arcee-ai/Trinity-Large-Preview) +using mlx-lm version **0.30.5** `mlx_lm.convert --hf-path arcee-ai/Trinity-Large-Preview --mlx-path Trinity-Large-Preview-MLX-6.5bpw --quantize --q-bits 6`. diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..05f0cd0bf1db4af2bf06100e0c636120d1b7b480 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,65 @@ +{{ bos_token }}{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role + '\n' }} + {% generation %} + {{- content}} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>' }} + {% endgeneration%} + {{- '\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1420e404c2c81a1b3242bf269a9972d8f011bf0 --- /dev/null +++ b/config.json @@ -0,0 +1,551 @@ +{ + "architectures": [ + "AfmoeForCausalLM" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_afmoe.AfmoeConfig", + "AutoModel": "modeling_afmoe.AfmoeModel", + "AutoModelForCausalLM": "modeling_afmoe.AfmoeForCausalLM" + }, + "dtype": "bfloat16", + "eos_token_id": 3, + "global_attn_every_n_layers": 4, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "load_balance_coeff": 5e-05, + "max_position_embeddings": 262144, + "model_type": "afmoe", + "moe_intermediate_size": 3072, + "mup_enabled": true, + "n_group": 1, + "num_attention_heads": 48, + "num_dense_layers": 6, + "num_expert_groups": 1, + "num_experts": 256, + "num_experts_per_tok": 4, + "num_hidden_layers": 60, + "num_key_value_heads": 8, + "num_limited_groups": 1, + "num_shared_experts": 1, + "quantization": { + "group_size": 64, + "bits": 6, + "mode": "affine", + "model.layers.6.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.7.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.8.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.9.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.10.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.11.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.12.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.13.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.14.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.15.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.16.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.17.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.18.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.19.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.20.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.21.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.22.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.23.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.24.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.25.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.26.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.27.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.28.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.29.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.30.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.31.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.32.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.33.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.34.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.35.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.36.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.37.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.38.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.39.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.40.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.41.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.42.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.43.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.44.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.45.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.46.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.47.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.48.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.49.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.50.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.51.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.52.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.53.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.54.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.55.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.56.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.57.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.58.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.59.mlp.router.gate": { + "group_size": 64, + "bits": 8 + } + }, + "quantization_config": { + "group_size": 64, + "bits": 6, + "mode": "affine", + "model.layers.6.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.7.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.8.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.9.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.10.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.11.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.12.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.13.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.14.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.15.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.16.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.17.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.18.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.19.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.20.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.21.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.22.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.23.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.24.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.25.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.26.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.27.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.28.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.29.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.30.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.31.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.32.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.33.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.34.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.35.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.36.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.37.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.38.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.39.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.40.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.41.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.42.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.43.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.44.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.45.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.46.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.47.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.48.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.49.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.50.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.51.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.52.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.53.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.54.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.55.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.56.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.57.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.58.mlp.router.gate": { + "group_size": 64, + "bits": 8 + }, + "model.layers.59.mlp.router.gate": { + "group_size": 64, + "bits": 8 + } + }, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000, + "route_norm": true, + "route_scale": 2.448, + "score_func": "sigmoid", + "sliding_window": 4096, + "tie_word_embeddings": false, + "topk_group": 1, + "transformers_version": "4.57.1", + "use_cache": true, + "use_grouped_mm": true, + "vocab_size": 200192 +} \ No newline at end of file diff --git a/configuration_afmoe.py b/configuration_afmoe.py new file mode 100644 index 0000000000000000000000000000000000000000..9efecdd517e8e6168f46ebecb3d282bdea34c5dc --- /dev/null +++ b/configuration_afmoe.py @@ -0,0 +1,133 @@ +# coding=utf-8 +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from transformers.configuration_utils import PretrainedConfig +from transformers.modeling_rope_utils import rope_config_validation +from transformers.configuration_utils import layer_type_validation +from transformers.utils import logging + +logger = logging.get_logger(__name__) + +class AfmoeConfig(PretrainedConfig): + """ + n_group (`int`, *optional*, defaults to 1): + Number of groups for routed experts. + topk_group (`int`, *optional*, defaults to 1): + Number of selected groups for each token(for each token, ensuring the selected experts is only within `topk_group` groups). + """ + model_type = "afmoe" + base_model_pp_plan = { + "embed_tokens": (["input_ids"], ["inputs_embeds"]), + "layers": (["hidden_states", "attention_mask"], ["hidden_states"]), + "norm": (["hidden_states"], ["hidden_states"]), + } + + def __init__( + self, + num_hidden_layers: int = 32, + vocab_size: int = 200192, + hidden_size: int = 2048, + intermediate_size: int = 6144, + moe_intermediate_size=1408, + num_dense_layers=1, + num_attention_heads=16, + num_key_value_heads=None, + head_dim=128, + hidden_act="silu", + max_position_embeddings=16384, + initializer_range=0.02, + rms_norm_eps=1e-5, + use_cache=True, + tie_word_embeddings=False, + rope_theta=10000.0, + rope_scaling=None, + num_experts=64, + num_experts_per_tok=6, + num_shared_experts=2, + num_expert_groups=1, + num_limited_groups=1, + score_func="sigmoid", + route_norm=True, + route_scale=1.0, + global_attn_every_n_layers=4, + sliding_window=1024, + mup_enabled=False, + layer_types=None, + attention_dropout: float = 0.0, + n_group: int = 1, + topk_group: int = 1, + **kwargs, + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_dense_layers = num_dense_layers + self.num_attention_heads = num_attention_heads + self.head_dim = head_dim + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.use_cache = use_cache + self.rope_theta = rope_theta + self.rope_scaling = rope_scaling + + + # MoE specific + self.moe_intermediate_size = moe_intermediate_size + self.num_experts_per_tok = num_experts_per_tok + self.n_group = n_group + self.topk_group = topk_group + self.num_experts = num_experts + self.num_shared_experts = num_shared_experts + self.num_expert_groups = num_expert_groups + self.num_limited_groups = num_limited_groups + self.score_func = score_func + self.route_norm = route_norm + self.route_scale = route_scale + + + # Attention specific + self.attention_dropout = attention_dropout + self.global_attn_every_n_layers = global_attn_every_n_layers + self.sliding_window = sliding_window + self.layer_types = layer_types + if self.layer_types is None: + self.layer_types = [ + "sliding_attention" if bool((i + 1) % global_attn_every_n_layers) else "full_attention" for i in range(self.num_hidden_layers) + ] + layer_type_validation(self.layer_types) + + # muP specific + self.mup_enabled = mup_enabled + + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + + self.num_key_value_heads = num_key_value_heads + + + # Validate rope configs + if self.rope_scaling is not None and "type" in self.rope_scaling: + self.rope_scaling["rope_type"] = self.rope_scaling["type"] + rope_config_validation(self) + + super().__init__( + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) + + +__all__ = ["AfmoeConfig"] diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..71ac125ab114655c051bfbb2ed3d3fddd27887ba --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 3, + "pad_token_id": 12, + "transformers_version": "4.57.3", + "temperature": 0.8, + "top_p": 0.8 +} \ No newline at end of file diff --git a/model-00001-of-00081.safetensors b/model-00001-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27f7163a0cee93565b877d24d89dd36ae616e1ac --- /dev/null +++ b/model-00001-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103dab9e86215b42ae3eda748b1fd96d0a98dc3cffa345ba350125b57feb1e79 +size 5336460769 diff --git a/model-00002-of-00081.safetensors b/model-00002-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f1cb8436b499a8b840869f019dd04436c428716 --- /dev/null +++ b/model-00002-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89909275dea0e2225bb743a3e486e499de25915c52bed7092fbc60246e8335aa +size 4000855628 diff --git a/model-00003-of-00081.safetensors b/model-00003-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a24962534f1059f2bf7f3b2945db055df97b582 --- /dev/null +++ b/model-00003-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cae076e6d476865a14d6e3155cb6943dde88792b98f81425591f0924d6729bc +size 4000855568 diff --git a/model-00004-of-00081.safetensors b/model-00004-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73a9fe05e9162f1d0f4705c4c73f7931bc8ae696 --- /dev/null +++ b/model-00004-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c3c89fe95e0074afc806002c890751af49c49c996be9fd66c8170910324088 +size 3925869308 diff --git a/model-00005-of-00081.safetensors b/model-00005-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7b30e3d4f72e58d5d7c2134fd9c9d279a804eb9 --- /dev/null +++ b/model-00005-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b6092cc0e95ec25d4556b543cdeb06c771573e75c78aa17929d3828187d31db +size 4000855654 diff --git a/model-00006-of-00081.safetensors b/model-00006-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34519707cd64532ce14d6959c441c991b6b205be --- /dev/null +++ b/model-00006-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:308a7915dff2123cc4f4170ea2b8fce1262ba27e613b72cabb8297f44370b01f +size 4000855601 diff --git a/model-00007-of-00081.safetensors b/model-00007-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..641c0f1747f173efba619c20bba5aa8aa291b7a9 --- /dev/null +++ b/model-00007-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7267ea012b3958ace58feeca443e265bb14c6eb2944a0233aea928c343bed5d5 +size 3925869314 diff --git a/model-00008-of-00081.safetensors b/model-00008-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99adda54971324281a8b46cb7a085c3ef748259b --- /dev/null +++ b/model-00008-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b1dee7170942f410c82d22b8525e216012cfc4975e280a070640f6151270b2 +size 4000855750 diff --git a/model-00009-of-00081.safetensors b/model-00009-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b597e769130eb319e36d7050d3825d247f137d0e --- /dev/null +++ b/model-00009-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96595e88cad42464f5163409c9bf8b7817b6dc67c75121acaeeb32b092fa5909 +size 4000855632 diff --git a/model-00010-of-00081.safetensors b/model-00010-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52dc4f60692803948adccf0e6ef90c9a1879053b --- /dev/null +++ b/model-00010-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4d3b4a09b7b916aa7bd23bd5f70c8cd76b1180a3a1ab52b19fa4fa37320c41 +size 3925869316 diff --git a/model-00011-of-00081.safetensors b/model-00011-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9bdedbe83c47e1a251f74fd5c8762d2a2083d97 --- /dev/null +++ b/model-00011-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2883c0b74971bd9a6f4ee49ee4ddcab5201946d62d5458df3faba0e7159a7349 +size 4000855750 diff --git a/model-00012-of-00081.safetensors b/model-00012-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7bb31f3323e55896851a08deedde7671900906f --- /dev/null +++ b/model-00012-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d866d0f25db1876b73664eb3a835adc62aa1ce64a31b8ffcfe9bdd2c6142267b +size 4000855632 diff --git a/model-00013-of-00081.safetensors b/model-00013-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3748d4fac321eae009cdf189e7d5a6457969ebfc --- /dev/null +++ b/model-00013-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ae62a1ea422a554088730deb3a85da5dc458324c80d0cc43ab16bc5d296115 +size 3925869316 diff --git a/model-00014-of-00081.safetensors b/model-00014-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d23078321b736e6a9f7b68d5d54d7c594fd0aa9 --- /dev/null +++ b/model-00014-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:922ea296aca3a36bb635d3ac082d93823a647fe548398a4261d7b3ca973c3937 +size 4000855754 diff --git a/model-00015-of-00081.safetensors b/model-00015-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da970c2794c3852adabc4e554476246cd7d9fb06 --- /dev/null +++ b/model-00015-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a374ea9121f04d3934be84cca4b3fb31e5a264e79e21d3b9a33654d704d3b5d3 +size 4000855638 diff --git a/model-00016-of-00081.safetensors b/model-00016-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8599fefb30004d1bba403b2a4a0f15572757692 --- /dev/null +++ b/model-00016-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d168b770f457e5a25b08089479b761afda75527940abb79b7f47b32b6c9fd0e3 +size 3925869320 diff --git a/model-00017-of-00081.safetensors b/model-00017-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0cf02bc8966f76c67b389878fb328c4c0763c27 --- /dev/null +++ b/model-00017-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb051b16d87ac8130925c4611341a569005cb6d78b617c27a8246646e3a0d8aa +size 4000855704 diff --git a/model-00018-of-00081.safetensors b/model-00018-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6114d8722bcff166694749194535df34dc1fbfb4 --- /dev/null +++ b/model-00018-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a243b9b7883e21922e0b877fbf67b04ab69a5025fe6af6582ecbacd728b9a6ec +size 4000855626 diff --git a/model-00019-of-00081.safetensors b/model-00019-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f1a6aa8ab4845ef12fc85bd277ebc515cfff7468 --- /dev/null +++ b/model-00019-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eed8ecea29e94e1c6e8b305244e270401c833cdba33b855c30077acf23d7f4f +size 3925869316 diff --git a/model-00020-of-00081.safetensors b/model-00020-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5b623f3790275b2ced10653e75161cd99bea18c --- /dev/null +++ b/model-00020-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc28249a3ee089a56c7090cc63b6a9be67d3f7a9fed856f9fb06685573edca09 +size 4000855674 diff --git a/model-00021-of-00081.safetensors b/model-00021-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cca5b624cc805c5b341abdaef08c3678118b4409 --- /dev/null +++ b/model-00021-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd47e3e641bf03ac56824258421b0ebde9af725ac5624456969fa9cec682c45 +size 4000855654 diff --git a/model-00022-of-00081.safetensors b/model-00022-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b68dbe77822ea0e15aa7b55b593d35a179da0d9 --- /dev/null +++ b/model-00022-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c2012d9e80079f0edd68bda18339cc98dea7300e4eda967a7b20551e2ebec0d +size 3925869310 diff --git a/model-00023-of-00081.safetensors b/model-00023-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12119a67daf688b1fc8d64f8823f84412598ee32 --- /dev/null +++ b/model-00023-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bb67260e2d7f666f72d4b5008469d9c289533467a79da336910d9c55705dbf +size 4000855720 diff --git a/model-00024-of-00081.safetensors b/model-00024-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af790646c2de4cebfd4a9c94b17921560f5fc5fc --- /dev/null +++ b/model-00024-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec38ba3338a0c1ed06db02e07d68311439345c96693ba5ceefd1e9d840ec337d +size 4000855666 diff --git a/model-00025-of-00081.safetensors b/model-00025-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..355995f57e542556f267cbee850e85218e7cecb2 --- /dev/null +++ b/model-00025-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0698c5def766ff1eebea0fa6480793341ac9144552a12d1551f881f1e3acc50c +size 3925869314 diff --git a/model-00026-of-00081.safetensors b/model-00026-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b1d2d1a7a28b1e34b3415c4ac4efc8874459586 --- /dev/null +++ b/model-00026-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff44427449a1c3ceed916f1d69fc6950f170f41999c479a81e0a837f2e0779bf +size 4000855750 diff --git a/model-00027-of-00081.safetensors b/model-00027-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eafbf4df5de3623078585e31436b9005fdf95171 --- /dev/null +++ b/model-00027-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8717a50f4abf1cfc5bc1797b69114c1da4a63d7e993f708060564eefe4c57d9 +size 4000855630 diff --git a/model-00028-of-00081.safetensors b/model-00028-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e204956029c4cce22f18c6757487f9accc2d4d5b --- /dev/null +++ b/model-00028-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baf5d7a51f294a8bbd44d34cd43cb05b39ac0bdbc7d08dceb790f27fd562c43a +size 3925869314 diff --git a/model-00029-of-00081.safetensors b/model-00029-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2d62705aee881dfab838a6e406e77678a39b55f --- /dev/null +++ b/model-00029-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c42d21d3bca679e9c468ff861a6b08488cfc13b8b9e324cf3902d92b5648455 +size 4000855750 diff --git a/model-00030-of-00081.safetensors b/model-00030-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ecc329f3927eaa548c85912a22b0d93af73f55a --- /dev/null +++ b/model-00030-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e1992bb8eea49fef2d20e62aff87977422d55757f8958b9dc88464a91c7d56 +size 4000855658 diff --git a/model-00031-of-00081.safetensors b/model-00031-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab5f1e6b85685d94b97c490c6cabeedc94ab0309 --- /dev/null +++ b/model-00031-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddc27ed212f49304534a307f51940b52363bff6d8d283270e0294a561322f87a +size 3925869316 diff --git a/model-00032-of-00081.safetensors b/model-00032-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a42362165d3bc8b1165bea73d11ae6130c5f257 --- /dev/null +++ b/model-00032-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b6340648bb012ef56533a6044bcc31924b55e1cb9f30b6798c39971477c06b +size 4000855754 diff --git a/model-00033-of-00081.safetensors b/model-00033-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..361cb5e50edc65d2a8b02ba437094eef83b71cf3 --- /dev/null +++ b/model-00033-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5702cc3ddb915abb6297dda1eb0282a6517c857d8666a5fbc5bdddf40869bb96 +size 4000855630 diff --git a/model-00034-of-00081.safetensors b/model-00034-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01ca8e78175aeabdffd915dfaec880363f1eade3 --- /dev/null +++ b/model-00034-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b09726c4ffc13bcf0aba33ab47b17ab1a6aeecb9a6f8389e896a6c8b7fb9eedd +size 3925869310 diff --git a/model-00035-of-00081.safetensors b/model-00035-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d80eeeb81780272dc01c98a6a28c841a4d96b0f9 --- /dev/null +++ b/model-00035-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85e97f3c813701c007c037a0cbd5407f44e7db18645910574f1c0518ec7817d +size 4000855748 diff --git a/model-00036-of-00081.safetensors b/model-00036-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4b26f6a2f29007e9f8c4957ab41dbe56e5768dd --- /dev/null +++ b/model-00036-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd075ae060d11a8b6334411428c1a0e29381fc61f9ccdf5704ce58318247ab1 +size 4000855686 diff --git a/model-00037-of-00081.safetensors b/model-00037-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..046f685068cbc7a582acc70401115c61ea18ecaa --- /dev/null +++ b/model-00037-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:773ff86bc69f7a40e338d1315848ffb1684894247e8318dc6f353c580f65d616 +size 3925869320 diff --git a/model-00038-of-00081.safetensors b/model-00038-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00416a19cb52a495096e23eaaab23e6ca78a557a --- /dev/null +++ b/model-00038-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e981bb7a788311b9c062a18ea899a78ed0157bae0a30b425cc4d9b7820dbd45 +size 4000855748 diff --git a/model-00039-of-00081.safetensors b/model-00039-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7aa67d7dfa21bbb8e622a2a5d35e516013419f9b --- /dev/null +++ b/model-00039-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10ece3e61af73c514b33fe38565968a771e61df6700f70f9a5acfde8bed6cfb4 +size 4000855672 diff --git a/model-00040-of-00081.safetensors b/model-00040-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa84d827d65eedc15e074bcaa1154cb373fbe2c4 --- /dev/null +++ b/model-00040-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b932a14f1b52d0591ccb185b7e200993f3f8f78fc6c04be3eed8705122be39f2 +size 3925869316 diff --git a/model-00041-of-00081.safetensors b/model-00041-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6799d236466e6d1a7d4c153981280b83a4dffc4c --- /dev/null +++ b/model-00041-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1391db3ce71d84dd5d2dc4b90431c629d954102a6d714314039978c76e98928 +size 4000855722 diff --git a/model-00042-of-00081.safetensors b/model-00042-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d0fce2857c19afd9e191d7c17e8dbcbbc7326ac --- /dev/null +++ b/model-00042-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:061400a0d0088ff0b7f64f4752380f88013564459d33cc5c9ed6fac93da211d8 +size 4000855634 diff --git a/model-00043-of-00081.safetensors b/model-00043-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4021617eefcf2906bbe31aff2ee6c03cabb9f195 --- /dev/null +++ b/model-00043-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c068d5bc32dd9707c2b83588e09fc1b542a25d55ce6f6f475b3fcb85522bb27 +size 3925869314 diff --git a/model-00044-of-00081.safetensors b/model-00044-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4df2d76a8dc23cca98976db2fd55830a490671a9 --- /dev/null +++ b/model-00044-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b21b97fd85c25a51c4cb457acb0bb7bbd8308b80e9685bfc3ead2ca26aab343 +size 4000855740 diff --git a/model-00045-of-00081.safetensors b/model-00045-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a3d7ebb633a566dbfe8c71c7f6ec3102a7aae44 --- /dev/null +++ b/model-00045-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b2e516bf037d1cd2ce2585478e98c118aeeb4084f35d311346ad52d69a207d +size 4000855628 diff --git a/model-00046-of-00081.safetensors b/model-00046-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa4ed76a328caf17e29ead5b2c287bb9017c7d76 --- /dev/null +++ b/model-00046-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f506a67f8be062c09deb8f58aa6b8ea9c79a321d141a4813c32d20e0700748e +size 3925869312 diff --git a/model-00047-of-00081.safetensors b/model-00047-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f63a7fac9007f95b5764bb39e9a4db9c0145934b --- /dev/null +++ b/model-00047-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22b3ca9957c40a71c63d2f44bebae7ad2454b11c1c53de1263123e9d3e09a65b +size 4000855732 diff --git a/model-00048-of-00081.safetensors b/model-00048-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a059889ecb0b954412573eccb6d2871350bc71c1 --- /dev/null +++ b/model-00048-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55261f6a9a38c356278ceca9538b8303c3a11fa1b4bf10c03d58be8a2589c575 +size 4000855658 diff --git a/model-00049-of-00081.safetensors b/model-00049-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3661b177f9f9cbe49b42a7575bb8ea720fbbcd1c --- /dev/null +++ b/model-00049-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:520c1e184d46614e5ef97f6f27c876772ff42ded6840f114c74fd40f9299df5e +size 3925869316 diff --git a/model-00050-of-00081.safetensors b/model-00050-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7df2ec70daf46517323961457aa66bc7dd346ff --- /dev/null +++ b/model-00050-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19232ab3e7b1148b720002bfcef83cb6bc4b6b73893a445e9d76137d48707147 +size 4000855754 diff --git a/model-00051-of-00081.safetensors b/model-00051-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44ba6aca72696a15762272477786754de2d3b56d --- /dev/null +++ b/model-00051-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2c0c7bd076755a70d1e417eee6289db394469ae74e9a85d17bbefc65722ccc +size 4000855644 diff --git a/model-00052-of-00081.safetensors b/model-00052-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3427279bc7e93dd97f569767c7e0fa991a7b155 --- /dev/null +++ b/model-00052-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa216acef63723c56b6b4750d8d32bdbaacdd1bbcc65d89b617b689ab8b61019 +size 3925869310 diff --git a/model-00053-of-00081.safetensors b/model-00053-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d091efcd183936b383c9df9fd247d37798e3f0fa --- /dev/null +++ b/model-00053-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63375d6c145162e0021a243ace2775668e19f96033f348a07f1385ebd9fef83c +size 4000855710 diff --git a/model-00054-of-00081.safetensors b/model-00054-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9792f7551c5e780c82b9adb617a52bbcc2728ec --- /dev/null +++ b/model-00054-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610998ce00f62d726a04e2112018631656e91598e9374c0648cb8eff6e4fa73e +size 4000855646 diff --git a/model-00055-of-00081.safetensors b/model-00055-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfba9aaa385da30b42433b3b6005bf71100132be --- /dev/null +++ b/model-00055-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d34757ad9a8c3a7df8588b19965e232e14eedf9c2731f36c364139012bc3489 +size 3925869314 diff --git a/model-00056-of-00081.safetensors b/model-00056-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19ed68357a2f63690f07c99b2632ff27d17d4878 --- /dev/null +++ b/model-00056-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b89cf6cbd9a89302f6148c395906c4ee2575d0d2af94bdb667905158b51a9167 +size 4000855748 diff --git a/model-00057-of-00081.safetensors b/model-00057-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b654b3c6a6c0a9bdf08118deec9bf0c73aa15a66 --- /dev/null +++ b/model-00057-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86480ebffa843f8a0315c35e234ecbba80f811f3f2447bde6bff6fe6c117b15d +size 4000855676 diff --git a/model-00058-of-00081.safetensors b/model-00058-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b0ea448e1f5ac6ab0c8d898fd88adc2912ba47e --- /dev/null +++ b/model-00058-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c847d30ae24b4007c7a8a52caa1fbd0c2361edf5834b6c3feae5e7fabf5355f +size 3925869310 diff --git a/model-00059-of-00081.safetensors b/model-00059-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d6ba98cdd2f38e32bc05af5ed98009e825fa959 --- /dev/null +++ b/model-00059-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc336983d9681fd5475c91c79d5c162e3d7bfbe6fac67c9f43e6dc780c7100c +size 4000855748 diff --git a/model-00060-of-00081.safetensors b/model-00060-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5e32852f8083c1860bce2322f7db4b1f323e047 --- /dev/null +++ b/model-00060-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de8371ddb350acd37ff8e050df1913e5a1306e5b924c2c36d4545d7af049abe +size 4000855618 diff --git a/model-00061-of-00081.safetensors b/model-00061-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3d3d3eb77febd72465ff85a16ca15227beaf2bc --- /dev/null +++ b/model-00061-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c35015d817a6289ecbf51c4b121167b4c9929b7dff938c2ad4c8fe4bc1fa1f85 +size 3925869316 diff --git a/model-00062-of-00081.safetensors b/model-00062-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83424535e1605ccc514b4fe0c1cd34eb76621b8a --- /dev/null +++ b/model-00062-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3261ed0687b1816f01c42eb86d3035a5fabbf7b44522b2d8365a861e5c29adf2 +size 4000855682 diff --git a/model-00063-of-00081.safetensors b/model-00063-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c68046b405cce1bb5ec70c8a7a9d5634ef80c65 --- /dev/null +++ b/model-00063-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305d93b4b1250a89e8d634db175720fadefdb7a3685ec87d3ce113f220f363d3 +size 4000855690 diff --git a/model-00064-of-00081.safetensors b/model-00064-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7b35e6e3ba2826da726d28cedbbd987d2eab4b1 --- /dev/null +++ b/model-00064-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2729f59d04ce1f7e15dab43cc3779c4f46eb22925d19ab388d7cc0c0dc352c7 +size 3925869316 diff --git a/model-00065-of-00081.safetensors b/model-00065-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9dfc2e80216278943dad30107fcf5b481fc7f387 --- /dev/null +++ b/model-00065-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20894251493bf37419cded1faaebe8232d5492311923ce8d6f5794ecde9ca391 +size 4000855730 diff --git a/model-00066-of-00081.safetensors b/model-00066-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97278112036b2a0ff996bcec979886edf3f48495 --- /dev/null +++ b/model-00066-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94067ba005871eca929756f139ed85f09440156c66537c9202bddb0adfc2e73 +size 4000855606 diff --git a/model-00067-of-00081.safetensors b/model-00067-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7602f6bf070cb034376bd64a6aefffa3c687f3a8 --- /dev/null +++ b/model-00067-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c8e521c1bdee876256352f33114b0b3bdc59f65d16be5e6352e33b6c0f5ac2a +size 3925869312 diff --git a/model-00068-of-00081.safetensors b/model-00068-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..392e8b3723209f9212d65949c66b9281a1c041dc --- /dev/null +++ b/model-00068-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f04dd070e90d54a67081e1162460c581569c3a2a6ed2f4ae1e98af47c65df1f +size 4000855750 diff --git a/model-00069-of-00081.safetensors b/model-00069-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b0f84faa5e8a07d9429512171744481f03b62c6 --- /dev/null +++ b/model-00069-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e041e688ad881f65e344f9a8570c8dae8cfc129614b61bb756d0e4d47f87c7d5 +size 4000855706 diff --git a/model-00070-of-00081.safetensors b/model-00070-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..862a218fc12e295bc3662fe159124d3a4715a8e2 --- /dev/null +++ b/model-00070-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad13f3a8de2afea357ce0c83e107a644e9b793ff5972f6aa2cdfa9fd3c46975 +size 3925869312 diff --git a/model-00071-of-00081.safetensors b/model-00071-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44f196bda84a1ff766000796fbd2fc5ea988cf1e --- /dev/null +++ b/model-00071-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f8bc57bd369c887c2a3e4f0e83746486b4c777665ea5eab5e63f8f0e674aca8 +size 4000855750 diff --git a/model-00072-of-00081.safetensors b/model-00072-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..745176aba8478e6fb12a18a30679a3ee99ce2fd5 --- /dev/null +++ b/model-00072-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dffda4061e69dd7518f9a51e59a005c1dbae7c8e658b4e3629369d86b91218c +size 4000855632 diff --git a/model-00073-of-00081.safetensors b/model-00073-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c5df70809ac3cae8c32f305d0d67b50919f201f --- /dev/null +++ b/model-00073-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92d80d8c235d28662b538508bd747ccd4d5dc5835211e821e3898f1e1365e8c4 +size 3925869316 diff --git a/model-00074-of-00081.safetensors b/model-00074-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30721ed18c2d02d2487409363b9f2ac6c51874de --- /dev/null +++ b/model-00074-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:308fccb59fabec5344e506aef732033e9c64e04e15a93f08a8b4d2de9a3bbe2a +size 4000855682 diff --git a/model-00075-of-00081.safetensors b/model-00075-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e13921593794ffefad7ab68c08373b55d4acea4 --- /dev/null +++ b/model-00075-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9243ee43eea3e06ea7750c56730956ee31686e566b14c4563c855aecd124ed8e +size 4000855628 diff --git a/model-00076-of-00081.safetensors b/model-00076-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b519416651ae58f1a201a4db5fd7d723e7e5ec2 --- /dev/null +++ b/model-00076-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a2e7d697a7a04acb04279623929d0723a8ea1cdaa5a9765aa1619bc635e693 +size 3925869316 diff --git a/model-00077-of-00081.safetensors b/model-00077-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb4bbfbaf772b9daf2f1d1fb97a529f13f17caa7 --- /dev/null +++ b/model-00077-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1a6b55cb91f25c67688c3b6cff2c8ddc807b17f90dfde1c22c382b279a3e573 +size 4000855754 diff --git a/model-00078-of-00081.safetensors b/model-00078-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24ba375e1ac71a715f178263b1e1568a7c899719 --- /dev/null +++ b/model-00078-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c32ec00f2e49bb9c61d4d66b690bed6c0bbe82401a7e35c89145f32e9eddc136 +size 4000855624 diff --git a/model-00079-of-00081.safetensors b/model-00079-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c400c680b7d2ef9c228c8d45497f631a16f9252 --- /dev/null +++ b/model-00079-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99d949f6ae9f750cadaf8daf67100b8e2a08e0cdd4af43efe7231856a7cd763c +size 3925869316 diff --git a/model-00080-of-00081.safetensors b/model-00080-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f46c37511813a377bb2a3f5b890f47326f8c27cd --- /dev/null +++ b/model-00080-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3abc5f9edc7a2966eb7e0229b4bfca12ae7d4c399965bbb69eebc8c9b6fe858e +size 4000855706 diff --git a/model-00081-of-00081.safetensors b/model-00081-of-00081.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0ce55c22cd73521495faded34867cad7715f104 --- /dev/null +++ b/model-00081-of-00081.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec0ec76e5ad8794acafcd77d8fde314dcacd50017aa90a80159e1b1ed293f1df +size 4448584346 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..7497af3bab36cac86e7f79de80f634fe81b6f433 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,2517 @@ +{ + "metadata": { + "total_size": 323902700544, + "total_parameters": 398635272192 + }, + "weight_map": { + "lm_head.biases": "model-00081-of-00081.safetensors", + "lm_head.scales": "model-00081-of-00081.safetensors", + "lm_head.weight": "model-00081-of-00081.safetensors", + "model.embed_tokens.biases": "model-00001-of-00081.safetensors", + "model.embed_tokens.scales": "model-00001-of-00081.safetensors", + "model.embed_tokens.weight": "model-00001-of-00081.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.0.mlp.down_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.0.mlp.down_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.0.mlp.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.0.mlp.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.0.mlp.up_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.0.mlp.up_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.0.post_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.0.pre_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.k_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.k_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.o_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.o_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.q_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.q_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.v_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.v_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.1.mlp.down_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.1.mlp.down_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.1.mlp.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.1.mlp.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.1.mlp.up_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.1.mlp.up_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.1.post_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.1.pre_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.k_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.k_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.o_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.o_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.q_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.q_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.v_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.v_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.10.input_layernorm.weight": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.expert_bias": "model-00006-of-00081.safetensors", + "model.layers.10.mlp.experts.down_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.experts.down_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.experts.down_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.experts.gate_proj.biases": "model-00007-of-00081.safetensors", + "model.layers.10.mlp.experts.gate_proj.scales": "model-00007-of-00081.safetensors", + "model.layers.10.mlp.experts.gate_proj.weight": "model-00007-of-00081.safetensors", + "model.layers.10.mlp.experts.up_proj.biases": "model-00007-of-00081.safetensors", + "model.layers.10.mlp.experts.up_proj.scales": "model-00007-of-00081.safetensors", + "model.layers.10.mlp.experts.up_proj.weight": "model-00007-of-00081.safetensors", + "model.layers.10.mlp.router.gate.biases": "model-00006-of-00081.safetensors", + "model.layers.10.mlp.router.gate.scales": "model-00006-of-00081.safetensors", + "model.layers.10.mlp.router.gate.weight": "model-00006-of-00081.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00008-of-00081.safetensors", + "model.layers.10.post_mlp_layernorm.weight": "model-00008-of-00081.safetensors", + "model.layers.10.pre_mlp_layernorm.weight": "model-00008-of-00081.safetensors", + "model.layers.10.self_attn.gate_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.gate_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.gate_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.k_norm.weight": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.k_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.k_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.o_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.o_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.q_norm.weight": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.q_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.q_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.v_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.v_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.11.input_layernorm.weight": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.expert_bias": "model-00008-of-00081.safetensors", + "model.layers.11.mlp.experts.down_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.experts.down_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.experts.down_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.experts.gate_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.11.mlp.experts.gate_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.11.mlp.experts.gate_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.11.mlp.experts.up_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.experts.up_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.experts.up_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.router.gate.biases": "model-00008-of-00081.safetensors", + "model.layers.11.mlp.router.gate.scales": "model-00008-of-00081.safetensors", + "model.layers.11.mlp.router.gate.weight": "model-00008-of-00081.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00009-of-00081.safetensors", + "model.layers.11.post_mlp_layernorm.weight": "model-00009-of-00081.safetensors", + "model.layers.11.pre_mlp_layernorm.weight": "model-00009-of-00081.safetensors", + "model.layers.11.self_attn.gate_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.gate_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.gate_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.k_norm.weight": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.k_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.k_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.o_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.o_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.q_norm.weight": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.q_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.q_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.v_proj.biases": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.v_proj.scales": "model-00008-of-00081.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00008-of-00081.safetensors", + "model.layers.12.input_layernorm.weight": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.expert_bias": "model-00009-of-00081.safetensors", + "model.layers.12.mlp.experts.down_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.experts.down_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.experts.down_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.experts.gate_proj.biases": "model-00010-of-00081.safetensors", + "model.layers.12.mlp.experts.gate_proj.scales": "model-00010-of-00081.safetensors", + "model.layers.12.mlp.experts.gate_proj.weight": "model-00010-of-00081.safetensors", + "model.layers.12.mlp.experts.up_proj.biases": "model-00010-of-00081.safetensors", + "model.layers.12.mlp.experts.up_proj.scales": "model-00010-of-00081.safetensors", + "model.layers.12.mlp.experts.up_proj.weight": "model-00010-of-00081.safetensors", + "model.layers.12.mlp.router.gate.biases": "model-00009-of-00081.safetensors", + "model.layers.12.mlp.router.gate.scales": "model-00009-of-00081.safetensors", + "model.layers.12.mlp.router.gate.weight": "model-00009-of-00081.safetensors", + "model.layers.12.mlp.shared_experts.down_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.shared_experts.down_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.shared_experts.gate_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.shared_experts.gate_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.shared_experts.up_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.shared_experts.up_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00011-of-00081.safetensors", + "model.layers.12.post_mlp_layernorm.weight": "model-00011-of-00081.safetensors", + "model.layers.12.pre_mlp_layernorm.weight": "model-00011-of-00081.safetensors", + "model.layers.12.self_attn.gate_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.gate_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.gate_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.k_norm.weight": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.k_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.k_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.o_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.o_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.q_norm.weight": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.q_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.q_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.v_proj.biases": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.v_proj.scales": "model-00009-of-00081.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00009-of-00081.safetensors", + "model.layers.13.input_layernorm.weight": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.expert_bias": "model-00011-of-00081.safetensors", + "model.layers.13.mlp.experts.down_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.experts.down_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.experts.down_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.experts.gate_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.13.mlp.experts.gate_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.13.mlp.experts.gate_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.13.mlp.experts.up_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.experts.up_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.experts.up_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.router.gate.biases": "model-00011-of-00081.safetensors", + "model.layers.13.mlp.router.gate.scales": "model-00011-of-00081.safetensors", + "model.layers.13.mlp.router.gate.weight": "model-00011-of-00081.safetensors", + "model.layers.13.mlp.shared_experts.down_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.shared_experts.down_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.shared_experts.gate_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.shared_experts.gate_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.shared_experts.up_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.shared_experts.up_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00012-of-00081.safetensors", + "model.layers.13.post_mlp_layernorm.weight": "model-00012-of-00081.safetensors", + "model.layers.13.pre_mlp_layernorm.weight": "model-00012-of-00081.safetensors", + "model.layers.13.self_attn.gate_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.gate_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.gate_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.k_norm.weight": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.k_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.k_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.o_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.o_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.q_norm.weight": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.q_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.q_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.v_proj.biases": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.v_proj.scales": "model-00011-of-00081.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00011-of-00081.safetensors", + "model.layers.14.input_layernorm.weight": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.expert_bias": "model-00012-of-00081.safetensors", + "model.layers.14.mlp.experts.down_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.experts.down_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.experts.down_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.experts.gate_proj.biases": "model-00013-of-00081.safetensors", + "model.layers.14.mlp.experts.gate_proj.scales": "model-00013-of-00081.safetensors", + "model.layers.14.mlp.experts.gate_proj.weight": "model-00013-of-00081.safetensors", + "model.layers.14.mlp.experts.up_proj.biases": "model-00013-of-00081.safetensors", + "model.layers.14.mlp.experts.up_proj.scales": "model-00013-of-00081.safetensors", + "model.layers.14.mlp.experts.up_proj.weight": "model-00013-of-00081.safetensors", + "model.layers.14.mlp.router.gate.biases": "model-00012-of-00081.safetensors", + "model.layers.14.mlp.router.gate.scales": "model-00012-of-00081.safetensors", + "model.layers.14.mlp.router.gate.weight": "model-00012-of-00081.safetensors", + "model.layers.14.mlp.shared_experts.down_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.shared_experts.down_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.shared_experts.gate_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.shared_experts.gate_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.shared_experts.up_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.shared_experts.up_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00014-of-00081.safetensors", + "model.layers.14.post_mlp_layernorm.weight": "model-00014-of-00081.safetensors", + "model.layers.14.pre_mlp_layernorm.weight": "model-00014-of-00081.safetensors", + "model.layers.14.self_attn.gate_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.gate_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.gate_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.k_norm.weight": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.k_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.k_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.o_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.o_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.q_norm.weight": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.q_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.q_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.v_proj.biases": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.v_proj.scales": "model-00012-of-00081.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00012-of-00081.safetensors", + "model.layers.15.input_layernorm.weight": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.expert_bias": "model-00014-of-00081.safetensors", + "model.layers.15.mlp.experts.down_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.experts.down_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.experts.down_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.experts.gate_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.15.mlp.experts.gate_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.15.mlp.experts.gate_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.15.mlp.experts.up_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.experts.up_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.experts.up_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.router.gate.biases": "model-00014-of-00081.safetensors", + "model.layers.15.mlp.router.gate.scales": "model-00014-of-00081.safetensors", + "model.layers.15.mlp.router.gate.weight": "model-00014-of-00081.safetensors", + "model.layers.15.mlp.shared_experts.down_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.shared_experts.down_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.shared_experts.gate_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.shared_experts.gate_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.shared_experts.up_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.shared_experts.up_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00015-of-00081.safetensors", + "model.layers.15.post_mlp_layernorm.weight": "model-00015-of-00081.safetensors", + "model.layers.15.pre_mlp_layernorm.weight": "model-00015-of-00081.safetensors", + "model.layers.15.self_attn.gate_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.gate_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.gate_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.k_norm.weight": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.k_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.k_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.o_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.o_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.q_norm.weight": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.q_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.q_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.v_proj.biases": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.v_proj.scales": "model-00014-of-00081.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00014-of-00081.safetensors", + "model.layers.16.input_layernorm.weight": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.expert_bias": "model-00015-of-00081.safetensors", + "model.layers.16.mlp.experts.down_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.experts.down_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.experts.down_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.experts.gate_proj.biases": "model-00016-of-00081.safetensors", + "model.layers.16.mlp.experts.gate_proj.scales": "model-00016-of-00081.safetensors", + "model.layers.16.mlp.experts.gate_proj.weight": "model-00016-of-00081.safetensors", + "model.layers.16.mlp.experts.up_proj.biases": "model-00016-of-00081.safetensors", + "model.layers.16.mlp.experts.up_proj.scales": "model-00016-of-00081.safetensors", + "model.layers.16.mlp.experts.up_proj.weight": "model-00016-of-00081.safetensors", + "model.layers.16.mlp.router.gate.biases": "model-00015-of-00081.safetensors", + "model.layers.16.mlp.router.gate.scales": "model-00015-of-00081.safetensors", + "model.layers.16.mlp.router.gate.weight": "model-00015-of-00081.safetensors", + "model.layers.16.mlp.shared_experts.down_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.shared_experts.down_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.shared_experts.gate_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.shared_experts.gate_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.shared_experts.up_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.shared_experts.up_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00017-of-00081.safetensors", + "model.layers.16.post_mlp_layernorm.weight": "model-00017-of-00081.safetensors", + "model.layers.16.pre_mlp_layernorm.weight": "model-00017-of-00081.safetensors", + "model.layers.16.self_attn.gate_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.gate_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.gate_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.k_norm.weight": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.k_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.k_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.o_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.o_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.q_norm.weight": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.q_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.q_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.v_proj.biases": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.v_proj.scales": "model-00015-of-00081.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00015-of-00081.safetensors", + "model.layers.17.input_layernorm.weight": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.expert_bias": "model-00017-of-00081.safetensors", + "model.layers.17.mlp.experts.down_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.experts.down_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.experts.down_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.experts.gate_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.17.mlp.experts.gate_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.17.mlp.experts.gate_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.17.mlp.experts.up_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.experts.up_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.experts.up_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.router.gate.biases": "model-00017-of-00081.safetensors", + "model.layers.17.mlp.router.gate.scales": "model-00017-of-00081.safetensors", + "model.layers.17.mlp.router.gate.weight": "model-00017-of-00081.safetensors", + "model.layers.17.mlp.shared_experts.down_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.shared_experts.down_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.shared_experts.gate_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.shared_experts.gate_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.shared_experts.up_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.shared_experts.up_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00018-of-00081.safetensors", + "model.layers.17.post_mlp_layernorm.weight": "model-00018-of-00081.safetensors", + "model.layers.17.pre_mlp_layernorm.weight": "model-00018-of-00081.safetensors", + "model.layers.17.self_attn.gate_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.gate_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.gate_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.k_norm.weight": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.k_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.k_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.o_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.o_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.q_norm.weight": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.q_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.q_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.v_proj.biases": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.v_proj.scales": "model-00017-of-00081.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00017-of-00081.safetensors", + "model.layers.18.input_layernorm.weight": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.expert_bias": "model-00018-of-00081.safetensors", + "model.layers.18.mlp.experts.down_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.experts.down_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.experts.down_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.experts.gate_proj.biases": "model-00019-of-00081.safetensors", + "model.layers.18.mlp.experts.gate_proj.scales": "model-00019-of-00081.safetensors", + "model.layers.18.mlp.experts.gate_proj.weight": "model-00019-of-00081.safetensors", + "model.layers.18.mlp.experts.up_proj.biases": "model-00019-of-00081.safetensors", + "model.layers.18.mlp.experts.up_proj.scales": "model-00019-of-00081.safetensors", + "model.layers.18.mlp.experts.up_proj.weight": "model-00019-of-00081.safetensors", + "model.layers.18.mlp.router.gate.biases": "model-00018-of-00081.safetensors", + "model.layers.18.mlp.router.gate.scales": "model-00018-of-00081.safetensors", + "model.layers.18.mlp.router.gate.weight": "model-00018-of-00081.safetensors", + "model.layers.18.mlp.shared_experts.down_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.shared_experts.down_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.shared_experts.gate_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.shared_experts.gate_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.shared_experts.up_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.shared_experts.up_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00020-of-00081.safetensors", + "model.layers.18.post_mlp_layernorm.weight": "model-00020-of-00081.safetensors", + "model.layers.18.pre_mlp_layernorm.weight": "model-00020-of-00081.safetensors", + "model.layers.18.self_attn.gate_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.gate_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.gate_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.k_norm.weight": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.k_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.k_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.o_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.o_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.q_norm.weight": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.q_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.q_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.v_proj.biases": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.v_proj.scales": "model-00018-of-00081.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00018-of-00081.safetensors", + "model.layers.19.input_layernorm.weight": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.expert_bias": "model-00020-of-00081.safetensors", + "model.layers.19.mlp.experts.down_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.experts.down_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.experts.down_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.experts.gate_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.19.mlp.experts.gate_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.19.mlp.experts.gate_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.19.mlp.experts.up_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.experts.up_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.experts.up_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.router.gate.biases": "model-00020-of-00081.safetensors", + "model.layers.19.mlp.router.gate.scales": "model-00020-of-00081.safetensors", + "model.layers.19.mlp.router.gate.weight": "model-00020-of-00081.safetensors", + "model.layers.19.mlp.shared_experts.down_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.shared_experts.down_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.shared_experts.gate_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.shared_experts.gate_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.shared_experts.up_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.shared_experts.up_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00021-of-00081.safetensors", + "model.layers.19.post_mlp_layernorm.weight": "model-00021-of-00081.safetensors", + "model.layers.19.pre_mlp_layernorm.weight": "model-00021-of-00081.safetensors", + "model.layers.19.self_attn.gate_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.gate_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.gate_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.k_norm.weight": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.k_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.k_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.o_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.o_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.q_norm.weight": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.q_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.q_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.v_proj.biases": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.v_proj.scales": "model-00020-of-00081.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00020-of-00081.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.2.mlp.down_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.2.mlp.down_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.2.mlp.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.2.mlp.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.2.mlp.up_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.2.mlp.up_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.2.post_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.2.pre_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.k_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.k_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.o_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.o_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.q_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.q_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.v_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.v_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.20.input_layernorm.weight": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.expert_bias": "model-00021-of-00081.safetensors", + "model.layers.20.mlp.experts.down_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.experts.down_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.experts.down_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.experts.gate_proj.biases": "model-00022-of-00081.safetensors", + "model.layers.20.mlp.experts.gate_proj.scales": "model-00022-of-00081.safetensors", + "model.layers.20.mlp.experts.gate_proj.weight": "model-00022-of-00081.safetensors", + "model.layers.20.mlp.experts.up_proj.biases": "model-00022-of-00081.safetensors", + "model.layers.20.mlp.experts.up_proj.scales": "model-00022-of-00081.safetensors", + "model.layers.20.mlp.experts.up_proj.weight": "model-00022-of-00081.safetensors", + "model.layers.20.mlp.router.gate.biases": "model-00021-of-00081.safetensors", + "model.layers.20.mlp.router.gate.scales": "model-00021-of-00081.safetensors", + "model.layers.20.mlp.router.gate.weight": "model-00021-of-00081.safetensors", + "model.layers.20.mlp.shared_experts.down_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.shared_experts.down_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.shared_experts.gate_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.shared_experts.gate_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.shared_experts.up_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.shared_experts.up_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00023-of-00081.safetensors", + "model.layers.20.post_mlp_layernorm.weight": "model-00023-of-00081.safetensors", + "model.layers.20.pre_mlp_layernorm.weight": "model-00023-of-00081.safetensors", + "model.layers.20.self_attn.gate_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.gate_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.gate_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.k_norm.weight": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.k_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.k_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.o_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.o_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.q_norm.weight": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.q_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.q_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.v_proj.biases": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.v_proj.scales": "model-00021-of-00081.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00021-of-00081.safetensors", + "model.layers.21.input_layernorm.weight": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.expert_bias": "model-00023-of-00081.safetensors", + "model.layers.21.mlp.experts.down_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.experts.down_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.experts.down_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.experts.gate_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.21.mlp.experts.gate_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.21.mlp.experts.gate_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.21.mlp.experts.up_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.experts.up_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.experts.up_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.router.gate.biases": "model-00023-of-00081.safetensors", + "model.layers.21.mlp.router.gate.scales": "model-00023-of-00081.safetensors", + "model.layers.21.mlp.router.gate.weight": "model-00023-of-00081.safetensors", + "model.layers.21.mlp.shared_experts.down_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.shared_experts.down_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.shared_experts.gate_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.shared_experts.gate_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.shared_experts.up_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.shared_experts.up_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00024-of-00081.safetensors", + "model.layers.21.post_mlp_layernorm.weight": "model-00024-of-00081.safetensors", + "model.layers.21.pre_mlp_layernorm.weight": "model-00024-of-00081.safetensors", + "model.layers.21.self_attn.gate_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.gate_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.gate_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.k_norm.weight": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.k_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.k_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.o_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.o_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.q_norm.weight": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.q_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.q_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.v_proj.biases": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.v_proj.scales": "model-00023-of-00081.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00023-of-00081.safetensors", + "model.layers.22.input_layernorm.weight": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.expert_bias": "model-00024-of-00081.safetensors", + "model.layers.22.mlp.experts.down_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.experts.down_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.experts.down_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.experts.gate_proj.biases": "model-00025-of-00081.safetensors", + "model.layers.22.mlp.experts.gate_proj.scales": "model-00025-of-00081.safetensors", + "model.layers.22.mlp.experts.gate_proj.weight": "model-00025-of-00081.safetensors", + "model.layers.22.mlp.experts.up_proj.biases": "model-00025-of-00081.safetensors", + "model.layers.22.mlp.experts.up_proj.scales": "model-00025-of-00081.safetensors", + "model.layers.22.mlp.experts.up_proj.weight": "model-00025-of-00081.safetensors", + "model.layers.22.mlp.router.gate.biases": "model-00024-of-00081.safetensors", + "model.layers.22.mlp.router.gate.scales": "model-00024-of-00081.safetensors", + "model.layers.22.mlp.router.gate.weight": "model-00024-of-00081.safetensors", + "model.layers.22.mlp.shared_experts.down_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.shared_experts.down_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.shared_experts.gate_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.shared_experts.gate_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.shared_experts.up_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.shared_experts.up_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00026-of-00081.safetensors", + "model.layers.22.post_mlp_layernorm.weight": "model-00026-of-00081.safetensors", + "model.layers.22.pre_mlp_layernorm.weight": "model-00026-of-00081.safetensors", + "model.layers.22.self_attn.gate_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.gate_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.gate_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.k_norm.weight": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.k_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.k_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.o_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.o_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.q_norm.weight": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.q_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.q_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.v_proj.biases": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.v_proj.scales": "model-00024-of-00081.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00024-of-00081.safetensors", + "model.layers.23.input_layernorm.weight": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.expert_bias": "model-00026-of-00081.safetensors", + "model.layers.23.mlp.experts.down_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.experts.down_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.experts.down_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.experts.gate_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.23.mlp.experts.gate_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.23.mlp.experts.gate_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.23.mlp.experts.up_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.experts.up_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.experts.up_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.router.gate.biases": "model-00026-of-00081.safetensors", + "model.layers.23.mlp.router.gate.scales": "model-00026-of-00081.safetensors", + "model.layers.23.mlp.router.gate.weight": "model-00026-of-00081.safetensors", + "model.layers.23.mlp.shared_experts.down_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.shared_experts.down_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.shared_experts.gate_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.shared_experts.gate_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.shared_experts.up_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.shared_experts.up_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00027-of-00081.safetensors", + "model.layers.23.post_mlp_layernorm.weight": "model-00027-of-00081.safetensors", + "model.layers.23.pre_mlp_layernorm.weight": "model-00027-of-00081.safetensors", + "model.layers.23.self_attn.gate_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.gate_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.gate_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.k_norm.weight": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.k_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.k_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.o_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.o_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.q_norm.weight": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.q_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.q_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.v_proj.biases": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.v_proj.scales": "model-00026-of-00081.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00026-of-00081.safetensors", + "model.layers.24.input_layernorm.weight": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.expert_bias": "model-00027-of-00081.safetensors", + "model.layers.24.mlp.experts.down_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.experts.down_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.experts.down_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.experts.gate_proj.biases": "model-00028-of-00081.safetensors", + "model.layers.24.mlp.experts.gate_proj.scales": "model-00028-of-00081.safetensors", + "model.layers.24.mlp.experts.gate_proj.weight": "model-00028-of-00081.safetensors", + "model.layers.24.mlp.experts.up_proj.biases": "model-00028-of-00081.safetensors", + "model.layers.24.mlp.experts.up_proj.scales": "model-00028-of-00081.safetensors", + "model.layers.24.mlp.experts.up_proj.weight": "model-00028-of-00081.safetensors", + "model.layers.24.mlp.router.gate.biases": "model-00027-of-00081.safetensors", + "model.layers.24.mlp.router.gate.scales": "model-00027-of-00081.safetensors", + "model.layers.24.mlp.router.gate.weight": "model-00027-of-00081.safetensors", + "model.layers.24.mlp.shared_experts.down_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.shared_experts.down_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.shared_experts.gate_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.shared_experts.gate_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.shared_experts.up_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.shared_experts.up_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00029-of-00081.safetensors", + "model.layers.24.post_mlp_layernorm.weight": "model-00029-of-00081.safetensors", + "model.layers.24.pre_mlp_layernorm.weight": "model-00029-of-00081.safetensors", + "model.layers.24.self_attn.gate_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.gate_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.gate_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.k_norm.weight": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.k_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.k_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.o_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.o_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.q_norm.weight": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.q_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.q_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.v_proj.biases": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.v_proj.scales": "model-00027-of-00081.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00027-of-00081.safetensors", + "model.layers.25.input_layernorm.weight": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.expert_bias": "model-00029-of-00081.safetensors", + "model.layers.25.mlp.experts.down_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.experts.down_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.experts.down_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.experts.gate_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.25.mlp.experts.gate_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.25.mlp.experts.gate_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.25.mlp.experts.up_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.experts.up_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.experts.up_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.router.gate.biases": "model-00029-of-00081.safetensors", + "model.layers.25.mlp.router.gate.scales": "model-00029-of-00081.safetensors", + "model.layers.25.mlp.router.gate.weight": "model-00029-of-00081.safetensors", + "model.layers.25.mlp.shared_experts.down_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.shared_experts.down_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.shared_experts.gate_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.shared_experts.gate_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.shared_experts.up_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.shared_experts.up_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00030-of-00081.safetensors", + "model.layers.25.post_mlp_layernorm.weight": "model-00030-of-00081.safetensors", + "model.layers.25.pre_mlp_layernorm.weight": "model-00030-of-00081.safetensors", + "model.layers.25.self_attn.gate_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.gate_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.gate_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.k_norm.weight": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.k_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.k_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.o_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.o_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.q_norm.weight": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.q_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.q_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.v_proj.biases": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.v_proj.scales": "model-00029-of-00081.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00029-of-00081.safetensors", + "model.layers.26.input_layernorm.weight": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.expert_bias": "model-00030-of-00081.safetensors", + "model.layers.26.mlp.experts.down_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.experts.down_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.experts.down_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.experts.gate_proj.biases": "model-00031-of-00081.safetensors", + "model.layers.26.mlp.experts.gate_proj.scales": "model-00031-of-00081.safetensors", + "model.layers.26.mlp.experts.gate_proj.weight": "model-00031-of-00081.safetensors", + "model.layers.26.mlp.experts.up_proj.biases": "model-00031-of-00081.safetensors", + "model.layers.26.mlp.experts.up_proj.scales": "model-00031-of-00081.safetensors", + "model.layers.26.mlp.experts.up_proj.weight": "model-00031-of-00081.safetensors", + "model.layers.26.mlp.router.gate.biases": "model-00030-of-00081.safetensors", + "model.layers.26.mlp.router.gate.scales": "model-00030-of-00081.safetensors", + "model.layers.26.mlp.router.gate.weight": "model-00030-of-00081.safetensors", + "model.layers.26.mlp.shared_experts.down_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.shared_experts.down_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.shared_experts.gate_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.shared_experts.gate_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.shared_experts.up_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.shared_experts.up_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00032-of-00081.safetensors", + "model.layers.26.post_mlp_layernorm.weight": "model-00032-of-00081.safetensors", + "model.layers.26.pre_mlp_layernorm.weight": "model-00032-of-00081.safetensors", + "model.layers.26.self_attn.gate_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.gate_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.gate_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.k_norm.weight": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.k_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.k_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.o_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.o_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.q_norm.weight": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.q_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.q_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.v_proj.biases": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.v_proj.scales": "model-00030-of-00081.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00030-of-00081.safetensors", + "model.layers.27.input_layernorm.weight": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.expert_bias": "model-00032-of-00081.safetensors", + "model.layers.27.mlp.experts.down_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.experts.down_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.experts.down_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.experts.gate_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.27.mlp.experts.gate_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.27.mlp.experts.gate_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.27.mlp.experts.up_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.experts.up_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.experts.up_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.router.gate.biases": "model-00032-of-00081.safetensors", + "model.layers.27.mlp.router.gate.scales": "model-00032-of-00081.safetensors", + "model.layers.27.mlp.router.gate.weight": "model-00032-of-00081.safetensors", + "model.layers.27.mlp.shared_experts.down_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.shared_experts.down_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.shared_experts.gate_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.shared_experts.gate_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.shared_experts.up_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.shared_experts.up_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00033-of-00081.safetensors", + "model.layers.27.post_mlp_layernorm.weight": "model-00033-of-00081.safetensors", + "model.layers.27.pre_mlp_layernorm.weight": "model-00033-of-00081.safetensors", + "model.layers.27.self_attn.gate_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.gate_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.gate_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.k_norm.weight": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.k_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.k_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.o_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.o_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.q_norm.weight": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.q_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.q_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.v_proj.biases": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.v_proj.scales": "model-00032-of-00081.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00032-of-00081.safetensors", + "model.layers.28.input_layernorm.weight": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.expert_bias": "model-00033-of-00081.safetensors", + "model.layers.28.mlp.experts.down_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.experts.down_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.experts.down_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.experts.gate_proj.biases": "model-00034-of-00081.safetensors", + "model.layers.28.mlp.experts.gate_proj.scales": "model-00034-of-00081.safetensors", + "model.layers.28.mlp.experts.gate_proj.weight": "model-00034-of-00081.safetensors", + "model.layers.28.mlp.experts.up_proj.biases": "model-00034-of-00081.safetensors", + "model.layers.28.mlp.experts.up_proj.scales": "model-00034-of-00081.safetensors", + "model.layers.28.mlp.experts.up_proj.weight": "model-00034-of-00081.safetensors", + "model.layers.28.mlp.router.gate.biases": "model-00033-of-00081.safetensors", + "model.layers.28.mlp.router.gate.scales": "model-00033-of-00081.safetensors", + "model.layers.28.mlp.router.gate.weight": "model-00033-of-00081.safetensors", + "model.layers.28.mlp.shared_experts.down_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.shared_experts.down_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.shared_experts.down_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.shared_experts.gate_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.shared_experts.gate_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.shared_experts.up_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.shared_experts.up_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.28.mlp.shared_experts.up_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00035-of-00081.safetensors", + "model.layers.28.post_mlp_layernorm.weight": "model-00035-of-00081.safetensors", + "model.layers.28.pre_mlp_layernorm.weight": "model-00035-of-00081.safetensors", + "model.layers.28.self_attn.gate_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.gate_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.gate_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.k_norm.weight": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.k_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.k_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.o_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.o_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.q_norm.weight": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.q_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.q_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.v_proj.biases": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.v_proj.scales": "model-00033-of-00081.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00033-of-00081.safetensors", + "model.layers.29.input_layernorm.weight": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.expert_bias": "model-00035-of-00081.safetensors", + "model.layers.29.mlp.experts.down_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.experts.down_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.experts.down_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.experts.gate_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.29.mlp.experts.gate_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.29.mlp.experts.gate_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.29.mlp.experts.up_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.experts.up_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.experts.up_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.router.gate.biases": "model-00035-of-00081.safetensors", + "model.layers.29.mlp.router.gate.scales": "model-00035-of-00081.safetensors", + "model.layers.29.mlp.router.gate.weight": "model-00035-of-00081.safetensors", + "model.layers.29.mlp.shared_experts.down_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.shared_experts.down_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.shared_experts.down_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.shared_experts.gate_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.shared_experts.gate_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.shared_experts.up_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.shared_experts.up_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.29.mlp.shared_experts.up_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00036-of-00081.safetensors", + "model.layers.29.post_mlp_layernorm.weight": "model-00036-of-00081.safetensors", + "model.layers.29.pre_mlp_layernorm.weight": "model-00036-of-00081.safetensors", + "model.layers.29.self_attn.gate_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.gate_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.gate_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.k_norm.weight": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.k_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.k_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.o_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.o_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.q_norm.weight": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.q_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.q_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.v_proj.biases": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.v_proj.scales": "model-00035-of-00081.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00035-of-00081.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.3.mlp.down_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.3.mlp.down_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.3.mlp.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.3.mlp.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.3.mlp.up_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.3.mlp.up_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.3.post_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.3.pre_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.k_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.k_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.o_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.o_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.q_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.q_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.v_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.v_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.30.input_layernorm.weight": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.expert_bias": "model-00036-of-00081.safetensors", + "model.layers.30.mlp.experts.down_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.experts.down_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.experts.down_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.experts.gate_proj.biases": "model-00037-of-00081.safetensors", + "model.layers.30.mlp.experts.gate_proj.scales": "model-00037-of-00081.safetensors", + "model.layers.30.mlp.experts.gate_proj.weight": "model-00037-of-00081.safetensors", + "model.layers.30.mlp.experts.up_proj.biases": "model-00037-of-00081.safetensors", + "model.layers.30.mlp.experts.up_proj.scales": "model-00037-of-00081.safetensors", + "model.layers.30.mlp.experts.up_proj.weight": "model-00037-of-00081.safetensors", + "model.layers.30.mlp.router.gate.biases": "model-00036-of-00081.safetensors", + "model.layers.30.mlp.router.gate.scales": "model-00036-of-00081.safetensors", + "model.layers.30.mlp.router.gate.weight": "model-00036-of-00081.safetensors", + "model.layers.30.mlp.shared_experts.down_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.shared_experts.down_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.shared_experts.down_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.shared_experts.gate_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.shared_experts.gate_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.shared_experts.up_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.shared_experts.up_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.30.mlp.shared_experts.up_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00038-of-00081.safetensors", + "model.layers.30.post_mlp_layernorm.weight": "model-00038-of-00081.safetensors", + "model.layers.30.pre_mlp_layernorm.weight": "model-00038-of-00081.safetensors", + "model.layers.30.self_attn.gate_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.gate_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.gate_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.k_norm.weight": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.k_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.k_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.o_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.o_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.q_norm.weight": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.q_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.q_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.v_proj.biases": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.v_proj.scales": "model-00036-of-00081.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00036-of-00081.safetensors", + "model.layers.31.input_layernorm.weight": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.expert_bias": "model-00038-of-00081.safetensors", + "model.layers.31.mlp.experts.down_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.experts.down_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.experts.down_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.experts.gate_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.31.mlp.experts.gate_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.31.mlp.experts.gate_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.31.mlp.experts.up_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.experts.up_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.experts.up_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.router.gate.biases": "model-00038-of-00081.safetensors", + "model.layers.31.mlp.router.gate.scales": "model-00038-of-00081.safetensors", + "model.layers.31.mlp.router.gate.weight": "model-00038-of-00081.safetensors", + "model.layers.31.mlp.shared_experts.down_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.shared_experts.down_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.shared_experts.down_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.shared_experts.gate_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.shared_experts.gate_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.shared_experts.up_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.shared_experts.up_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.31.mlp.shared_experts.up_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00039-of-00081.safetensors", + "model.layers.31.post_mlp_layernorm.weight": "model-00039-of-00081.safetensors", + "model.layers.31.pre_mlp_layernorm.weight": "model-00039-of-00081.safetensors", + "model.layers.31.self_attn.gate_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.gate_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.gate_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.k_norm.weight": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.k_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.k_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.o_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.o_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.q_norm.weight": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.q_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.q_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.v_proj.biases": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.v_proj.scales": "model-00038-of-00081.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00038-of-00081.safetensors", + "model.layers.32.input_layernorm.weight": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.expert_bias": "model-00039-of-00081.safetensors", + "model.layers.32.mlp.experts.down_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.experts.down_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.experts.down_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.experts.gate_proj.biases": "model-00040-of-00081.safetensors", + "model.layers.32.mlp.experts.gate_proj.scales": "model-00040-of-00081.safetensors", + "model.layers.32.mlp.experts.gate_proj.weight": "model-00040-of-00081.safetensors", + "model.layers.32.mlp.experts.up_proj.biases": "model-00040-of-00081.safetensors", + "model.layers.32.mlp.experts.up_proj.scales": "model-00040-of-00081.safetensors", + "model.layers.32.mlp.experts.up_proj.weight": "model-00040-of-00081.safetensors", + "model.layers.32.mlp.router.gate.biases": "model-00039-of-00081.safetensors", + "model.layers.32.mlp.router.gate.scales": "model-00039-of-00081.safetensors", + "model.layers.32.mlp.router.gate.weight": "model-00039-of-00081.safetensors", + "model.layers.32.mlp.shared_experts.down_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.shared_experts.down_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.shared_experts.down_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.shared_experts.gate_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.shared_experts.gate_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.shared_experts.gate_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.shared_experts.up_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.shared_experts.up_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.32.mlp.shared_experts.up_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00041-of-00081.safetensors", + "model.layers.32.post_mlp_layernorm.weight": "model-00041-of-00081.safetensors", + "model.layers.32.pre_mlp_layernorm.weight": "model-00041-of-00081.safetensors", + "model.layers.32.self_attn.gate_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.gate_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.gate_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.k_norm.weight": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.k_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.k_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.o_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.o_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.q_norm.weight": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.q_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.q_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.v_proj.biases": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.v_proj.scales": "model-00039-of-00081.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00039-of-00081.safetensors", + "model.layers.33.input_layernorm.weight": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.expert_bias": "model-00041-of-00081.safetensors", + "model.layers.33.mlp.experts.down_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.experts.down_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.experts.down_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.experts.gate_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.33.mlp.experts.gate_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.33.mlp.experts.gate_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.33.mlp.experts.up_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.experts.up_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.experts.up_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.router.gate.biases": "model-00041-of-00081.safetensors", + "model.layers.33.mlp.router.gate.scales": "model-00041-of-00081.safetensors", + "model.layers.33.mlp.router.gate.weight": "model-00041-of-00081.safetensors", + "model.layers.33.mlp.shared_experts.down_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.shared_experts.down_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.shared_experts.down_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.shared_experts.gate_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.shared_experts.gate_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.shared_experts.gate_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.shared_experts.up_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.shared_experts.up_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.33.mlp.shared_experts.up_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00042-of-00081.safetensors", + "model.layers.33.post_mlp_layernorm.weight": "model-00042-of-00081.safetensors", + "model.layers.33.pre_mlp_layernorm.weight": "model-00042-of-00081.safetensors", + "model.layers.33.self_attn.gate_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.gate_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.gate_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.k_norm.weight": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.k_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.k_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.o_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.o_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.q_norm.weight": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.q_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.q_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.v_proj.biases": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.v_proj.scales": "model-00041-of-00081.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00041-of-00081.safetensors", + "model.layers.34.input_layernorm.weight": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.expert_bias": "model-00042-of-00081.safetensors", + "model.layers.34.mlp.experts.down_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.experts.down_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.experts.down_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.experts.gate_proj.biases": "model-00043-of-00081.safetensors", + "model.layers.34.mlp.experts.gate_proj.scales": "model-00043-of-00081.safetensors", + "model.layers.34.mlp.experts.gate_proj.weight": "model-00043-of-00081.safetensors", + "model.layers.34.mlp.experts.up_proj.biases": "model-00043-of-00081.safetensors", + "model.layers.34.mlp.experts.up_proj.scales": "model-00043-of-00081.safetensors", + "model.layers.34.mlp.experts.up_proj.weight": "model-00043-of-00081.safetensors", + "model.layers.34.mlp.router.gate.biases": "model-00042-of-00081.safetensors", + "model.layers.34.mlp.router.gate.scales": "model-00042-of-00081.safetensors", + "model.layers.34.mlp.router.gate.weight": "model-00042-of-00081.safetensors", + "model.layers.34.mlp.shared_experts.down_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.shared_experts.down_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.shared_experts.down_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.shared_experts.gate_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.shared_experts.gate_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.shared_experts.gate_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.shared_experts.up_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.shared_experts.up_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.34.mlp.shared_experts.up_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00044-of-00081.safetensors", + "model.layers.34.post_mlp_layernorm.weight": "model-00044-of-00081.safetensors", + "model.layers.34.pre_mlp_layernorm.weight": "model-00044-of-00081.safetensors", + "model.layers.34.self_attn.gate_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.gate_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.gate_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.k_norm.weight": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.k_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.k_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.o_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.o_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.q_norm.weight": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.q_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.q_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.v_proj.biases": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.v_proj.scales": "model-00042-of-00081.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00042-of-00081.safetensors", + "model.layers.35.input_layernorm.weight": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.expert_bias": "model-00044-of-00081.safetensors", + "model.layers.35.mlp.experts.down_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.experts.down_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.experts.down_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.experts.gate_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.35.mlp.experts.gate_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.35.mlp.experts.gate_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.35.mlp.experts.up_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.experts.up_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.experts.up_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.router.gate.biases": "model-00044-of-00081.safetensors", + "model.layers.35.mlp.router.gate.scales": "model-00044-of-00081.safetensors", + "model.layers.35.mlp.router.gate.weight": "model-00044-of-00081.safetensors", + "model.layers.35.mlp.shared_experts.down_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.shared_experts.down_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.shared_experts.down_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.shared_experts.gate_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.shared_experts.gate_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.shared_experts.gate_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.shared_experts.up_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.shared_experts.up_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.35.mlp.shared_experts.up_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00045-of-00081.safetensors", + "model.layers.35.post_mlp_layernorm.weight": "model-00045-of-00081.safetensors", + "model.layers.35.pre_mlp_layernorm.weight": "model-00045-of-00081.safetensors", + "model.layers.35.self_attn.gate_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.gate_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.gate_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.k_norm.weight": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.k_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.k_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.o_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.o_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.q_norm.weight": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.q_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.q_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.v_proj.biases": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.v_proj.scales": "model-00044-of-00081.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00044-of-00081.safetensors", + "model.layers.36.input_layernorm.weight": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.expert_bias": "model-00045-of-00081.safetensors", + "model.layers.36.mlp.experts.down_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.experts.down_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.experts.down_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.experts.gate_proj.biases": "model-00046-of-00081.safetensors", + "model.layers.36.mlp.experts.gate_proj.scales": "model-00046-of-00081.safetensors", + "model.layers.36.mlp.experts.gate_proj.weight": "model-00046-of-00081.safetensors", + "model.layers.36.mlp.experts.up_proj.biases": "model-00046-of-00081.safetensors", + "model.layers.36.mlp.experts.up_proj.scales": "model-00046-of-00081.safetensors", + "model.layers.36.mlp.experts.up_proj.weight": "model-00046-of-00081.safetensors", + "model.layers.36.mlp.router.gate.biases": "model-00045-of-00081.safetensors", + "model.layers.36.mlp.router.gate.scales": "model-00045-of-00081.safetensors", + "model.layers.36.mlp.router.gate.weight": "model-00045-of-00081.safetensors", + "model.layers.36.mlp.shared_experts.down_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.shared_experts.down_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.shared_experts.down_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.shared_experts.gate_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.shared_experts.gate_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.shared_experts.gate_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.shared_experts.up_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.shared_experts.up_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.36.mlp.shared_experts.up_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00047-of-00081.safetensors", + "model.layers.36.post_mlp_layernorm.weight": "model-00047-of-00081.safetensors", + "model.layers.36.pre_mlp_layernorm.weight": "model-00047-of-00081.safetensors", + "model.layers.36.self_attn.gate_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.gate_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.gate_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.k_norm.weight": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.k_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.k_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.o_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.o_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.q_norm.weight": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.q_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.q_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.v_proj.biases": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.v_proj.scales": "model-00045-of-00081.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00045-of-00081.safetensors", + "model.layers.37.input_layernorm.weight": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.expert_bias": "model-00047-of-00081.safetensors", + "model.layers.37.mlp.experts.down_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.experts.down_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.experts.down_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.experts.gate_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.37.mlp.experts.gate_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.37.mlp.experts.gate_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.37.mlp.experts.up_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.experts.up_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.experts.up_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.router.gate.biases": "model-00047-of-00081.safetensors", + "model.layers.37.mlp.router.gate.scales": "model-00047-of-00081.safetensors", + "model.layers.37.mlp.router.gate.weight": "model-00047-of-00081.safetensors", + "model.layers.37.mlp.shared_experts.down_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.shared_experts.down_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.shared_experts.down_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.shared_experts.gate_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.shared_experts.gate_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.shared_experts.gate_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.shared_experts.up_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.shared_experts.up_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.37.mlp.shared_experts.up_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00048-of-00081.safetensors", + "model.layers.37.post_mlp_layernorm.weight": "model-00048-of-00081.safetensors", + "model.layers.37.pre_mlp_layernorm.weight": "model-00048-of-00081.safetensors", + "model.layers.37.self_attn.gate_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.gate_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.gate_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.k_norm.weight": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.k_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.k_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.o_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.o_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.q_norm.weight": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.q_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.q_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.v_proj.biases": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.v_proj.scales": "model-00047-of-00081.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00047-of-00081.safetensors", + "model.layers.38.input_layernorm.weight": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.expert_bias": "model-00048-of-00081.safetensors", + "model.layers.38.mlp.experts.down_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.experts.down_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.experts.down_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.experts.gate_proj.biases": "model-00049-of-00081.safetensors", + "model.layers.38.mlp.experts.gate_proj.scales": "model-00049-of-00081.safetensors", + "model.layers.38.mlp.experts.gate_proj.weight": "model-00049-of-00081.safetensors", + "model.layers.38.mlp.experts.up_proj.biases": "model-00049-of-00081.safetensors", + "model.layers.38.mlp.experts.up_proj.scales": "model-00049-of-00081.safetensors", + "model.layers.38.mlp.experts.up_proj.weight": "model-00049-of-00081.safetensors", + "model.layers.38.mlp.router.gate.biases": "model-00048-of-00081.safetensors", + "model.layers.38.mlp.router.gate.scales": "model-00048-of-00081.safetensors", + "model.layers.38.mlp.router.gate.weight": "model-00048-of-00081.safetensors", + "model.layers.38.mlp.shared_experts.down_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.shared_experts.down_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.shared_experts.down_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.shared_experts.gate_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.shared_experts.gate_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.shared_experts.gate_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.shared_experts.up_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.shared_experts.up_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.38.mlp.shared_experts.up_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00050-of-00081.safetensors", + "model.layers.38.post_mlp_layernorm.weight": "model-00050-of-00081.safetensors", + "model.layers.38.pre_mlp_layernorm.weight": "model-00050-of-00081.safetensors", + "model.layers.38.self_attn.gate_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.gate_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.gate_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.k_norm.weight": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.k_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.k_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.o_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.o_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.q_norm.weight": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.q_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.q_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.v_proj.biases": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.v_proj.scales": "model-00048-of-00081.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00048-of-00081.safetensors", + "model.layers.39.input_layernorm.weight": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.expert_bias": "model-00050-of-00081.safetensors", + "model.layers.39.mlp.experts.down_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.experts.down_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.experts.down_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.experts.gate_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.39.mlp.experts.gate_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.39.mlp.experts.gate_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.39.mlp.experts.up_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.experts.up_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.experts.up_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.router.gate.biases": "model-00050-of-00081.safetensors", + "model.layers.39.mlp.router.gate.scales": "model-00050-of-00081.safetensors", + "model.layers.39.mlp.router.gate.weight": "model-00050-of-00081.safetensors", + "model.layers.39.mlp.shared_experts.down_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.shared_experts.down_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.shared_experts.down_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.shared_experts.gate_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.shared_experts.gate_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.shared_experts.gate_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.shared_experts.up_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.shared_experts.up_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.39.mlp.shared_experts.up_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00051-of-00081.safetensors", + "model.layers.39.post_mlp_layernorm.weight": "model-00051-of-00081.safetensors", + "model.layers.39.pre_mlp_layernorm.weight": "model-00051-of-00081.safetensors", + "model.layers.39.self_attn.gate_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.gate_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.gate_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.k_norm.weight": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.k_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.k_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.o_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.o_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.q_norm.weight": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.q_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.q_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.v_proj.biases": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.v_proj.scales": "model-00050-of-00081.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00050-of-00081.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.4.mlp.down_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.4.mlp.down_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.4.mlp.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.4.mlp.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.4.mlp.up_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.4.mlp.up_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.4.post_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.4.pre_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.k_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.k_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.o_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.o_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.q_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.q_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.v_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.v_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.40.input_layernorm.weight": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.expert_bias": "model-00051-of-00081.safetensors", + "model.layers.40.mlp.experts.down_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.experts.down_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.experts.down_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.experts.gate_proj.biases": "model-00052-of-00081.safetensors", + "model.layers.40.mlp.experts.gate_proj.scales": "model-00052-of-00081.safetensors", + "model.layers.40.mlp.experts.gate_proj.weight": "model-00052-of-00081.safetensors", + "model.layers.40.mlp.experts.up_proj.biases": "model-00052-of-00081.safetensors", + "model.layers.40.mlp.experts.up_proj.scales": "model-00052-of-00081.safetensors", + "model.layers.40.mlp.experts.up_proj.weight": "model-00052-of-00081.safetensors", + "model.layers.40.mlp.router.gate.biases": "model-00051-of-00081.safetensors", + "model.layers.40.mlp.router.gate.scales": "model-00051-of-00081.safetensors", + "model.layers.40.mlp.router.gate.weight": "model-00051-of-00081.safetensors", + "model.layers.40.mlp.shared_experts.down_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.shared_experts.down_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.shared_experts.down_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.shared_experts.gate_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.shared_experts.gate_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.shared_experts.gate_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.shared_experts.up_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.shared_experts.up_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.40.mlp.shared_experts.up_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00053-of-00081.safetensors", + "model.layers.40.post_mlp_layernorm.weight": "model-00053-of-00081.safetensors", + "model.layers.40.pre_mlp_layernorm.weight": "model-00053-of-00081.safetensors", + "model.layers.40.self_attn.gate_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.gate_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.gate_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.k_norm.weight": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.k_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.k_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.o_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.o_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.q_norm.weight": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.q_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.q_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.v_proj.biases": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.v_proj.scales": "model-00051-of-00081.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00051-of-00081.safetensors", + "model.layers.41.input_layernorm.weight": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.expert_bias": "model-00053-of-00081.safetensors", + "model.layers.41.mlp.experts.down_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.experts.down_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.experts.down_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.experts.gate_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.41.mlp.experts.gate_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.41.mlp.experts.gate_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.41.mlp.experts.up_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.experts.up_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.experts.up_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.router.gate.biases": "model-00053-of-00081.safetensors", + "model.layers.41.mlp.router.gate.scales": "model-00053-of-00081.safetensors", + "model.layers.41.mlp.router.gate.weight": "model-00053-of-00081.safetensors", + "model.layers.41.mlp.shared_experts.down_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.shared_experts.down_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.shared_experts.down_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.shared_experts.gate_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.shared_experts.gate_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.shared_experts.gate_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.shared_experts.up_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.shared_experts.up_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.41.mlp.shared_experts.up_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00054-of-00081.safetensors", + "model.layers.41.post_mlp_layernorm.weight": "model-00054-of-00081.safetensors", + "model.layers.41.pre_mlp_layernorm.weight": "model-00054-of-00081.safetensors", + "model.layers.41.self_attn.gate_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.gate_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.gate_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.k_norm.weight": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.k_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.k_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.o_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.o_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.q_norm.weight": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.q_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.q_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.v_proj.biases": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.v_proj.scales": "model-00053-of-00081.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00053-of-00081.safetensors", + "model.layers.42.input_layernorm.weight": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.expert_bias": "model-00054-of-00081.safetensors", + "model.layers.42.mlp.experts.down_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.experts.down_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.experts.down_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.experts.gate_proj.biases": "model-00055-of-00081.safetensors", + "model.layers.42.mlp.experts.gate_proj.scales": "model-00055-of-00081.safetensors", + "model.layers.42.mlp.experts.gate_proj.weight": "model-00055-of-00081.safetensors", + "model.layers.42.mlp.experts.up_proj.biases": "model-00055-of-00081.safetensors", + "model.layers.42.mlp.experts.up_proj.scales": "model-00055-of-00081.safetensors", + "model.layers.42.mlp.experts.up_proj.weight": "model-00055-of-00081.safetensors", + "model.layers.42.mlp.router.gate.biases": "model-00054-of-00081.safetensors", + "model.layers.42.mlp.router.gate.scales": "model-00054-of-00081.safetensors", + "model.layers.42.mlp.router.gate.weight": "model-00054-of-00081.safetensors", + "model.layers.42.mlp.shared_experts.down_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.shared_experts.down_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.shared_experts.down_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.shared_experts.gate_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.shared_experts.gate_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.shared_experts.gate_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.shared_experts.up_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.shared_experts.up_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.42.mlp.shared_experts.up_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00056-of-00081.safetensors", + "model.layers.42.post_mlp_layernorm.weight": "model-00056-of-00081.safetensors", + "model.layers.42.pre_mlp_layernorm.weight": "model-00056-of-00081.safetensors", + "model.layers.42.self_attn.gate_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.gate_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.gate_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.k_norm.weight": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.k_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.k_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.o_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.o_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.q_norm.weight": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.q_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.q_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.v_proj.biases": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.v_proj.scales": "model-00054-of-00081.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00054-of-00081.safetensors", + "model.layers.43.input_layernorm.weight": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.expert_bias": "model-00056-of-00081.safetensors", + "model.layers.43.mlp.experts.down_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.experts.down_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.experts.down_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.experts.gate_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.43.mlp.experts.gate_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.43.mlp.experts.gate_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.43.mlp.experts.up_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.experts.up_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.experts.up_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.router.gate.biases": "model-00056-of-00081.safetensors", + "model.layers.43.mlp.router.gate.scales": "model-00056-of-00081.safetensors", + "model.layers.43.mlp.router.gate.weight": "model-00056-of-00081.safetensors", + "model.layers.43.mlp.shared_experts.down_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.shared_experts.down_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.shared_experts.down_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.shared_experts.gate_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.shared_experts.gate_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.shared_experts.gate_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.shared_experts.up_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.shared_experts.up_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.43.mlp.shared_experts.up_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00057-of-00081.safetensors", + "model.layers.43.post_mlp_layernorm.weight": "model-00057-of-00081.safetensors", + "model.layers.43.pre_mlp_layernorm.weight": "model-00057-of-00081.safetensors", + "model.layers.43.self_attn.gate_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.gate_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.gate_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.k_norm.weight": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.k_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.k_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.o_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.o_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.q_norm.weight": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.q_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.q_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.v_proj.biases": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.v_proj.scales": "model-00056-of-00081.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00056-of-00081.safetensors", + "model.layers.44.input_layernorm.weight": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.expert_bias": "model-00057-of-00081.safetensors", + "model.layers.44.mlp.experts.down_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.experts.down_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.experts.down_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.experts.gate_proj.biases": "model-00058-of-00081.safetensors", + "model.layers.44.mlp.experts.gate_proj.scales": "model-00058-of-00081.safetensors", + "model.layers.44.mlp.experts.gate_proj.weight": "model-00058-of-00081.safetensors", + "model.layers.44.mlp.experts.up_proj.biases": "model-00058-of-00081.safetensors", + "model.layers.44.mlp.experts.up_proj.scales": "model-00058-of-00081.safetensors", + "model.layers.44.mlp.experts.up_proj.weight": "model-00058-of-00081.safetensors", + "model.layers.44.mlp.router.gate.biases": "model-00057-of-00081.safetensors", + "model.layers.44.mlp.router.gate.scales": "model-00057-of-00081.safetensors", + "model.layers.44.mlp.router.gate.weight": "model-00057-of-00081.safetensors", + "model.layers.44.mlp.shared_experts.down_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.shared_experts.down_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.shared_experts.down_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.shared_experts.gate_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.shared_experts.gate_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.shared_experts.gate_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.shared_experts.up_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.shared_experts.up_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.44.mlp.shared_experts.up_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00059-of-00081.safetensors", + "model.layers.44.post_mlp_layernorm.weight": "model-00059-of-00081.safetensors", + "model.layers.44.pre_mlp_layernorm.weight": "model-00059-of-00081.safetensors", + "model.layers.44.self_attn.gate_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.gate_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.gate_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.k_norm.weight": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.k_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.k_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.o_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.o_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.q_norm.weight": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.q_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.q_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.v_proj.biases": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.v_proj.scales": "model-00057-of-00081.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00057-of-00081.safetensors", + "model.layers.45.input_layernorm.weight": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.expert_bias": "model-00059-of-00081.safetensors", + "model.layers.45.mlp.experts.down_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.experts.down_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.experts.down_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.experts.gate_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.45.mlp.experts.gate_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.45.mlp.experts.gate_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.45.mlp.experts.up_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.experts.up_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.experts.up_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.router.gate.biases": "model-00059-of-00081.safetensors", + "model.layers.45.mlp.router.gate.scales": "model-00059-of-00081.safetensors", + "model.layers.45.mlp.router.gate.weight": "model-00059-of-00081.safetensors", + "model.layers.45.mlp.shared_experts.down_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.shared_experts.down_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.shared_experts.down_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.shared_experts.gate_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.shared_experts.gate_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.shared_experts.gate_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.shared_experts.up_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.shared_experts.up_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.45.mlp.shared_experts.up_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00060-of-00081.safetensors", + "model.layers.45.post_mlp_layernorm.weight": "model-00060-of-00081.safetensors", + "model.layers.45.pre_mlp_layernorm.weight": "model-00060-of-00081.safetensors", + "model.layers.45.self_attn.gate_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.gate_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.gate_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.k_norm.weight": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.k_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.k_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.o_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.o_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.q_norm.weight": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.q_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.q_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.v_proj.biases": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.v_proj.scales": "model-00059-of-00081.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00059-of-00081.safetensors", + "model.layers.46.input_layernorm.weight": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.expert_bias": "model-00060-of-00081.safetensors", + "model.layers.46.mlp.experts.down_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.experts.down_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.experts.down_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.experts.gate_proj.biases": "model-00061-of-00081.safetensors", + "model.layers.46.mlp.experts.gate_proj.scales": "model-00061-of-00081.safetensors", + "model.layers.46.mlp.experts.gate_proj.weight": "model-00061-of-00081.safetensors", + "model.layers.46.mlp.experts.up_proj.biases": "model-00061-of-00081.safetensors", + "model.layers.46.mlp.experts.up_proj.scales": "model-00061-of-00081.safetensors", + "model.layers.46.mlp.experts.up_proj.weight": "model-00061-of-00081.safetensors", + "model.layers.46.mlp.router.gate.biases": "model-00060-of-00081.safetensors", + "model.layers.46.mlp.router.gate.scales": "model-00060-of-00081.safetensors", + "model.layers.46.mlp.router.gate.weight": "model-00060-of-00081.safetensors", + "model.layers.46.mlp.shared_experts.down_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.shared_experts.down_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.shared_experts.gate_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.shared_experts.gate_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.shared_experts.up_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.shared_experts.up_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00062-of-00081.safetensors", + "model.layers.46.post_mlp_layernorm.weight": "model-00062-of-00081.safetensors", + "model.layers.46.pre_mlp_layernorm.weight": "model-00062-of-00081.safetensors", + "model.layers.46.self_attn.gate_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.gate_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.gate_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.k_norm.weight": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.k_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.k_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.o_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.o_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.q_norm.weight": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.q_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.q_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.v_proj.biases": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.v_proj.scales": "model-00060-of-00081.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00060-of-00081.safetensors", + "model.layers.47.input_layernorm.weight": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.expert_bias": "model-00062-of-00081.safetensors", + "model.layers.47.mlp.experts.down_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.experts.down_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.experts.down_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.experts.gate_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.47.mlp.experts.gate_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.47.mlp.experts.gate_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.47.mlp.experts.up_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.experts.up_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.experts.up_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.router.gate.biases": "model-00062-of-00081.safetensors", + "model.layers.47.mlp.router.gate.scales": "model-00062-of-00081.safetensors", + "model.layers.47.mlp.router.gate.weight": "model-00062-of-00081.safetensors", + "model.layers.47.mlp.shared_experts.down_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.shared_experts.down_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.shared_experts.down_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.shared_experts.gate_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.shared_experts.gate_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.shared_experts.gate_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.shared_experts.up_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.shared_experts.up_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.47.mlp.shared_experts.up_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00063-of-00081.safetensors", + "model.layers.47.post_mlp_layernorm.weight": "model-00063-of-00081.safetensors", + "model.layers.47.pre_mlp_layernorm.weight": "model-00063-of-00081.safetensors", + "model.layers.47.self_attn.gate_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.gate_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.gate_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.k_norm.weight": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.k_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.k_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.o_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.o_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.q_norm.weight": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.q_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.q_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.v_proj.biases": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.v_proj.scales": "model-00062-of-00081.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00062-of-00081.safetensors", + "model.layers.48.input_layernorm.weight": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.expert_bias": "model-00063-of-00081.safetensors", + "model.layers.48.mlp.experts.down_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.experts.down_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.experts.down_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.experts.gate_proj.biases": "model-00064-of-00081.safetensors", + "model.layers.48.mlp.experts.gate_proj.scales": "model-00064-of-00081.safetensors", + "model.layers.48.mlp.experts.gate_proj.weight": "model-00064-of-00081.safetensors", + "model.layers.48.mlp.experts.up_proj.biases": "model-00064-of-00081.safetensors", + "model.layers.48.mlp.experts.up_proj.scales": "model-00064-of-00081.safetensors", + "model.layers.48.mlp.experts.up_proj.weight": "model-00064-of-00081.safetensors", + "model.layers.48.mlp.router.gate.biases": "model-00063-of-00081.safetensors", + "model.layers.48.mlp.router.gate.scales": "model-00063-of-00081.safetensors", + "model.layers.48.mlp.router.gate.weight": "model-00063-of-00081.safetensors", + "model.layers.48.mlp.shared_experts.down_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.shared_experts.down_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.shared_experts.down_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.shared_experts.gate_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.shared_experts.gate_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.shared_experts.gate_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.shared_experts.up_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.shared_experts.up_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.48.mlp.shared_experts.up_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00065-of-00081.safetensors", + "model.layers.48.post_mlp_layernorm.weight": "model-00065-of-00081.safetensors", + "model.layers.48.pre_mlp_layernorm.weight": "model-00065-of-00081.safetensors", + "model.layers.48.self_attn.gate_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.gate_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.gate_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.k_norm.weight": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.k_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.k_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.o_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.o_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.q_norm.weight": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.q_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.q_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.v_proj.biases": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.v_proj.scales": "model-00063-of-00081.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00063-of-00081.safetensors", + "model.layers.49.input_layernorm.weight": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.expert_bias": "model-00065-of-00081.safetensors", + "model.layers.49.mlp.experts.down_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.experts.down_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.experts.down_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.experts.gate_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.49.mlp.experts.gate_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.49.mlp.experts.gate_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.49.mlp.experts.up_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.experts.up_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.experts.up_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.router.gate.biases": "model-00065-of-00081.safetensors", + "model.layers.49.mlp.router.gate.scales": "model-00065-of-00081.safetensors", + "model.layers.49.mlp.router.gate.weight": "model-00065-of-00081.safetensors", + "model.layers.49.mlp.shared_experts.down_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.shared_experts.down_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.shared_experts.down_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.shared_experts.gate_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.shared_experts.gate_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.shared_experts.gate_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.shared_experts.up_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.shared_experts.up_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.49.mlp.shared_experts.up_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00066-of-00081.safetensors", + "model.layers.49.post_mlp_layernorm.weight": "model-00066-of-00081.safetensors", + "model.layers.49.pre_mlp_layernorm.weight": "model-00066-of-00081.safetensors", + "model.layers.49.self_attn.gate_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.gate_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.gate_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.k_norm.weight": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.k_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.k_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.o_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.o_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.q_norm.weight": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.q_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.q_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.v_proj.biases": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.v_proj.scales": "model-00065-of-00081.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00065-of-00081.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.5.mlp.down_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.5.mlp.down_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.5.mlp.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.5.mlp.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.5.mlp.up_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.5.mlp.up_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.5.post_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.5.pre_mlp_layernorm.weight": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.k_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.k_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.o_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.o_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.q_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.q_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.v_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.v_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.50.input_layernorm.weight": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.expert_bias": "model-00066-of-00081.safetensors", + "model.layers.50.mlp.experts.down_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.experts.down_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.experts.down_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.experts.gate_proj.biases": "model-00067-of-00081.safetensors", + "model.layers.50.mlp.experts.gate_proj.scales": "model-00067-of-00081.safetensors", + "model.layers.50.mlp.experts.gate_proj.weight": "model-00067-of-00081.safetensors", + "model.layers.50.mlp.experts.up_proj.biases": "model-00067-of-00081.safetensors", + "model.layers.50.mlp.experts.up_proj.scales": "model-00067-of-00081.safetensors", + "model.layers.50.mlp.experts.up_proj.weight": "model-00067-of-00081.safetensors", + "model.layers.50.mlp.router.gate.biases": "model-00066-of-00081.safetensors", + "model.layers.50.mlp.router.gate.scales": "model-00066-of-00081.safetensors", + "model.layers.50.mlp.router.gate.weight": "model-00066-of-00081.safetensors", + "model.layers.50.mlp.shared_experts.down_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.shared_experts.down_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.shared_experts.down_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.shared_experts.gate_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.shared_experts.gate_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.shared_experts.gate_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.shared_experts.up_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.shared_experts.up_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.50.mlp.shared_experts.up_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00068-of-00081.safetensors", + "model.layers.50.post_mlp_layernorm.weight": "model-00068-of-00081.safetensors", + "model.layers.50.pre_mlp_layernorm.weight": "model-00068-of-00081.safetensors", + "model.layers.50.self_attn.gate_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.gate_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.gate_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.k_norm.weight": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.k_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.k_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.o_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.o_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.q_norm.weight": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.q_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.q_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.v_proj.biases": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.v_proj.scales": "model-00066-of-00081.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00066-of-00081.safetensors", + "model.layers.51.input_layernorm.weight": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.expert_bias": "model-00068-of-00081.safetensors", + "model.layers.51.mlp.experts.down_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.experts.down_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.experts.down_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.experts.gate_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.51.mlp.experts.gate_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.51.mlp.experts.gate_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.51.mlp.experts.up_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.experts.up_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.experts.up_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.router.gate.biases": "model-00068-of-00081.safetensors", + "model.layers.51.mlp.router.gate.scales": "model-00068-of-00081.safetensors", + "model.layers.51.mlp.router.gate.weight": "model-00068-of-00081.safetensors", + "model.layers.51.mlp.shared_experts.down_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.shared_experts.down_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.shared_experts.down_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.shared_experts.gate_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.shared_experts.gate_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.shared_experts.gate_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.shared_experts.up_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.shared_experts.up_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.51.mlp.shared_experts.up_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00069-of-00081.safetensors", + "model.layers.51.post_mlp_layernorm.weight": "model-00069-of-00081.safetensors", + "model.layers.51.pre_mlp_layernorm.weight": "model-00069-of-00081.safetensors", + "model.layers.51.self_attn.gate_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.gate_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.gate_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.k_norm.weight": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.k_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.k_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.o_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.o_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.q_norm.weight": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.q_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.q_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.v_proj.biases": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.v_proj.scales": "model-00068-of-00081.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00068-of-00081.safetensors", + "model.layers.52.input_layernorm.weight": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.expert_bias": "model-00069-of-00081.safetensors", + "model.layers.52.mlp.experts.down_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.experts.down_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.experts.down_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.experts.gate_proj.biases": "model-00070-of-00081.safetensors", + "model.layers.52.mlp.experts.gate_proj.scales": "model-00070-of-00081.safetensors", + "model.layers.52.mlp.experts.gate_proj.weight": "model-00070-of-00081.safetensors", + "model.layers.52.mlp.experts.up_proj.biases": "model-00070-of-00081.safetensors", + "model.layers.52.mlp.experts.up_proj.scales": "model-00070-of-00081.safetensors", + "model.layers.52.mlp.experts.up_proj.weight": "model-00070-of-00081.safetensors", + "model.layers.52.mlp.router.gate.biases": "model-00069-of-00081.safetensors", + "model.layers.52.mlp.router.gate.scales": "model-00069-of-00081.safetensors", + "model.layers.52.mlp.router.gate.weight": "model-00069-of-00081.safetensors", + "model.layers.52.mlp.shared_experts.down_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.shared_experts.down_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.shared_experts.down_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.shared_experts.gate_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.shared_experts.gate_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.shared_experts.gate_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.shared_experts.up_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.shared_experts.up_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.52.mlp.shared_experts.up_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00071-of-00081.safetensors", + "model.layers.52.post_mlp_layernorm.weight": "model-00071-of-00081.safetensors", + "model.layers.52.pre_mlp_layernorm.weight": "model-00071-of-00081.safetensors", + "model.layers.52.self_attn.gate_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.gate_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.gate_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.k_norm.weight": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.k_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.k_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.o_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.o_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.q_norm.weight": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.q_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.q_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.v_proj.biases": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.v_proj.scales": "model-00069-of-00081.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00069-of-00081.safetensors", + "model.layers.53.input_layernorm.weight": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.expert_bias": "model-00071-of-00081.safetensors", + "model.layers.53.mlp.experts.down_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.experts.down_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.experts.down_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.experts.gate_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.53.mlp.experts.gate_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.53.mlp.experts.gate_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.53.mlp.experts.up_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.experts.up_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.experts.up_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.router.gate.biases": "model-00071-of-00081.safetensors", + "model.layers.53.mlp.router.gate.scales": "model-00071-of-00081.safetensors", + "model.layers.53.mlp.router.gate.weight": "model-00071-of-00081.safetensors", + "model.layers.53.mlp.shared_experts.down_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.shared_experts.down_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.shared_experts.down_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.shared_experts.gate_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.shared_experts.gate_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.shared_experts.gate_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.shared_experts.up_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.shared_experts.up_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.53.mlp.shared_experts.up_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00072-of-00081.safetensors", + "model.layers.53.post_mlp_layernorm.weight": "model-00072-of-00081.safetensors", + "model.layers.53.pre_mlp_layernorm.weight": "model-00072-of-00081.safetensors", + "model.layers.53.self_attn.gate_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.gate_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.gate_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.k_norm.weight": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.k_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.k_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.o_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.o_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.q_norm.weight": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.q_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.q_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.v_proj.biases": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.v_proj.scales": "model-00071-of-00081.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00071-of-00081.safetensors", + "model.layers.54.input_layernorm.weight": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.expert_bias": "model-00072-of-00081.safetensors", + "model.layers.54.mlp.experts.down_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.experts.down_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.experts.down_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.experts.gate_proj.biases": "model-00073-of-00081.safetensors", + "model.layers.54.mlp.experts.gate_proj.scales": "model-00073-of-00081.safetensors", + "model.layers.54.mlp.experts.gate_proj.weight": "model-00073-of-00081.safetensors", + "model.layers.54.mlp.experts.up_proj.biases": "model-00073-of-00081.safetensors", + "model.layers.54.mlp.experts.up_proj.scales": "model-00073-of-00081.safetensors", + "model.layers.54.mlp.experts.up_proj.weight": "model-00073-of-00081.safetensors", + "model.layers.54.mlp.router.gate.biases": "model-00072-of-00081.safetensors", + "model.layers.54.mlp.router.gate.scales": "model-00072-of-00081.safetensors", + "model.layers.54.mlp.router.gate.weight": "model-00072-of-00081.safetensors", + "model.layers.54.mlp.shared_experts.down_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.shared_experts.down_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.shared_experts.down_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.shared_experts.gate_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.shared_experts.gate_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.shared_experts.gate_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.shared_experts.up_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.shared_experts.up_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.54.mlp.shared_experts.up_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00074-of-00081.safetensors", + "model.layers.54.post_mlp_layernorm.weight": "model-00074-of-00081.safetensors", + "model.layers.54.pre_mlp_layernorm.weight": "model-00074-of-00081.safetensors", + "model.layers.54.self_attn.gate_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.gate_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.gate_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.k_norm.weight": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.k_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.k_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.o_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.o_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.q_norm.weight": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.q_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.q_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.v_proj.biases": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.v_proj.scales": "model-00072-of-00081.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00072-of-00081.safetensors", + "model.layers.55.input_layernorm.weight": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.expert_bias": "model-00074-of-00081.safetensors", + "model.layers.55.mlp.experts.down_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.experts.down_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.experts.down_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.experts.gate_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.55.mlp.experts.gate_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.55.mlp.experts.gate_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.55.mlp.experts.up_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.experts.up_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.experts.up_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.router.gate.biases": "model-00074-of-00081.safetensors", + "model.layers.55.mlp.router.gate.scales": "model-00074-of-00081.safetensors", + "model.layers.55.mlp.router.gate.weight": "model-00074-of-00081.safetensors", + "model.layers.55.mlp.shared_experts.down_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.shared_experts.down_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.shared_experts.down_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.shared_experts.gate_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.shared_experts.gate_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.shared_experts.gate_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.shared_experts.up_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.shared_experts.up_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.55.mlp.shared_experts.up_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00075-of-00081.safetensors", + "model.layers.55.post_mlp_layernorm.weight": "model-00075-of-00081.safetensors", + "model.layers.55.pre_mlp_layernorm.weight": "model-00075-of-00081.safetensors", + "model.layers.55.self_attn.gate_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.gate_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.gate_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.k_norm.weight": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.k_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.k_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.o_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.o_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.q_norm.weight": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.q_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.q_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.v_proj.biases": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.v_proj.scales": "model-00074-of-00081.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00074-of-00081.safetensors", + "model.layers.56.input_layernorm.weight": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.expert_bias": "model-00075-of-00081.safetensors", + "model.layers.56.mlp.experts.down_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.experts.down_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.experts.down_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.experts.gate_proj.biases": "model-00076-of-00081.safetensors", + "model.layers.56.mlp.experts.gate_proj.scales": "model-00076-of-00081.safetensors", + "model.layers.56.mlp.experts.gate_proj.weight": "model-00076-of-00081.safetensors", + "model.layers.56.mlp.experts.up_proj.biases": "model-00076-of-00081.safetensors", + "model.layers.56.mlp.experts.up_proj.scales": "model-00076-of-00081.safetensors", + "model.layers.56.mlp.experts.up_proj.weight": "model-00076-of-00081.safetensors", + "model.layers.56.mlp.router.gate.biases": "model-00075-of-00081.safetensors", + "model.layers.56.mlp.router.gate.scales": "model-00075-of-00081.safetensors", + "model.layers.56.mlp.router.gate.weight": "model-00075-of-00081.safetensors", + "model.layers.56.mlp.shared_experts.down_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.shared_experts.down_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.shared_experts.down_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.shared_experts.gate_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.shared_experts.gate_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.shared_experts.gate_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.shared_experts.up_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.shared_experts.up_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.56.mlp.shared_experts.up_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00077-of-00081.safetensors", + "model.layers.56.post_mlp_layernorm.weight": "model-00077-of-00081.safetensors", + "model.layers.56.pre_mlp_layernorm.weight": "model-00077-of-00081.safetensors", + "model.layers.56.self_attn.gate_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.gate_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.gate_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.k_norm.weight": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.k_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.k_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.o_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.o_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.q_norm.weight": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.q_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.q_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.v_proj.biases": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.v_proj.scales": "model-00075-of-00081.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00075-of-00081.safetensors", + "model.layers.57.input_layernorm.weight": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.expert_bias": "model-00077-of-00081.safetensors", + "model.layers.57.mlp.experts.down_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.experts.down_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.experts.down_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.experts.gate_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.57.mlp.experts.gate_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.57.mlp.experts.gate_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.57.mlp.experts.up_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.experts.up_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.experts.up_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.router.gate.biases": "model-00077-of-00081.safetensors", + "model.layers.57.mlp.router.gate.scales": "model-00077-of-00081.safetensors", + "model.layers.57.mlp.router.gate.weight": "model-00077-of-00081.safetensors", + "model.layers.57.mlp.shared_experts.down_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.shared_experts.down_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.shared_experts.down_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.shared_experts.gate_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.shared_experts.gate_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.shared_experts.gate_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.shared_experts.up_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.shared_experts.up_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.57.mlp.shared_experts.up_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00078-of-00081.safetensors", + "model.layers.57.post_mlp_layernorm.weight": "model-00078-of-00081.safetensors", + "model.layers.57.pre_mlp_layernorm.weight": "model-00078-of-00081.safetensors", + "model.layers.57.self_attn.gate_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.gate_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.gate_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.k_norm.weight": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.k_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.k_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.o_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.o_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.q_norm.weight": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.q_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.q_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.v_proj.biases": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.v_proj.scales": "model-00077-of-00081.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00077-of-00081.safetensors", + "model.layers.58.input_layernorm.weight": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.expert_bias": "model-00078-of-00081.safetensors", + "model.layers.58.mlp.experts.down_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.experts.down_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.experts.down_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.experts.gate_proj.biases": "model-00079-of-00081.safetensors", + "model.layers.58.mlp.experts.gate_proj.scales": "model-00079-of-00081.safetensors", + "model.layers.58.mlp.experts.gate_proj.weight": "model-00079-of-00081.safetensors", + "model.layers.58.mlp.experts.up_proj.biases": "model-00079-of-00081.safetensors", + "model.layers.58.mlp.experts.up_proj.scales": "model-00079-of-00081.safetensors", + "model.layers.58.mlp.experts.up_proj.weight": "model-00079-of-00081.safetensors", + "model.layers.58.mlp.router.gate.biases": "model-00078-of-00081.safetensors", + "model.layers.58.mlp.router.gate.scales": "model-00078-of-00081.safetensors", + "model.layers.58.mlp.router.gate.weight": "model-00078-of-00081.safetensors", + "model.layers.58.mlp.shared_experts.down_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.shared_experts.down_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.shared_experts.down_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.shared_experts.gate_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.shared_experts.gate_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.shared_experts.gate_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.shared_experts.up_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.shared_experts.up_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.58.mlp.shared_experts.up_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00080-of-00081.safetensors", + "model.layers.58.post_mlp_layernorm.weight": "model-00080-of-00081.safetensors", + "model.layers.58.pre_mlp_layernorm.weight": "model-00080-of-00081.safetensors", + "model.layers.58.self_attn.gate_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.gate_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.gate_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.k_norm.weight": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.k_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.k_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.o_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.o_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.q_norm.weight": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.q_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.q_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.v_proj.biases": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.v_proj.scales": "model-00078-of-00081.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00078-of-00081.safetensors", + "model.layers.59.input_layernorm.weight": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.expert_bias": "model-00080-of-00081.safetensors", + "model.layers.59.mlp.experts.down_proj.biases": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.experts.down_proj.scales": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.experts.down_proj.weight": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.experts.gate_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.59.mlp.experts.gate_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.59.mlp.experts.gate_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.59.mlp.experts.up_proj.biases": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.experts.up_proj.scales": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.experts.up_proj.weight": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.router.gate.biases": "model-00080-of-00081.safetensors", + "model.layers.59.mlp.router.gate.scales": "model-00080-of-00081.safetensors", + "model.layers.59.mlp.router.gate.weight": "model-00080-of-00081.safetensors", + "model.layers.59.mlp.shared_experts.down_proj.biases": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.shared_experts.down_proj.scales": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.shared_experts.down_proj.weight": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.shared_experts.gate_proj.biases": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.shared_experts.gate_proj.scales": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.shared_experts.gate_proj.weight": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.shared_experts.up_proj.biases": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.shared_experts.up_proj.scales": "model-00081-of-00081.safetensors", + "model.layers.59.mlp.shared_experts.up_proj.weight": "model-00081-of-00081.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00081-of-00081.safetensors", + "model.layers.59.post_mlp_layernorm.weight": "model-00081-of-00081.safetensors", + "model.layers.59.pre_mlp_layernorm.weight": "model-00081-of-00081.safetensors", + "model.layers.59.self_attn.gate_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.gate_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.gate_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.k_norm.weight": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.k_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.k_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.o_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.o_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.q_norm.weight": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.q_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.q_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.v_proj.biases": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.v_proj.scales": "model-00080-of-00081.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00080-of-00081.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.expert_bias": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.experts.down_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.experts.down_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.experts.down_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.experts.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.experts.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.experts.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.experts.up_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.experts.up_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.experts.up_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.router.gate.biases": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.router.gate.scales": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.router.gate.weight": "model-00001-of-00081.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00081.safetensors", + "model.layers.6.post_mlp_layernorm.weight": "model-00002-of-00081.safetensors", + "model.layers.6.pre_mlp_layernorm.weight": "model-00002-of-00081.safetensors", + "model.layers.6.self_attn.gate_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.gate_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.gate_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.k_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.k_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.o_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.o_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.q_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.q_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.v_proj.biases": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.v_proj.scales": "model-00001-of-00081.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00081.safetensors", + "model.layers.7.input_layernorm.weight": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.expert_bias": "model-00002-of-00081.safetensors", + "model.layers.7.mlp.experts.down_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.experts.down_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.experts.down_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.experts.gate_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.7.mlp.experts.gate_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.7.mlp.experts.gate_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.7.mlp.experts.up_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.experts.up_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.experts.up_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.router.gate.biases": "model-00002-of-00081.safetensors", + "model.layers.7.mlp.router.gate.scales": "model-00002-of-00081.safetensors", + "model.layers.7.mlp.router.gate.weight": "model-00002-of-00081.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00081.safetensors", + "model.layers.7.post_mlp_layernorm.weight": "model-00003-of-00081.safetensors", + "model.layers.7.pre_mlp_layernorm.weight": "model-00003-of-00081.safetensors", + "model.layers.7.self_attn.gate_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.gate_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.gate_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.k_norm.weight": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.k_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.k_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.o_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.o_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.q_norm.weight": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.q_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.q_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.v_proj.biases": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.v_proj.scales": "model-00002-of-00081.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00081.safetensors", + "model.layers.8.input_layernorm.weight": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.expert_bias": "model-00003-of-00081.safetensors", + "model.layers.8.mlp.experts.down_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.experts.down_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.experts.down_proj.weight": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.experts.gate_proj.biases": "model-00004-of-00081.safetensors", + "model.layers.8.mlp.experts.gate_proj.scales": "model-00004-of-00081.safetensors", + "model.layers.8.mlp.experts.gate_proj.weight": "model-00004-of-00081.safetensors", + "model.layers.8.mlp.experts.up_proj.biases": "model-00004-of-00081.safetensors", + "model.layers.8.mlp.experts.up_proj.scales": "model-00004-of-00081.safetensors", + "model.layers.8.mlp.experts.up_proj.weight": "model-00004-of-00081.safetensors", + "model.layers.8.mlp.router.gate.biases": "model-00003-of-00081.safetensors", + "model.layers.8.mlp.router.gate.scales": "model-00003-of-00081.safetensors", + "model.layers.8.mlp.router.gate.weight": "model-00003-of-00081.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00005-of-00081.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00005-of-00081.safetensors", + "model.layers.8.post_mlp_layernorm.weight": "model-00005-of-00081.safetensors", + "model.layers.8.pre_mlp_layernorm.weight": "model-00005-of-00081.safetensors", + "model.layers.8.self_attn.gate_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.gate_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.gate_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.k_norm.weight": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.k_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.k_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.o_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.o_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.q_norm.weight": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.q_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.q_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.v_proj.biases": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.v_proj.scales": "model-00003-of-00081.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00081.safetensors", + "model.layers.9.input_layernorm.weight": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.expert_bias": "model-00005-of-00081.safetensors", + "model.layers.9.mlp.experts.down_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.experts.down_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.experts.down_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.experts.gate_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.9.mlp.experts.gate_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.9.mlp.experts.gate_proj.weight": "model-00005-of-00081.safetensors", + "model.layers.9.mlp.experts.up_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.experts.up_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.experts.up_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.router.gate.biases": "model-00005-of-00081.safetensors", + "model.layers.9.mlp.router.gate.scales": "model-00005-of-00081.safetensors", + "model.layers.9.mlp.router.gate.weight": "model-00005-of-00081.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.biases": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.scales": "model-00006-of-00081.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00006-of-00081.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00006-of-00081.safetensors", + "model.layers.9.post_mlp_layernorm.weight": "model-00006-of-00081.safetensors", + "model.layers.9.pre_mlp_layernorm.weight": "model-00006-of-00081.safetensors", + "model.layers.9.self_attn.gate_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.gate_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.gate_proj.weight": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.k_norm.weight": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.k_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.k_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.o_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.o_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.q_norm.weight": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.q_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.q_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.v_proj.biases": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.v_proj.scales": "model-00005-of-00081.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00005-of-00081.safetensors", + "model.norm.weight": "model-00081-of-00081.safetensors" + } +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..681605e5cc3898f9937deea97b32dbf9d6bd7479 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d864fe0d9c300d44c54006f5960548946d507d8ec05a082a3bff3e49de58208 +size 14614721 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ce7388b1a654f6bff046cae75317c8d989bfd8f3 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,14 @@ +{ + "add_prefix_space": null, + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "is_local": true, + "model_max_length": 65536, + "model_specific_special_tokens": {}, + "pad_token": "<|pad|>", + "tokenizer_class": "TokenizersBackend", + "tool_parser_type": "json_tools", + "use_default_system_prompt": false +}