DeepHermes 3 Preview is the latest version of our flagship Hermes series of LLMs by Nous Research, and one of the first models in the world to unify Reasoning (long chains of thought that improve answer accuracy) and normal LLM response modes into one mod

8b

674 4 weeks ago

64dd3b9c4eaf · 4.9GB
    Metadata
  • general.architecture
    llama
  • general.file_type
    Q4_K_M
  • llama.attention.head_count
    32
  • llama.attention.head_count_kv
    8
  • llama.attention.key_length
    128
  • llama.attention.layer_norm_rms_epsilon
    1e-05
  • llama.attention.value_length
    128
  • llama.block_count
    32
  • llama.context_length
    131072
  • llama.embedding_length
    4096
  • llama.feed_forward_length
    14336
  • llama.rope.dimension_count
    128
  • llama.rope.freq_base
    500000
  • llama.vocab_size
    128256
  • tokenizer.chat_template.tool_use
    {%- macro json_to_python_type(json_spec) %} {%- set basic_type_map = { "string": "str", "number": "float", "integer": "int", "boolean": "bool" } %} {%- if basic_type_map[json_spec.type] is defined %} {{- basic_type_map[json_spec.type] }} {%- elif json_spec.type == "array" %} {{- "list[" + json_to_python_type(json_spec|items) + "]"}} {%- elif json_spec.type == "object" %} {%- if json_spec.additionalProperties is defined %} {{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']'}} {%- else %} {{- "dict" }} {%- endif %} {%- elif json_spec.type is iterable %} {{- "Union[" }} {%- for t in json_spec.type %} {{- json_to_python_type({"type": t}) }} {%- if not loop.last %} {{- "," }} {%- endif %} {%- endfor %} {{- "]" }} {%- else %} {{- "Any" }} {%- endif %} {%- endmacro %} {{- bos_token }} {{- '<|im_start|>system ' }} {{- "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> " }} {%- for tool in tools %} {%- if tool.function is defined %} {%- set tool = tool.function %} {%- endif %} {{- '{"type": "function", "function": ' }} {{- '{"name": "' + tool.name + '", ' }} {{- '"description": "' + tool.name + '(' }} {%- for param_name, param_fields in tool.parameters.properties|items %} {{- param_name + ": " + json_to_python_type(param_fields) }} {%- if not loop.last %} {{- ", " }} {%- endif %} {%- endfor %} {{- ")" }} {%- if tool.return is defined %} {{- " -> " + json_to_python_type(tool.return) }} {%- endif %} {{- " - " + tool.description + " " }} {%- for param_name, param_fields in tool.parameters.properties|items %} {%- if loop.first %} {{- " Args: " }} {%- endif %} {{- " " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }} {%- endfor %} {%- if tool.return is defined and tool.return.description is defined %} {{- " Returns: " + tool.return.description }} {%- endif %} {{- '"' }} {{- ', "parameters": ' }} {%- if tool.parameters.properties | length == 0 %} {{- "{}" }} {%- else %} {{- tool.parameters|tojson }} {%- endif %} {{- "}" }} {%- if not loop.last %} {{- " " }} {%- endif %} {%- endfor %} {{- " </tools>" }} {{- 'Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} ' }} {{- "For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows: " }} {{- "<tool_call> " }} {{- '{"name": <function-name>, "arguments": <args-dict>} ' }} {{- '</tool_call><|im_end|> ' }} {%- for message in messages %} {%- if message.role == "user" or message.role == "system" or (message.role == "assistant" and message.tool_calls is not defined) %} {{- '<|im_start|>' + message.role + ' ' + message.content + '<|im_end|>' + ' ' }} {%- elif message.role == "assistant" %} {{- '<|im_start|>' + message.role }} {%- for tool_call in message.tool_calls %} {{- ' <tool_call> ' }} {%- if tool_call.function is defined %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '{' }} {{- '"name": "' }} {{- tool_call.name }} {{- '"' }} {{- ', '}} {%- if tool_call.arguments is defined %} {{- '"arguments": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments|tojson }} {%- endif %} {%- endif %} {{- '}' }} {{- ' </tool_call>' }} {%- endfor %} {{- '<|im_end|> ' }} {%- elif message.role == "tool" %} {%- if loop.previtem and loop.previtem.role != "tool" %} {{- '<|im_start|>tool ' }} {%- endif %} {{- '<tool_response> ' }} {{- message.content }} {%- if not loop.last %} {{- ' </tool_response> ' }} {%- else %} {{- ' </tool_response>' }} {%- endif %} {%- if not loop.last and loop.nextitem.role != "tool" %} {{- '<|im_end|>' }} {%- elif loop.last %} {{- '<|im_end|>' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant ' }} {%- endif %}
  • tokenizer.chat_templates
    [tool_use]
  • tokenizer.ggml.add_bos_token
    true
  • tokenizer.ggml.bos_token_id
    128000
  • tokenizer.ggml.eos_token_id
    128009
  • tokenizer.ggml.merges
    [Ġ Ġ, Ġ ĠĠĠ, ĠĠ ĠĠ, ĠĠĠ Ġ, i n, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    128001
  • tokenizer.ggml.pre
    llama-bpe
  • tokenizer.ggml.token_type
    [1, 1, 1, 1, 1, ...]
  • tokenizer.ggml.tokens
    [!, ", #, $, %, ...]
  • Tensor
  • token_embd.weight
    Q4_K
    [4096, 128256]
  • blk.0
  • blk.0.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.0.attn_norm.weight
    F32
    [4096]
  • blk.0.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.0.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.0.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.0.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.0.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.0.ffn_norm.weight
    F32
    [4096]
  • blk.0.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.1
  • blk.1.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.1.attn_norm.weight
    F32
    [4096]
  • blk.1.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.1.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.1.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.1.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.1.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.1.ffn_norm.weight
    F32
    [4096]
  • blk.1.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.2
  • blk.2.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.2.attn_norm.weight
    F32
    [4096]
  • blk.2.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.2.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.2.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.2.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.2.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.2.ffn_norm.weight
    F32
    [4096]
  • blk.2.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.3
  • blk.3.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.3.attn_norm.weight
    F32
    [4096]
  • blk.3.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.3.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.3.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.3.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.3.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.3.ffn_norm.weight
    F32
    [4096]
  • blk.3.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.4
  • blk.4.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.4.attn_norm.weight
    F32
    [4096]
  • blk.4.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.4.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.4.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.4.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.4.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.4.ffn_norm.weight
    F32
    [4096]
  • blk.4.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.5
  • blk.5.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.5.attn_norm.weight
    F32
    [4096]
  • blk.5.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.5.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.5.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.5.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.5.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.5.ffn_norm.weight
    F32
    [4096]
  • blk.5.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.6
  • blk.6.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.6.attn_norm.weight
    F32
    [4096]
  • blk.6.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.6.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.6.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.6.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.6.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.6.ffn_norm.weight
    F32
    [4096]
  • blk.6.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.7
  • blk.7.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.7.attn_norm.weight
    F32
    [4096]
  • blk.7.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.7.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.7.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.7.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.7.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.7.ffn_norm.weight
    F32
    [4096]
  • blk.7.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.8
  • blk.8.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.8.attn_norm.weight
    F32
    [4096]
  • blk.8.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.8.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.8.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.8.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.8.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.8.ffn_norm.weight
    F32
    [4096]
  • blk.8.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.9
  • blk.9.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.9.attn_norm.weight
    F32
    [4096]
  • blk.9.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.9.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.9.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.9.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.9.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.9.ffn_norm.weight
    F32
    [4096]
  • blk.9.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.10
  • blk.10.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.10.attn_norm.weight
    F32
    [4096]
  • blk.10.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.10.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.10.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.10.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.10.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.10.ffn_norm.weight
    F32
    [4096]
  • blk.10.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.11
  • blk.11.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.11.attn_norm.weight
    F32
    [4096]
  • blk.11.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.11.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.11.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.11.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.11.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.11.ffn_norm.weight
    F32
    [4096]
  • blk.11.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.12
  • blk.12.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.12.attn_norm.weight
    F32
    [4096]
  • blk.12.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.12.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.12.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.12.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.12.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.12.ffn_norm.weight
    F32
    [4096]
  • blk.12.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.13
  • blk.13.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.13.attn_norm.weight
    F32
    [4096]
  • blk.13.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.13.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.13.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.13.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.13.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.13.ffn_norm.weight
    F32
    [4096]
  • blk.13.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.14
  • blk.14.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.14.attn_norm.weight
    F32
    [4096]
  • blk.14.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.14.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.14.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.14.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.14.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.14.ffn_norm.weight
    F32
    [4096]
  • blk.14.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.15
  • blk.15.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.15.attn_norm.weight
    F32
    [4096]
  • blk.15.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.15.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.15.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.15.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.15.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.15.ffn_norm.weight
    F32
    [4096]
  • blk.15.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.16
  • blk.16.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.16.attn_norm.weight
    F32
    [4096]
  • blk.16.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.16.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.16.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.16.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.16.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.16.ffn_norm.weight
    F32
    [4096]
  • blk.16.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.17
  • blk.17.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.17.attn_norm.weight
    F32
    [4096]
  • blk.17.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.17.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.17.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.17.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.17.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.17.ffn_norm.weight
    F32
    [4096]
  • blk.17.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.18
  • blk.18.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.18.attn_norm.weight
    F32
    [4096]
  • blk.18.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.18.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.18.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.18.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.18.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.18.ffn_norm.weight
    F32
    [4096]
  • blk.18.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.19
  • blk.19.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.19.attn_norm.weight
    F32
    [4096]
  • blk.19.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.19.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.19.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.19.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.19.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.19.ffn_norm.weight
    F32
    [4096]
  • blk.19.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.20
  • blk.20.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.20.attn_norm.weight
    F32
    [4096]
  • blk.20.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.20.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.20.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.20.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.20.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.20.ffn_norm.weight
    F32
    [4096]
  • blk.20.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.21
  • blk.21.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.21.attn_norm.weight
    F32
    [4096]
  • blk.21.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.21.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.21.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.21.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.21.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.21.ffn_norm.weight
    F32
    [4096]
  • blk.21.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.22
  • blk.22.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.22.attn_norm.weight
    F32
    [4096]
  • blk.22.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.22.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.22.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.22.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.22.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.22.ffn_norm.weight
    F32
    [4096]
  • blk.22.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.23
  • blk.23.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.23.attn_norm.weight
    F32
    [4096]
  • blk.23.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.23.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.23.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.23.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.23.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.23.ffn_norm.weight
    F32
    [4096]
  • blk.23.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.24
  • blk.24.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.24.attn_norm.weight
    F32
    [4096]
  • blk.24.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.24.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.24.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.24.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.24.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.24.ffn_norm.weight
    F32
    [4096]
  • blk.24.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.25
  • blk.25.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.25.attn_norm.weight
    F32
    [4096]
  • blk.25.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.25.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.25.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.25.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.25.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.25.ffn_norm.weight
    F32
    [4096]
  • blk.25.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.26
  • blk.26.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.26.attn_norm.weight
    F32
    [4096]
  • blk.26.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.26.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.26.attn_v.weight
    Q4_K
    [4096, 1024]
  • blk.26.ffn_down.weight
    Q4_K
    [14336, 4096]
  • blk.26.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.26.ffn_norm.weight
    F32
    [4096]
  • blk.26.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.27
  • blk.27.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.27.attn_norm.weight
    F32
    [4096]
  • blk.27.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.27.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.27.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.27.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.27.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.27.ffn_norm.weight
    F32
    [4096]
  • blk.27.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.28
  • blk.28.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.28.attn_norm.weight
    F32
    [4096]
  • blk.28.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.28.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.28.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.28.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.28.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.28.ffn_norm.weight
    F32
    [4096]
  • blk.28.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.29
  • blk.29.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.29.attn_norm.weight
    F32
    [4096]
  • blk.29.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.29.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.29.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.29.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.29.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.29.ffn_norm.weight
    F32
    [4096]
  • blk.29.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.30
  • blk.30.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.30.attn_norm.weight
    F32
    [4096]
  • blk.30.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.30.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.30.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.30.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.30.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.30.ffn_norm.weight
    F32
    [4096]
  • blk.30.ffn_up.weight
    Q4_K
    [4096, 14336]
  • blk.31
  • blk.31.attn_k.weight
    Q4_K
    [4096, 1024]
  • blk.31.attn_norm.weight
    F32
    [4096]
  • blk.31.attn_output.weight
    Q4_K
    [4096, 4096]
  • blk.31.attn_q.weight
    Q4_K
    [4096, 4096]
  • blk.31.attn_v.weight
    Q6_K
    [4096, 1024]
  • blk.31.ffn_down.weight
    Q6_K
    [14336, 4096]
  • blk.31.ffn_gate.weight
    Q4_K
    [4096, 14336]
  • blk.31.ffn_norm.weight
    F32
    [4096]
  • blk.31.ffn_up.weight
    Q4_K
    [4096, 14336]
  • output.weight
    Q6_K
    [4096, 128256]
  • rope_freqs.weight
    F32
    [64]
  • output_norm.weight
    F32
    [4096]