Fine-tuned Gemma with OpenAI Function Call Support Finetuned version of Gemma 7B Instruct to support direct function calling. This new capability aligns with the functionality seen in OpenAI's models, enabling Gemma to interact with external data sources

155 11 months ago

    Metadata
  • general.architecture
    gemma
  • general.file_type
    Q4_0
  • gemma.attention.head_count
    16
  • gemma.attention.head_count_kv
    16
  • gemma.attention.key_length
    256
  • gemma.attention.layer_norm_rms_epsilon
    1e-06
  • gemma.attention.value_length
    256
  • gemma.block_count
    28
  • gemma.context_length
    8192
  • gemma.embedding_length
    3072
  • gemma.feed_forward_length
    24576
  • tokenizer.ggml.add_bos_token
    true
  • tokenizer.ggml.add_eos_token
    false
  • tokenizer.ggml.bos_token_id
    2
  • tokenizer.ggml.eos_token_id
    1
  • tokenizer.ggml.model
    llama
  • tokenizer.ggml.padding_token_id
    0
  • tokenizer.ggml.scores
    [0, 0, 0, 0, 0, ...]
  • tokenizer.ggml.token_type
    [3, 3, 3, 2, 1, ...]
  • tokenizer.ggml.tokens
    [<pad>, <eos>, <bos>, <unk>, <mask>, ...]
  • Tensor
  • token_embd.weight
    Q6_K
    [3072, 256000]
  • blk.0
  • blk.0.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.0.attn_norm.weight
    F32
    [3072]
  • blk.0.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.0.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.0.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.0.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.0.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.0.ffn_norm.weight
    F32
    [3072]
  • blk.0.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.1
  • blk.1.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.1.attn_norm.weight
    F32
    [3072]
  • blk.1.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.1.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.1.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.1.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.1.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.1.ffn_norm.weight
    F32
    [3072]
  • blk.1.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.2
  • blk.2.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.2.attn_norm.weight
    F32
    [3072]
  • blk.2.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.2.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.2.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.2.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.2.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.2.ffn_norm.weight
    F32
    [3072]
  • blk.2.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.3
  • blk.3.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.3.attn_norm.weight
    F32
    [3072]
  • blk.3.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.3.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.3.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.3.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.3.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.3.ffn_norm.weight
    F32
    [3072]
  • blk.3.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.4
  • blk.4.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.4.attn_norm.weight
    F32
    [3072]
  • blk.4.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.4.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.4.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.4.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.4.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.4.ffn_norm.weight
    F32
    [3072]
  • blk.4.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.5
  • blk.5.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.5.attn_norm.weight
    F32
    [3072]
  • blk.5.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.5.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.5.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.5.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.5.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.5.ffn_norm.weight
    F32
    [3072]
  • blk.5.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.6
  • blk.6.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.6.attn_norm.weight
    F32
    [3072]
  • blk.6.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.6.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.6.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.6.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.6.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.6.ffn_norm.weight
    F32
    [3072]
  • blk.6.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.7
  • blk.7.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.7.attn_norm.weight
    F32
    [3072]
  • blk.7.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.7.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.7.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.7.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.7.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.7.ffn_norm.weight
    F32
    [3072]
  • blk.7.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.8
  • blk.8.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.8.attn_norm.weight
    F32
    [3072]
  • blk.8.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.8.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.8.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.8.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.8.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.8.ffn_norm.weight
    F32
    [3072]
  • blk.8.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.9
  • blk.9.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.9.attn_norm.weight
    F32
    [3072]
  • blk.9.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.9.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.9.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.9.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.9.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.9.ffn_norm.weight
    F32
    [3072]
  • blk.9.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.10
  • blk.10.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.10.attn_norm.weight
    F32
    [3072]
  • blk.10.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.10.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.10.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.10.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.10.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.10.ffn_norm.weight
    F32
    [3072]
  • blk.10.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.11
  • blk.11.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.11.attn_norm.weight
    F32
    [3072]
  • blk.11.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.11.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.11.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.11.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.11.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.11.ffn_norm.weight
    F32
    [3072]
  • blk.11.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.12
  • blk.12.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.12.attn_norm.weight
    F32
    [3072]
  • blk.12.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.12.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.12.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.12.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.12.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.12.ffn_norm.weight
    F32
    [3072]
  • blk.12.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.13
  • blk.13.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.13.attn_norm.weight
    F32
    [3072]
  • blk.13.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.13.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.13.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.13.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.13.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.13.ffn_norm.weight
    F32
    [3072]
  • blk.13.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.14
  • blk.14.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.14.attn_norm.weight
    F32
    [3072]
  • blk.14.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.14.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.14.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.14.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.14.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.14.ffn_norm.weight
    F32
    [3072]
  • blk.14.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.15
  • blk.15.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.15.attn_norm.weight
    F32
    [3072]
  • blk.15.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.15.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.15.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.15.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.15.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.15.ffn_norm.weight
    F32
    [3072]
  • blk.15.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.16
  • blk.16.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.16.attn_norm.weight
    F32
    [3072]
  • blk.16.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.16.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.16.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.16.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.16.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.16.ffn_norm.weight
    F32
    [3072]
  • blk.16.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.17
  • blk.17.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.17.attn_norm.weight
    F32
    [3072]
  • blk.17.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.17.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.17.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.17.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.17.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.17.ffn_norm.weight
    F32
    [3072]
  • blk.17.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.18
  • blk.18.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.18.attn_norm.weight
    F32
    [3072]
  • blk.18.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.18.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.18.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.18.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.18.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.18.ffn_norm.weight
    F32
    [3072]
  • blk.18.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.19
  • blk.19.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.19.attn_norm.weight
    F32
    [3072]
  • blk.19.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.19.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.19.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.19.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.19.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.19.ffn_norm.weight
    F32
    [3072]
  • blk.19.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.20
  • blk.20.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.20.attn_norm.weight
    F32
    [3072]
  • blk.20.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.20.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.20.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.20.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.20.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.20.ffn_norm.weight
    F32
    [3072]
  • blk.20.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.21
  • blk.21.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.21.attn_norm.weight
    F32
    [3072]
  • blk.21.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.21.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.21.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.21.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.21.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.21.ffn_norm.weight
    F32
    [3072]
  • blk.21.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.22
  • blk.22.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.22.attn_norm.weight
    F32
    [3072]
  • blk.22.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.22.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.22.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.22.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.22.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.22.ffn_norm.weight
    F32
    [3072]
  • blk.22.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.23
  • blk.23.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.23.attn_norm.weight
    F32
    [3072]
  • blk.23.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.23.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.23.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.23.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.23.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.23.ffn_norm.weight
    F32
    [3072]
  • blk.23.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.24
  • blk.24.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.24.attn_norm.weight
    F32
    [3072]
  • blk.24.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.24.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.24.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.24.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.24.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.24.ffn_norm.weight
    F32
    [3072]
  • blk.24.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.25
  • blk.25.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.25.attn_norm.weight
    F32
    [3072]
  • blk.25.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.25.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.25.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.25.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.25.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.25.ffn_norm.weight
    F32
    [3072]
  • blk.25.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.26
  • blk.26.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.26.attn_norm.weight
    F32
    [3072]
  • blk.26.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.26.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.26.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.26.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.26.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.26.ffn_norm.weight
    F32
    [3072]
  • blk.26.ffn_up.weight
    Q4_0
    [3072, 24576]
  • blk.27
  • blk.27.attn_k.weight
    Q4_0
    [3072, 4096]
  • blk.27.attn_norm.weight
    F32
    [3072]
  • blk.27.attn_output.weight
    Q4_0
    [4096, 3072]
  • blk.27.attn_q.weight
    Q4_0
    [3072, 4096]
  • blk.27.attn_v.weight
    Q4_0
    [3072, 4096]
  • blk.27.ffn_down.weight
    Q4_0
    [24576, 3072]
  • blk.27.ffn_gate.weight
    Q4_0
    [3072, 24576]
  • blk.27.ffn_norm.weight
    F32
    [3072]
  • blk.27.ffn_up.weight
    Q4_0
    [3072, 24576]
  • output_norm.weight
    F32
    [3072]