pdevine/glm-4.7-flash:int8

pdevine/ glm-4.7-flash:int8

51 Downloads Updated 5 months ago

Experimental version of glm-4.7-flash

tools thinking

ollama run pdevine/glm-4.7-flash:int8

curl http://localhost:11434/api/chat \
  -d '{
    "model": "pdevine/glm-4.7-flash:int8",
    "messages": [{"role": "user", "content": "Hello!"}]
  }'

from ollama import chat

response = chat(
    model='pdevine/glm-4.7-flash:int8',
    messages=[{'role': 'user', 'content': 'Hello!'}],
)
print(response.message.content)

import ollama from 'ollama'

const response = await ollama.chat({
  model: 'pdevine/glm-4.7-flash:int8',
  messages: [{role: 'user', content: 'Hello!'}],
})
console.log(response.message.content)

Details

Updated 5 months ago

5 months ago

0239b4ae15ab · 34GB ·

json

{ "architectures": [ "Glm4MoeLiteForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0,

1.1kB

json

{ "_from_model_config": true, "eos_token_id": [ 154820, 154827, 154829 ], "pad_token_id": 154820, "t

181B

json

{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 154820, "content"

20MB

json

{ "added_tokens_decoder": { "154820": { "content": "<|endoftext|>", "single_word": false, "lstrip":

7.2kB

template

13B

632 tensors

34GB

Readme

No readme