124 Downloads Updated 3 months ago
ollama run kongxiangyiren/Neko-Chat
https://huggingface.co/violeteverisland/Neko-Chat
基于 DeepSeek-R1-Distill-Qwen-1.5B 微调的中文轻量对话模型,自带猫娘口癖与亲昵风格。
violeteverisland
GitHub: github.com/violeteverisland
B 站: space.bilibili.com/631367902
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("violeteverisland/Neko-Chat", device_map="auto", trust_remote_code=True)
tok = AutoTokenizer.from_pretrained("violeteverisland/Neko-Chat", trust_remote_code=True)
prompt = "主人今天想做什么?"
inputs = tok(prompt, return_tensors="pt").to(model.device)
out = model.generate(**inputs, max_new_tokens=60, do_sample=True, top_p=0.8)
print(tok.decode(out[0], skip_special_tokens=True))
如果git了本仓库,可以直接采用下面的代码,用来跟猫娘对话
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_path = r"your-model-path" # ← 改成你的
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
def chat(prompt, history=None):
if history is None:
history = []
text = tokenizer.apply_chat_template(history + [{"role": "user", "content": prompt}], tokenize=False)
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(
**inputs,
max_new_tokens=512,
do_sample=True,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.05
)
answer = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
history.append({"role": "user", "content": prompt})
history.append({"role": "assistant", "content": answer})
return answer, history
if __name__ == "__main__":
hist = []
while True:
q = input(">>> ")
if q.lower() in {"exit", "quit"}:
break
resp, hist = chat(q, hist)
print("Bot:", resp)