74 7 months ago

Reasoning model distilled from DeepSeek-R1, enhanced with GRPO using supplementary reasoning datasets.

14b
... /
template
11300ac09be3 · 586B
{{- if .System }}<|im_start|>system<|im_sep|>{{ .System }}<|im_end|>{{ end }}
{{- range $i, $_ := .Messages -}}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
<|im_start|>{{ .Role }}<|im_sep|>
{{- if eq .Role "assistant" -}}
{{- $content := .Content -}}
{{/* </think>, <answer> should be removed afterward for the model to respond correctly. */}}
{{- $content -}}
{{- else -}}
{{ .Content }}
{{- end -}}
{{- if not $last }}<|im_end|>{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_end|>
<|im_start|>assistant<|im_sep|>
{{- end -}}
{{- end -}}