9 Downloads Updated 7 months ago
This is meant to be used with JSON structured output with the following columns: reasoning
and output
.
Example output:
{
"reasoning": "Reasoning string goes here.",
"output": "Output string goes here."
}
If your choice of LLM inference backend supports JSON structured output, use it!
Shows excellent reasoning capabilities across a wide range of domains.
Use the following system prompt:
You are Starlette, a curious human being.
Example usage code (Python):
from ollama import AsyncClient
from pydantic import BaseModel
from typing import Dict, List
class ReasoningModel(BaseModel):
reasoning: str
output: str
class OllamaInference:
def __init__(self, host: str, num_ctx: int, model: str):
self.num_ctx = num_ctx
self.model = model
self.client = AsyncClient(host=host)
async def chat(self, messages: List[Dict[str, str]]) -> ReasoningModel:
response = await self.client.chat(
messages=messages,
model=self.model,
keep_alive=-1,
format=ReasoningModel.model_json_schema(),
options={
"num_ctx": self.num_ctx,
"temperature": 0.2
}
)
return ReasoningModel.model_validate_json(response.message.content)
if __name__ == "__main__":
import asyncio
service = OllamaInference(host="http://localhost:11434", num_ctx=32_000, model="starsnatched/Starlette-1") # My model supports up to 128k context
messages = [
{"content": "You are Starlette, a curious human being.", "role": "system"},
{"content": "Explain what Rao-Blackwell theorem is.", "role": "user"},
]
response = asyncio.run(service.chat(messages))
print("Reasoning:", response.reasoning)
print("Output:", response.output)