539 Downloads Updated 4 months ago
ollama run danchev/granite-docling
Granite Docling is a family of instruction-tuned models designed for document understanding tasks. These models are fine-tuned on a diverse set of tasks including document classification, information extraction, and question answering. The models are optimized for performance on document-centric tasks and can handle a variety of document formats and layouts.
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["docling>=2.60.0", "requests>=2.32.5"]
# ///
import tempfile
import requests
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import VlmPipelineOptions
from docling.datamodel.pipeline_options_vlm_model import ApiVlmOptions, ResponseFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline
from pydantic import AnyUrl
pdf_url = "https://arxiv.org/pdf/1706.03762.pdf"
with tempfile.NamedTemporaryFile(suffix=".pdf") as f:
response = requests.get(pdf_url)
f.write(response.content)
f.flush()
pipeline_options = VlmPipelineOptions(
enable_remote_services=True,
vlm_options=ApiVlmOptions(
url=AnyUrl("http://localhost:11434/v1/chat/completions"),
params={"model": "danchev/granite-docling"},
prompt="Convert this page to docling.",
temperature=0.0,
response_format=ResponseFormat.DOCTAGS,
),
)
doc_converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(
pipeline_options=pipeline_options, pipeline_cls=VlmPipeline
)
}
)
markdown = doc_converter.convert(f.name).document.export_to_markdown()
print(markdown)
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["docling-core>=2.50.0", "Pillow>=11.3.0", "requests>=2.32.5"]
# ///
import base64
import io
import requests
from docling_core.types.doc.document import DoclingDocument, DocTagsDocument
from PIL import Image
from requests import Response
img_url = "https://ibm.biz/docling-page-with-list"
response: Response = requests.get(img_url)
png_bytes: bytes = response.content
response = requests.post(
url="http://localhost:11434/api/chat",
json={
"messages": [
{
"role": "user",
"content": "Convert this image to docling.",
"images": [base64.b64encode(png_bytes).decode("utf-8")],
}
],
"model": "danchev/granite-docling",
"stream": False,
},
)
doctags: str = response.json()["message"]["content"]
doc: DoclingDocument = DoclingDocument.load_from_doctags(
doctag_document=DocTagsDocument.from_doctags_and_image_pairs(
doctags=[doctags], images=[Image.open(io.BytesIO(png_bytes))]
),
)
markdown: str = doc.export_to_markdown()
print(markdown)