Files
llama.cpp/tools/server/tests/unit/test_compat_oai_responses.py
손희준 fbbf3ad190 server: /v1/responses (partial) (#18486)
* from previous PR

* Make instruction(system) as first message

* Convert [input_message] (text/image/file)

* Rename convert_responses_to_chatcmpl(body) -> response_body

* Initial tool call support

* Erase instructions field from chatcmpl body

* Feed reasoning texts to chat template

* Use std::vector instead of opaque json array

* Make output_item.added events consistent

* Move `server_task_result_cmpl_partial::update` from header to source

* Match ID of output_item.added and .done events

* Add function_call only if there is no "fc_" prefix

* Add function call output at non-streaming API

* Test if ID is persistent

* Add doc

* Fix style - use trailing comma

* Rewrite state management

* catch up with upstream/master

* Fix style - "type" is the first item of SSE data

* Explicitly check "instructions" from response_body

* Make lambdas static

* Check if reasoning content exists

* Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final

* Reject `input_file` since it is not supported by chatcmpl

* Add "fc_" prefix to non-straming function call id as coderabbit pointed out

---------

Co-authored-by: openingnow <>
2026-01-21 17:47:23 +01:00

74 lines
2.5 KiB
Python

import pytest
from openai import OpenAI
from utils import *
server: ServerProcess
@pytest.fixture(autouse=True)
def create_server():
global server
server = ServerPreset.tinyllama2()
def test_responses_with_openai_library():
global server
server.start()
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
res = client.responses.create(
model="gpt-4.1",
input=[
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
max_output_tokens=8,
temperature=0.8,
)
assert res.id.startswith("resp_")
assert res.output[0].id is not None
assert res.output[0].id.startswith("msg_")
assert match_regex("(Suddenly)+", res.output_text)
def test_responses_stream_with_openai_library():
global server
server.start()
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
stream = client.responses.create(
model="gpt-4.1",
input=[
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
max_output_tokens=8,
temperature=0.8,
stream=True,
)
gathered_text = ''
resp_id = ''
msg_id = ''
for r in stream:
if r.type == "response.created":
assert r.response.id.startswith("resp_")
resp_id = r.response.id
if r.type == "response.in_progress":
assert r.response.id == resp_id
if r.type == "response.output_item.added":
assert r.item.id is not None
assert r.item.id.startswith("msg_")
msg_id = r.item.id
if (r.type == "response.content_part.added" or
r.type == "response.output_text.delta" or
r.type == "response.output_text.done" or
r.type == "response.content_part.done"):
assert r.item_id == msg_id
if r.type == "response.output_item.done":
assert r.item.id == msg_id
if r.type == "response.output_text.delta":
gathered_text += r.delta
if r.type == "response.completed":
assert r.response.id.startswith("resp_")
assert r.response.output[0].id is not None
assert r.response.output[0].id.startswith("msg_")
assert gathered_text == r.response.output_text
assert match_regex("(Suddenly)+", r.response.output_text)