mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-05 13:53:23 +02:00
* from previous PR * Make instruction(system) as first message * Convert [input_message] (text/image/file) * Rename convert_responses_to_chatcmpl(body) -> response_body * Initial tool call support * Erase instructions field from chatcmpl body * Feed reasoning texts to chat template * Use std::vector instead of opaque json array * Make output_item.added events consistent * Move `server_task_result_cmpl_partial::update` from header to source * Match ID of output_item.added and .done events * Add function_call only if there is no "fc_" prefix * Add function call output at non-streaming API * Test if ID is persistent * Add doc * Fix style - use trailing comma * Rewrite state management * catch up with upstream/master * Fix style - "type" is the first item of SSE data * Explicitly check "instructions" from response_body * Make lambdas static * Check if reasoning content exists * Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final * Reject `input_file` since it is not supported by chatcmpl * Add "fc_" prefix to non-straming function call id as coderabbit pointed out --------- Co-authored-by: openingnow <>
74 lines
2.5 KiB
Python
74 lines
2.5 KiB
Python
import pytest
|
|
from openai import OpenAI
|
|
from utils import *
|
|
|
|
server: ServerProcess
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def create_server():
|
|
global server
|
|
server = ServerPreset.tinyllama2()
|
|
|
|
def test_responses_with_openai_library():
|
|
global server
|
|
server.start()
|
|
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
|
|
res = client.responses.create(
|
|
model="gpt-4.1",
|
|
input=[
|
|
{"role": "system", "content": "Book"},
|
|
{"role": "user", "content": "What is the best book"},
|
|
],
|
|
max_output_tokens=8,
|
|
temperature=0.8,
|
|
)
|
|
assert res.id.startswith("resp_")
|
|
assert res.output[0].id is not None
|
|
assert res.output[0].id.startswith("msg_")
|
|
assert match_regex("(Suddenly)+", res.output_text)
|
|
|
|
def test_responses_stream_with_openai_library():
|
|
global server
|
|
server.start()
|
|
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
|
|
stream = client.responses.create(
|
|
model="gpt-4.1",
|
|
input=[
|
|
{"role": "system", "content": "Book"},
|
|
{"role": "user", "content": "What is the best book"},
|
|
],
|
|
max_output_tokens=8,
|
|
temperature=0.8,
|
|
stream=True,
|
|
)
|
|
|
|
gathered_text = ''
|
|
resp_id = ''
|
|
msg_id = ''
|
|
for r in stream:
|
|
if r.type == "response.created":
|
|
assert r.response.id.startswith("resp_")
|
|
resp_id = r.response.id
|
|
if r.type == "response.in_progress":
|
|
assert r.response.id == resp_id
|
|
if r.type == "response.output_item.added":
|
|
assert r.item.id is not None
|
|
assert r.item.id.startswith("msg_")
|
|
msg_id = r.item.id
|
|
if (r.type == "response.content_part.added" or
|
|
r.type == "response.output_text.delta" or
|
|
r.type == "response.output_text.done" or
|
|
r.type == "response.content_part.done"):
|
|
assert r.item_id == msg_id
|
|
if r.type == "response.output_item.done":
|
|
assert r.item.id == msg_id
|
|
|
|
if r.type == "response.output_text.delta":
|
|
gathered_text += r.delta
|
|
if r.type == "response.completed":
|
|
assert r.response.id.startswith("resp_")
|
|
assert r.response.output[0].id is not None
|
|
assert r.response.output[0].id.startswith("msg_")
|
|
assert gathered_text == r.response.output_text
|
|
assert match_regex("(Suddenly)+", r.response.output_text)
|