Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 445b6c5

Browse files
authored
Merge pull request #412 from janhq/404-feat-nitro-support-for-queue-with-embedding
feat: add queue for embedding
2 parents 68cfdad + a8f1735 commit 445b6c5

File tree

1 file changed

+19
-2
lines changed

1 file changed

+19
-2
lines changed

controllers/llamaCPP.cc

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,17 +407,31 @@ void llamaCPP::embedding(
407407
std::function<void(const HttpResponsePtr &)> &&callback) {
408408
check_model_loaded(llama, req, callback);
409409

410+
auto state = create_inference_state(this);
411+
410412
const auto &jsonBody = req->getJsonObject();
411413

412414
Json::Value responseData(Json::arrayValue);
413415

414416
if (jsonBody->isMember("input")) {
417+
// If single queue is busy, we will wait if not we will just go ahead and
418+
// process and make it busy, and yet i'm aware not DRY, i have the same
419+
// stuff on chatcompletion as well
420+
if (state->instance->llama.params.n_parallel == 1) {
421+
while (state->instance->single_queue_is_busy) {
422+
LOG_INFO << "Waiting for task to be released status:"
423+
<< state->instance->single_queue_is_busy;
424+
std::this_thread::sleep_for(
425+
std::chrono::milliseconds(500)); // Waiting in 500 miliseconds step
426+
}
427+
}
415428
const Json::Value &input = (*jsonBody)["input"];
416429
if (input.isString()) {
417430
// Process the single string input
418-
const int task_id = llama.request_completion(
431+
state->task_id = llama.request_completion(
419432
{{"prompt", input.asString()}, {"n_predict", 0}}, false, true, -1);
420-
task_result result = llama.next_result(task_id);
433+
state->instance->single_queue_is_busy = true;
434+
task_result result = llama.next_result(state->task_id);
421435
std::vector<float> embedding_result = result.result_json["embedding"];
422436
responseData.append(create_embedding_payload(embedding_result, 0));
423437
} else if (input.isArray()) {
@@ -434,6 +448,9 @@ void llamaCPP::embedding(
434448
}
435449
}
436450

451+
// We already got result of the embedding so no longer busy
452+
state->instance->single_queue_is_busy = false;
453+
437454
auto resp = nitro_utils::nitroHttpResponse();
438455
Json::Value root;
439456
root["data"] = responseData;

0 commit comments

Comments
 (0)