@@ -96,18 +96,19 @@ std::string create_return_json(const std::string &id, const std::string &model,
9696}
9797
9898void llamaCPP::warmupModel () {
99- // json pseudo;
100- //
101- // pseudo["prompt"] = "Hello";
102- // pseudo["n_predict"] = 10;
103- // const int task_id = llama.request_completion(pseudo, false);
104- // std::string completion_text;
105- // task_result result = llama.next_result(task_id);
106- // if (!result.error && result.stop) {
107- // LOG_INFO << result.result_json.dump(-1, ' ', false,
108- // json::error_handler_t::replace);
109- // }
110- // return;
99+ json pseudo;
100+
101+ pseudo[" prompt" ] = " Hello" ;
102+ pseudo[" n_predict" ] = 10 ;
103+ pseudo[" stream" ] = false ;
104+ const int task_id = llama.request_completion (pseudo, false , false );
105+ std::string completion_text;
106+ task_result result = llama.next_result (task_id);
107+ if (!result.error && result.stop ) {
108+ LOG_INFO << result.result_json .dump (-1 , ' ' , false ,
109+ json::error_handler_t ::replace);
110+ }
111+ return ;
111112}
112113
113114void llamaCPP::chatCompletion (
@@ -365,10 +366,11 @@ void llamaCPP::loadModel(
365366 jsonResp[" message" ] = " Model loaded successfully" ;
366367 model_loaded = true ;
367368 auto resp = nitro_utils::nitroHttpJsonResponse (jsonResp);
368- // warmupModel();
369369
370370 LOG_INFO << " Started background task here!" ;
371371 backgroundThread = std::thread (&llamaCPP::backgroundTask, this );
372+ warmupModel ();
373+
372374 callback (resp);
373375}
374376
0 commit comments