Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use thread pool #400

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,7 @@ zig-out/
zig-cache/

*.dot

.gitignore/
build-clang/
build-icx/
2 changes: 2 additions & 0 deletions examples/dolly-v2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ bool dollyv2_model_load(const std::string & fname, dollyv2_model & model, gpt_vo
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
/*.n_threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -492,6 +493,7 @@ bool dollyv2_eval(
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/dolly-v2/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL, false };
struct ggml_init_params params = { 0, NULL, false, 1 };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
2 changes: 2 additions & 0 deletions examples/gpt-2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
/*.n_threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -425,6 +426,7 @@ bool gpt2_eval(
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-2/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL, false };
struct ggml_init_params params = { 0, NULL, false, 1 };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
2 changes: 2 additions & 0 deletions examples/gpt-j/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
/*.n_threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -421,6 +422,7 @@ bool gptj_eval(
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-j/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL, false };
struct ggml_init_params params = { 0, NULL, false, 1 };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
2 changes: 2 additions & 0 deletions examples/gpt-neox/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt_
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
/*.n_threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -472,6 +473,7 @@ bool gpt_neox_eval(
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-neox/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL, false };
struct ggml_init_params params = { 0, NULL, false, 1 };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
1 change: 1 addition & 0 deletions examples/mnist/main-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ int mnist_eval(
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ n_threads,
};

struct ggml_context * ctx_work = ggml_init(params);
Expand Down
1 change: 1 addition & 0 deletions examples/mnist/main-mtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ int mnist_eval(
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ 1,
};

struct ggml_context * ctx_work = ggml_init(params);
Expand Down
2 changes: 2 additions & 0 deletions examples/mnist/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ bool mnist_model_load(const std::string & fname, mnist_model & model) {
/*.mem_size =*/ ctx_size + 1024*1024,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
/*.n_threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -182,6 +183,7 @@ int mnist_eval(
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 2 additions & 0 deletions examples/mpt/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
/*.n_threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -495,6 +496,7 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/mpt/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = {0, NULL, false};
struct ggml_init_params params = {0, NULL, false, 1 };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
2 changes: 2 additions & 0 deletions examples/replit/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ bool replit_model_load(const std::string & fname, replit_model & model, replit_t
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
/*.n_threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -472,6 +473,7 @@ bool replit_eval(const replit_model & model, const int n_threads, const int n_pa
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/replit/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = {0, NULL, false};
struct ggml_init_params params = {0, NULL, false, 1};
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
2 changes: 2 additions & 0 deletions examples/starcoder/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
/*.n_threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -460,6 +461,7 @@ bool starcoder_eval(
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.n_threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/starcoder/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL, false };
struct ggml_init_params params = { 0, NULL, false, 1 };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
3 changes: 3 additions & 0 deletions examples/starcoder/starcoder-mmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
/*.n_threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -450,6 +451,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp
c_params.mem_size = model.cache.buf.size;
c_params.mem_buffer = model.cache.buf.addr;
c_params.no_alloc = false;
c_params.n_threads = 1;

model.cache.ctx = ggml_init(c_params);

Expand Down Expand Up @@ -667,6 +669,7 @@ bool starcoder_eval(
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
/*.no_alloc =*/ false,
/*.threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/whisper/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL, false };
struct ggml_init_params params = { 0, NULL, false, 1 };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
6 changes: 6 additions & 0 deletions examples/whisper/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ static bool kv_cache_init(
/*.mem_size =*/ cache.buf.size(),
/*.mem_buffer =*/ cache.buf.data(),
/*.no_alloc =*/ false,
/*.threads =*/ 1,
};

cache.ctx = ggml_init(params);
Expand Down Expand Up @@ -777,6 +778,7 @@ static bool kv_cache_reinit(struct whisper_kv_cache & cache) {
/*.mem_size =*/ cache.buf.size(),
/*.mem_buffer =*/ cache.buf.data(),
/*.no_alloc =*/ false,
/*.threads =*/ 1,
};

cache.ctx = ggml_init(params);
Expand Down Expand Up @@ -1136,6 +1138,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
/*.mem_size =*/ wctx.model.buf->size(),
/*.mem_buffer =*/ wctx.model.buf->data(),
/*.no_alloc =*/ false,
/*.threads =*/ 1,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -1456,6 +1459,7 @@ static bool whisper_encode_internal(
/*.mem_size =*/ wstate.buf_compute.size(),
/*.mem_buffer =*/ wstate.buf_compute.data(),
/*.no_alloc =*/ false,
/*.threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down Expand Up @@ -1935,6 +1939,7 @@ static bool whisper_decode_internal(
/*.mem_size =*/ wstate.buf_compute.size(),
/*.mem_buffer =*/ wstate.buf_compute.data(),
/*.no_alloc =*/ false,
/*.threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down Expand Up @@ -5084,6 +5089,7 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
/*.mem_size =*/ buf.size(),
/*.mem_buffer =*/ buf.data(),
/*.no_alloc =*/ false,
/*.threads =*/ n_threads,
};

struct ggml_context * ctx0 = ggml_init(gparams);
Expand Down
3 changes: 2 additions & 1 deletion include/ggml/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ extern "C" {
size_t mem_size; // bytes
void * mem_buffer; // if NULL, memory will be allocated internally
bool no_alloc; // don't allocate memory for the tensor data
int n_threads; // number of threads for the thread pool
};


Expand Down Expand Up @@ -1350,7 +1351,7 @@ extern "C" {
// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan, void * tpool);
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);

// same as ggml_graph_compute() but the work data is allocated as a part of the context
Expand Down