Skip to content

Commit

Permalink
Fix OOM on cuBLAS-enabled quantized models
Browse files Browse the repository at this point in the history
  • Loading branch information
LoganDark committed Jun 10, 2023
1 parent f0ec611 commit ff8e3d8
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions rwkv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,11 @@ bool rwkv_build_sequence_graph(
}

size_t rwkv_estimate_graph_work(const enum ggml_type type, const size_t ffn_key_size, const uint32_t n_threads, const size_t sequence_len = 1) {
#ifdef GGML_USE_CUBLAS
enum ggml_type mul_mat_type = GGML_TYPE_F16;
#else
enum ggml_type mul_mat_type = ggml_is_quantized(type) ? GGML_TYPE_Q8_1 : type;
#endif
return rwkv_tensor_size(GGML_TYPE_I8, rwkv_tensor_size(mul_mat_type, ffn_key_size, sequence_len) * n_threads + 64 * (n_threads - 1));
}

Expand Down

0 comments on commit ff8e3d8

Please sign in to comment.