Trying to call mnist model in main

Signed-off-by: Aisuko <[email protected]>
mudler · Nov 18, 2023 · b91b79c · b91b79c
1 parent 660cc49
commit b91b79c
Show file tree

Hide file tree

Showing 4 changed files with 159 additions and 52 deletions.
diff --git a/backend/rust/backend-burn/src/main.rs b/backend/rust/backend-burn/src/main.rs
@@ -1,5 +1,8 @@
 use std::collections::HashMap;
 use std::net::SocketAddr;
+use std::process::{id, Command};
+use std::result;
+use std::sync::{Arc, Mutex};
 
 use bunker::pb::Result as PbResult;
 use bunker::pb::{
@@ -15,16 +18,12 @@ use tonic::{Request, Response, Status};
 use async_trait::async_trait;
 
 use tracing::{event, span, Level};
-use tracing_subscriber::filter::LevelParseError;
-
-use std::fs;
-use std::process::{Command,id};
 
 use models::*;
 // implement BackendService trait in bunker
 
 #[derive(Default, Debug)]
-struct BurnBackend;
+pub struct BurnBackend;
 
 #[async_trait]
 impl BackendService for BurnBackend {
@@ -42,33 +41,48 @@ impl BackendService for BurnBackend {
 
  #[tracing::instrument]
  async fn predict(&self, request: Request<PredictOptions>) -> Result<Response<Reply>, Status> {
- let mut models: Vec<Box<dyn LLM>> = vec![Box::new(models::MNINST::new())];
- let result = models[0].predict(request.into_inner());
-
- match result {
- Ok(res) => {
- let reply = Reply {
- message: res.into(),
- };
- let res = Response::new(reply);
- Ok(res)
- }
- Err(e) => {
- let reply = Reply {
- message: e.to_string().into(),
- };
- let res = Response::new(reply);
- Ok(res)
- }
- }
+ // TODO: How to get model from load_model function?
+ todo!("How to get model from load_model function?")
  }
 
  #[tracing::instrument]
  async fn load_model(
  &self,
  request: Request<ModelOptions>,
  ) -> Result<Response<PbResult>, Status> {
- todo!()
+ let result= match request.get_ref().model.as_str() {
+ "mnist" => {
+ let mut model = MNINST::new("model.bin");
+ let result = model.load_model(request.get_ref().clone());
+ match result {
+ Ok(_) => {
+ let model = Arc::new(Mutex::new(model));
+ let model = model.clone();
+ let result = PbResult {
+ message: "Model loaded successfully".into(),
+ success: true,
+ };
+ Ok(Response::new(result))
+ }
+ Err(e) => {
+ let result = PbResult {
+ message: format!("Failed to load model: {}", e),
+ success: false,
+ };
+ Err(Status::internal(result.message))
+ }
+ }
+ }
+ _ => {
+ let result = PbResult {
+ message: format!("Model {} not found", request.get_ref().model),
+ success: false,
+ };
+ Err(Status::internal(result.message))
+ }
+ };
+ // TODO: add model to backend, how to transfer model to backend and let predict funciton can use it?
+ result
  }
 
  #[tracing::instrument]
@@ -121,35 +135,34 @@ impl BackendService for BurnBackend {
  &self,
  request: Request<HealthMessage>,
  ) -> Result<Response<StatusResponse>, Status> {
-
  // Here we do not need to cover the windows platform
  let mut breakdown = HashMap::new();
- let mut memory_usage: u64=0;
+ let mut memory_usage: u64 = 0;
 
  #[cfg(target_os = "linux")]
  {
- let pid =id();
- let stat = fs::read_to_string(format!("/proc/{}/stat", pid)).expect("Failed to read stat file");
+ let pid = id();
+ let stat = fs::read_to_string(format!("/proc/{}/stat", pid))
+ .expect("Failed to read stat file");
 
  let stats: Vec<&str> = stat.split_whitespace().collect();
  memory_usage = stats[23].parse::<u64>().expect("Failed to parse RSS");
  }
 
- #[cfg(target_os="macos")]
+ #[cfg(target_os = "macos")]
  {
- let output=Command::new("ps")
- .arg("-p")
- .arg(id().to_string())
- .arg("-o")
- .arg("rss=")
- .output()
- .expect("failed to execute process");
-
- memory_usage = String::from_utf8_lossy(&output.stdout)
- .trim()
- .parse::<u64>()
- .expect("Failed to parse memory usage");
+ let output = Command::new("ps")
+ .arg("-p")
+ .arg(id().to_string())
+ .arg("-o")
+ .arg("rss=")
+ .output()
+ .expect("failed to execute process");
 
+ memory_usage = String::from_utf8_lossy(&output.stdout)
+ .trim()
+ .parse::<u64>()
+ .expect("Failed to parse memory usage");
  }
  breakdown.insert("RSS".to_string(), memory_usage);
 
@@ -167,6 +180,87 @@ impl BackendService for BurnBackend {
  }
 }
 
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use tonic::Request;
+
+ #[tokio::test]
+ async fn test_health() {
+ let backend = BurnBackend::default();
+ let request = Request::new(HealthMessage {});
+ let response = backend.health(request).await;
+
+ assert!(response.is_ok());
+ let response = response.unwrap();
+ let message_str = String::from_utf8(response.get_ref().message.clone()).unwrap();
+ assert_eq!(message_str, "OK");
+ }
+ #[tokio::test]
+ async fn test_status() {
+ let backend = BurnBackend::default();
+ let request = Request::new(HealthMessage {});
+ let response = backend.status(request).await;
+
+ assert!(response.is_ok());
+ let response = response.unwrap();
+ let state = response.get_ref().state;
+ assert_eq!(state, 0);
+ }
+
+ #[tokio::test]
+ async fn test_load_model() {
+ let backend = BurnBackend::default();
+ let request = Request::new(ModelOptions {
+ model: "test".to_string(),
+ context_size: 0,
+ seed: 0,
+ n_batch: 0,
+ f16_memory: false,
+ m_lock: false,
+ m_map: false,
+ vocab_only: false,
+ low_vram: false,
+ embeddings: false,
+ numa: false,
+ ngpu_layers: 0,
+ main_gpu: "".to_string(),
+ tensor_split: "".to_string(),
+ threads: 1,
+ library_search_path: "".to_string(),
+ rope_freq_base: 0.0,
+ rope_freq_scale: 0.0,
+ rms_norm_eps: 0.0,
+ ngqa: 0,
+ model_file: "".to_string(),
+ device: "".to_string(),
+ use_triton: false,
+ model_base_name: "".to_string(),
+ use_fast_tokenizer: false,
+ pipeline_type: "".to_string(),
+ scheduler_type: "".to_string(),
+ cuda: false,
+ cfg_scale: 0.0,
+ img2img: false,
+ clip_model: "".to_string(),
+ clip_subfolder: "".to_string(),
+ clip_skip: 0,
+ tokenizer: "".to_string(),
+ lora_base: "".to_string(),
+ lora_adapter: "".to_string(),
+ no_mul_mat_q: false,
+ draft_model: "".to_string(),
+ audio_path: "".to_string(),
+ quantization: "".to_string(),
+ });
+ let response = backend.load_model(request).await;
+
+ assert!(response.is_ok());
+ let response = response.unwrap();
+ //TO_DO: add test for response
+ }
+}
+
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
  let subscriber = tracing_subscriber::fmt()

diff --git a/backend/rust/models/src/lib.rs b/backend/rust/models/src/lib.rs
@@ -1,9 +1,16 @@
+use bunker::pb::{ModelOptions, PredictOptions};
+
 pub(crate) mod mnist;
 pub use mnist::mnist::MNINST;
 
-use bunker::pb::{ModelOptions, PredictOptions};
-
+/// Trait for implementing a Language Model.
 pub trait LLM {
+ /// Loads the model from the given options.
  fn load_model(&mut self, request: ModelOptions) -> Result<String, Box<dyn std::error::Error>>;
+ /// Predicts the output for the given input options.
  fn predict(&mut self, request: PredictOptions) -> Result<String, Box<dyn std::error::Error>>;
 }
+
+pub struct LLModel {
+ model: Box<dyn LLM + 'static>,
+}
diff --git a/backend/rust/models/src/mnist/mnist.rs b/backend/rust/models/src/mnist/mnist.rs
@@ -4,15 +4,14 @@
 //! Adapter by Aisuko
 
 use burn::{
- backend::wgpu::{compute::init_async, AutoGraphicsApi, WgpuDevice},
+ backend::wgpu::{AutoGraphicsApi, WgpuDevice},
  module::Module,
  nn::{self, BatchNorm, PaddingConfig2d},
  record::{BinBytesRecorder, FullPrecisionSettings, Recorder},
  tensor::{backend::Backend, Tensor},
 };
 
 // https://github.com/burn-rs/burn/blob/main/examples/mnist-inference-web/model.bin
-static STATE_ENCODED: &[u8] = include_bytes!("model.bin");
 
 const NUM_CLASSES: usize = 10;
 
@@ -36,7 +35,7 @@ pub struct MNINST<B: Backend> {
 }
 
 impl<B: Backend> MNINST<B> {
- pub fn new() -> Self {
+ pub fn new(model_name: &str) -> Self {
  let conv1 = ConvBlock::new([1, 8], [3, 3]); // 1 input channel, 8 output channels, 3x3 kernel size
  let conv2 = ConvBlock::new([8, 16], [3, 3]); // 8 input channels, 16 output channels, 3x3 kernel size
  let conv3 = ConvBlock::new([16, 24], [3, 3]); // 16 input channels, 24 output channels, 3x3 kernel size
@@ -59,8 +58,9 @@ impl<B: Backend> MNINST<B> {
  fc2: fc2,
  activation: nn::GELU::new(),
  };
+ let state_encoded: &[u8] = &std::fs::read(model_name).expect("Failed to load model");
  let record = BinBytesRecorder::<FullPrecisionSettings>::default()
- .load(STATE_ENCODED.to_vec())
+ .load(state_encoded.to_vec())
  .expect("Failed to decode state");
 
  instance.load_record(record)
@@ -178,7 +178,7 @@ mod tests {
  pub type Backend = burn::backend::NdArrayBackend<f32>;
  #[test]
  fn test_inference() {
- let mut model = MNINST::<Backend>::new();
+ let mut model = MNINST::<Backend>::new("model.bin");
  let output = model.inference(&[0.0; 28 * 28]).unwrap();
  assert_eq!(output.len(), 10);
  }

diff --git a/backend/rust/models/src/mnist/mod.rs b/backend/rust/models/src/mnist/mod.rs
@@ -1,14 +1,20 @@
 use crate::LLM;
-use bunker::pb::{ModelOptions, PredictOptions};
 
 pub(crate) mod mnist;
 
+use mnist::MNINST;
+
+use bunker::pb::{ModelOptions, PredictOptions};
+
 #[cfg(feature = "ndarray")]
 pub type Backend = burn::backend::NdArrayBackend<f32>;
 
-impl LLM for mnist::MNINST<Backend> {
+impl LLM for MNINST<Backend> {
  fn load_model(&mut self, request: ModelOptions) -> Result<String, Box<dyn std::error::Error>> {
- todo!("load model")
+ let model = request.model_file;
+ let instance = MNINST::<Backend>::new(&model);
+ *self = instance;
+ Ok("".to_string())
  }
 
  fn predict(&mut self, pre_ops: PredictOptions) -> Result<String, Box<dyn std::error::Error>> {