diff --git a/.github/workflows/jan-electron-linter-and-test.yml b/.github/workflows/jan-electron-linter-and-test.yml index 96258e7097..55c3308da6 100644 --- a/.github/workflows/jan-electron-linter-and-test.yml +++ b/.github/workflows/jan-electron-linter-and-test.yml @@ -22,6 +22,7 @@ on: branches: - main - dev + - release/** paths: - "electron/**" - .github/workflows/jan-electron-linter-and-test.yml diff --git a/core/package.json b/core/package.json index 2f4f6b576f..c4d0d475df 100644 --- a/core/package.json +++ b/core/package.json @@ -46,7 +46,7 @@ }, "devDependencies": { "@types/jest": "^29.5.12", - "@types/node": "^12.0.2", + "@types/node": "^20.11.4", "eslint": "8.57.0", "eslint-plugin-jest": "^27.9.0", "jest": "^29.7.0", diff --git a/core/src/api/index.ts b/core/src/api/index.ts index f97593934f..8e41da0d17 100644 --- a/core/src/api/index.ts +++ b/core/src/api/index.ts @@ -33,7 +33,7 @@ export enum AppRoute { stopServer = 'stopServer', log = 'log', logServer = 'logServer', - systemInformations = 'systemInformations', + systemInformation = 'systemInformation', showToast = 'showToast', } @@ -95,6 +95,8 @@ export enum FileManagerRoute { getUserHomePath = 'getUserHomePath', fileStat = 'fileStat', writeBlob = 'writeBlob', + mkdir = 'mkdir', + rm = 'rm', } export type ApiFunction = (...args: any[]) => any diff --git a/core/src/core.ts b/core/src/core.ts index b8cbd3162c..47c0fe6f2c 100644 --- a/core/src/core.ts +++ b/core/src/core.ts @@ -1,4 +1,4 @@ -import { DownloadRequest, FileStat, NetworkConfig } from './types' +import { DownloadRequest, FileStat, NetworkConfig, SystemInformation } from './types' /** * Execute a extension module function in main process @@ -110,7 +110,8 @@ const isSubdirectory: (from: string, to: string) => Promise = (from: st * Get system information * @returns {Promise} - A promise that resolves with the system information. */ -const systemInformations: () => Promise = () => global.core.api?.systemInformations() +const systemInformation: () => Promise = () => + global.core.api?.systemInformation() /** * Show toast message from browser processes. @@ -146,7 +147,7 @@ export { log, isSubdirectory, getUserHomePath, - systemInformations, + systemInformation, showToast, FileStat, } diff --git a/core/src/extension.ts b/core/src/extension.ts index 22accb4b47..973d4778a7 100644 --- a/core/src/extension.ts +++ b/core/src/extension.ts @@ -19,6 +19,7 @@ export interface Compatibility { const ALL_INSTALLATION_STATE = [ 'NotRequired', // not required. 'Installed', // require and installed. Good to go. + 'Updatable', // require and installed but need to be updated. 'NotInstalled', // require to be installed. 'Corrupted', // require but corrupted. Need to redownload. ] as const @@ -59,6 +60,13 @@ export abstract class BaseExtension implements ExtensionType { return undefined } + /** + * Determine if the extension is updatable. + */ + updatable(): boolean { + return false + } + /** * Determine if the prerequisites for the extension are installed. * diff --git a/core/src/extensions/ai-engines/LocalOAIEngine.ts b/core/src/extensions/ai-engines/LocalOAIEngine.ts index 79dbcbf5e7..89444ff0fc 100644 --- a/core/src/extensions/ai-engines/LocalOAIEngine.ts +++ b/core/src/extensions/ai-engines/LocalOAIEngine.ts @@ -1,4 +1,4 @@ -import { executeOnMain, getJanDataFolderPath, joinPath } from '../../core' +import { executeOnMain, getJanDataFolderPath, joinPath, systemInformation } from '../../core' import { events } from '../../events' import { Model, ModelEvent } from '../../types' import { OAIEngine } from './OAIEngine' @@ -30,11 +30,11 @@ export abstract class LocalOAIEngine extends OAIEngine { if (model.engine.toString() !== this.provider) return const modelFolder = await joinPath([await getJanDataFolderPath(), this.modelFolder, model.id]) - + const systemInfo = await systemInformation() const res = await executeOnMain(this.nodeModule, this.loadModelFunctionName, { modelFolder, model, - }) + }, systemInfo) if (res?.error) { events.emit(ModelEvent.OnModelFail, { diff --git a/core/src/extensions/monitoring.ts b/core/src/extensions/monitoring.ts index 8d61580fca..2d75e0218b 100644 --- a/core/src/extensions/monitoring.ts +++ b/core/src/extensions/monitoring.ts @@ -1,5 +1,5 @@ import { BaseExtension, ExtensionTypeEnum } from '../extension' -import { GpuSetting, MonitoringInterface } from '../index' +import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../index' /** * Monitoring extension for system monitoring. @@ -16,4 +16,5 @@ export abstract class MonitoringExtension extends BaseExtension implements Monit abstract getGpuSetting(): Promise abstract getResourcesInfo(): Promise abstract getCurrentLoad(): Promise + abstract getOsInfo(): Promise } diff --git a/core/src/fs.ts b/core/src/fs.ts index 71538ae9cc..dacdbb6d6f 100644 --- a/core/src/fs.ts +++ b/core/src/fs.ts @@ -37,12 +37,17 @@ const readdirSync = (...args: any[]) => global.core.api?.readdirSync(...args) */ const mkdirSync = (...args: any[]) => global.core.api?.mkdirSync(...args) +const mkdir = (...args: any[]) => global.core.api?.mkdir(...args) + /** * Removes a directory at the specified path. * @returns {Promise} A Promise that resolves when the directory is removed successfully. */ const rmdirSync = (...args: any[]) => global.core.api?.rmdirSync(...args, { recursive: true, force: true }) + +const rm = (path: string) => global.core.api?.rm(path) + /** * Deletes a file from the local file system. * @param {string} path - The path of the file to delete. @@ -92,7 +97,9 @@ export const fs = { existsSync, readdirSync, mkdirSync, + mkdir, rmdirSync, + rm, unlinkSync, appendFileSync, copyFileSync, diff --git a/core/src/node/api/processors/fsExt.ts b/core/src/node/api/processors/fsExt.ts index 4787da65b3..9b88cfef9f 100644 --- a/core/src/node/api/processors/fsExt.ts +++ b/core/src/node/api/processors/fsExt.ts @@ -88,4 +88,28 @@ export class FSExt implements Processor { }) }) } + + mkdir(path: string): Promise { + return new Promise((resolve, reject) => { + fs.mkdir(path, { recursive: true }, (err) => { + if (err) { + reject(err) + } else { + resolve() + } + }) + }) + } + + rmdir(path: string): Promise { + return new Promise((resolve, reject) => { + fs.rm(path, { recursive: true }, (err) => { + if (err) { + reject(err) + } else { + resolve() + } + }) + }) + } } diff --git a/core/src/node/extension/store.ts b/core/src/node/extension/store.ts index 93b1aeb2b6..630756485d 100644 --- a/core/src/node/extension/store.ts +++ b/core/src/node/extension/store.ts @@ -93,8 +93,7 @@ export function persistExtensions() { */ export async function installExtensions(extensions: any) { const installed: Extension[] = [] - for (const ext of extensions) { - // Set install options and activation based on input type + const installations = extensions.map((ext: any): Promise => { const isObject = typeof ext === 'object' const spec = isObject ? [ext.specifier, ext] : [ext] const activate = isObject ? ext.activate !== false : true @@ -102,15 +101,17 @@ export async function installExtensions(extensions: any) { // Install and possibly activate extension const extension = new Extension(...spec) if (!extension.origin) { - continue + return Promise.resolve() } - await extension._install() - if (activate) extension.setActive(true) + return extension._install().then(() => { + if (activate) extension.setActive(true) + // Add extension to store if needed + addExtension(extension) + installed.push(extension) + }) + }) - // Add extension to store if needed - addExtension(extension) - installed.push(extension) - } + await Promise.all(installations) // Return list of all installed extensions return installed diff --git a/core/src/node/helper/config.ts b/core/src/node/helper/config.ts index 81bc64611b..b5ec2e029a 100644 --- a/core/src/node/helper/config.ts +++ b/core/src/node/helper/config.ts @@ -82,26 +82,34 @@ export const getJanExtensionsPath = (): string => { */ export const physicalCpuCount = async (): Promise => { const platform = os.platform() - if (platform === 'linux') { - const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l') - return parseInt(output.trim(), 10) - } else if (platform === 'darwin') { - const output = await exec('sysctl -n hw.physicalcpu_max') - return parseInt(output.trim(), 10) - } else if (platform === 'win32') { - const output = await exec('WMIC CPU Get NumberOfCores') - return output - .split(os.EOL) - .map((line: string) => parseInt(line)) - .filter((value: number) => !isNaN(value)) - .reduce((sum: number, number: number) => sum + number, 1) - } else { - const cores = os.cpus().filter((cpu: any, index: number) => { - const hasHyperthreading = cpu.model.includes('Intel') - const isOdd = index % 2 === 1 - return !hasHyperthreading || isOdd - }) - return cores.length + try { + if (platform === 'linux') { + const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l') + return parseInt(output.trim(), 10) + } else if (platform === 'darwin') { + const output = await exec('sysctl -n hw.physicalcpu_max') + return parseInt(output.trim(), 10) + } else if (platform === 'win32') { + const output = await exec('WMIC CPU Get NumberOfCores') + return output + .split(os.EOL) + .map((line: string) => parseInt(line)) + .filter((value: number) => !isNaN(value)) + .reduce((sum: number, number: number) => sum + number, 1) + } else { + const cores = os.cpus().filter((cpu: any, index: number) => { + const hasHyperthreading = cpu.model.includes('Intel') + const isOdd = index % 2 === 1 + return !hasHyperthreading || isOdd + }) + return cores.length + } + } catch (err) { + console.warn('Failed to get physical CPU count', err) + // Divide by 2 to get rid of hyper threading + const coreCount = Math.ceil(os.cpus().length / 2) + console.debug('Using node API to get physical CPU count:', coreCount) + return coreCount } } diff --git a/core/src/node/helper/resource.ts b/core/src/node/helper/resource.ts index c79a63688b..faaaace05e 100644 --- a/core/src/node/helper/resource.ts +++ b/core/src/node/helper/resource.ts @@ -1,6 +1,6 @@ import { SystemResourceInfo } from '../../types' import { physicalCpuCount } from './config' -import { log, logServer } from './log' +import { log } from './log' export const getSystemResourceInfo = async (): Promise => { const cpu = await physicalCpuCount() diff --git a/core/src/types/miscellaneous/systemResourceInfo.ts b/core/src/types/miscellaneous/systemResourceInfo.ts index f7dd4a82ba..fb059b1ba8 100644 --- a/core/src/types/miscellaneous/systemResourceInfo.ts +++ b/core/src/types/miscellaneous/systemResourceInfo.ts @@ -30,3 +30,27 @@ export type GpuSettingInfo = { name: string arch?: string } + +export type SystemInformation = { + gpuSetting: GpuSetting + osInfo?: OperatingSystemInfo +} + +export const SupportedPlatforms = ['win32', 'linux', 'darwin'] as const +export type SupportedPlatformTuple = typeof SupportedPlatforms +export type SupportedPlatform = SupportedPlatformTuple[number] + +export type OperatingSystemInfo = { + platform: SupportedPlatform | 'unknown' + arch: string + release: string + machine: string + version: string + totalMem: number + freeMem: number +} + +export type CpuCoreInfo = { + model: string + speed: number +} diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index e2970b8f9d..6072758842 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -38,7 +38,7 @@ export default class JanModelExtension extends ModelExtension { private static readonly _tensorRtEngineFormat = '.engine' private static readonly _configDirName = 'config' private static readonly _defaultModelFileName = 'default-model.json' - private static readonly _supportedGpuArch = ['turing', 'ampere', 'ada'] + private static readonly _supportedGpuArch = ['ampere', 'ada'] /** * Called when the extension is loaded. diff --git a/extensions/monitoring-extension/src/index.ts b/extensions/monitoring-extension/src/index.ts index c7f53455dd..7ef40e7bec 100644 --- a/extensions/monitoring-extension/src/index.ts +++ b/extensions/monitoring-extension/src/index.ts @@ -1,4 +1,9 @@ -import { GpuSetting, MonitoringExtension, executeOnMain } from '@janhq/core' +import { + GpuSetting, + MonitoringExtension, + OperatingSystemInfo, + executeOnMain, +} from '@janhq/core' /** * JanMonitoringExtension is a extension that provides system monitoring functionality. @@ -41,4 +46,8 @@ export default class JanMonitoringExtension extends MonitoringExtension { getCurrentLoad(): Promise { return executeOnMain(NODE, 'getCurrentLoad') } + + getOsInfo(): Promise { + return executeOnMain(NODE, 'getOsInfo') + } } diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts index 25f1511120..ca767d348f 100644 --- a/extensions/monitoring-extension/src/node/index.ts +++ b/extensions/monitoring-extension/src/node/index.ts @@ -1,9 +1,16 @@ -import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core' +import { + GpuSetting, + GpuSettingInfo, + OperatingSystemInfo, + ResourceInfo, + SupportedPlatforms, +} from '@janhq/core' import { getJanDataFolderPath, log } from '@janhq/core/node' import { mem, cpu } from 'node-os-utils' import { exec } from 'child_process' import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs' import path from 'path' +import os from 'os' /** * Path to the settings directory @@ -174,8 +181,7 @@ const updateNvidiaDriverInfo = async () => const getGpuArch = (gpuName: string): string => { if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown' - if (gpuName.includes('20')) return 'turing' - else if (gpuName.includes('30')) return 'ampere' + if (gpuName.includes('30')) return 'ampere' else if (gpuName.includes('40')) return 'ada' else return 'unknown' } @@ -320,3 +326,20 @@ const updateCudaExistence = ( data.is_initial = false return data } + +export const getOsInfo = (): OperatingSystemInfo => { + const platform = + SupportedPlatforms.find((p) => p === process.platform) || 'unknown' + + const osInfo: OperatingSystemInfo = { + platform: platform, + arch: process.arch, + release: os.release(), + machine: os.machine(), + version: os.version(), + totalMem: os.totalmem(), + freeMem: os.freemem(), + } + + return osInfo +} diff --git a/extensions/tensorrt-llm-extension/models.json b/extensions/tensorrt-llm-extension/models.json index 7f95940b71..a27cf059d2 100644 --- a/extensions/tensorrt-llm-extension/models.json +++ b/extensions/tensorrt-llm-extension/models.json @@ -3,27 +3,31 @@ "sources": [ { "filename": "config.json", - "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/config.json" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/config.json" }, { - "filename": "rank0.engine", - "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/rank0.engine" + "filename": "mistral_float16_tp1_rank0.engine", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine" }, { "filename": "tokenizer.model", - "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/tokenizer.model" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.model" }, { "filename": "special_tokens_map.json", - "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json" }, { "filename": "tokenizer.json", - "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/tokenizer.json" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.json" }, { "filename": "tokenizer_config.json", - "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json" + }, + { + "filename": "model.cache", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/model.cache" } ], "id": "llamacorn-1.1b-chat-fp16", @@ -50,27 +54,31 @@ "sources": [ { "filename": "config.json", - "url": "https://delta.jan.ai/dist/models///TinyJensen-1.1B-Chat-fp16/config.json" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json" }, { - "filename": "rank0.engine", - "url": "https://delta.jan.ai/dist/models///TinyJensen-1.1B-Chat-fp16/rank0.engine" + "filename": "mistral_float16_tp1_rank0.engine", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine" }, { "filename": "tokenizer.model", - "url": "https://delta.jan.ai/dist/models///TinyJensen-1.1B-Chat-fp16/tokenizer.model" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model" }, { "filename": "special_tokens_map.json", - "url": "https://delta.jan.ai/dist/models///TinyJensen-1.1B-Chat-fp16/special_tokens_map.json" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json" }, { "filename": "tokenizer.json", - "url": "https://delta.jan.ai/dist/models///TinyJensen-1.1B-Chat-fp16/tokenizer.json" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json" }, { "filename": "tokenizer_config.json", - "url": "https://delta.jan.ai/dist/models///TinyJensen-1.1B-Chat-fp16/tokenizer_config.json" + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json" + }, + { + "filename": "model.cache", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache" } ], "id": "tinyjensen-1.1b-chat-fp16", @@ -92,5 +100,57 @@ "size": 2151000000 }, "engine": "nitro-tensorrt-llm" + }, + { + "sources": [ + { + "filename": "config.json", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/config.json" + }, + { + "filename": "mistral_float16_tp1_rank0.engine", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/mistral_float16_tp1_rank0.engine" + }, + { + "filename": "tokenizer.model", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.model" + }, + { + "filename": "special_tokens_map.json", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/special_tokens_map.json" + }, + { + "filename": "tokenizer.json", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.json" + }, + { + "filename": "tokenizer_config.json", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer_config.json" + }, + { + "filename": "model.cache", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/model.cache" + } + ], + "id": "mistral-7b-instruct-int4", + "object": "model", + "name": "Mistral 7B Instruct v0.1 INT4", + "version": "1.0", + "description": "Mistral 7B Instruct v0.1 INT4", + "format": "TensorRT-LLM", + "settings": { + "ctx_len": 2048, + "text_model": false, + "prompt_template": "[INST] {prompt} [/INST]" + }, + "parameters": { + "max_tokens": 4096 + }, + "metadata": { + "author": "MistralAI", + "tags": ["TensorRT-LLM", "7B", "Finetuned"], + "size": 3840000000 + }, + "engine": "nitro-tensorrt-llm" } ] diff --git a/extensions/tensorrt-llm-extension/package.json b/extensions/tensorrt-llm-extension/package.json index af1dba8539..d3521669e2 100644 --- a/extensions/tensorrt-llm-extension/package.json +++ b/extensions/tensorrt-llm-extension/package.json @@ -18,6 +18,8 @@ "0.1.0" ] }, + "tensorrtVersion": "0.1.8", + "provider": "nitro-tensorrt-llm", "scripts": { "build": "tsc --module commonjs && rollup -c rollup.config.ts", "build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", diff --git a/extensions/tensorrt-llm-extension/rollup.config.ts b/extensions/tensorrt-llm-extension/rollup.config.ts index 33e45823be..e602bc7205 100644 --- a/extensions/tensorrt-llm-extension/rollup.config.ts +++ b/extensions/tensorrt-llm-extension/rollup.config.ts @@ -16,11 +16,12 @@ export default [ plugins: [ replace({ EXTENSION_NAME: JSON.stringify(packageJson.name), - TENSORRT_VERSION: JSON.stringify('0.1.5'), + TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion), + PROVIDER: JSON.stringify(packageJson.provider), DOWNLOAD_RUNNER_URL: - process.platform === 'darwin' || process.platform === 'win32' + process.platform === 'win32' ? JSON.stringify( - 'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v/nitro-windows-v-amd64-tensorrt-llm-.tar.gz' + 'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v-tensorrt-llm-v0.7.1/nitro-windows-v-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz' ) : JSON.stringify( 'https://github.com/janhq/nitro-tensorrt-llm/releases/download/linux-v/nitro-linux-v-amd64-tensorrt-llm-.tar.gz' @@ -52,11 +53,14 @@ export default [ }, plugins: [ replace({ + EXTENSION_NAME: JSON.stringify(packageJson.name), + TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion), + PROVIDER: JSON.stringify(packageJson.provider), LOAD_MODEL_URL: JSON.stringify( `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel` ), TERMINATE_ENGINE_URL: JSON.stringify( - `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/processmanager/destroy` + `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/processmanager/destroy` ), ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'), ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'), diff --git a/extensions/tensorrt-llm-extension/src/@types/global.d.ts b/extensions/tensorrt-llm-extension/src/@types/global.d.ts index 905e86380a..9cf5b60900 100644 --- a/extensions/tensorrt-llm-extension/src/@types/global.d.ts +++ b/extensions/tensorrt-llm-extension/src/@types/global.d.ts @@ -8,3 +8,4 @@ declare const DOWNLOAD_RUNNER_URL: string declare const TENSORRT_VERSION: string declare const COMPATIBILITY: object declare const EXTENSION_NAME: string +declare const PROVIDER: string diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts index 02c6768413..d2d08e8a71 100644 --- a/extensions/tensorrt-llm-extension/src/index.ts +++ b/extensions/tensorrt-llm-extension/src/index.ts @@ -16,11 +16,12 @@ import { executeOnMain, joinPath, showToast, - systemInformations, + systemInformation, LocalOAIEngine, fs, MessageRequest, ModelEvent, + getJanDataFolderPath, } from '@janhq/core' import models from '../models.json' @@ -34,11 +35,13 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { * Override custom function name for loading and unloading model * Which are implemented from node module */ - override provider = 'nitro-tensorrt-llm' + override provider = PROVIDER override inferenceUrl = INFERENCE_URL override nodeModule = NODE - private supportedGpuArch = ['turing', 'ampere', 'ada'] + private supportedGpuArch = ['ampere', 'ada'] + private supportedPlatform = ['win32', 'linux'] + private isUpdateAvailable = false compatibility() { return COMPATIBILITY as unknown as Compatibility @@ -54,7 +57,9 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { } override async install(): Promise { - const info = await systemInformations() + await this.removePopulatedModels() + + const info = await systemInformation() console.debug( `TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}` ) @@ -83,12 +88,19 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { return } - const binaryFolderPath = await executeOnMain( - this.nodeModule, - 'binaryFolder' - ) - if (!(await fs.existsSync(binaryFolderPath))) { - await fs.mkdirSync(binaryFolderPath) + const janDataFolderPath = await getJanDataFolderPath() + const engineVersion = TENSORRT_VERSION + + const executableFolderPath = await joinPath([ + janDataFolderPath, + 'engines', + this.provider, + engineVersion, + firstGpu.arch, + ]) + + if (!(await fs.existsSync(executableFolderPath))) { + await fs.mkdir(executableFolderPath) } const placeholderUrl = DOWNLOAD_RUNNER_URL @@ -100,7 +112,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { const tarball = await baseName(url) - const tarballFullPath = await joinPath([binaryFolderPath, tarball]) + const tarballFullPath = await joinPath([executableFolderPath, tarball]) const downloadRequest: DownloadRequest = { url, localPath: tarballFullPath, @@ -109,12 +121,16 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { } downloadFile(downloadRequest) - // TODO: wrap this into a Promise const onFileDownloadSuccess = async (state: DownloadState) => { // if other download, ignore if (state.fileName !== tarball) return events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) - await executeOnMain(this.nodeModule, 'decompressRunner', tarballFullPath) + await executeOnMain( + this.nodeModule, + 'decompressRunner', + tarballFullPath, + executableFolderPath + ) events.emit(DownloadEvent.onFileUnzipSuccess, state) // Prepopulate models as soon as it's ready @@ -128,6 +144,22 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) } + async removePopulatedModels(): Promise { + console.debug(`removePopulatedModels`, JSON.stringify(models)) + const janDataFolderPath = await getJanDataFolderPath() + const modelFolderPath = await joinPath([janDataFolderPath, 'models']) + + for (const model of models) { + const modelPath = await joinPath([modelFolderPath, model.id]) + console.debug(`modelPath: ${modelPath}`) + if (await fs.existsSync(modelPath)) { + console.debug(`Removing model ${modelPath}`) + await fs.rmdirSync(modelPath) + } + } + events.emit(ModelEvent.OnModelsUpdate, {}) + } + async onModelInit(model: Model): Promise { if (model.engine !== this.provider) return @@ -143,14 +175,70 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { } } + updatable() { + return this.isUpdateAvailable + } + override async installationState(): Promise { - // For now, we just check the executable of nitro x tensor rt - const isNitroExecutableAvailable = await executeOnMain( - this.nodeModule, - 'isNitroExecutableAvailable' - ) + const info = await systemInformation() + + const gpuSetting: GpuSetting | undefined = info.gpuSetting + if (gpuSetting === undefined) { + console.warn( + 'No GPU setting found. TensorRT-LLM extension is not installed' + ) + return 'NotInstalled' // TODO: maybe disabled / incompatible is more appropriate + } + + if (gpuSetting.gpus.length === 0) { + console.warn('No GPU found. TensorRT-LLM extension is not installed') + return 'NotInstalled' + } + + const firstGpu = gpuSetting.gpus[0] + if (!firstGpu.name.toLowerCase().includes('nvidia')) { + console.error('No Nvidia GPU found. Please check your GPU setting.') + return 'NotInstalled' + } - return isNitroExecutableAvailable ? 'Installed' : 'NotInstalled' + if (firstGpu.arch === undefined) { + console.error('No GPU architecture found. Please check your GPU setting.') + return 'NotInstalled' + } + + if (!this.supportedGpuArch.includes(firstGpu.arch)) { + console.error( + `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.` + ) + return 'NotInstalled' + } + + const osInfo = info.osInfo + if (!osInfo) { + console.error('No OS information found. Please check your OS setting.') + return 'NotInstalled' + } + + if (!this.supportedPlatform.includes(osInfo.platform)) { + console.error( + `Your OS: ${osInfo.platform} is not supported. Only Windows and Linux are supported.` + ) + return 'NotInstalled' + } + const janDataFolderPath = await getJanDataFolderPath() + const engineVersion = TENSORRT_VERSION + + const enginePath = await joinPath([ + janDataFolderPath, + 'engines', + this.provider, + engineVersion, + firstGpu.arch, + osInfo.platform === 'win32' ? 'nitro.exe' : 'nitro', + ]) + + // For now, we just check the executable of nitro x tensor rt + return (await fs.existsSync(enginePath)) ? 'Installed' : 'NotInstalled' } override onInferenceStopped() { diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts index 252468fc1b..1afebb950f 100644 --- a/extensions/tensorrt-llm-extension/src/node/index.ts +++ b/extensions/tensorrt-llm-extension/src/node/index.ts @@ -2,13 +2,17 @@ import path from 'path' import { ChildProcessWithoutNullStreams, spawn } from 'child_process' import tcpPortUsed from 'tcp-port-used' import fetchRT from 'fetch-retry' -import { log } from '@janhq/core/node' -import { existsSync } from 'fs' +import { log, getJanDataFolderPath } from '@janhq/core/node' import decompress from 'decompress' +import { SystemInformation } from '@janhq/core' +import { PromptTemplate } from '@janhq/core' // Polyfill fetch with retry const fetchRetry = fetchRT(fetch) +const supportedPlatform = (): string[] => ['win32', 'linux'] +const supportedGpuArch = (): string[] => ['ampere', 'ada'] + /** * The response object for model init operation. */ @@ -24,16 +28,34 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined * Initializes a engine subprocess to load a machine learning model. * @param params - The model load settings. */ -async function loadModel(params: any): Promise<{ error: Error | undefined }> { +async function loadModel( + params: any, + systemInfo?: SystemInformation +): Promise<{ error: Error | undefined }> { // modelFolder is the absolute path to the running model folder // e.g. ~/jan/models/llama-2 let modelFolder = params.modelFolder + if (params.model.settings.prompt_template) { + const promptTemplate = params.model.settings.prompt_template + const prompt = promptTemplateConverter(promptTemplate) + if (prompt?.error) { + return Promise.reject(prompt.error) + } + params.model.settings.system_prompt = prompt.system_prompt + params.model.settings.user_prompt = prompt.user_prompt + params.model.settings.ai_prompt = prompt.ai_prompt + } + const settings: ModelLoadParams = { engine_path: modelFolder, ctx_len: params.model.settings.ctx_len ?? 2048, + ...params.model.settings, + } + if (!systemInfo) { + throw new Error('Cannot get system info. Unable to start nitro x tensorrt.') } - return runEngineAndLoadModel(settings) + return runEngineAndLoadModel(settings, systemInfo) } /** @@ -67,9 +89,12 @@ function unloadModel(): Promise { * 2. Load model into engine subprocess * @returns */ -async function runEngineAndLoadModel(settings: ModelLoadParams) { +async function runEngineAndLoadModel( + settings: ModelLoadParams, + systemInfo: SystemInformation +) { return unloadModel() - .then(runEngine) + .then(() => runEngine(systemInfo)) .then(() => loadModelRequest(settings)) .catch((err) => { // TODO: Broadcast error so app could display proper error message @@ -81,7 +106,7 @@ async function runEngineAndLoadModel(settings: ModelLoadParams) { /** * Loads a LLM model into the Engine subprocess by sending a HTTP POST request. */ -function loadModelRequest( +async function loadModelRequest( settings: ModelLoadParams ): Promise<{ error: Error | undefined }> { debugLog(`Loading model with params ${JSON.stringify(settings)}`) @@ -107,23 +132,66 @@ function loadModelRequest( /** * Spawns engine subprocess. */ -function runEngine(): Promise { +async function runEngine(systemInfo: SystemInformation): Promise { debugLog(`Spawning engine subprocess...`) + if (systemInfo.gpuSetting == null) { + return Promise.reject( + 'No GPU information found. Please check your GPU setting.' + ) + } + + if (systemInfo.gpuSetting.gpus.length === 0) { + return Promise.reject('No GPU found. Please check your GPU setting.') + } + + if (systemInfo.osInfo == null) { + return Promise.reject( + 'No OS information found. Please check your OS setting.' + ) + } + const platform = systemInfo.osInfo.platform + if (platform == null || supportedPlatform().includes(platform) === false) { + return Promise.reject( + 'No OS architecture found. Please check your OS setting.' + ) + } + + const gpu = systemInfo.gpuSetting.gpus[0] + if (gpu.name.toLowerCase().includes('nvidia') === false) { + return Promise.reject('No Nvidia GPU found. Please check your GPU setting.') + } + const gpuArch = gpu.arch + if (gpuArch == null || supportedGpuArch().includes(gpuArch) === false) { + return Promise.reject( + `Your GPU: ${gpu.name} is not supported. Only ${supportedGpuArch().join( + ', ' + )} series are supported.` + ) + } + const janDataFolderPath = await getJanDataFolderPath() + const tensorRtVersion = TENSORRT_VERSION + const provider = PROVIDER return new Promise((resolve, reject) => { // Current directory by default - let binaryFolder = path.join(__dirname, '..', 'bin') - // Binary path - const binary = path.join( - binaryFolder, - process.platform === 'win32' ? 'nitro.exe' : 'nitro' + + const executableFolderPath = path.join( + janDataFolderPath, + 'engines', + provider, + tensorRtVersion, + gpuArch + ) + const nitroExecutablePath = path.join( + executableFolderPath, + platform === 'win32' ? 'nitro.exe' : 'nitro' ) const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT] // Execute the binary - debugLog(`Spawn nitro at path: ${binary}, and args: ${args}`) - subprocess = spawn(binary, args, { - cwd: binaryFolder, + debugLog(`Spawn nitro at path: ${nitroExecutablePath}, and args: ${args}`) + subprocess = spawn(nitroExecutablePath, args, { + cwd: executableFolderPath, env: { ...process.env, }, @@ -155,12 +223,7 @@ function debugLog(message: string, level: string = 'Debug') { log(`[TENSORRT_LLM_NITRO]::${level}:${message}`) } -const binaryFolder = async (): Promise => { - return path.join(__dirname, '..', 'bin') -} - -const decompressRunner = async (zipPath: string) => { - const output = path.join(__dirname, '..', 'bin') +const decompressRunner = async (zipPath: string, output: string) => { console.debug(`Decompressing ${zipPath} to ${output}...`) try { const files = await decompress(zipPath, output) @@ -170,22 +233,57 @@ const decompressRunner = async (zipPath: string) => { } } -const isNitroExecutableAvailable = async (): Promise => { - const binary = path.join( - __dirname, - '..', - 'bin', - process.platform === 'win32' ? 'nitro.exe' : 'nitro' - ) +/** + * Parse prompt template into agrs settings + * @param promptTemplate Template as string + * @returns + */ +function promptTemplateConverter(promptTemplate: string): PromptTemplate { + // Split the string using the markers + const systemMarker = '{system_message}' + const promptMarker = '{prompt}' + + if ( + promptTemplate.includes(systemMarker) && + promptTemplate.includes(promptMarker) + ) { + // Find the indices of the markers + const systemIndex = promptTemplate.indexOf(systemMarker) + const promptIndex = promptTemplate.indexOf(promptMarker) + + // Extract the parts of the string + const system_prompt = promptTemplate.substring(0, systemIndex) + const user_prompt = promptTemplate.substring( + systemIndex + systemMarker.length, + promptIndex + ) + const ai_prompt = promptTemplate.substring( + promptIndex + promptMarker.length + ) + + // Return the split parts + return { system_prompt, user_prompt, ai_prompt } + } else if (promptTemplate.includes(promptMarker)) { + // Extract the parts of the string for the case where only promptMarker is present + const promptIndex = promptTemplate.indexOf(promptMarker) + const user_prompt = promptTemplate.substring(0, promptIndex) + const ai_prompt = promptTemplate.substring( + promptIndex + promptMarker.length + ) + + // Return the split parts + return { user_prompt, ai_prompt } + } - return existsSync(binary) + // Return an error if none of the conditions are met + return { error: 'Cannot split prompt template' } } export default { - binaryFolder, + supportedPlatform, + supportedGpuArch, decompressRunner, loadModel, unloadModel, dispose: unloadModel, - isNitroExecutableAvailable, } diff --git a/web/containers/Providers/index.tsx b/web/containers/Providers/index.tsx index e70a56ca87..10c6c7547a 100644 --- a/web/containers/Providers/index.tsx +++ b/web/containers/Providers/index.tsx @@ -1,6 +1,6 @@ 'use client' -import { PropsWithChildren, useEffect, useState } from 'react' +import { PropsWithChildren, useCallback, useEffect, useState } from 'react' import { Toaster } from 'react-hot-toast' @@ -37,7 +37,7 @@ const Providers = (props: PropsWithChildren) => { const [activated, setActivated] = useState(false) const [settingUp, setSettingUp] = useState(false) - async function setupExtensions() { + const setupExtensions = useCallback(async () => { // Register all active extensions await extensionManager.registerActive() @@ -57,7 +57,7 @@ const Providers = (props: PropsWithChildren) => { setSettingUp(false) setActivated(true) }, 500) - } + }, [pathname]) // Services Setup useEffect(() => { @@ -78,7 +78,7 @@ const Providers = (props: PropsWithChildren) => { setActivated(true) } } - }, [setupCore]) + }, [setupCore, setupExtensions]) return ( diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts index 11a57a5988..0bbc779a6f 100644 --- a/web/hooks/useSendChatMessage.ts +++ b/web/hooks/useSendChatMessage.ts @@ -102,7 +102,6 @@ export default function useSendChatMessage() { console.error('No active thread') return } - setIsGeneratingResponse(true) updateThreadWaiting(activeThreadRef.current.id, true) const messages: ChatCompletionMessage[] = [ activeThreadRef.current.assistants[0]?.instructions, @@ -148,7 +147,7 @@ export default function useSendChatMessage() { await waitForModelStarting(modelId) setQueuedMessage(false) } - + setIsGeneratingResponse(true) if (currentMessage.role !== ChatCompletionRole.User) { // Delete last response before regenerating deleteMessage(currentMessage.id ?? '') @@ -171,7 +170,6 @@ export default function useSendChatMessage() { console.error('No active thread') return } - setIsGeneratingResponse(true) if (engineParamsUpdate) setReloadModel(true) @@ -361,7 +359,7 @@ export default function useSendChatMessage() { await waitForModelStarting(modelId) setQueuedMessage(false) } - + setIsGeneratingResponse(true) events.emit(MessageEvent.OnMessageSent, messageRequest) setReloadModel(false) diff --git a/web/hooks/useSettings.ts b/web/hooks/useSettings.ts index 9ff89827e8..378ca33faf 100644 --- a/web/hooks/useSettings.ts +++ b/web/hooks/useSettings.ts @@ -70,11 +70,6 @@ export const useSettings = () => { } } await fs.writeFileSync(settingsFile, JSON.stringify(settings)) - - // Relaunch to apply settings - if (vulkan != null) { - window.location.reload() - } } return { diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx index 3cc43e744e..67ebf81d52 100644 --- a/web/screens/Settings/Advanced/index.tsx +++ b/web/screens/Settings/Advanced/index.tsx @@ -90,12 +90,38 @@ const Advanced = () => { [setPartialProxy, setProxy] ) - const updateQuickAskEnabled = async (e: boolean) => { + const updateQuickAskEnabled = async ( + e: boolean, + relaunch: boolean = true + ) => { const appConfiguration: AppConfiguration = await window.core?.api?.getAppConfigurations() appConfiguration.quick_ask = e await window.core?.api?.updateAppConfiguration(appConfiguration) - window.core?.api?.relaunch() + if (relaunch) window.core?.api?.relaunch() + } + + const updateVulkanEnabled = async (e: boolean, relaunch: boolean = true) => { + toaster({ + title: 'Reload', + description: 'Vulkan settings updated. Reload now to apply the changes.', + }) + stopModel() + setVulkanEnabled(e) + await saveSettings({ vulkan: e, gpusInUse: [] }) + // Relaunch to apply settings + if (relaunch) window.location.reload() + } + + const updateExperimentalEnabled = async (e: boolean) => { + setExperimentalEnabled(e) + if (e) return + + // It affects other settings, so we need to reset them + const isRelaunch = quickAskEnabled || vulkanEnabled + if (quickAskEnabled) await updateQuickAskEnabled(false, false) + if (vulkanEnabled) await updateVulkanEnabled(false, false) + if (isRelaunch) window.core?.api?.relaunch() } useEffect(() => { @@ -179,7 +205,7 @@ const Advanced = () => { @@ -381,16 +407,7 @@ const Advanced = () => { { - toaster({ - title: 'Reload', - description: - 'Vulkan settings updated. Reload now to apply the changes.', - }) - stopModel() - saveSettings({ vulkan: e, gpusInUse: [] }) - setVulkanEnabled(e) - }} + onCheckedChange={(e) => updateVulkanEnabled(e)} /> )} diff --git a/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx index c11041ffe0..fb0214536a 100644 --- a/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx +++ b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx @@ -5,7 +5,7 @@ import { GpuSetting, InstallationState, abortDownload, - systemInformations, + systemInformation, } from '@janhq/core' import { Badge, @@ -23,6 +23,8 @@ import { useAtomValue } from 'jotai' import { Marked, Renderer } from 'marked' +import UpdateExtensionModal from './UpdateExtensionModal' + import { extensionManager } from '@/extension' import Extension from '@/extension/Extension' import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom' @@ -39,7 +41,7 @@ const TensorRtExtensionItem: React.FC = ({ item }) => { useState('NotRequired') const installingExtensions = useAtomValue(installingExtensionAtom) const [isGpuSupported, setIsGpuSupported] = useState(false) - + const [promptUpdateModal, setPromptUpdateModal] = useState(false) const isInstalling = installingExtensions.some( (e) => e.extensionId === item.name ) @@ -51,7 +53,7 @@ const TensorRtExtensionItem: React.FC = ({ item }) => { useEffect(() => { const getSystemInfos = async () => { - const info = await systemInformations() + const info = await systemInformation() if (!info) { setIsGpuSupported(false) return @@ -69,7 +71,7 @@ const TensorRtExtensionItem: React.FC = ({ item }) => { return } - const supportedGpuArch = ['turing', 'ampere', 'ada'] + const supportedGpuArch = ['ampere', 'ada'] setIsGpuSupported(supportedGpuArch.includes(arch)) } getSystemInfos() @@ -112,7 +114,7 @@ const TensorRtExtensionItem: React.FC = ({ item }) => { } const description = marked.parse(item.description ?? '', { async: false }) - console.log(description) + return (
@@ -138,6 +140,7 @@ const TensorRtExtensionItem: React.FC = ({ item }) => { installProgress={progress} installState={installState} onInstallClick={onInstallClick} + onUpdateClick={() => setPromptUpdateModal(true)} onCancelClick={onCancelInstallingClick} />
@@ -177,6 +180,9 @@ const TensorRtExtensionItem: React.FC = ({ item }) => {
)} + {promptUpdateModal && ( + + )} ) } @@ -185,6 +191,7 @@ type InstallStateProps = { installProgress: number installState: InstallationState onInstallClick: () => void + onUpdateClick: () => void onCancelClick: () => void } @@ -192,6 +199,7 @@ const InstallStateIndicator: React.FC = ({ installProgress, installState, onInstallClick, + onUpdateClick, onCancelClick, }) => { if (installProgress !== -1) { @@ -218,6 +226,12 @@ const InstallStateIndicator: React.FC = ({ Installed ) + case 'Updatable': + return ( + + ) case 'NotInstalled': return ( + + + + + + + + + ) +} + +export default React.memo(UpdateExtensionModal) diff --git a/web/services/appService.ts b/web/services/appService.ts index 9327d55c34..16060e2d94 100644 --- a/web/services/appService.ts +++ b/web/services/appService.ts @@ -1,20 +1,32 @@ -import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core' +import { + ExtensionTypeEnum, + MonitoringExtension, + SystemInformation, +} from '@janhq/core' import { toaster } from '@/containers/Toast' import { extensionManager } from '@/extension' export const appService = { - systemInformations: async () => { - const gpuSetting = await extensionManager - ?.get(ExtensionTypeEnum.SystemMonitoring) - ?.getGpuSetting() + systemInformation: async (): Promise => { + const monitorExtension = extensionManager?.get( + ExtensionTypeEnum.SystemMonitoring + ) + if (!monitorExtension) { + console.warn('System monitoring extension not found') + return undefined + } + + const gpuSetting = await monitorExtension.getGpuSetting() + const osInfo = await monitorExtension.getOsInfo() return { gpuSetting, - // TODO: Other system information + osInfo, } }, + showToast: (title: string, description: string) => { toaster({ title,