Skip to content

Commit

Permalink
fix: better kill process tensorrt-llm (#2681)
Browse files Browse the repository at this point in the history
  • Loading branch information
louis-jan committed Apr 11, 2024
1 parent ebdaaa6 commit c0949b2
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 27 deletions.
6 changes: 4 additions & 2 deletions extensions/tensorrt-llm-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^15.2.3",
"@rollup/plugin-replace": "^5.0.5",
"@types/decompress": "4.2.7",
"@types/node": "^20.11.4",
"@types/os-utils": "^0.0.4",
"@types/tcp-port-used": "^1.0.4",
"@types/decompress": "4.2.7",
"cpx": "^1.5.0",
"download-cli": "^1.1.1",
"rimraf": "^3.0.2",
Expand All @@ -58,6 +58,7 @@
"path-browserify": "^1.0.1",
"rxjs": "^7.8.1",
"tcp-port-used": "^1.0.2",
"terminate": "^2.6.1",
"ulidx": "^2.3.0"
},
"engines": {
Expand All @@ -72,6 +73,7 @@
"tcp-port-used",
"fetch-retry",
"decompress",
"@janhq/core"
"@janhq/core",
"terminate"
]
}
78 changes: 53 additions & 25 deletions extensions/tensorrt-llm-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ import {
PromptTemplate,
} from '@janhq/core/node'
import decompress from 'decompress'
import terminate from 'terminate'

// Polyfill fetch with retry
const fetchRetry = fetchRT(fetch)

const supportedPlatform = (): string[] => ['win32', 'linux']
const supportedGpuArch = (): string[] => ['ampere', 'ada']
const PORT_CHECK_INTERVAL = 100

/**
* The response object for model init operation.
Expand Down Expand Up @@ -64,28 +66,57 @@ async function loadModel(
/**
* Stops a Engine subprocess.
*/
function unloadModel(): Promise<any> {
function unloadModel(): Promise<void> {
const controller = new AbortController()
setTimeout(() => controller.abort(), 5000)
debugLog(`Request to kill engine`)

subprocess?.kill()
return fetch(TERMINATE_ENGINE_URL, {
method: 'DELETE',
signal: controller.signal,
})
.then(() => {
subprocess = undefined
const killRequest = () => {
return fetch(TERMINATE_ENGINE_URL, {
method: 'DELETE',
signal: controller.signal,
})
.catch(() => {}) // Do nothing with this attempt
.then(() => tcpPortUsed.waitUntilFree(parseInt(ENGINE_PORT), 300, 5000)) // Wait for port available
.then(() => debugLog(`Engine process is terminated`))
.catch((err) => {
debugLog(
`Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
)
return { err: 'PORT_NOT_AVAILABLE' }
.then(() => {
subprocess = undefined
})
.catch(() => {}) // Do nothing with this attempt
.then(() =>
tcpPortUsed.waitUntilFree(
parseInt(ENGINE_PORT),
PORT_CHECK_INTERVAL,
5000
)
) // Wait for port available
.then(() => debugLog(`Engine process is terminated`))
.catch((err) => {
debugLog(
`Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
)
throw 'PORT_NOT_AVAILABLE'
})
}

if (subprocess?.pid) {
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
const pid = subprocess.pid
return new Promise((resolve, reject) => {
terminate(pid, function (err) {
if (err) {
return killRequest()
} else {
return tcpPortUsed
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
.then(() => resolve())
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
.catch(() => {
killRequest()
})
}
})
})
} else {
return killRequest()
}
}
/**
* 1. Spawn engine process
Expand All @@ -97,11 +128,6 @@ async function runEngineAndLoadModel(
systemInfo: SystemInformation
) {
return unloadModel()
.then((res) => {
if (res?.error) {
throw new Error(res.error)
}
})
.then(() => runEngine(systemInfo))
.then(() => loadModelRequest(settings))
.catch((err) => {
Expand Down Expand Up @@ -220,10 +246,12 @@ async function runEngine(systemInfo: SystemInformation): Promise<void> {
reject(`child process exited with code ${code}`)
})

tcpPortUsed.waitUntilUsed(parseInt(ENGINE_PORT), 300, 30000).then(() => {
debugLog(`Engine is ready`)
resolve()
})
tcpPortUsed
.waitUntilUsed(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 30000)
.then(() => {
debugLog(`Engine is ready`)
resolve()
})
})
}

Expand Down

0 comments on commit c0949b2

Please sign in to comment.