{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":599547518,"defaultBranch":"main","name":"vllm","ownerLogin":"vllm-project","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-02-09T11:23:20.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/136984999?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1716312041.0","currentOid":""},"activityList":{"items":[{"before":"a36de682d4283c60777bc3022ed3ce71cd90b904","after":"ee3eea0a1b2c690557455d97074d8829d5a98320","ref":"refs/heads/main","pushedAt":"2024-05-22T22:55:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"rkooo567","name":"SangBin Cho","path":"/rkooo567","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/18510752?s=80&v=4"},"commit":{"message":"[Misc] Take user preference in attention selector (#4960)","shortMessageHtmlLink":"[Misc] Take user preference in attention selector (#4960)"}},{"before":"eb6d3c264d0cd8e44dec16bca7947fbe96415ce9","after":"a36de682d4283c60777bc3022ed3ce71cd90b904","ref":"refs/heads/main","pushedAt":"2024-05-22T22:26:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Minor] Fix small typo in llama.py: QKVParallelLinear -> QuantizationConfig (#4991)","shortMessageHtmlLink":"[Minor] Fix small typo in llama.py: QKVParallelLinear -> Quantization…"}},{"before":"97b030005c7f5cde7c1b97c718a8841db7d6220b","after":"eb6d3c264d0cd8e44dec16bca7947fbe96415ce9","ref":"refs/heads/main","pushedAt":"2024-05-22T21:17:27.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"rkooo567","name":"SangBin Cho","path":"/rkooo567","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/18510752?s=80&v=4"},"commit":{"message":"[Core] Eliminate parallel worker per-step task scheduling overhead (#4894)","shortMessageHtmlLink":"[Core] Eliminate parallel worker per-step task scheduling overhead (#…"}},{"before":"a3a73ab0696b6692f3eecf80271a01fa97bd001d","after":"97b030005c7f5cde7c1b97c718a8841db7d6220b","ref":"refs/heads/main","pushedAt":"2024-05-22T20:58:59.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"njhill","name":"Nick Hill","path":"/njhill","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16958488?s=80&v=4"},"commit":{"message":"[Model] LoRA gptbigcode implementation (#3949)","shortMessageHtmlLink":"[Model] LoRA gptbigcode implementation (#3949)"}},{"before":"8674f9880e2d8574c2adc759027e0f27dc9b95de","after":"a3a73ab0696b6692f3eecf80271a01fa97bd001d","ref":"refs/heads/main","pushedAt":"2024-05-22T20:28:20.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"pcmoritz","name":"Philipp Moritz","path":"/pcmoritz","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/113316?s=80&v=4"},"commit":{"message":"[Misc] Load FP8 kv-cache scaling factors from checkpoints (#4893)\n\nThe 2nd PR for #4532.\r\n\r\nThis PR supports loading FP8 kv-cache scaling factors from a FP8 checkpoint (with .kv_scale parameter).","shortMessageHtmlLink":"[Misc] Load FP8 kv-cache scaling factors from checkpoints (#4893)"}},{"before":"c74c913bfbefc5d7a1302557eb35cdcbecd91f67","after":"8674f9880e2d8574c2adc759027e0f27dc9b95de","ref":"refs/heads/main","pushedAt":"2024-05-22T14:10:43.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"pcmoritz","name":"Philipp Moritz","path":"/pcmoritz","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/113316?s=80&v=4"},"commit":{"message":"[Kernel] Fixup for CUTLASS kernels in CUDA graphs (#4954)\n\nPass the CUDA stream into the CUTLASS GEMMs, to avoid future issues with CUDA graphs","shortMessageHtmlLink":"[Kernel] Fixup for CUTLASS kernels in CUDA graphs (#4954)"}},{"before":"5f6d10c14c17122e6d711a4829ee0ca672e07f6f","after":"c74c913bfbefc5d7a1302557eb35cdcbecd91f67","ref":"refs/heads/main","pushedAt":"2024-05-22T13:02:59.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"mgoin","name":"Michael Goin","path":"/mgoin","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3195154?s=80&v=4"},"commit":{"message":"[misc] remove comments that were supposed to be removed (#4977)","shortMessageHtmlLink":"[misc] remove comments that were supposed to be removed (#4977)"}},{"before":"9b9a10d6cb89f18e054daa66f25cb8f17c723b2c","after":"5f6d10c14c17122e6d711a4829ee0ca672e07f6f","ref":"refs/heads/main","pushedAt":"2024-05-22T07:18:41.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"rkooo567","name":"SangBin Cho","path":"/rkooo567","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/18510752?s=80&v=4"},"commit":{"message":"[CI/Build] Enforce style for C++ and CUDA code with `clang-format` (#4722)","shortMessageHtmlLink":"[CI/Build] Enforce style for C++ and CUDA code with clang-format (#…"}},{"before":"99eff67ba9155b5fec9a9abd939e3a29a1b42dce","after":"9b9a10d6cb89f18e054daa66f25cb8f17c723b2c","ref":"refs/heads/main","pushedAt":"2024-05-22T05:32:35.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"mgoin","name":"Michael Goin","path":"/mgoin","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3195154?s=80&v=4"},"commit":{"message":"[Frontend] Dynamic RoPE scaling (#4638)","shortMessageHtmlLink":"[Frontend] Dynamic RoPE scaling (#4638)"}},{"before":"14772eeb8e8ec76e5e70142d12a7332fcec28ccb","after":"99eff67ba9155b5fec9a9abd939e3a29a1b42dce","ref":"refs/heads/main","pushedAt":"2024-05-21T19:33:25.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"mgoin","name":"Michael Goin","path":"/mgoin","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3195154?s=80&v=4"},"commit":{"message":"[Bugfix][Kernel] Add head size check for attention backend selection (#4944)","shortMessageHtmlLink":"[Bugfix][Kernel] Add head size check for attention backend selection (#…"}},{"before":"7d4a9c837d267666c539c2dbc954ac40fe7a2122","after":null,"ref":"refs/heads/curr_loras_fix","pushedAt":"2024-05-21T17:20:41.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"Yard1","name":"Antoni Baum","path":"/Yard1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10364161?s=80&v=4"}},{"before":"757b62c49560baa6f294310a53032348a0d95939","after":"14772eeb8e8ec76e5e70142d12a7332fcec28ccb","ref":"refs/heads/main","pushedAt":"2024-05-21T16:30:52.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Bugfix] Fix flag name for `max_seq_len_to_capture` (#4935)\n\nSigned-off-by: kerthcet ","shortMessageHtmlLink":"[Bugfix] Fix flag name for max_seq_len_to_capture (#4935)"}},{"before":"e941f885843d4bcd239f805a9267729e9631556f","after":"757b62c49560baa6f294310a53032348a0d95939","ref":"refs/heads/main","pushedAt":"2024-05-21T16:06:10.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[CI/Build] Codespell ignore `build/` directory (#4945)","shortMessageHtmlLink":"[CI/Build] Codespell ignore build/ directory (#4945)"}},{"before":"f12c3b5b3d076a67662b76d215fd875fd6cdf6d7","after":"e941f885843d4bcd239f805a9267729e9631556f","ref":"refs/heads/main","pushedAt":"2024-05-21T07:17:25.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Docs] Add acknowledgment for sponsors (#4925)","shortMessageHtmlLink":"[Docs] Add acknowledgment for sponsors (#4925)"}},{"before":"d130b573a0162173002b97e2112c6c1c10d0ca8e","after":"f12c3b5b3d076a67662b76d215fd875fd6cdf6d7","ref":"refs/heads/main","pushedAt":"2024-05-21T05:24:17.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"rkooo567","name":"SangBin Cho","path":"/rkooo567","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/18510752?s=80&v=4"},"commit":{"message":"[Model] Add Phi-2 LoRA support (#4886)","shortMessageHtmlLink":"[Model] Add Phi-2 LoRA support (#4886)"}},{"before":"65ae8c2c8f52e0c98e4e26ad1255772d888592a6","after":"d130b573a0162173002b97e2112c6c1c10d0ca8e","ref":"refs/heads/main","pushedAt":"2024-05-21T05:22:22.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Model] add rope_scaling support for qwen2 (#4930)","shortMessageHtmlLink":"[Model] add rope_scaling support for qwen2 (#4930)"}},{"before":"c3af44722cff56bba5fc912c8e16d9de02dfb532","after":"65ae8c2c8f52e0c98e4e26ad1255772d888592a6","ref":"refs/heads/main","pushedAt":"2024-05-21T00:48:32.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Core] Fix scheduler considering \"no LoRA\" as \"LoRA\" (#4897)","shortMessageHtmlLink":"[Core] Fix scheduler considering \"no LoRA\" as \"LoRA\" (#4897)"}},{"before":"1937e29848c8de8634c5421612d57863aa0e2a51","after":"c3af44722cff56bba5fc912c8e16d9de02dfb532","ref":"refs/heads/main","pushedAt":"2024-05-20T20:16:57.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ywang96","name":"Roger Wang","path":"/ywang96","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/136131678?s=80&v=4"},"commit":{"message":"[Doc]Add documentation to benchmarking script when running TGI (#4920)","shortMessageHtmlLink":"[Doc]Add documentation to benchmarking script when running TGI (#4920)"}},{"before":"f0eecee6106774e1e0f9b31c7438cde77654df52","after":"1937e29848c8de8634c5421612d57863aa0e2a51","ref":"refs/heads/main","pushedAt":"2024-05-20T18:46:13.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ywang96","name":"Roger Wang","path":"/ywang96","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/136131678?s=80&v=4"},"commit":{"message":"[Core] Sharded State Loader download from HF (#4889)","shortMessageHtmlLink":"[Core] Sharded State Loader download from HF (#4889)"}},{"before":"943e72ca56974b4d8b5a141182e717d2abd3a819","after":"f0eecee6106774e1e0f9b31c7438cde77654df52","ref":"refs/heads/main","pushedAt":"2024-05-20T18:44:25.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"pcmoritz","name":"Philipp Moritz","path":"/pcmoritz","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/113316?s=80&v=4"},"commit":{"message":"[Bugfix] Fix dummy weight for fp8 (#4916)\n\nAllow dummy load format for fp8,\r\ntorch.uniform_ doesn't support FP8 at the moment\r\n\r\nCo-authored-by: Mor Zusman ","shortMessageHtmlLink":"[Bugfix] Fix dummy weight for fp8 (#4916)"}},{"before":"546a97ef691f242c899a5e0906d0e75f42694e95","after":"943e72ca56974b4d8b5a141182e717d2abd3a819","ref":"refs/heads/main","pushedAt":"2024-05-20T18:29:28.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Build/CI] Enabling AMD Entrypoints Test (#4834)\n\nCo-authored-by: Alexey Kondratiev ","shortMessageHtmlLink":"[Build/CI] Enabling AMD Entrypoints Test (#4834)"}},{"before":null,"after":"717e8c784b1ffdccb6246ea483e307544bbf4cf4","ref":"refs/heads/bench-moe","pushedAt":"2024-05-20T17:54:52.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Delete benchmark_mixtral_moe","shortMessageHtmlLink":"Delete benchmark_mixtral_moe"}},{"before":"8c1c94ab4df2d7829100abd2cd08f7994988f483","after":"bc232fb1dba7429ecaae331042c6473fc0aba52e","ref":"refs/heads/woosuk-moe","pushedAt":"2024-05-20T17:50:00.000Z","pushType":"push","commitsCount":146,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Merge branch 'main' into woosuk-moe","shortMessageHtmlLink":"Merge branch 'main' into woosuk-moe"}},{"before":"da5a0b539d6a5fe0c0195513a797814d2c267540","after":"546a97ef691f242c899a5e0906d0e75f42694e95","ref":"refs/heads/main","pushedAt":"2024-05-20T17:45:06.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"youkaichao","name":"youkaichao","path":"/youkaichao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23236638?s=80&v=4"},"commit":{"message":"[Misc]: allow user to specify port in distributed setting (#4914)","shortMessageHtmlLink":"[Misc]: allow user to specify port in distributed setting (#4914)"}},{"before":"6287537a0c970bda1fc8b31f2bde1bcf2d26e151","after":"da5a0b539d6a5fe0c0195513a797814d2c267540","ref":"refs/heads/main","pushedAt":"2024-05-20T14:55:34.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"robertgshaw2-neuralmagic","name":"Robert Shaw","path":"/robertgshaw2-neuralmagic","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/114415538?s=80&v=4"},"commit":{"message":"Remove marlin warning (#4918)","shortMessageHtmlLink":"Remove marlin warning (#4918)"}},{"before":"b57e6c59491ea7d60af413ad4a6455812b9c6c50","after":"6287537a0c970bda1fc8b31f2bde1bcf2d26e151","ref":"refs/heads/main","pushedAt":"2024-05-20T08:11:25.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ywang96","name":"Roger Wang","path":"/ywang96","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/136131678?s=80&v=4"},"commit":{"message":"[Model] LLaVA model refactor (#4910)","shortMessageHtmlLink":"[Model] LLaVA model refactor (#4910)"}},{"before":"bb2624f5f6d0dfbeab9ab873bbf1b51ef5dee51c","after":null,"ref":"refs/heads/add-flash-attn","pushedAt":"2024-05-20T01:11:33.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"}},{"before":"27ce85476e6b170c5c90c65ac5c3268911135766","after":"b57e6c59491ea7d60af413ad4a6455812b9c6c50","ref":"refs/heads/main","pushedAt":"2024-05-20T01:11:30.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"[Kernel] Add flash-attn back (#4907)","shortMessageHtmlLink":"[Kernel] Add flash-attn back (#4907)"}},{"before":"555a499906795ae42cd4f7325843c585c9502b83","after":"bb2624f5f6d0dfbeab9ab873bbf1b51ef5dee51c","ref":"refs/heads/add-flash-attn","pushedAt":"2024-05-19T17:25:38.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Test with large num_blocks","shortMessageHtmlLink":"Test with large num_blocks"}},{"before":"f68470e803df575f294e67167b4b83adfe004cfa","after":"27ce85476e6b170c5c90c65ac5c3268911135766","ref":"refs/heads/main","pushedAt":"2024-05-19T15:37:34.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"robertgshaw2-neuralmagic","name":"Robert Shaw","path":"/robertgshaw2-neuralmagic","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/114415538?s=80&v=4"},"commit":{"message":"[Kernel] Add marlin_24 unit tests (#4901)","shortMessageHtmlLink":"[Kernel] Add marlin_24 unit tests (#4901)"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEUXB6NwA","startCursor":null,"endCursor":null}},"title":"Activity · vllm-project/vllm"}