| 0.40 |
roseinfer |
9.75/16.00/27.43 |
1.35/1.52/1.99 |
1.28/1.52/2.74 |
173.78/198.34/260.21 |
| 0.40 |
roseinfer (+batch16, +chunk1024) |
9.79/16.07/25.90 |
1.34/1.52/1.87 |
1.28/1.52/2.66 |
174.38/195.33/245.96 |
| 0.40 |
roseinfer (+batch16, +chunk1024, +fast BT sync) |
10.20/15.70/25.39 |
1.34/1.51/2.00 |
1.27/1.52/2.70 |
173.08/196.82/252.69 |
| 0.40 |
roseinfer (+batch16, +chunk1024, +no item) |
9.96/16.21/29.11 |
1.35/1.54/2.02 |
1.28/1.54/2.90 |
174.70/197.06/264.71 |
| 0.40 |
SGLang |
7.80/9.39/14.89 |
1.10/1.21/1.35 |
1.07/1.28/2.85 |
144.13/156.65/170.35 |
| 0.40 |
TensorRT-LLM |
5.82/6.30/8.03 |
1.40/1.44/1.90 |
1.40/1.53/2.62 |
182.62/187.42/193.03 |
| 0.40 |
vLLM |
9.20/10.15/14.54 |
1.59/1.83/1.99 |
1.53/1.87/3.41 |
201.93/234.64/253.63 |
| 0.80 |
roseinfer |
5.14/6.20/7.04 |
1.27/1.35/1.38 |
1.24/1.42/1.78 |
162.81/175.66/179.54 |
| 0.80 |
roseinfer (+batch16, +chunk1024) |
5.31/6.42/6.99 |
1.27/1.34/1.37 |
1.24/1.42/1.79 |
162.17/175.22/179.87 |
| 0.80 |
roseinfer (+batch16, +chunk1024, +fast BT sync) |
5.23/6.29/6.78 |
1.27/1.34/1.38 |
1.24/1.42/1.75 |
161.71/175.11/181.09 |
| 0.80 |
roseinfer (+batch16, +chunk1024, +no item) |
5.22/6.40/7.87 |
1.27/1.35/1.37 |
1.25/1.42/1.78 |
162.60/175.41/179.97 |
| 0.80 |
SGLang |
8.63/9.83/14.13 |
1.07/1.15/1.27 |
1.06/1.21/2.02 |
142.70/149.64/160.67 |
| 0.80 |
TensorRT-LLM |
5.71/6.28/6.90 |
1.39/1.41/1.51 |
1.38/1.50/2.02 |
181.14/184.33/188.56 |
| 0.80 |
vLLM |
9.19/10.36/10.99 |
1.45/1.66/1.78 |
1.42/1.69/2.38 |
186.06/211.76/232.81 |
| 1.60 |
roseinfer |
5.18/5.92/6.62 |
1.26/1.34/1.41 |
1.23/1.38/1.70 |
160.91/172.25/184.12 |
| 1.60 |
roseinfer (+batch16, +chunk1024) |
5.25/6.24/6.82 |
1.26/1.33/1.41 |
1.23/1.39/1.66 |
159.65/172.29/179.65 |
| 1.60 |
roseinfer (+batch16, +chunk1024, +fast BT sync) |
5.27/6.16/6.67 |
1.26/1.34/1.41 |
1.23/1.40/1.69 |
161.18/172.93/184.10 |
| 1.60 |
roseinfer (+batch16, +chunk1024, +no item) |
5.24/6.09/6.86 |
1.26/1.35/1.41 |
1.23/1.40/1.70 |
161.13/174.54/184.79 |
| 1.60 |
SGLang |
8.87/9.98/14.93 |
1.06/1.16/1.34 |
1.05/1.20/2.02 |
142.46/151.47/166.25 |
| 1.60 |
TensorRT-LLM |
5.88/6.43/7.02 |
1.38/1.41/1.53 |
1.38/1.50/1.86 |
180.76/184.17/199.04 |
| 1.60 |
vLLM |
9.44/10.73/11.37 |
1.37/1.56/1.78 |
1.38/1.60/2.02 |
182.80/201.53/230.43 |