| 0.40 |
roseinfer |
9.41/15.26/26.44 |
1.14/1.36/6.11 |
1.10/1.35/2.50 |
81.58/95.43/423.38 |
| 0.40 |
roseinfer (in-proc) |
9.83/16.10/90.88 |
1.32/1.54/8.56 |
1.26/1.51/3.31 |
91.71/108.07/658.39 |
| 0.40 |
roseinfer (no affinity split) |
9.49/16.02/28.00 |
1.31/1.53/6.28 |
1.25/1.50/3.08 |
91.42/106.16/434.39 |
| 0.40 |
roseinfer (no cmd budget) |
9.80/15.74/38.42 |
1.31/1.53/8.37 |
1.25/1.51/3.33 |
91.80/105.55/566.68 |
| 0.40 |
roseinfer (no fill target) |
9.63/15.87/28.45 |
1.31/1.53/6.65 |
1.25/1.50/3.05 |
91.66/106.15/457.95 |
| 0.40 |
roseinfer (no flat events) |
9.75/15.63/89.14 |
1.15/1.38/11.57 |
1.11/1.39/2.99 |
82.22/97.21/859.51 |
| 0.40 |
roseinfer (no thread cap) |
9.85/15.67/28.32 |
1.30/1.52/6.45 |
1.25/1.51/2.86 |
91.32/105.67/447.41 |
| 0.40 |
roseinfer (queue ipc) |
10.15/16.11/28.37 |
1.30/1.54/6.63 |
1.24/1.51/3.17 |
91.66/105.72/458.82 |
| 0.40 |
SGLang |
7.93/9.37/11.55 |
1.08/1.20/1.36 |
1.07/1.27/2.45 |
76.58/83.76/90.66 |
| 0.40 |
TensorRT-LLM |
5.62/6.25/7.68 |
1.37/1.40/1.48 |
1.36/1.49/2.08 |
92.16/93.77/95.13 |
| 0.40 |
vLLM |
9.23/10.46/12.32 |
1.48/1.76/1.93 |
1.42/1.74/3.30 |
101.65/118.27/126.76 |
| 0.80 |
roseinfer |
5.24/5.90/6.52 |
1.11/1.19/1.31 |
1.09/1.27/1.62 |
75.31/80.83/88.76 |
| 0.80 |
roseinfer (in-proc) |
3.99/4.91/5.63 |
1.28/1.38/1.47 |
1.23/1.43/2.34 |
84.86/91.43/97.36 |
| 0.80 |
roseinfer (no affinity split) |
5.17/6.11/6.71 |
1.26/1.35/1.43 |
1.22/1.43/1.80 |
84.00/90.91/96.98 |
| 0.80 |
roseinfer (no cmd budget) |
5.27/6.14/6.88 |
1.26/1.36/1.47 |
1.23/1.43/1.85 |
84.69/91.66/99.10 |
| 0.80 |
roseinfer (no fill target) |
5.30/6.16/8.20 |
1.25/1.35/1.46 |
1.23/1.43/1.79 |
84.63/91.54/98.21 |
| 0.80 |
roseinfer (no flat events) |
5.28/6.08/6.85 |
1.11/1.19/1.31 |
1.10/1.28/1.66 |
75.55/81.07/88.41 |
| 0.80 |
roseinfer (no thread cap) |
5.26/6.33/7.30 |
1.26/1.35/1.46 |
1.22/1.43/1.80 |
84.85/91.25/97.93 |
| 0.80 |
roseinfer (queue ipc) |
5.51/6.28/6.78 |
1.26/1.36/1.47 |
1.22/1.44/1.84 |
85.02/91.67/98.56 |
| 0.80 |
SGLang |
8.69/10.13/14.38 |
1.08/1.17/1.32 |
1.07/1.26/2.04 |
76.85/83.18/92.86 |
| 0.80 |
TensorRT-LLM |
5.66/6.27/7.30 |
1.37/1.39/1.52 |
1.36/1.49/1.93 |
91.89/93.61/100.43 |
| 0.80 |
vLLM |
9.55/10.84/11.54 |
1.39/1.65/1.95 |
1.40/1.65/2.91 |
97.19/111.61/126.35 |
| 1.60 |
roseinfer |
5.43/5.97/6.53 |
1.14/1.20/1.28 |
1.12/1.28/1.57 |
76.89/81.90/86.42 |
| 1.60 |
roseinfer (in-proc) |
4.26/4.91/5.89 |
1.29/1.39/1.46 |
1.24/1.43/2.34 |
85.56/92.29/97.41 |
| 1.60 |
roseinfer (no affinity split) |
5.40/5.99/7.10 |
1.28/1.38/1.45 |
1.24/1.44/1.74 |
85.70/92.73/97.55 |
| 1.60 |
roseinfer (no cmd budget) |
5.48/6.13/7.74 |
1.27/1.37/1.43 |
1.24/1.44/1.75 |
85.87/92.52/97.28 |
| 1.60 |
roseinfer (no fill target) |
5.47/6.19/6.77 |
1.28/1.37/1.45 |
1.24/1.44/1.76 |
86.06/93.20/97.73 |
| 1.60 |
roseinfer (no flat events) |
5.45/6.17/6.58 |
1.14/1.21/1.28 |
1.12/1.29/1.61 |
76.71/82.17/86.98 |
| 1.60 |
roseinfer (no thread cap) |
5.39/6.13/7.18 |
1.27/1.37/1.44 |
1.24/1.43/1.72 |
85.59/92.52/97.66 |
| 1.60 |
roseinfer (queue ipc) |
5.72/6.41/8.21 |
1.27/1.37/1.44 |
1.24/1.44/1.76 |
85.69/92.53/97.26 |
| 1.60 |
SGLang |
9.15/10.56/14.80 |
1.11/1.19/1.27 |
1.10/1.27/1.96 |
79.25/84.32/89.76 |
| 1.60 |
TensorRT-LLM |
5.82/6.60/9.21 |
1.37/1.50/1.82 |
1.38/1.54/1.81 |
92.55/100.52/105.58 |
| 1.60 |
vLLM |
10.23/11.43/12.36 |
1.42/1.56/1.79 |
1.40/1.59/2.07 |
98.70/109.15/123.86 |