| roseinfer |
201.13 |
12872.49 |
64362.44 |
0.636 |
| roseinfer (+idle keepalive) |
201.53 |
12897.97 |
64489.86 |
0.635 |
| roseinfer (in-proc) |
204.11 |
13062.83 |
65314.13 |
0.627 |
| roseinfer (in-proc, +pprio1, +eager prefill, +query decode) |
204.02 |
13057.59 |
65287.94 |
0.627 |
| roseinfer (+eager prefill) |
201.03 |
12865.71 |
64328.54 |
0.637 |
| roseinfer (+pprio1) |
203.70 |
13036.67 |
65183.33 |
0.628 |
| roseinfer (+pprio1, +eager prefill, +query decode, +idle keepalive) |
200.86 |
12854.83 |
64274.13 |
0.637 |
| SGLang |
243.20 |
15564.48 |
77822.40 |
0.526 |
| TensorRT-LLM |
248.69 |
15916.24 |
79581.21 |
0.515 |
| vLLM |
140.44 |
8988.14 |
44940.70 |
0.911 |