diff --git a/benchmark/scatter.R b/benchmark/scatter.R index 10e099e4308..1574987aebe 100644 --- a/benchmark/scatter.R +++ b/benchmark/scatter.R @@ -79,6 +79,7 @@ if (!is.null(plot.filename)) { width=.1, na.rm=TRUE ); p = p + geom_point(); + p = p + geom_line(); p = p + ylab("rate of operations (higher is better)"); p = p + ggtitle(dat[1, 1]); ggsave(plot.filename, p); diff --git a/doc/guides/doc_img/scatter-plot.png b/doc/guides/doc_img/scatter-plot.png index de5358d5750..726129e6334 100644 Binary files a/doc/guides/doc_img/scatter-plot.png and b/doc/guides/doc_img/scatter-plot.png differ diff --git a/doc/guides/writing-and-running-benchmarks.md b/doc/guides/writing-and-running-benchmarks.md index 7fa5400b4ae..b6f8984afff 100644 --- a/doc/guides/writing-and-running-benchmarks.md +++ b/doc/guides/writing-and-running-benchmarks.md @@ -270,56 +270,64 @@ After generating the csv, a comparison table can be created using the the `--plot filename` option. ```console -$ cat scatter.csv | Rscript benchmark/scatter.R --xaxis chunk --category encoding --plot scatter-plot.png --log +$ cat scatter.csv | Rscript benchmark/scatter.R --xaxis chunkLen --category encoding --plot scatter-plot.png --log -aggregating variable: inlen +aggregating variable: inLen -chunk encoding mean confidence.interval - 16 ascii 1111933.3 221502.48 - 16 base64-ascii 167508.4 33116.09 - 16 base64-utf8 122666.6 25037.65 - 16 utf8 783254.8 159601.79 - 64 ascii 2623462.9 399791.36 - 64 base64-ascii 462008.3 85369.45 - 64 base64-utf8 420108.4 85612.05 - 64 utf8 1358327.5 235152.03 - 256 ascii 3730343.4 371530.47 - 256 base64-ascii 663281.2 80302.73 - 256 base64-utf8 632911.7 81393.07 - 256 utf8 1554216.9 236066.53 - 1024 ascii 4399282.0 186436.46 - 1024 base64-ascii 730426.6 63806.12 - 1024 base64-utf8 680954.3 68076.33 - 1024 utf8 1554832.5 237532.07 +chunkLen encoding rate confidence.interval + 16 ascii 1515855.1 334492.68 + 16 base64-ascii 403527.2 89677.70 + 16 base64-utf8 322352.8 70792.93 + 16 utf16le 1714567.5 388439.81 + 16 utf8 1100181.6 254141.32 + 64 ascii 3550402.0 661277.65 + 64 base64-ascii 1093660.3 229976.34 + 64 base64-utf8 997804.8 227238.04 + 64 utf16le 3372234.0 647274.88 + 64 utf8 1731941.2 360854.04 + 256 ascii 5033793.9 723354.30 + 256 base64-ascii 1447962.1 236625.96 + 256 base64-utf8 1357269.2 231045.70 + 256 utf16le 4039581.5 655483.16 + 256 utf8 1828672.9 360311.55 + 1024 ascii 5677592.7 624771.56 + 1024 base64-ascii 1494171.7 227302.34 + 1024 base64-utf8 1399218.9 224584.79 + 1024 utf16le 4157452.0 630416.28 + 1024 utf8 1824266.6 359628.52 ``` -Because the scatter plot can only show two variables (in this case _chunk_ and -_encoding_) the rest is aggregated. Sometimes aggregating is a problem, this +Because the scatter plot can only show two variables (in this case _chunkLen_ +and _encoding_) the rest is aggregated. Sometimes aggregating is a problem, this can be solved by filtering. This can be done while benchmarking using the `--set` parameter (e.g. `--set encoding=ascii`) or by filtering results afterwards using tools such as `sed` or `grep`. In the `sed` case be sure to keep the first line since that contains the header information. ```console -$ cat scatter.csv | sed -E '1p;/([^,]+, ){3}128,/!d' | Rscript benchmark/scatter.R --xaxis chunk --category encoding --plot scatter-plot.png --log +$ cat scatter.csv | sed -E '1p;/([^,]+, ){3}128,/!d' | Rscript benchmark/scatter.R --xaxis chunkLen --category encoding --plot scatter-plot.png --log -chunk encoding mean confidence.interval - 16 ascii 701285.96 21233.982 - 16 base64-ascii 107719.07 3339.439 - 16 base64-utf8 72966.95 2438.448 - 16 utf8 475340.84 17685.450 - 64 ascii 2554105.08 87067.132 - 64 base64-ascii 330120.32 8551.707 - 64 base64-utf8 249693.19 8990.493 - 64 utf8 1128671.90 48433.862 - 256 ascii 4841070.04 181620.768 - 256 base64-ascii 849545.53 29931.656 - 256 base64-utf8 809629.89 33773.496 - 256 utf8 1489525.15 49616.334 - 1024 ascii 4931512.12 165402.805 - 1024 base64-ascii 863933.22 27766.982 - 1024 base64-utf8 827093.97 24376.522 - 1024 utf8 1487176.43 50128.721 +chunkLen encoding rate confidence.interval + 16 ascii 1302078.5 71692.27 + 16 base64-ascii 338669.1 15159.54 + 16 base64-utf8 281904.2 20326.75 + 16 utf16le 1381515.5 58533.61 + 16 utf8 831183.2 33631.01 + 64 ascii 4363402.8 224030.00 + 64 base64-ascii 1036825.9 48644.72 + 64 base64-utf8 780059.3 60994.98 + 64 utf16le 3900749.5 158366.84 + 64 utf8 1723710.6 80665.65 + 256 ascii 8472896.1 511822.51 + 256 base64-ascii 2215884.6 104347.53 + 256 base64-utf8 1996230.3 131778.47 + 256 utf16le 5824147.6 234550.82 + 256 utf8 2019428.8 100913.36 + 1024 ascii 8340189.4 598855.08 + 1024 base64-ascii 2201316.2 111777.68 + 1024 base64-utf8 2002272.9 128843.11 + 1024 utf16le 5789281.7 240642.77 + 1024 utf8 2025551.2 81770.69 ``` ![compare tool boxplot](doc_img/scatter-plot.png)