Skip to content

Commit

Permalink
Merge pull request #181 from adam-szymanski/updated_oxla
Browse files Browse the repository at this point in the history
Updated Oxla results
  • Loading branch information
melvynator authored May 28, 2024
2 parents c9ec927 + 0700588 commit b95b0e9
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 157 deletions.
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@
,{"system":"Motherduck","date":"2024-01-27","machine":"cloud","cluster_size":1,"tags":["C++","column-oriented","serverless"],"load_time":4122,"data_size":25332035584,"result":[[0.21968004433438182,0.3162029208615422,0.21604336006566882],[0.33489555725827813,0.29951094510033727,0.21411490021273494],[0.4648742200806737,0.3040045131929219,0.30952421203255653],[0.6499226209707558,0.22191201290115714,0.2888053017668426],[0.7704195859842002,0.921060211956501,0.819766622968018],[1.9447067943401635,1.2225632020272315,1.3308632392436266],[0.2486483482643962,0.22854855004698038,0.20795917278155684],[0.22081567207351327,0.2213178831152618,0.2327750907279551],[1.3735258071683347,1.2279518628492951,1.0233631990849972],[1.277890115045011,1.3313516019843519,1.1260047918185592],[0.6073150201700628,0.2593280617147684,0.3234165138565004],[0.34701970824971795,0.32168390695005655,0.3142702719196677],[1.0892055933363736,1.1165755479596555,0.8717481149360538],[1.5503880502656102,1.6257044719532132,1.464306827634573],[0.9123282572254539,0.7499142647720873,0.8189653856679797],[0.9769321829080582,0.8805147060193121,0.8598887426778674],[1.8943038629367948,2.4523583548143506,2.457844952121377],[2.4018522929400206,2.1315763420425355,2.188034536782652],[7.007666043005884,5.863597965799272,5.461284582037479],[0.24364612391218543,0.325953362043947,0.3064295807853341],[15.391365340910852,0.8924880139529705,0.8800020259805024],[1.0099374479614198,0.7060354948043823,0.7063306840136647],[8.954986380413175,4.095173366833478,3.279917892999947],[23.613256133161485,19.25096853263676,23.278139277826995],[1.5025351103395224,0.7977945390157402,0.6170151601545513],[0.46431142510846257,0.4349544760771096,0.36817489471286535],[0.5523030371405184,0.5110767548903823,0.3649540198966861],[4.98280710587278,2.0472186598926783,2.6902666091918945],[6.719612340908498,4.538661384955049,4.369533614721149],[3.358773229178041,3.1345412940718234,3.100874589756131],[2.000370934139937,1.3284917967393994,1.2289274740032852],[3.7089375513605773,2.681138473097235,1.91789041319862],[7.777549963910133,7.958697326015681,7.435122138820589],[9.89957385417074,6.938831991981715,5.757995248306543],[5.393568014726043,5.632281670346856,5.551704770885408],[1.4525640471838415,0.8954971083439887,0.9103033309802413],[0.22383009875193238,0.2152935261838138,0.21353887394070625],[0.2124440912157297,0.20737810991704464,0.20923770079389215],[0.2175677828490734,0.23641109932214022,0.22010038886219263],[0.24250942608341575,0.21723396237939596,0.2134688631631434],[0.21473077218979597,0.2085639163851738,0.251109620090574],[0.21749612782150507,0.24898552894592285,0.20936914114281535],[0.2193519319407642,0.2155164610594511,0.20863541215658188]],"source":"motherduck/results/result.json"}
,{"system":"MySQL (MyISAM)","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["C++","row-oriented","MySQL compatible"],"load_time":2512,"data_size":121588958061,"result":[[0,0,0],[283.32,276.83,274.52],[276.93,278.29,283.27],[28.83,23.63,21.55],[46.41,40.81,40.93],[467.04,467.39,469.08],[31.02,25.89,24.2],[277.89,275.3,277.3],[329.34,325.8,325.35],[342.86,338.43,336.95],[282.03,279.87,281.22],[277.74,282.68,282],[335.66,334.83,336.44],[305.24,310.39,307.3],[337.41,338.52,342.94],[308.66,307.34,306.27],[738.38,748.44,740.75],[738.75,734.01,738.25],[867.01,872.92,868.84],[25.65,20.61,18.46],[312.39,313.67,306.66],[301.66,305.12,308.01],[298.12,298.44,312.4],[311.34,309.9,311.85],[281.87,278.5,275],[277.46,277.46,277.46],[280.75,278.04,281.76],[263.9,417.39,406.88],[707.21,711.96,705],[668.1,668.33,665.96],[330.31,333.36,331.94],[506.57,506.18,500.53],[2604.49,2681.96,2703.12],[830.65,832.88,831.14],[831.98,830.46,833.41],[608.49,608.51,613.68],[4.56,4.13,4.16],[3.8,3.8,3.7],[1.65,1.45,1.46],[6.33,5.14,6.15],[1.6,1.41,1.41],[1.56,1.42,1.39],[7.04,1.17,1.13]],"source":"mysql-myisam/results/c6a.4xlarge.json"}
,{"system":"MySQL","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["C++","row-oriented","MySQL compatible"],"load_time":9472,"data_size":171953585825,"result":[[339.77,339.88,339.77],[364.91,371.86,367.55],[366.2,368.91,389.66],[364.39,377.53,571.45],[377.69,390.02,384.86],[569.48,576.51,574.68],[367.4,368.23,370.41],[371.29,384.02,613.22],[478.85,683.22,495.68],[489.9,635.96,662.43],[386.07,396.49,640.15],[389.13,412.55,444.12],[447.97,455.54,448.06],[423.22,845.44,813.6],[452.48,460.07,453.98],[577.54,623.21,586.49],[852.07,856.36,862.66],[838.09,848.92,851.12],[1006.37,1011.16,1023.17],[369.76,375.61,415.28],[412.45,419.9,456.62],[411.65,432.88,482.2],[412.73,420.73,429.5],[551.16,577.62,545.45],[382.89,394.76,386.37],[380.9,391.4,385.05],[385.3,394.67,460.32],[388.95,394.7,387.21],[800.33,807.9,807.11],[706.03,745.27,718.9],[450.9,489.59,530.97],[625.5,651.93,647.32],[2721.13,2792.12,2819.26],[945.9,954.94,957.54],[945.42,953.78,965.16],[684.36,716.29,708.75],[10.01,3.79,3.77],[7.48,3.32,3.27],[5.09,0.98,0.96],[8.7,4.77,4.68],[4.82,0.76,0.74],[4.46,0.77,0.75],[7.04,1.17,1.13]],"source":"mysql/results/c6a.4xlarge.json"}
,{"system":"Oxla.com","date":"2024-01-31","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"ingests data only from non-compressed cvs. data should be ingested in chunks < ~5Gb","tags":["C","analytical","somewhat PostgreSQL compatible"],"load_time":584.88,"data_size":83948840996,"result":[[0.050129,0.022855,0.022903],[0.057503,0.015481,0.015516],[0.094277,0.025541,0.026299],[0.16258,0.024306,0.023829],[0.039942,0.021789,0.02218],[0.1374,0.101147,0.099119],[0.059388,0.025719,0.0264],[0.033712,0.017465,0.0184],[0.135859,0.096702,0.095512],[0.171016,0.155303,0.154772],[0.190669,0.182083,0.183751],[0.22111,0.18886,0.193252],[2.52142,2.55214,2.54007],[2.6807,2.5843,2.63534],[2.75055,2.68715,2.67322],[1.52986,1.47056,1.49706],[8.22161,7.84746,7.57008],[7.06027,7.33078,6.88351],[10.5416,10.2955,10.2149],[0.053765,0.02466,0.023807],[2.67538,2.54948,2.71025],[null,null,null],[null,null,null],[8.75207,8.09075,8.26788],[0.359349,0.31487,0.312975],[0.331405,0.339576,0.332647],[0.351841,0.340408,0.325682],[1.62115,1.67895,1.74872],[null,null,null],[1.58662,1.58356,1.58278],[0.632047,0.553994,0.586148],[1.15181,1.14935,1.10892],[13.1952,13.0677,12.6957],[15.9073,13.0187,12.6488],[12.3274,13.7521,14.7386],[1.27877,1.35705,1.24621],[1.011,0.955655,0.967898],[3.46346,0.841062,0.878294],[0.867865,0.885799,0.812358],[4.55556,1.79754,1.75293],[4.00301,0.064817,0.062893],[0.163954,0.063076,0.060033],[null,null,null]],"source":"oxla/results/c6a.4xlarge.json"}
,{"system":"Oxla","date":"2024-04-09","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"Ingests data only from non-compressed cvs.","tags":["C","analytical","somewhat PostgreSQL compatible"],"load_time":497.815,"data_size":17394972923,"result":[[3.112892,0.068225,0.049215],[1.476993,0.068502,0.01352],[1.532504,0.015794,0.01899],[1.541791,0.043208,0.090244],[1.424205,1.114138,1.079011],[1.546764,1.340306,1.339957],[0.202873,0.009787,0.008214],[1.017122,0.01247,0.010861],[1.790766,1.816432,1.681662],[2.06058,2.051205,2.05747],[0.166164,0.149605,0.147586],[0.33821,0.15334,0.15212],[0.968408,0.975795,0.932127],[1.641231,1.648973,1.69153],[1.039926,1.021776,1.015062],[1.059569,1.038191,1.016849],[2.930077,2.780725,2.786122],[2.7766,2.745188,2.827054],[5.474963,5.455883,5.462812],[0.069049,0.037876,0.030425],[5.294758,2.818725,2.803313],[null,null,null],[null,null,null],[21.034479,18.253271,6.146486],[0.17394,0.151798,0.146398],[0.180155,0.170271,0.177003],[0.22494,0.216158,0.216051],[0.978861,0.973059,0.964485],[null,null,null],[0.030928,0.02037,0.020366],[0.408601,0.412485,0.408602],[0.875709,0.743332,0.704842],[7.962516,7.867736,7.594272],[6.209667,5.892066,5.963681],[5.931634,5.947336,6.005506],[0.577314,0.583573,0.545736],[0.126127,0.090768,0.094307],[0.110712,0.04149,0.039939],[0.060824,0.043637,0.030213],[0.322545,0.204934,0.185178],[0.121207,0.011082,0.011699],[0.069138,0.012728,0.014108],[0.030538,0.028048,0.030625]],"source":"oxla/results/c6a.4xlarge.json"}
,{"hide":false,"system":"ParadeDB","date":"2024-02-02","machine":"c6a.4xlarge, 1500gb gp2","cluster_size":1,"comment":"The results for (c6a.4xlarge, 500gb gp2) are also submitted here for easy comparison with Elasticsearch","tags":["Rust","row-oriented","column-oriented","search","PostgreSQL compatible"],"load_time":1294,"data_size":15415061091,"result":[[0.170805,0.005724,0.006098],[0.207075,0.09786,0.097367],[0.223766,0.088988,0.083545],[0.428068,0.097471,0.098085],[1.02813,0.842475,0.847983],[1.47076,1.27149,1.24457],[0.079231,0.005903,0.006381],[0.20438,0.102631,0.101062],[1.61474,1.57814,1.57654],[1.34235,1.09032,1.07038],[0.624544,0.350221,0.354308],[0.592603,0.38256,0.382334],[1.5044,1.32063,1.31768],[3.54282,2.67601,2.61359],[1.66994,1.46717,1.4526],[1.16541,0.999982,0.975601],[3.36336,2.90587,2.89977],[3.1358,2.75251,2.71843],[7.57537,5.9709,6.09835],[0.450773,0.136726,0.134937],[9.75662,1.39915,1.44835],[11.2973,1.70324,1.6996],[22.2634,4.01717,3.96894],[57.8901,10.7998,10.8794],[3.10183,0.605149,0.595146],[0.829921,0.542481,0.531427],[3.01411,0.668447,0.680061],[9.88178,2.28414,2.28912],[9.13347,5.15906,5.15358],[0.599997,0.454952,0.463381],[2.34495,1.18331,1.19755],[5.76726,1.62152,1.52735],[8.98009,8.85184,8.65374],[11.9086,6.49934,6.7125],[12.2924,7.10393,7.21253],[2.04731,1.8265,1.84334],[0.274516,0.252795,0.251755],[0.338633,0.253551,0.252828],[0.339914,0.254548,0.253683],[0.244831,0.158776,0.158403],[0.382238,0.253784,0.254412],[0.404016,0.253894,0.253025],[0.358676,0.204648,0.204944]],"source":"paradedb/results/c6a.4xlarge.1500gb.json"}
,{"hide":false,"system":"ParadeDB","date":"2024-02-02","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["Rust","row-oriented","column-oriented","search","PostgreSQL compatible"],"load_time":1294,"data_size":15415061091,"result":[[0.170805,0.005724,0.006098],[0.207075,0.09786,0.097367],[0.223766,0.088988,0.083545],[0.428068,0.097471,0.098085],[1.02813,0.842475,0.847983],[1.47076,1.27149,1.24457],[0.079231,0.005903,0.006381],[0.20438,0.102631,0.101062],[1.61474,1.57814,1.57654],[1.34235,1.09032,1.07038],[0.624544,0.350221,0.354308],[0.592603,0.38256,0.382334],[1.5044,1.32063,1.31768],[3.54282,2.67601,2.61359],[1.66994,1.46717,1.4526],[1.16541,0.999982,0.975601],[3.36336,2.90587,2.89977],[3.1358,2.75251,2.71843],[7.57537,5.9709,6.09835],[0.450773,0.136726,0.134937],[9.75662,1.39915,1.44835],[11.2973,1.70324,1.6996],[22.2634,4.01717,3.96894],[57.8901,10.7998,10.8794],[3.10183,0.605149,0.595146],[0.829921,0.542481,0.531427],[3.01411,0.668447,0.680061],[9.88178,2.28414,2.28912],[9.13347,5.15906,5.15358],[0.599997,0.454952,0.463381],[2.34495,1.18331,1.19755],[5.76726,1.62152,1.52735],[8.98009,8.85184,8.65374],[11.9086,6.49934,6.7125],[12.2924,7.10393,7.21253],[2.04731,1.8265,1.84334],[0.274516,0.252795,0.251755],[0.338633,0.253551,0.252828],[0.339914,0.254548,0.253683],[0.244831,0.158776,0.158403],[0.382238,0.253784,0.254412],[0.404016,0.253894,0.253025],[0.358676,0.204648,0.204944]],"source":"paradedb/results/c6a.4xlarge.json"}
,{"system":"Pinot","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"It successfully loaded only 94465149 out of 99997497 records. Some queries returned NullPointerException. The loading process is painful - splitting to 100 pieces required. It does not correctly report errors on data loading, the results may be incorrect.","tags":["Java","column-oriented"],"load_time":2032,"data_size":null,"result":[[0.002,0.001,0.001],[0.186,0.186,0.185],[0.251,0.276,0.258],[0.475,0.281,0.238],[3.907,3.655,3.633],[30.471,14.687,14.93],[null,null,null],[0.135,0.134,0.148],[3.039,2.902,2.938],[3.159,3.212,3.225],[4.217,4.197,4.384],[4.145,4.124,4.121],[2.989,3.145,3.18],[6.402,6.886,6.374],[3.245,3.35,3.129],[5.112,5.027,5.141],[5.509,5.279,5.257],[0.865,0.856,0.829],[null,null,null],[0.017,0.015,0.015],[54.348,19.562,19.128],[null,null,null],[76.596,74.719,14.228],[7.441,5.77,5.87],[0.376,0.327,0.286],[7.689,0.395,1.281],[3.434,0.499,0.5],[27.679,2.378,2.393],[null,null,null],[2.221,2.227,2.167],[4.941,4.639,4.565],[5.641,5.37,5.007],[5.295,5.006,5.357],[5.28,5.21,5.105],[6.231,6.238,6.385],[5.918,5.933,5.934],[0.26,0.202,0.21],[0.364,0.072,0.069],[0.042,0.034,0.035],[1.483,0.686,0.651],[0.113,0.071,0.079],[0.042,0.051,0.037],[null,null,null]],"source":"pinot/results/c6a.4xlarge.json"}
Expand Down
109 changes: 15 additions & 94 deletions oxla/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
#!/bin/bash -e

# cleanup
sudo ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9

# docker
sudo rm /usr/share/keyrings/docker-archive-keyring.gpg
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt update
Expand All @@ -15,109 +10,35 @@ sudo apt install -y docker-ce
sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2 parallel
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential

# ruby and fake S3
sudo apt install -y ruby-full
sudo gem install bundler fakes3 webrick sorted_set

# install aws cli tools
sudo rm /usr/local/bin/aws
sudo rm /usr/local/bin/aws_completer
sudo rm -rf /usr/local/aws-cli
sudo rm -rf ~/.aws/ aws

curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install --update
/usr/local/bin/aws --version
rm -f awscliv2.zip

# configure aws
mkdir -p ~/.aws
echo -e "[default]\nregion = none" > ~/.aws/config
echo -e "[default]\naws_access_key_id = none\naws_secret_access_key = none" > ~/.aws/credentials

# run fake S3
sudo ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
sudo rm -rf /mnt/fakes3_root
sudo mkdir -p /mnt/fakes3_root
sudo chmod a+rw /mnt/fakes3_root -R
fakes3 -r /mnt/fakes3_root -H 0.0.0.0 -p 4569 --license license.pdf > /dev/null 2>&1 &
sleep 10 # waiting for container start

# download dataset
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
gzip -d hits.tsv.gz
chmod 777 ~ hits.tsv

# convert dataset to csv
rm -f part_*.csv
curl https://clickhouse.com/ | sh
./clickhouse local --query "SELECT * FROM 'hits.tsv' INTO OUTFILE 'hits.csv'"
rm hits.tsv

# prepare digestable parts (5m rows each) of hits.csv
split -l 5000000 hits.csv part_
for file in part_*; do mv "$file" "${file}.csv"; done

# upload dataset (prepared parts) to fake S3 bucket
aws s3 mb s3://my-new-bucket --endpoint-url http://localhost:4569

for file in part_*.csv; do
echo "Processing file: $file"

# copy the file to the S3 bucket
aws s3 cp "./$file" s3://my-new-bucket --endpoint-url http://localhost:4569 > /dev/null 2>&1

# clean-up tmp parts left after upload
TMPPARTS=$(aws s3api list-objects --bucket my-new-bucket --query "Contents[?contains(Key, '_${file}_')].Key" --output text --endpoint-url http://localhost:4569)
echo $TMPPARTS | tr ' ' '\n' | grep . | parallel -j16 aws s3api delete-object --bucket my-new-bucket --key {} --endpoint-url http://localhost:4569
done
echo "Download dataset."
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
echo "Unpack dataset."
gzip -d hits.csv.gz
chmod 777 ~ hits.csv
mkdir data
mv hits.csv ~/data

# get and configure Oxla image
echo "Install and run Oxla."

sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9

sudo docker run --rm -p 5432:5432 --name oxlacontainer public.ecr.aws/oxla/release:latest > /dev/null 2>&1 &
sleep 10 # waiting for container start and db initialisation (leader election, etc.)

sudo docker exec oxlacontainer /bin/bash -c "sed -i 's#endpoint: \"\"#endpoint: \"http://localhost:4569\"#g' oxla/default_config.yml"
sudo docker exec oxlacontainer /bin/bash -c "sed -i 's#endpoint:.*#endpoint: '\''http://localhost:4569'\''#g' oxla/startup_config/config.yml"
sudo docker rmi oxla-configured-image:latest > /dev/null 2>&1 || echo "" > /dev/null
sudo docker commit oxlacontainer oxla-configured-image

sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9

# run oxla
sudo docker run --rm --net=host --name oxlacontainer oxla-configured-image > /dev/null 2>&1 &
sleep 10 # waiting for container start and db initialisation (leader election, etc.)
sudo docker run --rm -p 5432:5432 -v ~/data:/data --name oxlacontainer public.ecr.aws/oxla/release:1.20.0-beta > /dev/null 2>&1 &
sleep 30 # waiting for container start and db initialisation (leader election, etc.)

# create table and ingest data
export PGCLIENTENCODING=UTF8
psql -h localhost -p 5432 -U postgres -t -c 'CREATE SCHEMA test'
psql -h localhost -p 5432 -U postgres -d test -t < create.sql

for file in part_*.csv; do
echo "Processing file: $file"
psql -h localhost -p 5432 -U postgres -d test -t -c '\timing' -c "COPY hits FROM 's3://my-new-bucket/$file';"
aws s3api delete-object --bucket my-new-bucket --key "$file" --endpoint-url http://localhost:4569
done
psql -h localhost -t < create.sql
echo "Insert data."
psql -h localhost -t -c '\timing' -c "COPY hits FROM '/data/hits.csv';"

# get ingested data size
echo "data size after ingest:"
sudo docker exec oxlacontainer /bin/bash -c "du -s oxla/data"
psql -h localhost -t -c '\timing' -c "SELECT pg_total_relation_size('hits');"

# wait for merges to finish
sleep 30

# kill fake S3 and remove its data
ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
sudo rm -rf /mnt/fakes3_root
sleep 60

# run benchmark
echo "running benchmark..."
./run.sh 2>&1 | tee log.txt

# format results
cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' |
awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
./run.sh
1 change: 1 addition & 0 deletions oxla/create.sql
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,4 @@ CREATE TABLE hits
URLHash BIGINT NOT NULL,
CLID INTEGER NOT NULL
);
CREATE INDEX hits ON hits(CounterID, EventDate, UserID, EventTime, WatchID);
Loading

0 comments on commit b95b0e9

Please sign in to comment.