Skip to content

Commit

Permalink
benchmark: more features & fixes
Browse files Browse the repository at this point in the history
- add support for preexisting file server instance (--fileserver)
- add HTML endpoint benchmarks (--render-type html)
- make --sites-dir required
- dump output in proper JSON
  • Loading branch information
immerrr committed Apr 6, 2015
1 parent 4d072a6 commit f7a43da
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 51 deletions.
106 changes: 82 additions & 24 deletions splash/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"""

import json
import logging
import os
import random
Expand All @@ -20,26 +21,24 @@
import sys

import requests
from splash.benchmark.file_server import serve_files
from splash.tests.utils import SplashServer


def make_render_png_req(splash, params):
"""Prepare request for render.png endpoint."""
"""Make PNG render request via render.png endpoint."""
return {'url': splash.url('render.png'),
'params': params}


def make_render_json_req(splash, params):
"""Prepare request for render.json endpoint."""
"""Make PNG render request via JSON endpoint."""
json_params = params.copy()
json_params['png'] = 1
return {'url': splash.url('render.json'),
'params': json_params}


def make_render_png_lua_req(splash, params):
"""Prepare request for execute endpoint."""
"""Make PNG render request via Lua execute endpoint."""
lua_params = params.copy()
lua_params['lua_source'] = """
function main(splash)
Expand All @@ -57,11 +56,51 @@ def make_render_png_lua_req(splash, params):
'params': lua_params}


REQ_FACTORIES = [
make_render_png_req,
make_render_json_req,
make_render_png_lua_req,
]
def make_render_html_req(splash, params):
"""Make HTML render request via render.html endpoint."""
return {'url': splash.url('render.html'),
'params': params}


def make_render_html_json_req(splash, params):
"""Make HTML render request via JSON endpoint."""
json_params = params.copy()
json_params['html'] = 1
return {'url': splash.url('render.json'),
'params': json_params}


def make_render_html_lua_req(splash, params):
"""Make HTML render request via Lua execute endpoint."""
lua_params = params.copy()
lua_params['lua_source'] = """
function main(splash)
assert(splash:go(splash.args.url))
if splash.args.wait then
assert(splash:wait(splash.args.wait))
end
splash:set_result_content_type("text/html; charset=UTF-8")
return splash:html{}
end
"""
return {'url': splash.url('execute'),
'params': lua_params}


#: Same resource may be rendered by various endpoints with slightly varying
#: parameter combinations. Request factories set those combinations up.
REQ_FACTORIES = {
'png': [
make_render_png_req,
make_render_json_req,
make_render_png_lua_req,
],
'html': [
make_render_html_req,
make_render_html_json_req,
make_render_html_lua_req,
],
}


#: Port at which static pages will be served.
Expand All @@ -86,15 +125,20 @@ def make_render_png_lua_req(splash, params):
help='Request thread count')
parser.add_argument('--request-count', type=int, default=10,
help='Benchmark request count')
parser.add_argument('--sites-dir', type=str, default='sites',
parser.add_argument('--sites-dir', type=str, default='sites', required=True,
help='Directory with downloaded sites')
parser.add_argument('--file-server', metavar='HOST:PORT',
help='Use existing file server instance available at HOST:PORT')
parser.add_argument('--splash-server', metavar='HOST:PORT',
help='Use existing Splash instance available at HOST:PORT')
parser.add_argument('--out-file', type=FileType(mode='w'), default=sys.stdout,
help='Write detailed request information in this file')
parser.add_argument('--render-type', choices=('html', 'png'), default='png',
help=('Type of rendering to benchmark'
' (either "html" or "png")'))


def generate_requests(splash, args):
def generate_requests(splash, file_server, args):
log = logging.getLogger('generate_requests')
log.info("Using pRNG seed: %s", args.seed)

Expand All @@ -106,12 +150,14 @@ def generate_requests(splash, args):
for p in pages:
log.info("Using page for benchmark: %s", p)

request_factories = REQ_FACTORIES[args.render_type]

rng = random.Random(args.seed)
for i in xrange(args.request_count):
page = rng.choice(pages)
width, height = rng.choice(WIDTH_HEIGHT)
req_factory = rng.choice(REQ_FACTORIES)
url = 'http://localhost:%d/%s' % (PORT, page)
req_factory = rng.choice(request_factories)
url = file_server.url(page)
params = {'url': url, 'render_all': 1, 'wait': 0.1,
'width': width, 'height': height}
log.debug("Req factory: %s, params: %s", req_factory, params)
Expand Down Expand Up @@ -145,7 +191,7 @@ def invoke_request(invoke_args):
'height': kwargs['params']['height']}


class ExistingSplashWrapper(object):
class ExistingServerWrapper(object):
"""Wrapper for pre-existing Splash instance."""
def __init__(self, server):
self.server = server
Expand All @@ -165,37 +211,49 @@ def __exit__(self, *args):
def main():
log = logging.getLogger("benchmark")
args = parser.parse_args()
logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING)
(logging.getLogger('requests.packages.urllib3.connectionpool')
.setLevel(logging.WARNING))
logging.basicConfig(level=logging.DEBUG)

if args.splash_server:
splash = ExistingSplashWrapper(args.splash_server)
splash = ExistingServerWrapper(args.splash_server)
else:
from splash.tests.utils import SplashServer
splash = SplashServer(
logfile=SPLASH_LOG,
extra_args=['--disable-lua-sandbox',
'--disable-xvfb',
'--max-timeout=600'])

with splash, serve_files(port=PORT, directory=args.sites_dir, logfile=FILESERVER_LOG):
if args.file_server:
file_server = ExistingServerWrapper(args.file_server)
else:
from splash.benchmark.file_server import FileServerSubprocess
file_server = FileServerSubprocess(port=PORT,
path=args.sites_dir,
logfile=FILESERVER_LOG)

with splash, file_server:
log.info("Servers are up, starting benchmark...")
start_res = requests.get(
splash.url('execute'),
params={'lua_source': GET_PERF_STATS_SCRIPT}).json()
start_time = time()
results = parallel_map(invoke_request, generate_requests(splash, args),
results = parallel_map(invoke_request,
generate_requests(splash, file_server, args),
args.thread_count)
end_time = time()
end_res = requests.get(
splash.url('execute'),
params={'lua_source': GET_PERF_STATS_SCRIPT}).json()

log.info("Writing stats to %s", args.out_file.name)
args.out_file.write(pformat({
'maxrss': end_res['maxrss'],
'cputime': end_res['cputime'] - start_res['cputime'],
'walltime': end_time - start_time,
'requests': results}))
args.out_file.write(json.dumps(
{'maxrss': end_res['maxrss'],
'cputime': end_res['cputime'] - start_res['cputime'],
'walltime': end_time - start_time,
'requests': results},
indent=2))
log.info("Splash max RSS: %s B", end_res['maxrss'])
log.info("Splash CPU time elapsed: %.2f sec",
end_res['cputime'] - start_res['cputime'])
Expand Down
9 changes: 7 additions & 2 deletions splash/benchmark/download_sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
import os
import re
import subprocess
import logging
from urlparse import urlsplit

from lxml import html

import w3lib.html
from splash.benchmark.file_server import serve_files
from splash.benchmark.file_server import FileServerSubprocess
from splash.tests.stress import lua_runonce

SCRIPT_HTML = """
Expand Down Expand Up @@ -91,14 +92,18 @@ def download_sites(sites_dir, sites):

def main():
args = parser.parse_args()
(logging.getLogger('requests.packages.urllib3.connectionpool')
.setLevel(logging.WARNING))
logging.basicConfig(level=logging.DEBUG)
logging.info("Starting site download suite")
try:
os.makedirs(args.sites_dir)
except OSError as e:
if e.errno != errno.EEXIST:
raise
elif not os.path.isdir(args.sites_dir):
raise RuntimeError("Not a directory: %s" % args.sites_dir)
with serve_files(PORT, args.sites_dir):
with FileServerSubprocess(port=PORT, path=args.sites_dir):
download_sites(args.sites_dir, [
'http://www.wikipedia.org',
'http://www.google.com',
Expand Down
71 changes: 46 additions & 25 deletions splash/benchmark/file_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
import time
import sys
import logging
from contextlib import contextmanager

from twisted.internet import reactor
Expand All @@ -18,44 +19,64 @@

parser = argparse.ArgumentParser("")
parser.add_argument('--port', type=int, default=8806)
parser.add_argument('--directory', help='Directory to be served', default='.')
parser.add_argument('--logfile', default=sys.stderr, type=argparse.FileType(mode='w'),
parser.add_argument('--path', help='Path to be served', default='.')
parser.add_argument('--logfile', default=sys.stderr,
type=argparse.FileType(mode='w'),
help='File to write logs to')

@contextmanager
def serve_files(port, directory, logfile=None):

class FileServerSubprocess(object):
logger = logging.getLogger('file_server')

"""Serve files from specified directory statically in a subprocess."""
# command = ['twistd',
# '-n', # don't daemonize
# 'web', # start web component
# '--port', str(int(port)),
# '--path', os.path.abspath(directory), ]
# if logfile is not None:
# command += ['--logfile', logfile]
command = ['python', __file__,
'--port', str(int(port)),
'--directory', os.path.abspath(directory)]
if logfile is not None:
command += ['--logfile', logfile]
site_server = subprocess.Popen(command)
try:
def __init__(self, port, path, logfile=None):
self.port = port
self.path = path
self.logfile = logfile
self.server = 'http://localhost:%d' % port

def url(self, endpoint):
return self.server + '/' + endpoint

def __enter__(self):
# command = ['twistd',
# '-n', # don't daemonize
# 'web', # start web component
# '--port', str(int(port)),
# '--path', os.path.abspath(directory), ]
# if logfile is not None:
# command += ['--logfile', logfile]
command = ['python', __file__,
'--port', str(int(self.port)),
'--path', os.path.abspath(self.path)]
if self.logfile is not None:
command += ['--logfile', self.logfile]
self.logger.info("Starting file server subprocess: %s", command)
self._site_server = subprocess.Popen(command)
# It might take some time to bring up the server, wait for up to 10s.
for i in xrange(100):
try:
requests.get('http://localhost:%d' % port)
self.logger.info("Checking if file server is active")
requests.get(self.url(''))
break
except requests.ConnectionError:
time.sleep(0.1)
else:
break
yield
finally:
site_server.terminate()
else:
msg = "File server subprocess startup timed out"
if self.logfile:
with open(self.logfile, 'r') as log_f:
msg += ", logs:\n" + log_f.read()
raise RuntimeError(msg)

def __exit__(self, *args):
self._site_server.kill()
self._site_server.wait()


def main():
args = parser.parse_args()
startLogging(args.logfile)
resource = File(os.path.abspath(args.directory))
resource = File(os.path.abspath(args.path))
site = Site(resource)
reactor.listenTCP(args.port, site)
reactor.run()
Expand Down

0 comments on commit f7a43da

Please sign in to comment.