From f7a43dada156274792047f0053e77a3082743f17 Mon Sep 17 00:00:00 2001 From: immerrr again Date: Mon, 9 Mar 2015 09:42:21 +0000 Subject: [PATCH] benchmark: more features & fixes - add support for preexisting file server instance (--fileserver) - add HTML endpoint benchmarks (--render-type html) - make --sites-dir required - dump output in proper JSON --- splash/benchmark/benchmark.py | 106 ++++++++++++++++++++++------- splash/benchmark/download_sites.py | 9 ++- splash/benchmark/file_server.py | 71 ++++++++++++------- 3 files changed, 135 insertions(+), 51 deletions(-) diff --git a/splash/benchmark/benchmark.py b/splash/benchmark/benchmark.py index e99380008..ca22db518 100755 --- a/splash/benchmark/benchmark.py +++ b/splash/benchmark/benchmark.py @@ -8,6 +8,7 @@ """ +import json import logging import os import random @@ -20,18 +21,16 @@ import sys import requests -from splash.benchmark.file_server import serve_files -from splash.tests.utils import SplashServer def make_render_png_req(splash, params): - """Prepare request for render.png endpoint.""" + """Make PNG render request via render.png endpoint.""" return {'url': splash.url('render.png'), 'params': params} def make_render_json_req(splash, params): - """Prepare request for render.json endpoint.""" + """Make PNG render request via JSON endpoint.""" json_params = params.copy() json_params['png'] = 1 return {'url': splash.url('render.json'), @@ -39,7 +38,7 @@ def make_render_json_req(splash, params): def make_render_png_lua_req(splash, params): - """Prepare request for execute endpoint.""" + """Make PNG render request via Lua execute endpoint.""" lua_params = params.copy() lua_params['lua_source'] = """ function main(splash) @@ -57,11 +56,51 @@ def make_render_png_lua_req(splash, params): 'params': lua_params} -REQ_FACTORIES = [ - make_render_png_req, - make_render_json_req, - make_render_png_lua_req, -] +def make_render_html_req(splash, params): + """Make HTML render request via render.html endpoint.""" + return {'url': splash.url('render.html'), + 'params': params} + + +def make_render_html_json_req(splash, params): + """Make HTML render request via JSON endpoint.""" + json_params = params.copy() + json_params['html'] = 1 + return {'url': splash.url('render.json'), + 'params': json_params} + + +def make_render_html_lua_req(splash, params): + """Make HTML render request via Lua execute endpoint.""" + lua_params = params.copy() + lua_params['lua_source'] = """ +function main(splash) + assert(splash:go(splash.args.url)) + if splash.args.wait then + assert(splash:wait(splash.args.wait)) + end + splash:set_result_content_type("text/html; charset=UTF-8") + return splash:html{} +end +""" + return {'url': splash.url('execute'), + 'params': lua_params} + + +#: Same resource may be rendered by various endpoints with slightly varying +#: parameter combinations. Request factories set those combinations up. +REQ_FACTORIES = { + 'png': [ + make_render_png_req, + make_render_json_req, + make_render_png_lua_req, + ], + 'html': [ + make_render_html_req, + make_render_html_json_req, + make_render_html_lua_req, + ], +} #: Port at which static pages will be served. @@ -86,15 +125,20 @@ def make_render_png_lua_req(splash, params): help='Request thread count') parser.add_argument('--request-count', type=int, default=10, help='Benchmark request count') -parser.add_argument('--sites-dir', type=str, default='sites', +parser.add_argument('--sites-dir', type=str, default='sites', required=True, help='Directory with downloaded sites') +parser.add_argument('--file-server', metavar='HOST:PORT', + help='Use existing file server instance available at HOST:PORT') parser.add_argument('--splash-server', metavar='HOST:PORT', help='Use existing Splash instance available at HOST:PORT') parser.add_argument('--out-file', type=FileType(mode='w'), default=sys.stdout, help='Write detailed request information in this file') +parser.add_argument('--render-type', choices=('html', 'png'), default='png', + help=('Type of rendering to benchmark' + ' (either "html" or "png")')) -def generate_requests(splash, args): +def generate_requests(splash, file_server, args): log = logging.getLogger('generate_requests') log.info("Using pRNG seed: %s", args.seed) @@ -106,12 +150,14 @@ def generate_requests(splash, args): for p in pages: log.info("Using page for benchmark: %s", p) + request_factories = REQ_FACTORIES[args.render_type] + rng = random.Random(args.seed) for i in xrange(args.request_count): page = rng.choice(pages) width, height = rng.choice(WIDTH_HEIGHT) - req_factory = rng.choice(REQ_FACTORIES) - url = 'http://localhost:%d/%s' % (PORT, page) + req_factory = rng.choice(request_factories) + url = file_server.url(page) params = {'url': url, 'render_all': 1, 'wait': 0.1, 'width': width, 'height': height} log.debug("Req factory: %s, params: %s", req_factory, params) @@ -145,7 +191,7 @@ def invoke_request(invoke_args): 'height': kwargs['params']['height']} -class ExistingSplashWrapper(object): +class ExistingServerWrapper(object): """Wrapper for pre-existing Splash instance.""" def __init__(self, server): self.server = server @@ -165,25 +211,36 @@ def __exit__(self, *args): def main(): log = logging.getLogger("benchmark") args = parser.parse_args() - logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING) + (logging.getLogger('requests.packages.urllib3.connectionpool') + .setLevel(logging.WARNING)) logging.basicConfig(level=logging.DEBUG) if args.splash_server: - splash = ExistingSplashWrapper(args.splash_server) + splash = ExistingServerWrapper(args.splash_server) else: + from splash.tests.utils import SplashServer splash = SplashServer( logfile=SPLASH_LOG, extra_args=['--disable-lua-sandbox', '--disable-xvfb', '--max-timeout=600']) - with splash, serve_files(port=PORT, directory=args.sites_dir, logfile=FILESERVER_LOG): + if args.file_server: + file_server = ExistingServerWrapper(args.file_server) + else: + from splash.benchmark.file_server import FileServerSubprocess + file_server = FileServerSubprocess(port=PORT, + path=args.sites_dir, + logfile=FILESERVER_LOG) + + with splash, file_server: log.info("Servers are up, starting benchmark...") start_res = requests.get( splash.url('execute'), params={'lua_source': GET_PERF_STATS_SCRIPT}).json() start_time = time() - results = parallel_map(invoke_request, generate_requests(splash, args), + results = parallel_map(invoke_request, + generate_requests(splash, file_server, args), args.thread_count) end_time = time() end_res = requests.get( @@ -191,11 +248,12 @@ def main(): params={'lua_source': GET_PERF_STATS_SCRIPT}).json() log.info("Writing stats to %s", args.out_file.name) - args.out_file.write(pformat({ - 'maxrss': end_res['maxrss'], - 'cputime': end_res['cputime'] - start_res['cputime'], - 'walltime': end_time - start_time, - 'requests': results})) + args.out_file.write(json.dumps( + {'maxrss': end_res['maxrss'], + 'cputime': end_res['cputime'] - start_res['cputime'], + 'walltime': end_time - start_time, + 'requests': results}, + indent=2)) log.info("Splash max RSS: %s B", end_res['maxrss']) log.info("Splash CPU time elapsed: %.2f sec", end_res['cputime'] - start_res['cputime']) diff --git a/splash/benchmark/download_sites.py b/splash/benchmark/download_sites.py index e50fee33f..07a9577de 100755 --- a/splash/benchmark/download_sites.py +++ b/splash/benchmark/download_sites.py @@ -10,12 +10,13 @@ import os import re import subprocess +import logging from urlparse import urlsplit from lxml import html import w3lib.html -from splash.benchmark.file_server import serve_files +from splash.benchmark.file_server import FileServerSubprocess from splash.tests.stress import lua_runonce SCRIPT_HTML = """ @@ -91,6 +92,10 @@ def download_sites(sites_dir, sites): def main(): args = parser.parse_args() + (logging.getLogger('requests.packages.urllib3.connectionpool') + .setLevel(logging.WARNING)) + logging.basicConfig(level=logging.DEBUG) + logging.info("Starting site download suite") try: os.makedirs(args.sites_dir) except OSError as e: @@ -98,7 +103,7 @@ def main(): raise elif not os.path.isdir(args.sites_dir): raise RuntimeError("Not a directory: %s" % args.sites_dir) - with serve_files(PORT, args.sites_dir): + with FileServerSubprocess(port=PORT, path=args.sites_dir): download_sites(args.sites_dir, [ 'http://www.wikipedia.org', 'http://www.google.com', diff --git a/splash/benchmark/file_server.py b/splash/benchmark/file_server.py index 5e4cb3acf..bb0549ce8 100755 --- a/splash/benchmark/file_server.py +++ b/splash/benchmark/file_server.py @@ -7,6 +7,7 @@ import subprocess import time import sys +import logging from contextlib import contextmanager from twisted.internet import reactor @@ -18,44 +19,64 @@ parser = argparse.ArgumentParser("") parser.add_argument('--port', type=int, default=8806) -parser.add_argument('--directory', help='Directory to be served', default='.') -parser.add_argument('--logfile', default=sys.stderr, type=argparse.FileType(mode='w'), +parser.add_argument('--path', help='Path to be served', default='.') +parser.add_argument('--logfile', default=sys.stderr, + type=argparse.FileType(mode='w'), help='File to write logs to') -@contextmanager -def serve_files(port, directory, logfile=None): + +class FileServerSubprocess(object): + logger = logging.getLogger('file_server') + """Serve files from specified directory statically in a subprocess.""" - # command = ['twistd', - # '-n', # don't daemonize - # 'web', # start web component - # '--port', str(int(port)), - # '--path', os.path.abspath(directory), ] - # if logfile is not None: - # command += ['--logfile', logfile] - command = ['python', __file__, - '--port', str(int(port)), - '--directory', os.path.abspath(directory)] - if logfile is not None: - command += ['--logfile', logfile] - site_server = subprocess.Popen(command) - try: + def __init__(self, port, path, logfile=None): + self.port = port + self.path = path + self.logfile = logfile + self.server = 'http://localhost:%d' % port + + def url(self, endpoint): + return self.server + '/' + endpoint + + def __enter__(self): + # command = ['twistd', + # '-n', # don't daemonize + # 'web', # start web component + # '--port', str(int(port)), + # '--path', os.path.abspath(directory), ] + # if logfile is not None: + # command += ['--logfile', logfile] + command = ['python', __file__, + '--port', str(int(self.port)), + '--path', os.path.abspath(self.path)] + if self.logfile is not None: + command += ['--logfile', self.logfile] + self.logger.info("Starting file server subprocess: %s", command) + self._site_server = subprocess.Popen(command) # It might take some time to bring up the server, wait for up to 10s. for i in xrange(100): try: - requests.get('http://localhost:%d' % port) + self.logger.info("Checking if file server is active") + requests.get(self.url('')) + break except requests.ConnectionError: time.sleep(0.1) - else: - break - yield - finally: - site_server.terminate() + else: + msg = "File server subprocess startup timed out" + if self.logfile: + with open(self.logfile, 'r') as log_f: + msg += ", logs:\n" + log_f.read() + raise RuntimeError(msg) + + def __exit__(self, *args): + self._site_server.kill() + self._site_server.wait() def main(): args = parser.parse_args() startLogging(args.logfile) - resource = File(os.path.abspath(args.directory)) + resource = File(os.path.abspath(args.path)) site = Site(resource) reactor.listenTCP(args.port, site) reactor.run()