Diff
Modified: trunk/Tools/ChangeLog (90418 => 90419)
--- trunk/Tools/ChangeLog 2011-07-06 00:08:36 UTC (rev 90418)
+++ trunk/Tools/ChangeLog 2011-07-06 00:12:06 UTC (rev 90419)
@@ -7,6 +7,20 @@
2011-07-05 Dirk Pranke <dpra...@chromium.org>
+ nrwt: allow for multiple http shards
+ https://bugs.webkit.org/show_bug.cgi?id=63116
+
+ Reviewed by Tony Chang.
+
+ This modifies the sharding logic to support multiple http
+ shards, but for now we clamp to one shard until we can test
+ perf impact and flakiness impact.
+
+ * Scripts/webkitpy/layout_tests/layout_package/manager.py:
+ * Scripts/webkitpy/layout_tests/layout_package/manager_unittest.py:
+
+2011-07-05 Dirk Pranke <dpra...@chromium.org>
+
Re-land nrwt: make sharding tests needing locks less hard-coded
https://bugs.webkit.org/show_bug.cgi?id=63112
Modified: trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager.py (90418 => 90419)
--- trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager.py 2011-07-06 00:08:36 UTC (rev 90418)
+++ trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager.py 2011-07-06 00:12:06 UTC (rev 90419)
@@ -237,6 +237,21 @@
pass
+class TestShard(object):
+ """A test shard is a named list of TestInputs."""
+
+ # FIXME: Make this class visible, used by workers as well.
+ def __init__(self, name, test_inputs):
+ self.name = name
+ self.test_inputs = test_inputs
+
+ def __repr__(self):
+ return "TestShard(name='%s', test_inputs=%s'" % (self.name, self.test_inputs)
+
+ def __eq__(self, other):
+ return self.name == other.name and self.test_inputs == other.test_inputs
+
+
class Manager(object):
"""A class for managing running a series of tests on a series of layout
test files."""
@@ -535,68 +550,145 @@
return self._expectations.has_modifier(test_file,
test_expectations.SLOW)
- def _shard_tests(self, test_files, use_real_shards):
+ def _shard_tests(self, test_files, num_workers, fully_parallel):
"""Groups tests into batches.
This helps ensure that tests that depend on each other (aka bad tests!)
continue to run together as most cross-tests dependencies tend to
- occur within the same directory. If use_real_shards is False, we
- put each (non-HTTP/websocket) test into its own shard for maximum
- concurrency instead of trying to do any sort of real sharding.
-
+ occur within the same directory.
Return:
- Two lists of lists of TestInput objects. The first list should
- only be run under the server lock, the second can be run whenever.
+ Two list of TestShards. The first contains tests that must only be
+ run under the server lock, the second can be run whenever.
"""
- # FIXME: We still need to support multiple locked shards.
+
+ # FIXME: Move all of the sharding logic out of manager into its
+ # own class or module. Consider grouping it with the chunking logic
+ # in prepare_lists as well.
+ if num_workers == 1:
+ return self._shard_in_two(test_files)
+ elif fully_parallel:
+ return self._shard_every_file(test_files)
+ return self._shard_by_directory(test_files, num_workers)
+
+ def _shard_in_two(self, test_files):
+ """Returns two lists of shards, one with all the tests requiring a lock and one with the rest.
+
+ This is used when there's only one worker, to minimize the per-shard overhead."""
+ locked_inputs = []
+ unlocked_inputs = []
+ for test_file in test_files:
+ test_input = self._get_test_input_for_file(test_file)
+ if self._test_requires_lock(test_file):
+ locked_inputs.append(test_input)
+ else:
+ unlocked_inputs.append(test_input)
+ return [TestShard('locked_tests', locked_inputs)], [TestShard('unlocked_tests', unlocked_inputs)]
+
+ def _shard_every_file(self, test_files):
+ """Returns two lists of shards, each shard containing a single test file.
+
+ This mode gets maximal parallelism at the cost of much higher flakiness."""
locked_shards = []
unlocked_shards = []
- tests_to_http_lock = []
- if not use_real_shards:
- for test_file in test_files:
- test_input = self._get_test_input_for_file(test_file)
- if self._test_requires_lock(test_file):
- tests_to_http_lock.append(test_input)
- else:
- unlocked_shards.append((".", [test_input]))
- else:
- tests_by_dir = {}
- for test_file in test_files:
- directory = self._get_dir_for_test_file(test_file)
- test_input = self._get_test_input_for_file(test_file)
- if self._test_requires_lock(test_file):
- tests_to_http_lock.append(test_input)
- else:
- tests_by_dir.setdefault(directory, [])
- tests_by_dir[directory].append(test_input)
- for directory in tests_by_dir:
- test_list = tests_by_dir[directory]
- test_list_tuple = (directory, test_list)
- unlocked_shards.append(test_list_tuple)
+ for test_file in test_files:
+ test_input = self._get_test_input_for_file(test_file)
- # Sort the shards by directory name.
- unlocked_shards.sort(lambda a, b: cmp(a[0], b[0]))
+ # Note that we use a '.' for the shard name; the name doesn't really
+ # matter, and the only other meaningful value would be the filename,
+ # which would be really redundant.
+ if self._test_requires_lock(test_file):
+ locked_shards.append(TestShard('.', [test_input]))
+ else:
+ unlocked_shards.append(TestShard('.', [test_input]))
- if tests_to_http_lock:
- locked_shards = [("tests_to_http_lock", tests_to_http_lock)]
+ return locked_shards, unlocked_shards
- return (locked_shards, unlocked_shards)
+ def _shard_by_directory(self, test_files, num_workers):
+ """Returns two lists of shards, each shard containing all the files in a directory.
+ This is the default mode, and gets as much parallelism as we can while
+ minimizing flakiness caused by inter-test dependencies."""
+ locked_shards = []
+ unlocked_shards = []
+ tests_by_dir = {}
+ # FIXME: Given that the tests are already sorted by directory,
+ # we can probably rewrite this to be clearer and faster.
+ for test_file in test_files:
+ directory = self._get_dir_for_test_file(test_file)
+ test_input = self._get_test_input_for_file(test_file)
+ tests_by_dir.setdefault(directory, [])
+ tests_by_dir[directory].append(test_input)
+
+ for directory, test_inputs in tests_by_dir.iteritems():
+ shard = TestShard(directory, test_inputs)
+ if self._test_requires_lock(directory):
+ locked_shards.append(shard)
+ else:
+ unlocked_shards.append(shard)
+
+ # Sort the shards by directory name.
+ locked_shards.sort(key=lambda shard: shard.name)
+ unlocked_shards.sort(key=lambda shard: shard.name)
+
+ return (self._resize_shards(locked_shards, self._max_locked_shards(num_workers),
+ 'locked_shard'),
+ unlocked_shards)
+
+ def _max_locked_shards(self, num_workers):
+ # Put a ceiling on the number of locked shards, so that we
+ # don't hammer the servers too badly.
+
+ # FIXME: For now, limit to one shard. After testing to make sure we
+ # can handle multiple shards, we should probably do something like
+ # limit this to no more than a quarter of all workers, e.g.:
+ # return max(math.ceil(num_workers / 4.0), 1)
+ return 1
+
+ def _resize_shards(self, old_shards, max_new_shards, shard_name_prefix):
+ """Takes a list of shards and redistributes the tests into no more
+ than |max_new_shards| new shards."""
+
+ # This implementation assumes that each input shard only contains tests from a
+ # single directory, and that tests in each shard must remain together; as a
+ # result, a given input shard is never split between output shards.
+ #
+ # Each output shard contains the tests from one or more input shards and
+ # hence may contain tests from multiple directories.
+
+ def divide_and_round_up(numerator, divisor):
+ return int(math.ceil(float(numerator) / divisor))
+
+ def extract_and_flatten(shards):
+ test_inputs = []
+ for shard in shards:
+ test_inputs.extend(shard.test_inputs)
+ return test_inputs
+
+ def split_at(seq, index):
+ return (seq[:index], seq[index:])
+
+ num_old_per_new = divide_and_round_up(len(old_shards), max_new_shards)
+ new_shards = []
+ remaining_shards = old_shards
+ while remaining_shards:
+ some_shards, remaining_shards = split_at(remaining_shards, num_old_per_new)
+ new_shards.append(TestShard('%s_%d' % (shard_name_prefix, len(new_shards) + 1),
+ extract_and_flatten(some_shards)))
+ return new_shards
+
def _contains_tests(self, subdir):
for test_file in self._test_files:
if test_file.find(subdir) >= 0:
return True
return False
- def _num_workers(self, num_shards):
- num_workers = min(int(self._options.child_processes), num_shards)
+ def _log_num_workers(self, num_workers, num_shards, num_locked_shards):
driver_name = self._port.driver_name()
if num_workers == 1:
self._printer.print_config("Running 1 %s over %s" %
(driver_name, grammar.pluralize('shard', num_shards)))
else:
- self._printer.print_config("Running %d %ss in parallel over %d shards" %
- (num_workers, driver_name, num_shards))
- return num_workers
+ self._printer.print_config("Running %d %ss in parallel over %d shards (%d locked)" %
+ (num_workers, driver_name, num_shards, num_locked_shards))
def _run_tests(self, file_list, result_summary):
"""Runs the tests in the file_list.
@@ -625,7 +717,7 @@
self._printer.print_update('Sharding tests ...')
locked_shards, unlocked_shards = self._shard_tests(file_list,
- int(self._options.child_processes) > 1 and not self._options.experimental_fully_parallel)
+ int(self._options.child_processes), self._options.experimental_fully_parallel)
# FIXME: We don't have a good way to coordinate the workers so that
# they don't try to run the shards that need a lock if we don't actually
@@ -640,7 +732,9 @@
if locked_shards:
self.start_servers_with_lock()
- num_workers = self._num_workers(len(all_shards))
+ num_workers = min(int(self._options.child_processes), len(all_shards))
+ self._log_num_workers(num_workers, len(all_shards), len(locked_shards))
+
manager_connection = manager_worker_broker.get(self._port, self._options,
self, worker.Worker)
@@ -663,7 +757,8 @@
self._printer.print_update("Starting testing ...")
for shard in all_shards:
- manager_connection.post_message('test_list', shard[0], shard[1])
+ # FIXME: Change 'test_list' to 'shard', make sharding public.
+ manager_connection.post_message('test_list', shard.name, shard.test_inputs)
# We post one 'stop' message for each worker. Because the stop message
# are sent after all of the tests, and because each worker will stop
@@ -1346,7 +1441,7 @@
def find(name, test_lists):
for i in range(len(test_lists)):
- if test_lists[i][0] == name:
+ if test_lists[i].name == name:
return i
return -1
Modified: trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager_unittest.py (90418 => 90419)
--- trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager_unittest.py 2011-07-06 00:08:36 UTC (rev 90418)
+++ trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager_unittest.py 2011-07-06 00:12:06 UTC (rev 90419)
@@ -39,7 +39,7 @@
from webkitpy import layout_tests
from webkitpy.layout_tests import run_webkit_tests
-from webkitpy.layout_tests.layout_package.manager import Manager, natural_sort_key, path_key, TestRunInterruptedException
+from webkitpy.layout_tests.layout_package.manager import Manager, natural_sort_key, path_key, TestRunInterruptedException, TestShard
from webkitpy.layout_tests.layout_package import printing
from webkitpy.layout_tests.layout_package.result_summary import ResultSummary
from webkitpy.tool.mocktool import MockOptions
@@ -50,40 +50,79 @@
return test_file
-class ManagerTest(unittest.TestCase):
- def test_shard_tests(self):
- # Test that _shard_tests in test_runner.TestRunner really
- # put the http tests first in the queue.
+class ShardingTests(unittest.TestCase):
+ # FIXME: Remove "LayoutTests" from this if we can ever convert the generic
+ # code from filenames to test names.
+ test_list = [
+ "LayoutTests/http/tests/websocket/tests/unicode.htm",
+ "LayoutTests/animations/keyframes.html",
+ "LayoutTests/http/tests/security/view-source-no-refresh.html",
+ "LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html",
+ "LayoutTests/fast/css/display-none-inline-style-change-crash.html",
+ "LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html",
+ "LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html",
+ "LayoutTests/ietestcenter/_javascript_/11.1.5_4-4-c-1.html",
+ "LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html",
+ ]
+
+ def get_shards(self, num_workers, fully_parallel):
port = Mock()
port._filesystem = filesystem_mock.MockFileSystem()
- manager = ManagerWrapper(port=port, options=Mock(), printer=Mock())
+ self.manager = ManagerWrapper(port=port, options=Mock(), printer=Mock())
+ return self.manager._shard_tests(self.test_list, num_workers, fully_parallel)
- test_list = [
- "LayoutTests/websocket/tests/unicode.htm",
- "LayoutTests/animations/keyframes.html",
- "LayoutTests/http/tests/security/view-source-no-refresh.html",
- "LayoutTests/websocket/tests/websocket-protocol-ignored.html",
- "LayoutTests/fast/css/display-none-inline-style-change-crash.html",
- "LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html",
- "LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html",
- "LayoutTests/ietestcenter/_javascript_/11.1.5_4-4-c-1.html",
- "LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html",
- ]
+ def test_shard_by_dir(self):
+ locked, unlocked = self.get_shards(num_workers=2, fully_parallel=False)
- expected_tests_to_http_lock = set([
- 'LayoutTests/websocket/tests/unicode.htm',
- 'LayoutTests/http/tests/security/view-source-no-refresh.html',
- 'LayoutTests/websocket/tests/websocket-protocol-ignored.html',
- 'LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html',
- ])
+ # Note that although there are tests in multiple dirs that need locks,
+ # they are crammed into a single shard in order to reduce the # of
+ # workers hitting the server at once.
+ self.assertEquals(locked,
+ [TestShard('locked_shard_1',
+ ['LayoutTests/http/tests/security/view-source-no-refresh.html',
+ 'LayoutTests/http/tests/websocket/tests/unicode.htm',
+ 'LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html',
+ 'LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html'])])
+ self.assertEquals(unlocked,
+ [TestShard('animations',
+ ['LayoutTests/animations/keyframes.html']),
+ TestShard('dom/html/level2/html',
+ ['LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html',
+ 'LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html']),
+ TestShard('fast/css',
+ ['LayoutTests/fast/css/display-none-inline-style-change-crash.html']),
+ TestShard('ietestcenter/_javascript_',
+ ['LayoutTests/ietestcenter/_javascript_/11.1.5_4-4-c-1.html'])])
- single_locked, single_unlocked = manager._shard_tests(test_list, False)
- multi_locked, multi_unlocked = manager._shard_tests(test_list, True)
+ def test_shard_every_file(self):
+ locked, unlocked = self.get_shards(num_workers=2, fully_parallel=True)
+ self.assertEquals(locked,
+ [TestShard('.', ['LayoutTests/http/tests/websocket/tests/unicode.htm']),
+ TestShard('.', ['LayoutTests/http/tests/security/view-source-no-refresh.html']),
+ TestShard('.', ['LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html']),
+ TestShard('.', ['LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html'])])
+ self.assertEquals(unlocked,
+ [TestShard('.', ['LayoutTests/animations/keyframes.html']),
+ TestShard('.', ['LayoutTests/fast/css/display-none-inline-style-change-crash.html']),
+ TestShard('.', ['LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html']),
+ TestShard('.', ['LayoutTests/ietestcenter/_javascript_/11.1.5_4-4-c-1.html']),
+ TestShard('.', ['LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html'])])
- self.assertEqual("tests_to_http_lock", single_locked[0][0])
- self.assertEqual(expected_tests_to_http_lock, set(single_locked[0][1]))
- self.assertEqual("tests_to_http_lock", multi_locked[0][0])
- self.assertEqual(expected_tests_to_http_lock, set(multi_locked[0][1]))
+ def test_shard_in_two(self):
+ locked, unlocked = self.get_shards(num_workers=1, fully_parallel=False)
+ self.assertEquals(locked,
+ [TestShard('locked_tests',
+ ['LayoutTests/http/tests/websocket/tests/unicode.htm',
+ 'LayoutTests/http/tests/security/view-source-no-refresh.html',
+ 'LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html',
+ 'LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html'])])
+ self.assertEquals(unlocked,
+ [TestShard('unlocked_tests',
+ ['LayoutTests/animations/keyframes.html',
+ 'LayoutTests/fast/css/display-none-inline-style-change-crash.html',
+ 'LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html',
+ 'LayoutTests/ietestcenter/_javascript_/11.1.5_4-4-c-1.html',
+ 'LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html'])])
def test_http_locking(tester):
class LockCheckingManager(Manager):
@@ -93,7 +132,7 @@
def handle_finished_list(self, source, list_name, num_tests, elapsed_time):
if not self._finished_list_called:
- tester.assertEquals(list_name, 'tests_to_http_lock')
+ tester.assertEquals(list_name, 'locked_tests')
tester.assertTrue(self._remaining_locked_shards)
tester.assertTrue(self._has_http_lock)