diff options
Diffstat (limited to 'poky/bitbake/lib/hashserv')
-rw-r--r-- | poky/bitbake/lib/hashserv/__init__.py | 52 | ||||
-rw-r--r-- | poky/bitbake/lib/hashserv/client.py | 243 | ||||
-rw-r--r-- | poky/bitbake/lib/hashserv/server.py | 149 | ||||
-rw-r--r-- | poky/bitbake/lib/hashserv/tests.py | 147 |
4 files changed, 404 insertions, 187 deletions
diff --git a/poky/bitbake/lib/hashserv/__init__.py b/poky/bitbake/lib/hashserv/__init__.py index f95e8f43f..55f48410d 100644 --- a/poky/bitbake/lib/hashserv/__init__.py +++ b/poky/bitbake/lib/hashserv/__init__.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: GPL-2.0-only # +import asyncio from contextlib import closing import re import sqlite3 @@ -21,6 +22,24 @@ ADDR_TYPE_TCP = 1 # is necessary DEFAULT_MAX_CHUNK = 32 * 1024 +TABLE_DEFINITION = ( + ("method", "TEXT NOT NULL"), + ("outhash", "TEXT NOT NULL"), + ("taskhash", "TEXT NOT NULL"), + ("unihash", "TEXT NOT NULL"), + ("created", "DATETIME"), + + # Optional fields + ("owner", "TEXT"), + ("PN", "TEXT"), + ("PV", "TEXT"), + ("PR", "TEXT"), + ("task", "TEXT"), + ("outhash_siginfo", "TEXT"), +) + +TABLE_COLUMNS = tuple(name for name, _ in TABLE_DEFINITION) + def setup_database(database, sync=True): db = sqlite3.connect(database) db.row_factory = sqlite3.Row @@ -29,23 +48,10 @@ def setup_database(database, sync=True): cursor.execute(''' CREATE TABLE IF NOT EXISTS tasks_v2 ( id INTEGER PRIMARY KEY AUTOINCREMENT, - method TEXT NOT NULL, - outhash TEXT NOT NULL, - taskhash TEXT NOT NULL, - unihash TEXT NOT NULL, - created DATETIME, - - -- Optional fields - owner TEXT, - PN TEXT, - PV TEXT, - PR TEXT, - task TEXT, - outhash_siginfo TEXT, - + %s UNIQUE(method, outhash, taskhash) ) - ''') + ''' % " ".join("%s %s," % (name, typ) for name, typ in TABLE_DEFINITION)) cursor.execute('PRAGMA journal_mode = WAL') cursor.execute('PRAGMA synchronous = %s' % ('NORMAL' if sync else 'OFF')) @@ -88,10 +94,10 @@ def chunkify(msg, max_chunk): yield "\n" -def create_server(addr, dbname, *, sync=True): +def create_server(addr, dbname, *, sync=True, upstream=None): from . import server db = setup_database(dbname, sync=sync) - s = server.Server(db) + s = server.Server(db, upstream=upstream) (typ, a) = parse_address(addr) if typ == ADDR_TYPE_UNIX: @@ -113,3 +119,15 @@ def create_client(addr): c.connect_tcp(*a) return c + +async def create_async_client(addr): + from . import client + c = client.AsyncClient() + + (typ, a) = parse_address(addr) + if typ == ADDR_TYPE_UNIX: + await c.connect_unix(*a) + else: + await c.connect_tcp(*a) + + return c diff --git a/poky/bitbake/lib/hashserv/client.py b/poky/bitbake/lib/hashserv/client.py index a29af836d..ae5875d1b 100644 --- a/poky/bitbake/lib/hashserv/client.py +++ b/poky/bitbake/lib/hashserv/client.py @@ -3,189 +3,216 @@ # SPDX-License-Identifier: GPL-2.0-only # +import asyncio import json import logging import socket import os -from . import chunkify, DEFAULT_MAX_CHUNK +from . import chunkify, DEFAULT_MAX_CHUNK, create_async_client -logger = logging.getLogger('hashserv.client') +logger = logging.getLogger("hashserv.client") class HashConnectionError(Exception): pass -class Client(object): +class AsyncClient(object): MODE_NORMAL = 0 MODE_GET_STREAM = 1 def __init__(self): - self._socket = None self.reader = None self.writer = None self.mode = self.MODE_NORMAL self.max_chunk = DEFAULT_MAX_CHUNK - def connect_tcp(self, address, port): - def connect_sock(): - s = socket.create_connection((address, port)) - - s.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1) - s.setsockopt(socket.SOL_TCP, socket.TCP_QUICKACK, 1) - s.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) - return s + async def connect_tcp(self, address, port): + async def connect_sock(): + return await asyncio.open_connection(address, port) self._connect_sock = connect_sock - def connect_unix(self, path): - def connect_sock(): - s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - # AF_UNIX has path length issues so chdir here to workaround - cwd = os.getcwd() - try: - os.chdir(os.path.dirname(path)) - s.connect(os.path.basename(path)) - finally: - os.chdir(cwd) - return s + async def connect_unix(self, path): + async def connect_sock(): + return await asyncio.open_unix_connection(path) self._connect_sock = connect_sock - def connect(self): - if self._socket is None: - self._socket = self._connect_sock() - - self.reader = self._socket.makefile('r', encoding='utf-8') - self.writer = self._socket.makefile('w', encoding='utf-8') + async def _connect(self): + if self.reader is None or self.writer is None: + (self.reader, self.writer) = await self._connect_sock() - self.writer.write('OEHASHEQUIV 1.1\n\n') - self.writer.flush() + self.writer.write("OEHASHEQUIV 1.1\n\n".encode("utf-8")) + await self.writer.drain() - # Restore mode if the socket is being re-created cur_mode = self.mode self.mode = self.MODE_NORMAL - self._set_mode(cur_mode) + await self._set_mode(cur_mode) - return self._socket + async def close(self): + self.reader = None - def close(self): - if self._socket is not None: - self._socket.close() - self._socket = None - self.reader = None + if self.writer is not None: + self.writer.close() self.writer = None - def _send_wrapper(self, proc): + async def _send_wrapper(self, proc): count = 0 while True: try: - self.connect() - return proc() - except (OSError, HashConnectionError, json.JSONDecodeError, UnicodeDecodeError) as e: - logger.warning('Error talking to server: %s' % e) + await self._connect() + return await proc() + except ( + OSError, + HashConnectionError, + json.JSONDecodeError, + UnicodeDecodeError, + ) as e: + logger.warning("Error talking to server: %s" % e) if count >= 3: if not isinstance(e, HashConnectionError): raise HashConnectionError(str(e)) raise e - self.close() + await self.close() count += 1 - def send_message(self, msg): - def get_line(): - line = self.reader.readline() + async def send_message(self, msg): + async def get_line(): + line = await self.reader.readline() if not line: - raise HashConnectionError('Connection closed') + raise HashConnectionError("Connection closed") + + line = line.decode("utf-8") - if not line.endswith('\n'): - raise HashConnectionError('Bad message %r' % message) + if not line.endswith("\n"): + raise HashConnectionError("Bad message %r" % message) return line - def proc(): + async def proc(): for c in chunkify(json.dumps(msg), self.max_chunk): - self.writer.write(c) - self.writer.flush() + self.writer.write(c.encode("utf-8")) + await self.writer.drain() - l = get_line() + l = await get_line() m = json.loads(l) - if 'chunk-stream' in m: + if "chunk-stream" in m: lines = [] while True: - l = get_line().rstrip('\n') + l = (await get_line()).rstrip("\n") if not l: break lines.append(l) - m = json.loads(''.join(lines)) + m = json.loads("".join(lines)) return m - return self._send_wrapper(proc) + return await self._send_wrapper(proc) - def send_stream(self, msg): - def proc(): - self.writer.write("%s\n" % msg) - self.writer.flush() - l = self.reader.readline() + async def send_stream(self, msg): + async def proc(): + self.writer.write(("%s\n" % msg).encode("utf-8")) + await self.writer.drain() + l = await self.reader.readline() if not l: - raise HashConnectionError('Connection closed') - return l.rstrip() + raise HashConnectionError("Connection closed") + return l.decode("utf-8").rstrip() - return self._send_wrapper(proc) + return await self._send_wrapper(proc) - def _set_mode(self, new_mode): + async def _set_mode(self, new_mode): if new_mode == self.MODE_NORMAL and self.mode == self.MODE_GET_STREAM: - r = self.send_stream('END') - if r != 'ok': - raise HashConnectionError('Bad response from server %r' % r) + r = await self.send_stream("END") + if r != "ok": + raise HashConnectionError("Bad response from server %r" % r) elif new_mode == self.MODE_GET_STREAM and self.mode == self.MODE_NORMAL: - r = self.send_message({'get-stream': None}) - if r != 'ok': - raise HashConnectionError('Bad response from server %r' % r) + r = await self.send_message({"get-stream": None}) + if r != "ok": + raise HashConnectionError("Bad response from server %r" % r) elif new_mode != self.mode: - raise Exception('Undefined mode transition %r -> %r' % (self.mode, new_mode)) + raise Exception( + "Undefined mode transition %r -> %r" % (self.mode, new_mode) + ) self.mode = new_mode - def get_unihash(self, method, taskhash): - self._set_mode(self.MODE_GET_STREAM) - r = self.send_stream('%s %s' % (method, taskhash)) + async def get_unihash(self, method, taskhash): + await self._set_mode(self.MODE_GET_STREAM) + r = await self.send_stream("%s %s" % (method, taskhash)) if not r: return None return r - def report_unihash(self, taskhash, method, outhash, unihash, extra={}): - self._set_mode(self.MODE_NORMAL) + async def report_unihash(self, taskhash, method, outhash, unihash, extra={}): + await self._set_mode(self.MODE_NORMAL) m = extra.copy() - m['taskhash'] = taskhash - m['method'] = method - m['outhash'] = outhash - m['unihash'] = unihash - return self.send_message({'report': m}) - - def report_unihash_equiv(self, taskhash, method, unihash, extra={}): - self._set_mode(self.MODE_NORMAL) + m["taskhash"] = taskhash + m["method"] = method + m["outhash"] = outhash + m["unihash"] = unihash + return await self.send_message({"report": m}) + + async def report_unihash_equiv(self, taskhash, method, unihash, extra={}): + await self._set_mode(self.MODE_NORMAL) m = extra.copy() - m['taskhash'] = taskhash - m['method'] = method - m['unihash'] = unihash - return self.send_message({'report-equiv': m}) - - def get_taskhash(self, method, taskhash, all_properties=False): - self._set_mode(self.MODE_NORMAL) - return self.send_message({'get': { - 'taskhash': taskhash, - 'method': method, - 'all': all_properties - }}) - - def get_stats(self): - self._set_mode(self.MODE_NORMAL) - return self.send_message({'get-stats': None}) - - def reset_stats(self): - self._set_mode(self.MODE_NORMAL) - return self.send_message({'reset-stats': None}) + m["taskhash"] = taskhash + m["method"] = method + m["unihash"] = unihash + return await self.send_message({"report-equiv": m}) + + async def get_taskhash(self, method, taskhash, all_properties=False): + await self._set_mode(self.MODE_NORMAL) + return await self.send_message( + {"get": {"taskhash": taskhash, "method": method, "all": all_properties}} + ) + + async def get_stats(self): + await self._set_mode(self.MODE_NORMAL) + return await self.send_message({"get-stats": None}) + + async def reset_stats(self): + await self._set_mode(self.MODE_NORMAL) + return await self.send_message({"reset-stats": None}) + + async def backfill_wait(self): + await self._set_mode(self.MODE_NORMAL) + return (await self.send_message({"backfill-wait": None}))["tasks"] + + +class Client(object): + def __init__(self): + self.client = AsyncClient() + self.loop = asyncio.new_event_loop() + + for call in ( + "connect_tcp", + "connect_unix", + "close", + "get_unihash", + "report_unihash", + "report_unihash_equiv", + "get_taskhash", + "get_stats", + "reset_stats", + "backfill_wait", + ): + downcall = getattr(self.client, call) + setattr(self, call, self._get_downcall_wrapper(downcall)) + + def _get_downcall_wrapper(self, downcall): + def wrapper(*args, **kwargs): + return self.loop.run_until_complete(downcall(*args, **kwargs)) + + return wrapper + + @property + def max_chunk(self): + return self.client.max_chunk + + @max_chunk.setter + def max_chunk(self, value): + self.client.max_chunk = value diff --git a/poky/bitbake/lib/hashserv/server.py b/poky/bitbake/lib/hashserv/server.py index 81050715e..3ff4c51cc 100644 --- a/poky/bitbake/lib/hashserv/server.py +++ b/poky/bitbake/lib/hashserv/server.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: GPL-2.0-only # -from contextlib import closing +from contextlib import closing, contextmanager from datetime import datetime import asyncio import json @@ -12,8 +12,9 @@ import math import os import signal import socket +import sys import time -from . import chunkify, DEFAULT_MAX_CHUNK +from . import chunkify, DEFAULT_MAX_CHUNK, create_async_client, TABLE_COLUMNS logger = logging.getLogger('hashserv.server') @@ -111,16 +112,40 @@ class Stats(object): class ClientError(Exception): pass +def insert_task(cursor, data, ignore=False): + keys = sorted(data.keys()) + query = '''INSERT%s INTO tasks_v2 (%s) VALUES (%s)''' % ( + " OR IGNORE" if ignore else "", + ', '.join(keys), + ', '.join(':' + k for k in keys)) + cursor.execute(query, data) + +async def copy_from_upstream(client, db, method, taskhash): + d = await client.get_taskhash(method, taskhash, True) + if d is not None: + # Filter out unknown columns + d = {k: v for k, v in d.items() if k in TABLE_COLUMNS} + keys = sorted(d.keys()) + + + with closing(db.cursor()) as cursor: + insert_task(cursor, d) + db.commit() + + return d + class ServerClient(object): FAST_QUERY = 'SELECT taskhash, method, unihash FROM tasks_v2 WHERE method=:method AND taskhash=:taskhash ORDER BY created ASC LIMIT 1' ALL_QUERY = 'SELECT * FROM tasks_v2 WHERE method=:method AND taskhash=:taskhash ORDER BY created ASC LIMIT 1' - def __init__(self, reader, writer, db, request_stats): + def __init__(self, reader, writer, db, request_stats, backfill_queue, upstream): self.reader = reader self.writer = writer self.db = db self.request_stats = request_stats self.max_chunk = DEFAULT_MAX_CHUNK + self.backfill_queue = backfill_queue + self.upstream = upstream self.handlers = { 'get': self.handle_get, @@ -130,10 +155,18 @@ class ServerClient(object): 'get-stats': self.handle_get_stats, 'reset-stats': self.handle_reset_stats, 'chunk-stream': self.handle_chunk, + 'backfill-wait': self.handle_backfill_wait, } async def process_requests(self): + if self.upstream is not None: + self.upstream_client = await create_async_client(self.upstream) + else: + self.upstream_client = None + try: + + self.addr = self.writer.get_extra_info('peername') logger.debug('Client %r connected' % (self.addr,)) @@ -171,6 +204,9 @@ class ServerClient(object): except ClientError as e: logger.error(str(e)) finally: + if self.upstream_client is not None: + await self.upstream_client.close() + self.writer.close() async def dispatch_message(self, msg): @@ -239,15 +275,19 @@ class ServerClient(object): if row is not None: logger.debug('Found equivalent task %s -> %s', (row['taskhash'], row['unihash'])) d = {k: row[k] for k in row.keys()} - - self.write_message(d) + elif self.upstream_client is not None: + d = await copy_from_upstream(self.upstream_client, self.db, method, taskhash) else: - self.write_message(None) + d = None + + self.write_message(d) async def handle_get_stream(self, request): self.write_message('ok') while True: + upstream = None + l = await self.reader.readline() if not l: return @@ -272,6 +312,12 @@ class ServerClient(object): if row is not None: msg = ('%s\n' % row['unihash']).encode('utf-8') #logger.debug('Found equivalent task %s -> %s', (row['taskhash'], row['unihash'])) + elif self.upstream_client is not None: + upstream = await self.upstream_client.get_unihash(method, taskhash) + if upstream: + msg = ("%s\n" % upstream).encode("utf-8") + else: + msg = "\n".encode("utf-8") else: msg = '\n'.encode('utf-8') @@ -282,6 +328,11 @@ class ServerClient(object): await self.writer.drain() + # Post to the backfill queue after writing the result to minimize + # the turn around time on a request + if upstream is not None: + await self.backfill_queue.put((method, taskhash)) + async def handle_report(self, data): with closing(self.db.cursor()) as cursor: cursor.execute(''' @@ -324,11 +375,7 @@ class ServerClient(object): if k in data: insert_data[k] = data[k] - cursor.execute('''INSERT INTO tasks_v2 (%s) VALUES (%s)''' % ( - ', '.join(sorted(insert_data.keys())), - ', '.join(':' + k for k in sorted(insert_data.keys()))), - insert_data) - + insert_task(cursor, insert_data) self.db.commit() logger.info('Adding taskhash %s with unihash %s', @@ -358,11 +405,7 @@ class ServerClient(object): if k in data: insert_data[k] = data[k] - cursor.execute('''INSERT OR IGNORE INTO tasks_v2 (%s) VALUES (%s)''' % ( - ', '.join(sorted(insert_data.keys())), - ', '.join(':' + k for k in sorted(insert_data.keys()))), - insert_data) - + insert_task(cursor, insert_data, ignore=True) self.db.commit() # Fetch the unihash that will be reported for the taskhash. If the @@ -394,6 +437,13 @@ class ServerClient(object): self.request_stats.reset() self.write_message(d) + async def handle_backfill_wait(self, request): + d = { + 'tasks': self.backfill_queue.qsize(), + } + await self.backfill_queue.join() + self.write_message(d) + def query_equivalent(self, method, taskhash, query): # This is part of the inner loop and must be as fast as possible try: @@ -405,7 +455,7 @@ class ServerClient(object): class Server(object): - def __init__(self, db, loop=None): + def __init__(self, db, loop=None, upstream=None): self.request_stats = Stats() self.db = db @@ -416,6 +466,8 @@ class Server(object): self.loop = loop self.close_loop = False + self.upstream = upstream + self._cleanup_socket = None def start_tcp_server(self, host, port): @@ -458,7 +510,7 @@ class Server(object): async def handle_client(self, reader, writer): # writer.transport.set_write_buffer_limits(0) try: - client = ServerClient(reader, writer, self.db, self.request_stats) + client = ServerClient(reader, writer, self.db, self.request_stats, self.backfill_queue, self.upstream) await client.process_requests() except Exception as e: import traceback @@ -467,23 +519,60 @@ class Server(object): writer.close() logger.info('Client disconnected') + @contextmanager + def _backfill_worker(self): + async def backfill_worker_task(): + client = await create_async_client(self.upstream) + try: + while True: + item = await self.backfill_queue.get() + if item is None: + self.backfill_queue.task_done() + break + method, taskhash = item + await copy_from_upstream(client, self.db, method, taskhash) + self.backfill_queue.task_done() + finally: + await client.close() + + async def join_worker(worker): + await self.backfill_queue.put(None) + await worker + + if self.upstream is not None: + worker = asyncio.ensure_future(backfill_worker_task()) + try: + yield + finally: + self.loop.run_until_complete(join_worker(worker)) + else: + yield + def serve_forever(self): def signal_handler(): self.loop.stop() - self.loop.add_signal_handler(signal.SIGTERM, signal_handler) - + asyncio.set_event_loop(self.loop) try: - self.loop.run_forever() - except KeyboardInterrupt: - pass + self.backfill_queue = asyncio.Queue() + + self.loop.add_signal_handler(signal.SIGTERM, signal_handler) - self.server.close() - self.loop.run_until_complete(self.server.wait_closed()) - logger.info('Server shutting down') + with self._backfill_worker(): + try: + self.loop.run_forever() + except KeyboardInterrupt: + pass - if self.close_loop: - self.loop.close() + self.server.close() + + self.loop.run_until_complete(self.server.wait_closed()) + logger.info('Server shutting down') + finally: + if self.close_loop: + if sys.version_info >= (3, 6): + self.loop.run_until_complete(self.loop.shutdown_asyncgens()) + self.loop.close() - if self._cleanup_socket is not None: - self._cleanup_socket() + if self._cleanup_socket is not None: + self._cleanup_socket() diff --git a/poky/bitbake/lib/hashserv/tests.py b/poky/bitbake/lib/hashserv/tests.py index 4566f2473..3dd9a31be 100644 --- a/poky/bitbake/lib/hashserv/tests.py +++ b/poky/bitbake/lib/hashserv/tests.py @@ -16,35 +16,54 @@ import threading import unittest import socket +def _run_server(server, idx): + # logging.basicConfig(level=logging.DEBUG, filename='bbhashserv.log', filemode='w', + # format='%(levelname)s %(filename)s:%(lineno)d %(message)s') + sys.stdout = open('bbhashserv-%d.log' % idx, 'w') + sys.stderr = sys.stdout + server.serve_forever() class TestHashEquivalenceServer(object): METHOD = 'TestMethod' - def _run_server(self): - # logging.basicConfig(level=logging.DEBUG, filename='bbhashserv.log', filemode='w', - # format='%(levelname)s %(filename)s:%(lineno)d %(message)s') - self.server.serve_forever() + server_index = 0 + + def start_server(self, dbpath=None, upstream=None): + self.server_index += 1 + if dbpath is None: + dbpath = os.path.join(self.temp_dir.name, "db%d.sqlite" % self.server_index) + + def cleanup_thread(thread): + thread.terminate() + thread.join() + + server = create_server(self.get_server_addr(self.server_index), dbpath, upstream=upstream) + server.dbpath = dbpath + + server.thread = multiprocessing.Process(target=_run_server, args=(server, self.server_index)) + server.thread.start() + self.addCleanup(cleanup_thread, server.thread) + + def cleanup_client(client): + client.close() + + client = create_client(server.address) + self.addCleanup(cleanup_client, client) + + return (client, server) def setUp(self): if sys.version_info < (3, 5, 0): self.skipTest('Python 3.5 or later required') self.temp_dir = tempfile.TemporaryDirectory(prefix='bb-hashserv') - self.dbfile = os.path.join(self.temp_dir.name, 'db.sqlite') - - self.server = create_server(self.get_server_addr(), self.dbfile) - self.server_thread = multiprocessing.Process(target=self._run_server) - self.server_thread.start() - self.client = create_client(self.server.address) - - def tearDown(self): - # Shutdown server - s = getattr(self, 'server', None) - if s is not None: - self.server_thread.terminate() - self.server_thread.join() - self.client.close() - self.temp_dir.cleanup() + self.addCleanup(self.temp_dir.cleanup) + + (self.client, self.server) = self.start_server() + + def assertClientGetHash(self, client, taskhash, unihash): + result = client.get_unihash(self.METHOD, taskhash) + self.assertEqual(result, unihash) def test_create_hash(self): # Simple test that hashes can be created @@ -52,8 +71,7 @@ class TestHashEquivalenceServer(object): outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f' unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd' - result = self.client.get_unihash(self.METHOD, taskhash) - self.assertIsNone(result, msg='Found unexpected task, %r' % result) + self.assertClientGetHash(self.client, taskhash, None) result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash) self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash') @@ -84,22 +102,19 @@ class TestHashEquivalenceServer(object): unihash = '218e57509998197d570e2c98512d0105985dffc9' self.client.report_unihash(taskhash, self.METHOD, outhash, unihash) - result = self.client.get_unihash(self.METHOD, taskhash) - self.assertEqual(result, unihash) + self.assertClientGetHash(self.client, taskhash, unihash) outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d' unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c' self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2) - result = self.client.get_unihash(self.METHOD, taskhash) - self.assertEqual(result, unihash) + self.assertClientGetHash(self.client, taskhash, unihash) outhash3 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4' unihash3 = '9217a7d6398518e5dc002ed58f2cbbbc78696603' self.client.report_unihash(taskhash, self.METHOD, outhash3, unihash3) - result = self.client.get_unihash(self.METHOD, taskhash) - self.assertEqual(result, unihash) + self.assertClientGetHash(self.client, taskhash, unihash) def test_huge_message(self): # Simple test that hashes can be created @@ -107,8 +122,7 @@ class TestHashEquivalenceServer(object): outhash = '3c979c3db45c569f51ab7626a4651074be3a9d11a84b1db076f5b14f7d39db44' unihash = '90e9bc1d1f094c51824adca7f8ea79a048d68824' - result = self.client.get_unihash(self.METHOD, taskhash) - self.assertIsNone(result, msg='Found unexpected task, %r' % result) + self.assertClientGetHash(self.client, taskhash, None) siginfo = "0" * (self.client.max_chunk * 4) @@ -156,14 +170,83 @@ class TestHashEquivalenceServer(object): self.assertFalse(failures) + def test_upstream_server(self): + # Tests upstream server support. This is done by creating two servers + # that share a database file. The downstream server has it upstream + # set to the test server, whereas the side server doesn't. This allows + # verification that the hash requests are being proxied to the upstream + # server by verifying that they appear on the downstream client, but not + # the side client. It also verifies that the results are pulled into + # the downstream database by checking that the downstream and side servers + # match after the downstream is done waiting for all backfill tasks + (down_client, down_server) = self.start_server(upstream=self.server.address) + (side_client, side_server) = self.start_server(dbpath=down_server.dbpath) + + def check_hash(taskhash, unihash, old_sidehash): + nonlocal down_client + nonlocal side_client + + # check upstream server + self.assertClientGetHash(self.client, taskhash, unihash) + + # Hash should *not* be present on the side server + self.assertClientGetHash(side_client, taskhash, old_sidehash) + + # Hash should be present on the downstream server, since it + # will defer to the upstream server. This will trigger + # the backfill in the downstream server + self.assertClientGetHash(down_client, taskhash, unihash) + + # After waiting for the downstream client to finish backfilling the + # task from the upstream server, it should appear in the side server + # since the database is populated + down_client.backfill_wait() + self.assertClientGetHash(side_client, taskhash, unihash) + + # Basic report + taskhash = '8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a' + outhash = 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e' + unihash = '218e57509998197d570e2c98512d0105985dffc9' + self.client.report_unihash(taskhash, self.METHOD, outhash, unihash) + + check_hash(taskhash, unihash, None) + + # Duplicated taskhash with multiple output hashes and unihashes. + # All servers should agree with the originally reported hash + outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d' + unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c' + self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2) + + check_hash(taskhash, unihash, unihash) + + # Report an equivalent task. The sideload will originally report + # no unihash until backfilled + taskhash3 = "044c2ec8aaf480685a00ff6ff49e6162e6ad34e1" + unihash3 = "def64766090d28f627e816454ed46894bb3aab36" + self.client.report_unihash(taskhash3, self.METHOD, outhash, unihash3) + + check_hash(taskhash3, unihash, None) + + # Test that reporting a unihash in the downstream client isn't + # propagating to the upstream server + taskhash4 = "e3da00593d6a7fb435c7e2114976c59c5fd6d561" + outhash4 = "1cf8713e645f491eb9c959d20b5cae1c47133a292626dda9b10709857cbe688a" + unihash4 = "3b5d3d83f07f259e9086fcb422c855286e18a57d" + down_client.report_unihash(taskhash4, self.METHOD, outhash4, unihash4) + down_client.backfill_wait() + + self.assertClientGetHash(down_client, taskhash4, unihash4) + self.assertClientGetHash(side_client, taskhash4, unihash4) + self.assertClientGetHash(self.client, taskhash4, None) + class TestHashEquivalenceUnixServer(TestHashEquivalenceServer, unittest.TestCase): - def get_server_addr(self): - return "unix://" + os.path.join(self.temp_dir.name, 'sock') + def get_server_addr(self, server_idx): + return "unix://" + os.path.join(self.temp_dir.name, 'sock%d' % server_idx) class TestHashEquivalenceTCPServer(TestHashEquivalenceServer, unittest.TestCase): - def get_server_addr(self): + def get_server_addr(self, server_idx): # Some hosts cause asyncio module to misbehave, when IPv6 is not enabled. # If IPv6 is enabled, it should be safe to use localhost directly, in general # case it is more reliable to resolve the IP address explicitly. |