Skip to content

Commit

Permalink
Merge pull request #19 from jbigatti/refactor-init-StatsManager
Browse files Browse the repository at this point in the history
Refactor init stats manager; allow forever booking
  • Loading branch information
jmansilla authored Oct 20, 2016
2 parents 046576b + c803abc commit e7f97cb
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 20 deletions.
2 changes: 1 addition & 1 deletion docs/experimentation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ database.

Tips:
- Monitor the memory usage of each experiment. Running several in parallel may use all the memory available, slowing down the entire experimentation.
- Bookings are not forever. By default they last 10 minutes, but you can set it to whatever you want. Once that an experiment booking expires, it may be booked again and re-run by anyone.
- Pay attention to booking time. Default booking time is set to 10 minutes, but it can be set to whatever time you want, even forever if you change booking time to 'None'. Once the experiment booking expires, that slot may be booked again, or re-run by anyone.


Dynamic experiment configuration
Expand Down
4 changes: 2 additions & 2 deletions featureforge/experimentation/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def main(single_runner,

opts = docopt(custom__doc__, version=version)

stats = StatsManager(booking_duration=booking_duration,
db_name=opts[u"<dbname>"],
stats = StatsManager(db_name=opts[u"<dbname>"],
booking_duration=booking_duration,
db_uri=opts[u"--dbserver"])

experiment_configurations = json.load(open(opts[u"<configs.json>"]))
Expand Down
56 changes: 39 additions & 17 deletions featureforge/experimentation/stats_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import hashlib
import logging
import warnings

from pymongo import MongoClient
from pymongo.errors import DuplicateKeyError
Expand All @@ -12,7 +13,6 @@

logger = logging.getLogger(__name__)


EXPERIMENTS_COLLECTION_NAME = 'experiment_data'


Expand Down Expand Up @@ -44,22 +44,41 @@ class StatsManager(object):
STATUS_BOOKED = 'status_booked'
STATUS_SOLVED = 'status_solved'

def __init__(self, booking_duration, db_name, db_uri=None,
def __init__(self, db_name, booking_duration=None, db_uri=None,
keep_running_on_errors=True):
"""
Creates new instance of Stats Manager.
Parameters:
- booking_duration,
- db_name: Name of the mongo database to use (will be created if needed)
- booking_duration, Default None. Means that booking time will not be take
in count, so booking_ticket cannot be stolen.
- db_uri: Default is None, which will be treated as localhost and the default
dbserver port.
- keep_running_on_errors: Default True. Indicates if errors shall be raised,
or if we shall attempt to recover from issues and keep running (errors
will be always logged to stderr)
"""
if isinstance(db_name, int) or db_name is None or isinstance(booking_duration, str):
with warnings.catch_warnings():
# We only want to show this warning message
warnings.simplefilter('always', DeprecationWarning)
message = (
'Init arguments will change. '
'Take a look to http://feature-forge.readthedocs.io/en/latest/experimentation.html' # NOQA
'#exploring-the-finished-experiments')
warnings.warn(message, DeprecationWarning)
# swap the values of db_name and booking_duration to ensure the previous behaviour
aux = db_name
db_name = booking_duration
booking_duration = aux

self.keep_running_on_errors = keep_running_on_errors
self._db_config = {'uri': db_uri, 'name': db_name}
self.booking_delta = timedelta(seconds=booking_duration)

booking_delta = None
if booking_duration is not None:
booking_delta = timedelta(seconds=booking_duration)
self.booking_delta = booking_delta
self.setup_database_connection()
self.normalizer = DictNormalizer()

Expand All @@ -73,7 +92,7 @@ def _db_connect(self):
def setup_database_connection(self):
self.db = self._db_connect()
self.data = self.db[EXPERIMENTS_COLLECTION_NAME]
self.data.ensure_index(self.marshalled_key, unique=True)
self.data.create_index(self.marshalled_key, unique=True)

def get_normalized_and_key(self, config):
normalized = self.normalizer(deepcopy(config))
Expand Down Expand Up @@ -112,18 +131,21 @@ def book_if_available(self, experiment_configuration):
except DuplicateKeyError:
# Ok, experiment is already registered. Let's see if it was already solved or
# not. If not, and if it was booked "long time ago", we'll steal the booking
query = {self.marshalled_key: key,
self.experiment_status: self.STATUS_BOOKED,
self.booking_at_key: {'$lte': now - self.booking_delta}
}
update = {'$set': {self.booking_at_key: now}}
experiment = self.data.find_and_modify(
query, update=update,
new=True # So the modified object is returned
)
if experiment:
logger.info("Stolen booking ticket %s" % key)
ticket = experiment[u'_id']
# depends on booking_delta value.
if self.booking_delta is not None:
query = {self.marshalled_key: key,
self.experiment_status: self.STATUS_BOOKED,
self.booking_at_key: {'$lte': now - self.booking_delta}
}
update = {'$set': {self.booking_at_key: now}}
experiment = self.data.find_and_modify(
query, update=update,
new=True # So the modified object is returned
)
if experiment:
logger.info("Stolen booking ticket %s" % key)
ticket = experiment[u'_id']

return ticket

def store_results(self, booking_ticket, results):
Expand Down
49 changes: 49 additions & 0 deletions tests/test_stats_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from datetime import timedelta
import mock
from unittest import TestCase
import warnings

from featureforge.experimentation.stats_manager import StatsManager

DEPRECATION_MSG = (
'Init arguments will change. '
'Take a look to http://feature-forge.readthedocs.io/en/latest/experimentation.html'
'#exploring-the-finished-experiments'
)

DB_CONNECTION_PATH = 'featureforge.experimentation.stats_manager.StatsManager.setup_database_connection' # NOQA


class TestStatsManager(TestCase):

def setUp(self):
self.db_name = 'a_db_name'
self.booking_duration = 10

def test_init_with_db_name_as_first_parameter_and_booking_duration_as_second(self):
with mock.patch(DB_CONNECTION_PATH):
st = StatsManager(db_name=self.db_name, booking_duration=self.booking_duration)
self.assertEqual(st._db_config['name'], self.db_name)
self.assertEqual(st.booking_delta, timedelta(seconds=self.booking_duration))

def test_if_init_with_db_name_as_second_argument_will_warning(self):
with warnings.catch_warnings(record=True) as w:
# Cause all warnings to always be triggered.
warnings.simplefilter("always", DeprecationWarning)
# Trigger a warning.
with mock.patch(DB_CONNECTION_PATH):
StatsManager(self.booking_duration, self.db_name)
# Verify some things
self.assertEqual(len(w), 1)
self.assertTrue(issubclass(w[-1].category, DeprecationWarning))
self.assertEqual(str(w[-1].message), DEPRECATION_MSG)

def test_if_use_db_name_as_second_argument_warnings_but_can_continue(self):
with warnings.catch_warnings(record=True):
# Cause all warnings to always be triggered.
warnings.simplefilter("always", DeprecationWarning)
# Trigger a warning.
with mock.patch(DB_CONNECTION_PATH):
st = StatsManager(self.booking_duration, self.db_name)
self.assertEqual(st._db_config['name'], self.db_name)
self.assertEqual(st.booking_delta, timedelta(seconds=self.booking_duration))

0 comments on commit e7f97cb

Please sign in to comment.