Strong RL

Example Application

Because Strong-RL takes care of the overall framework for your application and provides a host of default, configurable components, you can develop an entire, highly-scalable reinforcement application in just a single Python script:

import datetime
import tempfile
import shutil
import os
import numpy as np
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType, TimestampType, DoubleType, FloatType, BooleanType

from import Application, AppConfig
from strong_rl.base.environment import Environment
from strong_rl.actions.actionspace import ActionSpace
from strong_rl.actions.set import ActionSet
from strong_rl.actions.action import NullAction
from import Event
from strong_rl.actions.action import Action
from import EventSet
from strong_rl.models.model import Model
from strong_rl.models.set import ModelSet
from import LocalStorage
from strong_rl.cache.file import FileCache
from strong_rl.batch.datalog import BatchDataLog
from strong_rl.batch.datamodeler import BatchDataModeler
from strong_rl.batch.targeter import BatchTargeter
from import BatchActor
from strong_rl.internals.base_data import DataField

from strong_rl.algorithms.agents.set import AgentSet
from strong_rl.algorithms.states.state import AgentState
from strong_rl.algorithms.memories.experience_replay import ExperienceReplay
from strong_rl.algorithms.preprocessors.scaling_preprocessor import ScalingPreprocessor
from strong_rl.algorithms.agents.traart_dqn import DQN
from strong_rl.algorithms.models.nn import MultiActionDQN
from strong_rl.algorithms.policies.epsilon_greedy import EpsilonGreedy

Events, Models, and Actions
Describe the data your application collects (events),
the data models you will build (models), and the
actions your agent can take (actions).

class Impression(Event):
    Register a single ad impression to a user, and whether
    or not they clicked on it (our behavior of interest).
    name = "impression"

    name = DataField(StringType(), False)
    happened_at = DataField(TimestampType(), False)
    created_at = DataField(TimestampType(), False)
    user_id = DataField(LongType(), False)
    product_id = DataField(LongType(), False)
    click = DataField(BooleanType(), False)

    def validate(self):
        if self.product_id > 100:
            raise Exception("Impossible product ID received. We only have 100 products.")

class User(Model):
    Build our target model (of users), summarising their most recent
    10 impressions.
    name = "user"

    key = "user_id"

    query = """
        WITH ranked_impressions AS (
          SELECT user_id,
                 product_id AS recent_product,
                 ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY happened_at DESC) as n_back,
                 ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY happened_at ASC) as cum_num_impressions
          FROM impressions

        SELECT user_id,
               LAST(cum_num_impressions) AS num_impressions,
               LAST(recent_product) AS recent_product,
               AVG(CAST(click AS INTEGER)) AS click_rate
        FROM ranked_impressions
        WHERE n_back <= 10
        GROUP BY user_id

    user_id = DataField(LongType(), False)
    num_impressions = DataField(IntegerType(), False)
    recent_product = DataField(LongType(), False)
    click_rate = DataField(DoubleType(), False)

class Coupon(Action):
    Describe the only kind of action we can take:
    sending a coupon of a particular value.
    name = "Offer"

    value = DataField(DoubleType(), False)

class CouponActionSpace(ActionSpace):
    Describe our complete action space: what coupon values we can send,
    and whether we can choose to do nothing at all (a NullAction).
    actions = [

    def constrain(self, target):
        # constrain our actions so we only send $50 coupons
        # if the user has seen 5+ ads
        self.actions = [a for a in self.actions if a[0].null or target.num_impressions >= 5 or a[0].value < 50]

class SimulationEnvironment(Environment):
    Implement a simulation environment.

    In this simulation, we instantiate 100
    users who receive impressions. If we send them a $50 coupon,
    they have an 80% chance of clicking on the next ad (our target behavior, thus our reward).
    If we don't send them a $50 coupon, they have only a 20% chance of clicking on the next ad.
    def __init__(self, app):

        self.num_users = 100

        self.users = {
            i: {
                'last_actions': (NullAction(),)
            } for i in range(self.num_users)

    def current_reward(self):
        return np.mean([1.0 if self.reward(state) else 0.0 for state in self.users.values()])

    def reward(self, state):
        correct_action = True if Coupon(value=50) in state['last_actions'] else False
        p = .8 if correct_action else .2
        reward = bool(np.random.binomial(1, p=p))

        return reward

    def simulate_events(self):
        return [
       - datetime.timedelta(seconds=1),
            ) for i, state in self.users.items()

    def act(self, recommended_actions):
        for ra in recommended_actions:
            self.users[]['last_actions'] = ra.actions

class ExampleRLApplication:
    Build our application. To allow for simple and side-effect free testing,
    we build the application in a context and then tear it down (i.e.,
    delete all data) when the context is exited.
    def __init__(self):
        self.temp_dir = None = None

    def __enter__(self):
        # create temporary folder
        self.temp_dir = tempfile.mkdtemp()

        # configure our data store
        storage = LocalStorage(base_path=os.path.join(self.temp_dir, 'app'))

        # configure the application
        eventset = EventSet(custom_events=(Impression,))

        modelset = ModelSet(custom_models=(),

        actionset = ActionSet(actions=(Coupon,))

        config = AppConfig(eventset=eventset,
                           earliest_time=datetime.datetime(year=2018, month=1, day=1))

        # import a default Strong-RL agent (normally, you would extend a default Strong-RL agent to build your own)
        agent = DQN(
            state=AgentState(name="my_agent_v1", cache=FileCache(storage=LocalStorage(base_path=os.path.join(self.temp_dir, 'agent')))),
            policy=EpsilonGreedy(epsilon_start=.5, epsilon_step=.05, epsilon_stop=.1),

        agentset = AgentSet(agent)

        # wire all of the application components together = Application(config). \
                   set_datalog(BatchDataLog(storage=storage)). \
                   set_datamodeler(BatchDataModeler()). \
                   set_targeter(BatchTargeter(query="SELECT * FROM users")). \
                   set_actor(BatchActor(agentset=agentset, batch_size=32)). \


    def __exit__(self, *args):

if __name__ == "__main__":
    # Run our application for 90 days and watch the agent learn which coupons to send!
    with ExampleRLApplication() as app:
        current_date = app.config.earliest_time

        while current_date <= current_date + datetime.timedelta(days=90):

   - datetime.timedelta(days=1)))

            current_date += datetime.timedelta(days=1)