From f3667ab71161c7a3621705935309aecc05aad25b Mon Sep 17 00:00:00 2001 From: huangshiyu Date: Thu, 26 Oct 2023 17:17:20 +0800 Subject: [PATCH 1/2] update test --- .gitignore | 1 - examples/nlp/nlp_ppo.yaml | 1 + openrl/envs/__init__.py | 3 - openrl/envs/nlp/daily_dialog_env.py | 2 +- openrl/envs/nlp/rewards/intent.py | 38 ++++- openrl/envs/nlp/rewards/kl_penalty.py | 11 +- openrl/envs/nlp/rewards/meteor.py | 16 +- openrl/envs/toy_envs/__init__.py | 20 +-- openrl/envs/toy_envs/identity_env.py | 111 ------------- openrl/envs/toy_envs/multi_input_envs.py | 185 ---------------------- openrl/envs/vec_env/async_venv.py | 1 - openrl/rewards/nlp_reward.py | 7 +- openrl/utils/logger.py | 6 +- setup.py | 9 +- tests/test_dataset/test_expert_dataset.py | 89 +++++++++++ tests/test_rewards/test_nlp_reward.py | 73 +++++++++ 16 files changed, 235 insertions(+), 338 deletions(-) delete mode 100644 openrl/envs/toy_envs/multi_input_envs.py create mode 100644 tests/test_dataset/test_expert_dataset.py create mode 100644 tests/test_rewards/test_nlp_reward.py diff --git a/.gitignore b/.gitignore index c92a6657..80ced1f6 100644 --- a/.gitignore +++ b/.gitignore @@ -153,7 +153,6 @@ run_results/ api_docs .vscode *.pkl -api_docs *.json opponent_pool !/examples/selfplay/opponent_templates/tictactoe_opponent/info.json diff --git a/examples/nlp/nlp_ppo.yaml b/examples/nlp/nlp_ppo.yaml index 0b4e0f56..1ea1cc5b 100644 --- a/examples/nlp/nlp_ppo.yaml +++ b/examples/nlp/nlp_ppo.yaml @@ -28,5 +28,6 @@ reward_class: args: { "intent_model": "rajkumarrrk/roberta-daily-dialog-intent-classifier", "ref_model": "rajkumarrrk/gpt2-fine-tuned-on-daily-dialog", + "use_deepspeed": true, } \ No newline at end of file diff --git a/openrl/envs/__init__.py b/openrl/envs/__init__.py index a2eb835f..d12c493a 100644 --- a/openrl/envs/__init__.py +++ b/openrl/envs/__init__.py @@ -16,12 +16,9 @@ toy_all_envs = [ "BitFlippingEnv", - "FakeImageEnv", "IdentityEnv", "IdentityEnvcontinuous", "IdentityEnvBox", - "IdentityEnvMultiBinary", - "IdentityEnvMultiDiscrete", "SimpleMultiObsEnv", "SimpleMultiObsEnv", ] diff --git a/openrl/envs/nlp/daily_dialog_env.py b/openrl/envs/nlp/daily_dialog_env.py index 4cb49df1..61e68946 100644 --- a/openrl/envs/nlp/daily_dialog_env.py +++ b/openrl/envs/nlp/daily_dialog_env.py @@ -43,7 +43,7 @@ def __init__( self.env_name = "daily_dialog" tokenizer_name = cfg.env.args["tokenizer_path"] if tokenizer_name == "builtin_BPE": - from tokenizers import AddedToken, Tokenizer, models + from tokenizers import Tokenizer, models self.tokenizer = Tokenizer(models.BPE()) diff --git a/openrl/envs/nlp/rewards/intent.py b/openrl/envs/nlp/rewards/intent.py index f2f9bf11..812cc5f4 100644 --- a/openrl/envs/nlp/rewards/intent.py +++ b/openrl/envs/nlp/rewards/intent.py @@ -25,15 +25,42 @@ def get_eval_ds_config(offload, stage=0): class Intent: - def __init__(self, intent_model: str, intent_coeff: float = 1.0) -> None: + def __init__( + self, intent_model: str, intent_coeff: float = 1.0, use_deepspeed: bool = True + ) -> None: super().__init__() self._intent_coeff = intent_coeff - self.use_deepspeed = True # TODO + self.use_deepspeed = use_deepspeed + if intent_model == "builtin_intent": + from transformers import GPT2Config, GPT2LMHeadModel + + class TestTokenizer: + def __call__( + self, + input_texts, + return_tensors="pt", + truncation=True, + padding=True, + max_length=None, + ): + class EncodedOutput: + def __init__(self, input_ids, attention_mask): + self.input_ids = input_ids + self.attention_mask = attention_mask + + input_ids = torch.zeros((32), dtype=torch.long) + attention_masks = torch.zeros((32), dtype=torch.long) + return EncodedOutput(input_ids, attention_masks) + + self._tokenizer = TestTokenizer() + config = GPT2Config() + self._model = GPT2LMHeadModel(config) - model_path = data_abs_path(intent_model) - self._tokenizer = AutoTokenizer.from_pretrained(intent_model) - self._model = AutoModelForSequenceClassification.from_pretrained(model_path) + else: + model_path = data_abs_path(intent_model) + self._tokenizer = AutoTokenizer.from_pretrained(intent_model) + self._model = AutoModelForSequenceClassification.from_pretrained(model_path) if self.use_deepspeed: import deepspeed @@ -87,6 +114,7 @@ def get_input_for_classifier(prompt, generated_text): input_ids=encoded.input_ids.to(self._device), attention_mask=encoded.attention_mask.to(self._device), ) + pred_labels = torch.argmax(outputs.logits, dim=1).tolist() score = (np.array(pred_labels) == np.array(target_intents)) * 1.0 diff --git a/openrl/envs/nlp/rewards/kl_penalty.py b/openrl/envs/nlp/rewards/kl_penalty.py index ea109c45..643d263d 100644 --- a/openrl/envs/nlp/rewards/kl_penalty.py +++ b/openrl/envs/nlp/rewards/kl_penalty.py @@ -31,14 +31,21 @@ def __init__( action_space: gym.Space, ref_model: str, apply_model_parallel: bool = True, + use_deepspeed: bool = True, ): super().__init__() - self.use_deepspeed = True + self.use_deepspeed = use_deepspeed self.use_fp16 = True # reference model self._apply_model_parallel = apply_model_parallel - self._ref_net = AutoModelForCausalLM.from_pretrained(ref_model) + if ref_model == "builtin_ref": + from transformers import GPT2Config, GPT2LMHeadModel + + config = GPT2Config() + self._ref_net = GPT2LMHeadModel(config) + else: + self._ref_net = AutoModelForCausalLM.from_pretrained(ref_model) self._ref_net = self._ref_net.eval() if self.use_deepspeed: import deepspeed diff --git a/openrl/envs/nlp/rewards/meteor.py b/openrl/envs/nlp/rewards/meteor.py index c9acd16f..5bd169ad 100644 --- a/openrl/envs/nlp/rewards/meteor.py +++ b/openrl/envs/nlp/rewards/meteor.py @@ -6,13 +6,21 @@ import openrl.envs.nlp as nlp +class VirtualMetric: + def compute(self, predictions: Any, references: Any) -> Dict[str, float]: + return {"meteor": 0.0} + + class Meteor: - def __init__(self, meteor_coeff: int) -> None: + def __init__(self, meteor_coeff: int, test: bool = False) -> None: super().__init__() self._meteor_coeff = meteor_coeff - self._metric = evaluate.load( - str(Path(nlp.__file__).parent / "utils/metrics/meteor.py") - ) + if test: + self._metric = VirtualMetric() + else: + self._metric = evaluate.load( + str(Path(nlp.__file__).parent / "utils/metrics/meteor.py") + ) def __call__( self, diff --git a/openrl/envs/toy_envs/__init__.py b/openrl/envs/toy_envs/__init__.py index 4e6588ef..cf785cc5 100644 --- a/openrl/envs/toy_envs/__init__.py +++ b/openrl/envs/toy_envs/__init__.py @@ -18,25 +18,12 @@ from typing import Any from openrl.envs.toy_envs.bit_flipping_env import BitFlippingEnv -from openrl.envs.toy_envs.identity_env import ( - FakeImageEnv, - IdentityEnv, - IdentityEnvBox, - IdentityEnvcontinuous, - IdentityEnvMultiBinary, - IdentityEnvMultiDiscrete, -) -from openrl.envs.toy_envs.multi_input_envs import SimpleMultiObsEnv +from openrl.envs.toy_envs.identity_env import IdentityEnv, IdentityEnvcontinuous __all__ = [ "BitFlippingEnv", - "FakeImageEnv", "IdentityEnv", "IdentityEnvcontinuous", - "IdentityEnvBox", - "IdentityEnvMultiBinary", - "IdentityEnvMultiDiscrete", - "SimpleMultiObsEnv", ] @@ -49,13 +36,8 @@ env_dict = { "BitFlippingEnv": BitFlippingEnv, - "FakeImageEnv": FakeImageEnv, "IdentityEnv": IdentityEnv, "IdentityEnvcontinuous": IdentityEnvcontinuous, - "IdentityEnvBox": IdentityEnvBox, - "IdentityEnvMultiBinary": IdentityEnvMultiBinary, - "IdentityEnvMultiDiscrete": IdentityEnvMultiDiscrete, - "SimpleMultiObsEnv": SimpleMultiObsEnv, } diff --git a/openrl/envs/toy_envs/identity_env.py b/openrl/envs/toy_envs/identity_env.py index c3d4caa2..c3867756 100644 --- a/openrl/envs/toy_envs/identity_env.py +++ b/openrl/envs/toy_envs/identity_env.py @@ -157,114 +157,3 @@ def _get_reward(self, action: T) -> float: def render(self, mode: str = "human") -> None: pass - - -# Not Work Yet -class IdentityEnvBox(IdentityEnv[np.ndarray]): - def __init__( - self, - low: float = -1.0, - high: float = 1.0, - eps: float = 0.05, - ep_length: int = 100, - ): - """ - Identity environment for testing purposes - - :param low: the lower bound of the box dim - :param high: the upper bound of the box dim - :param eps: the epsilon bound for correct value - :param ep_length: the length of each episode in timesteps - """ - space = spaces.Box(low=low, high=high, shape=(1,), dtype=np.float32) - super().__init__(ep_length=ep_length, space=space) - self.eps = eps - - def step( - self, action: np.ndarray - ) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]: - reward = self._get_reward(action) - self._choose_next_state() - self.current_step += 1 - done = self.current_step >= self.ep_length - return self.state, reward, done, {} - - def _get_reward(self, action: np.ndarray) -> float: - return ( - 1.0 if (self.state - self.eps) <= action <= (self.state + self.eps) else 0.0 - ) - - -# Not Work Yet -class IdentityEnvMultiDiscrete(IdentityEnv[np.ndarray]): - def __init__(self, dim: int = 1, ep_length: int = 100) -> None: - """ - Identity environment for testing purposes - - :param dim: the size of the dimensions you want to learn - :param ep_length: the length of each episode in timesteps - """ - space = spaces.MultiDiscrete([dim, dim]) - super().__init__(ep_length=ep_length, space=space) - - -# Not Work Yet -class IdentityEnvMultiBinary(IdentityEnv[np.ndarray]): - def __init__(self, dim: int = 1, ep_length: int = 100) -> None: - """ - Identity environment for testing purposes - - :param dim: the size of the dimensions you want to learn - :param ep_length: the length of each episode in timesteps - """ - space = spaces.MultiBinary(dim) - super().__init__(ep_length=ep_length, space=space) - - -# Not Work Yet -class FakeImageEnv(gym.Env): - """ - Fake image environment for testing purposes, it mimics Atari games. - - :param action_dim: Number of discrete actions - :param screen_height: Height of the image - :param screen_width: Width of the image - :param n_channels: Number of color channels - :param discrete: Create discrete action space instead of continuous - :param channel_first: Put channels on first axis instead of last - """ - - def __init__( - self, - action_dim: int = 6, - screen_height: int = 84, - screen_width: int = 84, - n_channels: int = 1, - discrete: bool = True, - channel_first: bool = False, - ) -> None: - self.observation_shape = (screen_height, screen_width, n_channels) - if channel_first: - self.observation_shape = (n_channels, screen_height, screen_width) - self.observation_space = spaces.Box( - low=0, high=255, shape=self.observation_shape, dtype=np.uint8 - ) - if discrete: - self.action_space = spaces.Discrete(action_dim) - else: - self.action_space = spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) - self.ep_length = 10 - self.current_step = 0 - - def reset(self) -> np.ndarray: - self.current_step = 0 - return self.observation_space.sample() - - def step(self, action: Union[np.ndarray, int]): - reward = 0.0 - self.current_step += 1 - done = self.current_step >= self.ep_length - return self.observation_space.sample(), reward, done, {} - - def render(self, mode: str = "human") -> None: - pass diff --git a/openrl/envs/toy_envs/multi_input_envs.py b/openrl/envs/toy_envs/multi_input_envs.py deleted file mode 100644 index eccb1f6f..00000000 --- a/openrl/envs/toy_envs/multi_input_envs.py +++ /dev/null @@ -1,185 +0,0 @@ -from typing import Dict, Union - -import gymnasium as gym -import numpy as np -from gymnasium import spaces - - -# Not Work Yet -class SimpleMultiObsEnv(gym.Env): - """ - Base class for GridWorld-based MultiObs Environments 4x4 grid world. - - .. code-block:: text - - ____________ - | 0 1 2 3| - | 4|¯5¯¯6¯| 7| - | 8|_9_10_|11| - |12 13 14 15| - ¯¯¯¯¯¯¯¯¯¯¯¯¯¯ - - start is 0 - states 5, 6, 9, and 10 are blocked - goal is 15 - actions are = [left, down, right, up] - - simple linear state env of 15 states but encoded with a vector and an image observation: - each column is represented by a random vector and each row is - represented by a random image, both sampled once at creation time. - - :param num_col: Number of columns in the grid - :param num_row: Number of rows in the grid - :param random_start: If true, agent starts in random position - :param channel_last: If true, the image will be channel last, else it will be channel first - """ - - def __init__( - self, - num_col: int = 4, - num_row: int = 4, - random_start: bool = True, - discrete_actions: bool = True, - channel_last: bool = True, - ): - super().__init__() - - self.vector_size = 5 - if channel_last: - self.img_size = [64, 64, 1] - else: - self.img_size = [1, 64, 64] - - self.random_start = random_start - self.discrete_actions = discrete_actions - if discrete_actions: - self.action_space = spaces.Discrete(4) - else: - self.action_space = spaces.Box(0, 1, (4,)) - - self.observation_space = spaces.Dict( - spaces={ - "vec": spaces.Box(0, 1, (self.vector_size,), dtype=np.float64), - "img": spaces.Box(0, 255, self.img_size, dtype=np.uint8), - } - ) - self.count = 0 - # Timeout - self.max_count = 100 - self.log = "" - self.state = 0 - self.action2str = ["left", "down", "right", "up"] - self.init_possible_transitions() - - self.num_col = num_col - self.state_mapping = [] - self.init_state_mapping(num_col, num_row) - - self.max_state = len(self.state_mapping) - 1 - - def init_state_mapping(self, num_col: int, num_row: int) -> None: - """ - Initializes the state_mapping array which holds the observation values for each state - - :param num_col: Number of columns. - :param num_row: Number of rows. - """ - # Each column is represented by a random vector - col_vecs = np.random.random((num_col, self.vector_size)) - # Each row is represented by a random image - row_imgs = np.random.randint(0, 255, (num_row, 64, 64), dtype=np.uint8) - - for i in range(num_col): - for j in range(num_row): - self.state_mapping.append( - {"vec": col_vecs[i], "img": row_imgs[j].reshape(self.img_size)} - ) - - def get_state_mapping(self) -> Dict[str, np.ndarray]: - """ - Uses the state to get the observation mapping. - - :return: observation dict {'vec': ..., 'img': ...} - """ - return self.state_mapping[self.state] - - def init_possible_transitions(self) -> None: - """ - Initializes the transitions of the environment - The environment exploits the cardinal directions of the grid by noting that - they correspond to simple addition and subtraction from the cell id within the grid - - - up => means moving up a row => means subtracting the length of a column - - down => means moving down a row => means adding the length of a column - - left => means moving left by one => means subtracting 1 - - right => means moving right by one => means adding 1 - - Thus one only needs to specify in which states each action is possible - in order to define the transitions of the environment - """ - self.left_possible = [1, 2, 3, 13, 14, 15] - self.down_possible = [0, 4, 8, 3, 7, 11] - self.right_possible = [0, 1, 2, 12, 13, 14] - self.up_possible = [4, 8, 12, 7, 11, 15] - - def step(self, action: Union[float, np.ndarray]): - """ - Run one timestep of the environment's dynamics. When end of - episode is reached, you are responsible for calling `reset()` - to reset this environment's state. - Accepts an action and returns a tuple (observation, reward, done, info). - - :param action: - :return: tuple (observation, reward, done, info). - """ - if not self.discrete_actions: - action = np.argmax(action) - else: - action = int(action) - - self.count += 1 - - prev_state = self.state - - reward = -0.1 - # define state transition - if self.state in self.left_possible and action == 0: # left - self.state -= 1 - elif self.state in self.down_possible and action == 1: # down - self.state += self.num_col - elif self.state in self.right_possible and action == 2: # right - self.state += 1 - elif self.state in self.up_possible and action == 3: # up - self.state -= self.num_col - - got_to_end = self.state == self.max_state - reward = 1 if got_to_end else reward - done = self.count > self.max_count or got_to_end - - self.log = ( - f"Went {self.action2str[action]} in state {prev_state}, got to state" - f" {self.state}" - ) - - return self.get_state_mapping(), reward, done, {"got_to_end": got_to_end} - - def render(self, mode: str = "human") -> None: - """ - Prints the log of the environment. - - :param mode: - """ - print(self.log) - - def reset(self) -> Dict[str, np.ndarray]: - """ - Resets the environment state and step count and returns reset observation. - - :return: observation dict {'vec': ..., 'img': ...} - """ - self.count = 0 - if not self.random_start: - self.state = 0 - else: - self.state = np.random.randint(0, self.max_state) - return self.state_mapping[self.state] diff --git a/openrl/envs/vec_env/async_venv.py b/openrl/envs/vec_env/async_venv.py index dd654599..141532ba 100644 --- a/openrl/envs/vec_env/async_venv.py +++ b/openrl/envs/vec_env/async_venv.py @@ -751,7 +751,6 @@ def prepare_obs(observation): try: while True: command, data = pipe.recv() - print(command) if command == "reset": result = env.reset(**data) diff --git a/openrl/rewards/nlp_reward.py b/openrl/rewards/nlp_reward.py index c653c7c8..467f2a16 100644 --- a/openrl/rewards/nlp_reward.py +++ b/openrl/rewards/nlp_reward.py @@ -10,12 +10,15 @@ class NLPReward(BaseReward): - def __init__(self, env: Env, ref_model: str, intent_model: str): + def __init__( + self, env: Env, ref_model: str, intent_model: str, use_deepspeed: bool = True + ): self.rew_infos = [] self.env_infos = [] meteor_config = { "meteor_coeff": 0.5, + "test": ref_model == "builtin_ref", } self.inner_rew_funcs = { "meteor": Meteor(**meteor_config), @@ -24,6 +27,7 @@ def __init__(self, env: Env, ref_model: str, intent_model: str): kl_config = { "action_space": env.action_space, "ref_model": ref_model, + "use_deepspeed": use_deepspeed, } self.step_rew_funcs = { "kl_pen": KLPenalty(**kl_config), @@ -32,6 +36,7 @@ def __init__(self, env: Env, ref_model: str, intent_model: str): intent_config = { "intent_model": intent_model, "intent_coeff": 0.5, + "use_deepspeed": use_deepspeed, } self.batch_rew_funcs = { "intent_acc": Intent(**intent_config), diff --git a/openrl/utils/logger.py b/openrl/utils/logger.py index 0f2f0e2e..3fe61b53 100644 --- a/openrl/utils/logger.py +++ b/openrl/utils/logger.py @@ -32,9 +32,9 @@ class Logger: def __init__( self, cfg, - project_name: str, - scenario_name: str, - wandb_entity: str, + project_name: str = "openrl", + scenario_name: str = "openrl", + wandb_entity: str = "openrl", exp_name: Optional[str] = None, log_path: Optional[str] = None, use_wandb: bool = False, diff --git a/setup.py b/setup.py index 172343bf..89c839a5 100644 --- a/setup.py +++ b/setup.py @@ -60,16 +60,20 @@ def get_extra_requires() -> dict: "mpe": ["pyglet==1.5.27"], "nlp": [ "transformers==4.18.0", - "datasets", + "datasets==2.13", "nltk", "evaluate", "icetk", ], "nlp_test": [ "transformers", - "datasets", + "datasets==2.13", "evaluate", ], + "deep_speed_test": [ + "deepspeed", + "mpi4py", + ], "selfplay": ["ray[default]", "ray[serve]", "pettingzoo[classic]", "trueskill"], "selfplay_test": ["pettingzoo[mpe]", "pettingzoo[butterfly]"], "retro": ["gym-retro"], @@ -80,6 +84,7 @@ def get_extra_requires() -> dict: req["test"].extend(req["selfplay_test"]) req["test"].extend(req["atari"]) req["test"].extend(req["nlp_test"]) + req["test"].extend(req["deep_speed_test"]) return req diff --git a/tests/test_dataset/test_expert_dataset.py b/tests/test_dataset/test_expert_dataset.py new file mode 100644 index 00000000..1eed9125 --- /dev/null +++ b/tests/test_dataset/test_expert_dataset.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2023 The OpenRL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""""" +import os +import sys + +import pytest +import torch + +from openrl.datasets.expert_dataset import ExpertDataset +from openrl.envs.common import make +from openrl.envs.vec_env.wrappers.gen_data import GenDataWrapper +from openrl.envs.wrappers.monitor import Monitor + +env_wrappers = [ + Monitor, +] + + +def gen_data(total_episode, data_save_path): + # begin to test + # Create an environment for testing and set the number of environments to interact with to 9. Set rendering mode to group_human. + + env = make( + "IdentityEnv", + env_num=1, + asynchronous=True, + env_wrappers=env_wrappers, + ) + + env = GenDataWrapper( + env, data_save_path=data_save_path, total_episode=total_episode + ) + env.reset() + done = False + ep_length = 0 + while not done: + obs, r, done, info = env.step(env.random_action()) + ep_length += 1 + env.close() + return ep_length + + +@pytest.mark.unittest +def test_expert_dataset(tmp_path): + total_episode = 1 + data_save_path = tmp_path / "data.pkl" + ep_length = gen_data(total_episode, data_save_path) + + dataset = ExpertDataset( + data_save_path, + num_trajectories=None, + subsample_frequency=1, + seed=None, + env_id=0, + env_num=1, + ) + assert len(dataset) == ep_length, "len(dataset)={},data_length={}".format( + len(dataset), ep_length + ) + assert len(dataset[0]) == 2, "len(dataset[0])={}".format(len(dataset[0])) + + data_loader = torch.utils.data.DataLoader( + dataset=dataset, batch_size=1, shuffle=False, drop_last=True + ) + + step = 0 + for batch_data in data_loader: + assert len(batch_data) == 2, "len(batch_data)={}".format(len(batch_data)) + step += 1 + assert step == ep_length, "step={},ep_length={}".format(step, ep_length) + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", os.path.basename(__file__)])) diff --git a/tests/test_rewards/test_nlp_reward.py b/tests/test_rewards/test_nlp_reward.py new file mode 100644 index 00000000..739943ef --- /dev/null +++ b/tests/test_rewards/test_nlp_reward.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2023 The OpenRL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""""" +import os +import sys + +import numpy as np +import pytest + +from openrl.buffers.normal_buffer import NormalReplayBuffer +from openrl.configs.config import create_config_parser +from openrl.envs.common import make +from openrl.rewards import RewardFactory + + +@pytest.fixture( + scope="module", + params=[ + "--reward_class.id NLPReward --reward_class.args" + " {'intent_model':'builtin_intent','ref_model':'builtin_ref','use_deepspeed':False}" + ], +) +def config(request): + cfg_parser = create_config_parser() + cfg = cfg_parser.parse_args(request.param.split()) + return cfg + + +@pytest.mark.unittest +def test_nlp_reward(config): + env = make("fake_dialog_data", env_num=1) + reward = RewardFactory.get_reward_class(config.reward_class, env) + data = {} + data["rewards"] = np.zeros(32) + env_info = {} + env_info["final_info"] = { + "prompt_texts": "hello", + "generated_texts": "hello", + "meta_infos": {"intent": [1]}, + } + data["infos"] = [env_info] * 32 + data["step"] = 0 + data["actions"] = [0] + data["action_log_probs"] = np.zeros(32) + buffer = NormalReplayBuffer( + config, + num_agents=env.agent_num, + obs_space=env.observation_space, + act_space=env.action_space, + data_client=None, + episode_length=1, + ) + data["buffer"] = buffer + reward.step_reward(data=data) + reward.batch_rewards(buffer=buffer) + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", os.path.basename(__file__)])) From 0707ba6a5aeb4650e7f445e2a8e9b063b54b436d Mon Sep 17 00:00:00 2001 From: huangshiyu Date: Thu, 26 Oct 2023 17:25:31 +0800 Subject: [PATCH 2/2] update test --- setup.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/setup.py b/setup.py index 89c839a5..84da342e 100644 --- a/setup.py +++ b/setup.py @@ -70,10 +70,6 @@ def get_extra_requires() -> dict: "datasets==2.13", "evaluate", ], - "deep_speed_test": [ - "deepspeed", - "mpi4py", - ], "selfplay": ["ray[default]", "ray[serve]", "pettingzoo[classic]", "trueskill"], "selfplay_test": ["pettingzoo[mpe]", "pettingzoo[butterfly]"], "retro": ["gym-retro"], @@ -84,7 +80,6 @@ def get_extra_requires() -> dict: req["test"].extend(req["selfplay_test"]) req["test"].extend(req["atari"]) req["test"].extend(req["nlp_test"]) - req["test"].extend(req["deep_speed_test"]) return req