想要源码可以点赞关注收藏后评论区留下QQ邮箱
本次利用gym搭建一个扫地机器人环境,描述如下:
在一个5×5的扫地机器人环境中,有一个垃圾和一个充电桩,到达[5,4]即图标19处机器人捡到垃圾,并结束游戏。同时获得+3的奖赏。左下角[1,1]处有一个充电桩,机器人到达充电桩可以充电且不再行走,获得+1的奖赏。环境中间[3,3]处有一个障碍物,机器人无法通过。
扫地机器人具体流程如下
1:每局游戏开始 机器人初始位置位于左上角 即[1,5]处
2:游戏进行过程中 机器人将在地图上不断进行探索
3:机器人遇到障碍物时无法通过 保持原地不动 获得-10的奖赏
4:地图上有两个终止状态,一个为捡到垃圾,获得+5的奖赏,另一个为达到充电桩进行充电进行充电 获得+1奖赏
5:扫地机器人到达终止状态 即一个情节结束 机器人回到初始位置
代码运行效果如下
扫地机器人环境搭建
部分代码如下
- # 深度强化学习——原理、算法与PyTorch实战,代码名称:代02-搭建扫地机器人的Gym环境.py
-
- import gym
- from gym import spaces
- from gym.utils import seeding
- import sys
- from time import sleep
- import signal
-
-
- class Grid(object):
- def __init__(
- self,
- x: int = None, # 坐标x
- y: int = None, # 坐标y
- grid_type: int = 0, # 类别值(0:空;1:障碍或边界)
- enter_reward: float = 0.0): # 进入该格子的即时奖励
- self.x = x
- self.y = y
- self.grid_type = grid_type
- self.enter_reward = enter_reward
- self.name = "X{0}-Y{1}".format(self.x, self.y)
-
- def __str__(self):
- return "Grid: {name:{3}, x:{0}, y:{1}, grid_type:{2}}".format(self.x, self.y, self.grid_type, self.name)
-
-
- class GridMatrix(object):
- def __init__(
- self,
- n_width: int, # 水平方向格子数
- n_height: int, # 竖直方向格子数
- default_type: int = 0, # 默认类型,0-空
- default_reward: float = 0.0, # 默认即时奖励值
- ):
- self.n_height = n_height
- self.n_width = n_width
- self.default_reward = default_reward
- self.default_type = default_type
- self.grids = None # list(Grid) 将二维的格子世界中的格子存储在一维的列表中
- self.len = n_width * n_height # 格子数
- self.reset()
-
- def reset(self):
- self.grids = []
- for x in range(self.n_height):
- for y in range(self.n_width):
- self.grids.append(Grid(x, y, self.default_type, self.default_reward))
-
- def get_grid(self, x, y=None):
- """
- 获取一个格子信息
- args: 坐标信息,由x,y表示或仅有一个类型为tuple的x表示
- return: grid object
- """
- xx, yy = None, None
- if isinstance(x, int):
- xx, yy = x, y
- elif isinstance(x, tuple):
- xx, yy = x[0], x[1]
- assert (0 <= xx < self.n_width and 0 <= yy < self.n_height) # 任意坐标值应在合理区间
- index = yy * self.n_width + xx # 二维坐标展开为一维线性坐标
- return self.grids[index]
-
- def set_reward(self, x, y, reward):
- grid = self.get_grid(x, y)
- if grid is not None:
- grid.enter_reward = reward
- else:
- raise ("grid doesn't exist")
-
- def set_type(self, x, y, grid_type):
- grid = self.get_grid(x, y)
- if grid is not None:
- grid.grid_type = grid_type
- else:
- raise ("grid doesn't exist")
-
- def get_reward(self, x, y):
- grid = self.get_grid(x, y)
- if grid is None:
- return None
- return grid.enter_reward
-
- def get_type(self, x, y):
- grid = self.get_grid(x, y)
- if grid is None:
- return None
- return grid.grid_type
-
-
- class GridWorldEnv(gym.Env):
- metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 30}
-
- def __init__(
- self,
- n_width: int = 5, # 格子世界宽度(以格子数计)
- n_height: int = 5, # 高度
- u_size=40, # 当前格子绘制尺寸
- default_reward: float = 0,
- default_type=0):
- self.n_width = n_width
- self.n_height = n_height
- self.default_reward = default_reward
- self.default_type = default_type
- self.u_size = u_size
- self.screen_width = u_size * n_width # 场景宽度
- self.screen_height = u_size * n_height # 场景长度
-
- self.grids = GridMatrix(n_width=self.n_width,
- n_height=self.n_height,
- default_reward=self.default_reward,
- default_type=self.default_type)
- self.reward = 0 # for rendering
- self.action = None # for rendering
-
- # 0,1,2,3 represent left, right, up, down
- self.action_space = spaces.Discrete(4)
- # 观察空间由low和high决定
- self.observation_space = spaces.Discrete(self.n_height * self.n_width)
-
- self.state = None # 格子世界的当前状态
- self.ends = [(0, 0), (4, 3)] # 终止格子坐标,可以有多个
- self.start = (0, 4) # 起始格子坐标,只有一个
- self.types = [(2, 2, 1)]
- self.rewards = [(0, 0, 1), (4, 3, 5), (2, 2, -10)]
- self.refresh_setting()
- self.viewer = None # 图形接口对象
- self.seed() # 产生一个随机子
- self.reset()
-