add updated Laikago envs_v2, with PMTG and MPC examples.

python -m pybullet_envs.minitaur.envs_v2.locomotion_gym_env_test python -m pybullet_envs.minitaur.envs_v2.examples.laikago_mpc_wrapper_example python -m pybullet_envs.minitaur.envs_v2.examples.laikago_static_gait_example python -m pybullet_envs.minitaur.agents.baseline_controller.locomotion_controller_example PMTG example, not well tuned: python -m pybullet_envs.minitaur.envs_v2.examples.laikago_pmtg_example
2026-06-08 08:13:55 +00:00 · 2021-04-05 11:35:19 -07:00
parent d1c4c41b9a
commit 4d34ba7310
170 changed files with 27618 additions and 2 deletions
--- a/examples/pybullet/gym/pybullet_data/configs/laikago_gym_config.gin
+++ b/examples/pybullet/gym/pybullet_data/configs/laikago_gym_config.gin
@@ -0,0 +1,80 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.sensors.robot_sensors
+
+UPPER_BOUND = 6.28318548203
+LOWER_BOUND = -6.28318548203
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 2
+NUM_MOTORS = 12
+NOISY_READING = True
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+
+robot_sensors.IMUSensor.channels = ["R", "P", "dR", "dP"]
+robot_sensors.IMUSensor.noisy_reading = %NOISY_READING
+robot_sensors.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203,
+                                       -6283.18554688, -6283.18554688]
+robot_sensors.IMUSensor.upper_bound = [6.28318548203, 6.28318548203,
+                                       6283.18554688, 6283.18554688]
+
+robot_sensors.MotorAngleSensor.num_motors = %NUM_MOTORS
+robot_sensors.MotorAngleSensor.noisy_reading = %NOISY_READING
+robot_sensors.MotorAngleSensor.lower_bound = -6.28318548203
+robot_sensors.MotorAngleSensor.upper_bound = 6.28318548203
+
+sensors = [@robot_sensors.IMUSensor(), @robot_sensors.MotorAngleSensor()]
+
+Act0/locomotion_gym_config.ScalarField.name = "motor_angle_0"
+Act0/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act0/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act1/locomotion_gym_config.ScalarField.name = "motor_angle_1"
+Act1/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act1/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act2/locomotion_gym_config.ScalarField.name = "motor_angle_2"
+Act2/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act2/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act3/locomotion_gym_config.ScalarField.name = "motor_angle_3"
+Act3/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act3/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act4/locomotion_gym_config.ScalarField.name = "motor_angle_4"
+Act4/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act4/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act5/locomotion_gym_config.ScalarField.name = "motor_angle_5"
+Act5/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act5/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act6/locomotion_gym_config.ScalarField.name = "motor_angle_6"
+Act6/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act6/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act7/locomotion_gym_config.ScalarField.name = "motor_angle_7"
+Act7/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act7/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act8/locomotion_gym_config.ScalarField.name = "motor_angle_8"
+Act8/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act8/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act9/locomotion_gym_config.ScalarField.name = "motor_angle_9"
+Act9/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act9/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act10/locomotion_gym_config.ScalarField.name = "motor_angle_10"
+Act10/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act10/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act11/locomotion_gym_config.ScalarField.name = "motor_angle_11"
+Act11/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act11/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+
+
+locomotion_gym_config.LocomotionGymConfig.actions = [
+    @Act0/locomotion_gym_config.ScalarField(),
+    @Act1/locomotion_gym_config.ScalarField(),
+    @Act2/locomotion_gym_config.ScalarField(),
+    @Act3/locomotion_gym_config.ScalarField(),
+    @Act4/locomotion_gym_config.ScalarField(),
+    @Act5/locomotion_gym_config.ScalarField(),
+    @Act6/locomotion_gym_config.ScalarField(),
+    @Act7/locomotion_gym_config.ScalarField(),
+    @Act8/locomotion_gym_config.ScalarField(),
+    @Act9/locomotion_gym_config.ScalarField(),
+    @Act10/locomotion_gym_config.ScalarField(),
+    @Act11/locomotion_gym_config.ScalarField()]
--- a/examples/pybullet/gym/pybullet_data/configs/laikago_gym_env.gin
+++ b/examples/pybullet/gym/pybullet_data/configs/laikago_gym_env.gin
@@ -0,0 +1,116 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.simple_scene
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+import pybullet_envs.minitaur.robots.laikago
+
+URDF_ROOT = "urdf/"
+ABDUCTION_P_GAIN = 220.0
+ABDUCTION_D_GAIN = 0.3
+HIP_P_GAIN = 220.0
+HIP_D_GAIN = 2.0
+KNEE_P_GAIN = 220.0
+KNEE_D_GAIN = 2.0
+
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.sensors.robot_sensors
+
+UPPER_BOUND = 6.28318548203
+LOWER_BOUND = -6.28318548203
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 2
+NUM_MOTORS = 12
+NOISY_READING = True
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+
+robot_sensors.IMUSensor.channels = ["R", "P", "dR", "dP"]
+robot_sensors.IMUSensor.noisy_reading = %NOISY_READING
+robot_sensors.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203,
+                                       -6283.18554688, -6283.18554688]
+robot_sensors.IMUSensor.upper_bound = [6.28318548203, 6.28318548203,
+                                       6283.18554688, 6283.18554688]
+
+robot_sensors.MotorAngleSensor.num_motors = %NUM_MOTORS
+robot_sensors.MotorAngleSensor.noisy_reading = %NOISY_READING
+robot_sensors.MotorAngleSensor.lower_bound = -6.28318548203
+robot_sensors.MotorAngleSensor.upper_bound = 6.28318548203
+
+sensors = [@robot_sensors.IMUSensor(), @robot_sensors.MotorAngleSensor()]
+
+Act0/locomotion_gym_config.ScalarField.name = "motor_angle_0"
+Act0/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act0/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act1/locomotion_gym_config.ScalarField.name = "motor_angle_1"
+Act1/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act1/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act2/locomotion_gym_config.ScalarField.name = "motor_angle_2"
+Act2/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act2/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act3/locomotion_gym_config.ScalarField.name = "motor_angle_3"
+Act3/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act3/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act4/locomotion_gym_config.ScalarField.name = "motor_angle_4"
+Act4/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act4/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act5/locomotion_gym_config.ScalarField.name = "motor_angle_5"
+Act5/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act5/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act6/locomotion_gym_config.ScalarField.name = "motor_angle_6"
+Act6/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act6/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act7/locomotion_gym_config.ScalarField.name = "motor_angle_7"
+Act7/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act7/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act8/locomotion_gym_config.ScalarField.name = "motor_angle_8"
+Act8/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act8/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act9/locomotion_gym_config.ScalarField.name = "motor_angle_9"
+Act9/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act9/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act10/locomotion_gym_config.ScalarField.name = "motor_angle_10"
+Act10/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act10/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act11/locomotion_gym_config.ScalarField.name = "motor_angle_11"
+Act11/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act11/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+
+
+locomotion_gym_config.LocomotionGymConfig.actions = [
+    @Act0/locomotion_gym_config.ScalarField(),
+    @Act1/locomotion_gym_config.ScalarField(),
+    @Act2/locomotion_gym_config.ScalarField(),
+    @Act3/locomotion_gym_config.ScalarField(),
+    @Act4/locomotion_gym_config.ScalarField(),
+    @Act5/locomotion_gym_config.ScalarField(),
+    @Act6/locomotion_gym_config.ScalarField(),
+    @Act7/locomotion_gym_config.ScalarField(),
+    @Act8/locomotion_gym_config.ScalarField(),
+    @Act9/locomotion_gym_config.ScalarField(),
+    @Act10/locomotion_gym_config.ScalarField(),
+    @Act11/locomotion_gym_config.ScalarField()]
+
+
+
+laikago.Laikago.urdf_root = %URDF_ROOT
+laikago.Laikago.time_step = %SIM_TIME_STEP
+laikago.Laikago.action_repeat = %NUM_ACTION_REPEAT
+laikago.Laikago.self_collision_enabled = False
+laikago.Laikago.control_latency = 0.002
+laikago.Laikago.pd_latency = 0.0
+laikago.Laikago.motor_kp = [%ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN,
+                            %ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN,
+                            %ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN,
+                            %ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN]
+laikago.Laikago.motor_kd = [%ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN,
+                            %ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN,
+                            %ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN,
+                            %ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN]
+laikago.Laikago.sensors = %sensors
+
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago.Laikago
+locomotion_gym_env.LocomotionGymEnv.scene = @simple_scene.SimpleScene()
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
--- a/examples/pybullet/gym/pybullet_data/configs/laikago_mpc_example_flat.gin
+++ b/examples/pybullet/gym/pybullet_data/configs/laikago_mpc_example_flat.gin
@@ -0,0 +1,149 @@
+#-*-Python-*-
+
+# NOTE: Should be run with >=10CPU for decent performance.
+
+import pybullet_envs.minitaur.agents.baseline_controller.torque_stance_leg_controller
+import pybullet_envs.minitaur.envs_v2.env_loader
+import pybullet_envs.minitaur.envs_v2.env_wrappers.mpc_locomotion_wrapper
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.simple_scene
+import pybullet_envs.minitaur.envs_v2.sensors.camera_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.imu_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.last_action_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.toe_position_sensor
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+import pybullet_envs.minitaur.envs_v2.tasks.terminal_conditions
+import pybullet_envs.minitaur.envs_v2.utilities.noise_generators
+import pybullet_envs.minitaur.robots.hybrid_motor_model
+import pybullet_envs.minitaur.robots.laikago_v2
+import pybullet_envs.minitaur.robots.robot_config
+
+
+# Configure the dynamic robot
+
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 4 # Control frequency will be 100 Hz
+
+
+########################################
+# Configure the sensors
+########################################
+imu_sensor.IMUSensor.channels = [
+    %imu_sensor.IMUChannel.ROLL,
+    %imu_sensor.IMUChannel.PITCH,
+    %imu_sensor.IMUChannel.ROLL_RATE,
+    %imu_sensor.IMUChannel.PITCH_RATE
+]
+
+imu_sensor.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203, -6283.18554688, -6283.18554688]
+imu_sensor.IMUSensor.upper_bound = [6.28318548203, 6.28318548203, 6283.18554688, 6283.18554688]
+
+# Add noise to the IMU sensor and toe position sensor
+IMUNoise/noise_generators.NormalNoise.scale = (0.025, 0.025, 0.1, 0.1)
+TOENoise/noise_generators.NormalNoise.scale = (0.0025, 0.0025, 0.005, 0.0025, 0.0025, 0.005, 0.0025, 0.0025, 0.005, 0.0025, 0.0025, 0.005)
+imu_sensor.IMUSensor.noise_generator = @IMUNoise/noise_generators.NormalNoise()
+toe_position_sensor.ToePositionSensor.noise_generator = @TOENoise/noise_generators.NormalNoise()
+
+frontCamera/camera_sensor.CameraSensor.camera_translation_from_base = (0.197, 0.0, -0.115)
+frontCamera/camera_sensor.CameraSensor.camera_rotation_from_base = (-0.4996018, 0.4999998, 0.4999998, 0.5003982)
+frontCamera/camera_sensor.CameraSensor.parent_link_id = -1
+frontCamera/camera_sensor.CameraSensor.resolution = (32, 24)
+frontCamera/camera_sensor.CameraSensor.sensor_latency = 0.03
+frontCamera/camera_sensor.CameraSensor.name = "frontCam"
+frontCamera/camera_sensor.CameraSensor.fov_degree = 75
+frontCamera/camera_sensor.CameraSensor.camera_mode = %sim_camera.CameraMode.DEPTH
+frontCamera/camera_sensor.CameraSensor.camera_update_frequency_hz = 30.0
+
+rearCamera/camera_sensor.CameraSensor.camera_translation_from_base = (-0.092, 0.0, -0.105)
+rearCamera/camera_sensor.CameraSensor.camera_rotation_from_base = (-0.4996018, 0.4999998, 0.4999998, 0.5003982)
+rearCamera/camera_sensor.CameraSensor.parent_link_id = -1
+rearCamera/camera_sensor.CameraSensor.resolution = (32, 24)
+rearCamera/camera_sensor.CameraSensor.sensor_latency = 0.03
+rearCamera/camera_sensor.CameraSensor.name = "rearCam"
+rearCamera/camera_sensor.CameraSensor.fov_degree = 75
+rearCamera/camera_sensor.CameraSensor.camera_mode = %sim_camera.CameraMode.DEPTH
+rearCamera/camera_sensor.CameraSensor.camera_update_frequency_hz = 30.0
+
+sensors = [@imu_sensor.IMUSensor(), @last_action_sensor.LastActionSensor(), @toe_position_sensor.ToePositionSensor(), @frontCamera/camera_sensor.CameraSensor(), @rearCamera/camera_sensor.CameraSensor()]
+laikago_v2.Laikago.sensors = %sensors
+
+
+########################################
+# Specify the motor model and its parameters
+########################################
+LAIKAGO_MOTOR_ANGLE_UPPER_LIMITS = 6.28318548203
+LAIKAGO_MOTOR_ANGLE_LOWER_LIMITS = -6.28318548203
+laikago/robot_config.MotorLimits.angle_lower_limits = %LAIKAGO_MOTOR_ANGLE_LOWER_LIMITS
+laikago/robot_config.MotorLimits.angle_upper_limits = %LAIKAGO_MOTOR_ANGLE_UPPER_LIMITS
+laikago/robot_config.MotorLimits.torque_lower_limits = -30
+laikago/robot_config.MotorLimits.torque_upper_limits = 30
+laikago_v2.Laikago.motor_limits = @laikago/robot_config.MotorLimits()
+laikago_v2.Laikago.motor_control_mode = %robot_config.MotorControlMode.HYBRID
+laikago_v2.Laikago.motor_model_class = @hybrid_motor_model.HybridMotorModel
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago_v2.Laikago
+hybrid_motor_model.HybridMotorModel.kp = 250
+hybrid_motor_model.HybridMotorModel.kd = (0.3, 2.0, 2.0, 0.3, 2.0, 2.0, 0.3, 2.0, 2.0, 0.3, 2.0, 2.0)
+
+
+########################################
+# Setup the terrain randomization and simulation parameters
+########################################
+
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago_v2.Laikago
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+locomotion_gym_env.LocomotionGymEnv.scene = @simple_scene.SimpleScene()
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+
+
+########################################
+# Setup the task and terminal condition parameters
+########################################
+terminal_conditions.maxstep_terminal_condition.max_step = 2000
+terminal_conditions.default_terminal_condition_for_laikago_v2.max_roll = 0.25
+terminal_conditions.default_terminal_condition_for_laikago_v2.max_pitch = 1.0
+terminal_conditions.default_terminal_condition_for_laikago_v2.min_height = 0.15
+terminal_conditions.default_terminal_condition_for_laikago_v2.enforce_foot_contacts = True
+
+# Setup the terminal condition
+terminal_conditions.logical_any_terminal_condition.conditions = [
+    @terminal_conditions.default_terminal_condition_for_laikago_v2,
+    @terminal_conditions.maxstep_terminal_condition,
+    ]
+
+simple_locomotion_task.SimpleForwardTask.terminal_condition = @terminal_conditions.logical_any_terminal_condition
+
+env_loader.load.wrapper_classes = [
+    @mpc_locomotion_wrapper.MPCLocomotionWrapper,
+]
+
+########################################
+# Configure the MPC-related parameters
+########################################
+torque_stance_leg_controller.TorqueStanceLegController.qp_weights = (5, 5, 0.2, 0, 0, 10, 0.5, 0.5, 0.2, 0.2, 0.2, 0.1, 0)
+torque_stance_leg_controller.TorqueStanceLegController.body_inertia = (0.183375, 0, 0, 0, 0.6267, 0, 0, 0, 0.636175)
+torque_stance_leg_controller.TorqueStanceLegController.friction_coeffs = (0.45, 0.45, 0.45, 0.45)
+
+########################################
+# Configure the foothold wrapper parameters and action space
+########################################
+mpc_locomotion_wrapper.MPCLocomotionWrapper.foot_friction_coeff = 1.0
+mpc_locomotion_wrapper.MPCLocomotionWrapper.locomotion_gait = %mpc_locomotion_wrapper.Gait.TROT
+mpc_locomotion_wrapper.MPCLocomotionWrapper.control_frequency=20
+mpc_locomotion_wrapper.MPCLocomotionWrapper.target_horizontal_com_velocity_heuristic = @mpc_locomotion_wrapper.InverseRaibertTargetHorizontalComVelocityHeuristic()
+mpc_locomotion_wrapper.InverseRaibertTargetHorizontalComVelocityHeuristic.gains = (-0.25, -0.1)
+
+mpc_locomotion_wrapper.MPCLocomotionWrapper.swing_target_action_range = ((-0.05, -0.05, -0.03), (0.1, 0.05, 0.03))
+mpc_locomotion_wrapper.MPCLocomotionWrapper.pitch_action_range = (-0.2, 0.2)
+mpc_locomotion_wrapper.MPCLocomotionWrapper.roll_action_range = (-0.05, 0.05)
+mpc_locomotion_wrapper.MPCLocomotionWrapper.base_velocity_action_range = ((-0.05, -0.05), (0.05, 0.05))
+mpc_locomotion_wrapper.MPCLocomotionWrapper.base_twist_action_range = (-0.2, 0.2)
+mpc_locomotion_wrapper.MPCLocomotionWrapper.base_height_action_range = (0.42, 0.48)
+mpc_locomotion_wrapper.MPCLocomotionWrapper.swing_clearance_action_range = (0.05, 0.1)
+
+imu_based_com_velocity_estimator.IMUBasedCOMVelocityEstimator.use_sensor_interface = False
+
+
--- a/examples/pybullet/gym/pybullet_data/configs/laikago_mpc_stepstone.gin
+++ b/examples/pybullet/gym/pybullet_data/configs/laikago_mpc_stepstone.gin
@@ -0,0 +1,142 @@
+#-*-Python-*-
+
+# NOTE: Should be run with >=10CPU for decent performance.
+
+import pybullet_envs.minitaur.agents.baseline_controller.torque_stance_leg_controller
+import pybullet_envs.minitaur.envs_v2.env_loader
+import pybullet_envs.minitaur.envs_v2.env_wrappers.mpc_locomotion_wrapper
+import pybullet_envs.minitaur.envs_v2.env_wrappers.observation_dictionary_to_array_wrapper
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.random_stepstone_scene
+import pybullet_envs.minitaur.envs_v2.sensors.imu_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.last_action_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.toe_position_sensor
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+import pybullet_envs.minitaur.envs_v2.tasks.terminal_conditions
+import pybullet_envs.minitaur.envs_v2.utilities.noise_generators
+import pybullet_envs.minitaur.robots.hybrid_motor_model
+import pybullet_envs.minitaur.robots.laikago_v2
+import pybullet_envs.minitaur.robots.robot_config
+
+
+# Configure the dynamic robot
+
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 4 # Control frequency will be 100 Hz
+
+
+########################################
+# Configure the sensors
+########################################
+imu_sensor.IMUSensor.channels = [
+    %imu_sensor.IMUChannel.ROLL,
+    %imu_sensor.IMUChannel.PITCH,
+    %imu_sensor.IMUChannel.ROLL_RATE,
+    %imu_sensor.IMUChannel.PITCH_RATE
+]
+
+imu_sensor.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203, -6283.18554688, -6283.18554688]
+imu_sensor.IMUSensor.upper_bound = [6.28318548203, 6.28318548203, 6283.18554688, 6283.18554688]
+
+# Add noise to the IMU sensor and toe position sensor
+# IMUNoise/noise_generators.NormalNoise.scale = (0.025, 0.025, 0.1, 0.1)
+# TOENoise/noise_generators.NormalNoise.scale = (0.0025, 0.0025, 0.005, 0.0025, 0.0025, 0.005, 0.0025, 0.0025, 0.005, 0.0025, 0.0025, 0.005)
+# imu_sensor.IMUSensor.noise_generator = @IMUNoise/noise_generators.NormalNoise()
+# toe_position_sensor.ToePositionSensor.noise_generator = @TOENoise/noise_generators.NormalNoise()
+
+
+sensors = [@imu_sensor.IMUSensor(), @last_action_sensor.LastActionSensor(), @toe_position_sensor.ToePositionSensor()]
+laikago_v2.Laikago.sensors = %sensors
+
+
+########################################
+# Specify the motor model and its parameters
+########################################
+LAIKAGO_MOTOR_ANGLE_UPPER_LIMITS = 6.28318548203
+LAIKAGO_MOTOR_ANGLE_LOWER_LIMITS = -6.28318548203
+laikago/robot_config.MotorLimits.angle_lower_limits = %LAIKAGO_MOTOR_ANGLE_LOWER_LIMITS
+laikago/robot_config.MotorLimits.angle_upper_limits = %LAIKAGO_MOTOR_ANGLE_UPPER_LIMITS
+laikago/robot_config.MotorLimits.torque_lower_limits = -30
+laikago/robot_config.MotorLimits.torque_upper_limits = 30
+laikago_v2.Laikago.motor_limits = @laikago/robot_config.MotorLimits()
+laikago_v2.Laikago.motor_control_mode = %robot_config.MotorControlMode.HYBRID
+laikago_v2.Laikago.motor_model_class = @hybrid_motor_model.HybridMotorModel
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago_v2.Laikago
+hybrid_motor_model.HybridMotorModel.kp = 250
+hybrid_motor_model.HybridMotorModel.kd = (0.3, 2.0, 2.0, 0.3, 2.0, 2.0, 0.3, 2.0, 2.0, 0.3, 2.0, 2.0)
+
+
+########################################
+# Setup the terrain randomization and simulation parameters
+########################################
+
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago_v2.Laikago
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+locomotion_gym_env.LocomotionGymEnv.scene = @random_stepstone_scene.RandomStepstoneScene()
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+
+random_stepstone_scene.RandomStepstoneScene.random_seed = 13
+random_stepstone_scene.RandomStepstoneScene.gap_length_lower_bound = 0.07
+random_stepstone_scene.RandomStepstoneScene.gap_length_upper_bound = 0.15
+random_stepstone_scene.RandomStepstoneScene.stone_width = 0.6
+
+########################################
+# Setup the task and terminal condition parameters
+########################################
+terminal_conditions.maxstep_terminal_condition.max_step = 2000
+terminal_conditions.default_terminal_condition_for_laikago_v2.max_roll = 0.25
+terminal_conditions.default_terminal_condition_for_laikago_v2.max_pitch = 1.0
+terminal_conditions.default_terminal_condition_for_laikago_v2.min_height = 0.15
+terminal_conditions.default_terminal_condition_for_laikago_v2.enforce_foot_contacts = True
+
+# Setup the terminal condition
+terminal_conditions.logical_any_terminal_condition.conditions = [
+    @terminal_conditions.default_terminal_condition_for_laikago_v2,
+    @terminal_conditions.maxstep_terminal_condition,
+    ]
+
+    simple_locomotion_task.SimpleForwardTask.terminal_condition = @terminal_conditions.logical_any_terminal_condition
+simple_locomotion_task.SimpleForwardTask.clip_velocity = 0.0015
+
+time_ordered_buffer.TimeOrderedBuffer.error_on_duplicate_timestamp = False
+time_ordered_buffer.TimeOrderedBuffer.replace_value_on_duplicate_timestamp = True
+time_ordered_buffer.TimeOrderedBuffer.error_on_timestamp_reversal = False
+
+
+observation_dictionary_to_array_wrapper.ObservationDictionaryToArrayWrapper.observation_excluded = ('frontCam', 'rearCam')
+
+env_loader.load.wrapper_classes = [
+    @mpc_locomotion_wrapper.MPCLocomotionWrapper,
+    @observation_dictionary_to_array_wrapper.ObservationDictionaryToArrayWrapper,
+]
+
+########################################
+# Configure the MPC-related parameters
+########################################
+torque_stance_leg_controller.TorqueStanceLegController.qp_weights = (5, 5, 0.2, 0, 0, 10, 0.5, 0.5, 0.2, 0.2, 0.2, 0.1, 0)
+torque_stance_leg_controller.TorqueStanceLegController.body_inertia = (0.183375, 0, 0, 0, 0.6267, 0, 0, 0, 0.636175)
+torque_stance_leg_controller.TorqueStanceLegController.friction_coeffs = (0.45, 0.45, 0.45, 0.45)
+
+########################################
+# Configure the foothold wrapper parameters and action space
+########################################
+mpc_locomotion_wrapper.MPCLocomotionWrapper.foot_friction_coeff = 1.0
+mpc_locomotion_wrapper.MPCLocomotionWrapper.locomotion_gait = %mpc_locomotion_wrapper.Gait.TROT
+mpc_locomotion_wrapper.MPCLocomotionWrapper.control_frequency=20
+mpc_locomotion_wrapper.MPCLocomotionWrapper.target_horizontal_com_velocity_heuristic = @mpc_locomotion_wrapper.InverseRaibertTargetHorizontalComVelocityHeuristic()
+mpc_locomotion_wrapper.InverseRaibertTargetHorizontalComVelocityHeuristic.gains = (-0.25, -0.1)
+
+mpc_locomotion_wrapper.MPCLocomotionWrapper.swing_target_action_range = ((-0.05, -0.05, -0.03), (0.1, 0.05, 0.03))
+mpc_locomotion_wrapper.MPCLocomotionWrapper.pitch_action_range = (-0.2, 0.2)
+mpc_locomotion_wrapper.MPCLocomotionWrapper.roll_action_range = (-0.05, 0.05)
+mpc_locomotion_wrapper.MPCLocomotionWrapper.base_velocity_action_range = ((-0.05, -0.05), (0.05, 0.05))
+mpc_locomotion_wrapper.MPCLocomotionWrapper.base_twist_action_range = (-0.2, 0.2)
+mpc_locomotion_wrapper.MPCLocomotionWrapper.base_height_action_range = (0.42, 0.48)
+mpc_locomotion_wrapper.MPCLocomotionWrapper.swing_clearance_action_range = (0.05, 0.1)
+
+imu_based_com_velocity_estimator.IMUBasedCOMVelocityEstimator.use_sensor_interface = False
+
--- a/examples/pybullet/gym/pybullet_data/configs/laikago_reactive.gin
+++ b/examples/pybullet/gym/pybullet_data/configs/laikago_reactive.gin
@@ -0,0 +1,133 @@
+#-*-Python-*-
+
+# NOTE: Should be run with >=10CPU for decent performance.
+
+import pybullet_envs.minitaur.envs_v2.env_loader
+import pybullet_envs.minitaur.envs_v2.env_wrappers.observation_dictionary_to_array_wrapper
+import pybullet_envs.minitaur.envs_v2.env_wrappers.simple_openloop
+import pybullet_envs.minitaur.envs_v2.env_wrappers.trajectory_generator_wrapper_env
+
+
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.simple_scene
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+import pybullet_envs.minitaur.robots.laikago
+
+URDF_ROOT = "urdf/"
+ABDUCTION_P_GAIN = 220.0
+ABDUCTION_D_GAIN = 0.3
+HIP_P_GAIN = 220.0
+HIP_D_GAIN = 2.0
+KNEE_P_GAIN = 220.0
+KNEE_D_GAIN = 2.0
+
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.sensors.robot_sensors
+
+UPPER_BOUND = 6.28318548203
+LOWER_BOUND = -6.28318548203
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 2
+NUM_MOTORS = 12
+NOISY_READING = True
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+
+robot_sensors.IMUSensor.channels = ["R", "P", "dR", "dP"]
+robot_sensors.IMUSensor.noisy_reading = %NOISY_READING
+robot_sensors.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203,
+                                       -6283.18554688, -6283.18554688]
+robot_sensors.IMUSensor.upper_bound = [6.28318548203, 6.28318548203,
+                                       6283.18554688, 6283.18554688]
+
+robot_sensors.MotorAngleSensor.num_motors = %NUM_MOTORS
+robot_sensors.MotorAngleSensor.noisy_reading = %NOISY_READING
+robot_sensors.MotorAngleSensor.lower_bound = -6.28318548203
+robot_sensors.MotorAngleSensor.upper_bound = 6.28318548203
+
+sensors = [@robot_sensors.IMUSensor(), @robot_sensors.MotorAngleSensor()]
+
+Act0/locomotion_gym_config.ScalarField.name = "motor_angle_0"
+Act0/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act0/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act1/locomotion_gym_config.ScalarField.name = "motor_angle_1"
+Act1/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act1/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act2/locomotion_gym_config.ScalarField.name = "motor_angle_2"
+Act2/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act2/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act3/locomotion_gym_config.ScalarField.name = "motor_angle_3"
+Act3/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act3/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act4/locomotion_gym_config.ScalarField.name = "motor_angle_4"
+Act4/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act4/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act5/locomotion_gym_config.ScalarField.name = "motor_angle_5"
+Act5/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act5/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act6/locomotion_gym_config.ScalarField.name = "motor_angle_6"
+Act6/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act6/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act7/locomotion_gym_config.ScalarField.name = "motor_angle_7"
+Act7/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act7/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act8/locomotion_gym_config.ScalarField.name = "motor_angle_8"
+Act8/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act8/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act9/locomotion_gym_config.ScalarField.name = "motor_angle_9"
+Act9/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act9/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act10/locomotion_gym_config.ScalarField.name = "motor_angle_10"
+Act10/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act10/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act11/locomotion_gym_config.ScalarField.name = "motor_angle_11"
+Act11/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act11/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+
+
+locomotion_gym_config.LocomotionGymConfig.actions = [
+    @Act0/locomotion_gym_config.ScalarField(),
+    @Act1/locomotion_gym_config.ScalarField(),
+    @Act2/locomotion_gym_config.ScalarField(),
+    @Act3/locomotion_gym_config.ScalarField(),
+    @Act4/locomotion_gym_config.ScalarField(),
+    @Act5/locomotion_gym_config.ScalarField(),
+    @Act6/locomotion_gym_config.ScalarField(),
+    @Act7/locomotion_gym_config.ScalarField(),
+    @Act8/locomotion_gym_config.ScalarField(),
+    @Act9/locomotion_gym_config.ScalarField(),
+    @Act10/locomotion_gym_config.ScalarField(),
+    @Act11/locomotion_gym_config.ScalarField()]
+
+
+
+laikago.Laikago.urdf_root = %URDF_ROOT
+laikago.Laikago.time_step = %SIM_TIME_STEP
+laikago.Laikago.action_repeat = %NUM_ACTION_REPEAT
+laikago.Laikago.self_collision_enabled = False
+laikago.Laikago.control_latency = 0.002
+laikago.Laikago.pd_latency = 0.0
+laikago.Laikago.motor_kp = [%ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN,
+                            %ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN,
+                            %ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN,
+                            %ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN]
+laikago.Laikago.motor_kd = [%ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN,
+                            %ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN,
+                            %ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN,
+                            %ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN]
+laikago.Laikago.sensors = %sensors
+
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago.Laikago
+locomotion_gym_env.LocomotionGymEnv.scene = @simple_scene.SimpleScene()
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
+
+
+
+trajectory_generator_wrapper_env.TrajectoryGeneratorWrapperEnv.trajectory_generator = @simple_openloop.LaikagoPoseOffsetGenerator()
+env_loader.load.wrapper_classes = [
+    @observation_dictionary_to_array_wrapper.ObservationDictionaryToArrayWrapper,
+    @trajectory_generator_wrapper_env.TrajectoryGeneratorWrapperEnv]
--- a/examples/pybullet/gym/pybullet_data/configs/laikago_walk_static_gait.gin
+++ b/examples/pybullet/gym/pybullet_data/configs/laikago_walk_static_gait.gin
@@ -0,0 +1,65 @@
+import pybullet_envs.minitaur.envs_v2.env_loader
+import pybullet_envs.minitaur.envs_v2.env_wrappers.simple_openloop
+import pybullet_envs.minitaur.envs_v2.env_wrappers.trajectory_generator_wrapper_env
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.simple_scene
+import pybullet_envs.minitaur.envs_v2.sensors.imu_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.motor_angle_sensor
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+import pybullet_envs.minitaur.robots.hybrid_motor_model
+import pybullet_envs.minitaur.robots.laikago_v2
+import pybullet_envs.minitaur.robots.robot_config
+
+
+# Specify the gym env parameters
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 4
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+
+# Specify the world.
+URDF_ROOT = ""
+scene_base.SceneBase.data_root = %URDF_ROOT
+locomotion_gym_env.LocomotionGymEnv.scene = @simple_scene.SimpleScene()
+
+# Specify the task.
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
+
+# Specify the sensors. Sensors determine the observation space.
+# Sensors can either be mounted on robots (see below), or passed to envs
+# i.e. like ambient sensors, or provided by tasks (task specific measures).
+imu_sensor.IMUSensor.channels = [
+    %imu_sensor.IMUChannel.ROLL,
+    %imu_sensor.IMUChannel.PITCH,
+    %imu_sensor.IMUChannel.ROLL_RATE,
+    %imu_sensor.IMUChannel.PITCH_RATE,
+]
+
+imu_sensor.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203,
+                                       -6283.18554688, -6283.18554688]
+imu_sensor.IMUSensor.upper_bound = [6.28318548203, 6.28318548203,
+                                       6283.18554688, 6283.18554688]
+
+# We use the default confirugration for MotorAngleSensor, which reads limits from the robot.
+SENSORS = [@imu_sensor.IMUSensor(), @motor_angle_sensor.MotorAngleSensor()]
+
+# Specify the motor limits, and motor control mode.
+LAIKAGO_MOTOR_ANGLE_UPPER_LIMITS = 6.28318548203
+LAIKAGO_MOTOR_ANGLE_LOWER_LIMITS = -6.28318548203
+laikago/robot_config.MotorLimits.angle_lower_limits = %LAIKAGO_MOTOR_ANGLE_LOWER_LIMITS
+laikago/robot_config.MotorLimits.angle_upper_limits = %LAIKAGO_MOTOR_ANGLE_UPPER_LIMITS
+laikago_v2.Laikago.motor_limits = @laikago/robot_config.MotorLimits()
+laikago_v2.Laikago.motor_control_mode = %robot_config.MotorControlMode.POSITION
+laikago_v2.Laikago.motor_model_class = @hybrid_motor_model.HybridMotorModel
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago_v2.Laikago
+
+# Specify the motor model parameters. Notice that we don't need to specify the control mode or motor limits here.
+hybrid_motor_model.HybridMotorModel.kp = 2400
+hybrid_motor_model.HybridMotorModel.kd = 5
+
+# Finally, mount sensors specified above to the Laikago.
+laikago_v2.Laikago.sensors = %SENSORS
--- a/examples/pybullet/gym/pybullet_data/configs/minitaur_gym_config.gin
+++ b/examples/pybullet/gym/pybullet_data/configs/minitaur_gym_config.gin
@@ -0,0 +1,48 @@
+#-*-Python-*-
+
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.sensors.imu_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.motor_angle_sensor
+import pybullet_envs.minitaur.robots.minitaur_motor_model_v2
+import pybullet_envs.minitaur.robots.minitaur_v2
+import pybullet_envs.minitaur.robots.robot_config
+
+
+UPPER_BOUND = 1.0
+LOWER_BOUND = -1.0
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 20
+NUM_MOTORS = 8
+NOISY_READING = True
+SENSOR_LATENCY = 0.02
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+
+robot_config.MotorLimits.angle_lower_limits = %LOWER_BOUND
+robot_config.MotorLimits.angle_upper_limits = %UPPER_BOUND
+
+minitaur_v2.Minitaur.motor_control_mode = %robot_config.MotorControlMode.POSITION
+minitaur_v2.Minitaur.motor_limits = @robot_config.MotorLimits()
+minitaur_v2.Minitaur.motor_model_class = @minitaur_motor_model_v2.MinitaurMotorModel
+minitaur_motor_model_v2.MinitaurMotorModel.pd_latency = 0.003
+minitaur_motor_model_v2.MinitaurMotorModel.kp = 1.0
+minitaur_motor_model_v2.MinitaurMotorModel.kd = 0.015
+
+imu_sensor.IMUSensor.channels = [
+    %imu_sensor.IMUChannel.ROLL,
+    %imu_sensor.IMUChannel.PITCH,
+    %imu_sensor.IMUChannel.ROLL_RATE,
+    %imu_sensor.IMUChannel.PITCH_RATE
+]
+imu_sensor.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203,
+                                    -6283.18554688, -6283.18554688]
+imu_sensor.IMUSensor.upper_bound = [6.28318548203, 6.28318548203,
+                                    6283.18554688, 6283.18554688]
+
+motor_angle_sensor.MotorAngleSensor.sensor_latency = %SENSOR_LATENCY
+motor_angle_sensor.MotorAngleSensor.lower_bound = -6.28318548203
+motor_angle_sensor.MotorAngleSensor.upper_bound = 6.28318548203
+minitaur_v2.Minitaur.sensors = [@imu_sensor.IMUSensor(), @motor_angle_sensor.MotorAngleSensor()]
--- a/examples/pybullet/gym/pybullet_data/configs/minitaur_gym_env.gin
+++ b/examples/pybullet/gym/pybullet_data/configs/minitaur_gym_env.gin
@@ -0,0 +1,63 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.simple_scene
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+import pybullet_envs.minitaur.robots.minitaur_v2
+import pybullet_envs.minitaur.robots.robot_config
+import pybullet_envs.minitaur.robots.robot_urdf_loader
+
+URDF_ROOT = "urdf/"
+#-*-Python-*-
+
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.sensors.imu_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.motor_angle_sensor
+import pybullet_envs.minitaur.robots.minitaur_motor_model_v2
+import pybullet_envs.minitaur.robots.minitaur_v2
+import pybullet_envs.minitaur.robots.robot_config
+
+
+UPPER_BOUND = 1.0
+LOWER_BOUND = -1.0
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 20
+NUM_MOTORS = 8
+NOISY_READING = True
+SENSOR_LATENCY = 0.02
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+
+robot_config.MotorLimits.angle_lower_limits = %LOWER_BOUND
+robot_config.MotorLimits.angle_upper_limits = %UPPER_BOUND
+
+minitaur_v2.Minitaur.motor_control_mode = %robot_config.MotorControlMode.POSITION
+minitaur_v2.Minitaur.motor_limits = @robot_config.MotorLimits()
+minitaur_v2.Minitaur.motor_model_class = @minitaur_motor_model_v2.MinitaurMotorModel
+minitaur_motor_model_v2.MinitaurMotorModel.pd_latency = 0.003
+minitaur_motor_model_v2.MinitaurMotorModel.kp = 1.0
+minitaur_motor_model_v2.MinitaurMotorModel.kd = 0.015
+
+imu_sensor.IMUSensor.channels = [
+    %imu_sensor.IMUChannel.ROLL,
+    %imu_sensor.IMUChannel.PITCH,
+    %imu_sensor.IMUChannel.ROLL_RATE,
+    %imu_sensor.IMUChannel.PITCH_RATE
+]
+imu_sensor.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203,
+                                    -6283.18554688, -6283.18554688]
+imu_sensor.IMUSensor.upper_bound = [6.28318548203, 6.28318548203,
+                                    6283.18554688, 6283.18554688]
+
+motor_angle_sensor.MotorAngleSensor.sensor_latency = %SENSOR_LATENCY
+motor_angle_sensor.MotorAngleSensor.lower_bound = -6.28318548203
+motor_angle_sensor.MotorAngleSensor.upper_bound = 6.28318548203
+minitaur_v2.Minitaur.sensors = [@imu_sensor.IMUSensor(), @motor_angle_sensor.MotorAngleSensor()]
+
+
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+locomotion_gym_env.LocomotionGymEnv.robot_class = @minitaur_v2.Minitaur
+locomotion_gym_env.LocomotionGymEnv.scene = @simple_scene.SimpleScene()
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
+
--- a/examples/pybullet/gym/pybullet_data/configs_v2/base/laikago_reactive.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/base/laikago_reactive.gin
@@ -0,0 +1,80 @@
+import pybullet_envs.minitaur.envs_v2.env_loader
+import pybullet_envs.minitaur.envs_v2.env_wrappers.observation_dictionary_to_array_wrapper
+import pybullet_envs.minitaur.envs_v2.env_wrappers.simple_openloop
+import pybullet_envs.minitaur.envs_v2.env_wrappers.trajectory_generator_wrapper_env
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.simple_scene
+import pybullet_envs.minitaur.envs_v2.sensors.accelerometer_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.imu_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.motor_angle_sensor
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+import pybullet_envs.minitaur.envs_v2.tasks.terminal_conditions
+import pybullet_envs.minitaur.robots.hybrid_motor_model
+import pybullet_envs.minitaur.robots.laikago_v2
+import pybullet_envs.minitaur.robots.time_ordered_buffer
+
+
+# Specify the gym env parameters
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 2
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+
+
+# Specify the robot
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago_v2.Laikago
+LAIKAGO_MOTOR_ANGLE_UPPER_LIMITS = 6.28318548203
+LAIKAGO_MOTOR_ANGLE_LOWER_LIMITS = -6.28318548203
+laikago/robot_config.MotorLimits.angle_lower_limits = %LAIKAGO_MOTOR_ANGLE_LOWER_LIMITS
+laikago/robot_config.MotorLimits.angle_upper_limits = %LAIKAGO_MOTOR_ANGLE_UPPER_LIMITS
+laikago_v2.Laikago.motor_limits = @laikago/robot_config.MotorLimits()
+laikago_v2.Laikago.motor_control_mode = %robot_config.MotorControlMode.POSITION
+
+# Specify the motor model parameters. Notice that we don't need to specify the control mode
+# and motor limits here, as they will be passed from the robot interface.
+hybrid_motor_model.HybridMotorModel.kp = 220
+hybrid_motor_model.HybridMotorModel.kd = (0.3, 2.0, 2.0, 0.3, 2.0, 2.0, 0.3, 2.0, 2.0, 0.3, 2.0, 2.0)
+
+# This will make sure the hybrid motor model does not throw error during reset, when the timestamp is alwasy zero.
+time_ordered_buffer.TimeOrderedBuffer.error_on_duplicate_timestamp = False
+time_ordered_buffer.TimeOrderedBuffer.replace_value_on_duplicate_timestamp = True
+
+# Add the sensors
+laikago_v2.Laikago.sensors = [@imu_sensor.IMUSensor(), @motor_angle_sensor.MotorAngleSensor(), @accelerometer_sensor.AccelerometerSensor()]
+imu_sensor.IMUSensor.channels = [
+    %imu_sensor.IMUChannel.ROLL,
+    %imu_sensor.IMUChannel.PITCH,
+    %imu_sensor.IMUChannel.ROLL_RATE,
+    %imu_sensor.IMUChannel.PITCH_RATE,
+]
+
+imu_sensor.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203,
+                                       -6283.18554688, -6283.18554688]
+imu_sensor.IMUSensor.upper_bound = [6.28318548203, 6.28318548203,
+                                       6283.18554688, 6283.18554688]
+
+# Specify the scene
+locomotion_gym_env.LocomotionGymEnv.scene = @simple_scene.SimpleScene()
+simple_scene.SimpleScene.data_root = "third_party/bullet/data"
+
+# Specify the task.
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
+simple_locomotion_task.SimpleForwardTask.terminal_condition = @terminal_conditions.default_terminal_condition_for_laikago
+
+accelerometer_sensor.AccelerometerSensor.lower_bound = [-50.0, -50.0, -50.0]
+accelerometer_sensor.AccelerometerSensor.upper_bound = [50.0, 50.0, 50.0]
+accelerometer_sensor.AccelerometerSensor.sensor_latency = 0.01
+imu_sensor.IMUSensor.sensor_latency = 0.1
+
+
+# Define the wrappers needed
+env_loader.load.wrapper_classes = [
+    @trajectory_generator_wrapper_env.TrajectoryGeneratorWrapperEnv,
+]
+trajectory_generator_wrapper_env.TrajectoryGeneratorWrapperEnv.trajectory_generator = @simple_openloop.LaikagoPoseOffsetGenerator()
--- a/examples/pybullet/gym/pybullet_data/configs_v2/base/laikago_with_imu.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/base/laikago_with_imu.gin
@@ -0,0 +1,66 @@
+#import pybullet_data
+#MYPATH = pybullet_data.getDataPath()+'/configs_v2/robots/laikago.gin'
+#MYPATH = 'D:/dev/bullet3/examples/pybullet\gym/pybullet_data/configs_v2/robots/laikago.gin'
+#include '$MYPATH/ambiguous.gin'
+
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.scene_base
+import pybullet_envs.minitaur.envs_v2.scenes.simple_scene
+import pybullet_envs.minitaur.envs_v2.sensors.imu_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.motor_angle_sensor
+import pybullet_envs.minitaur.robots.hybrid_motor_model
+import pybullet_envs.minitaur.robots.laikago_v2
+import pybullet_envs.minitaur.robots.robot_config
+
+URDF_ROOT = ""
+UPPER_BOUND = 6.28318548203
+LOWER_BOUND = -6.28318548203
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 2
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+
+imu_sensor.IMUSensor.channels = [
+    %imu_sensor.IMUChannel.ROLL,
+    %imu_sensor.IMUChannel.PITCH,
+    %imu_sensor.IMUChannel.ROLL_RATE,
+    %imu_sensor.IMUChannel.PITCH_RATE,
+]
+
+imu_sensor.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203,
+                                       -6283.18554688, -6283.18554688]
+imu_sensor.IMUSensor.upper_bound = [6.28318548203, 6.28318548203,
+                                       6283.18554688, 6283.18554688]
+
+# We use the default confirugration for MotorAngleSensor, which reads limits from the robot.
+SENSORS = [@imu_sensor.IMUSensor(), @motor_angle_sensor.MotorAngleSensor()]
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+
+# Specify the scene.
+scene_base.SceneBase.data_root = %URDF_ROOT
+locomotion_gym_env.LocomotionGymEnv.scene = @simple_scene.SimpleScene()
+
+# Specify the motor limits, and motor control mode.
+laikago/robot_config.MotorLimits.angle_lower_limits = %LOWER_BOUND
+laikago/robot_config.MotorLimits.angle_upper_limits = %UPPER_BOUND
+laikago_v2.Laikago.motor_limits = @laikago/robot_config.MotorLimits()
+laikago_v2.Laikago.motor_control_mode = %robot_config.MotorControlMode.POSITION
+laikago_v2.Laikago.motor_model_class = @hybrid_motor_model.HybridMotorModel
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago_v2.Laikago
+
+# Specify the motor model parameters. Notice that we don't need to specify the control mode or motor limits here.
+hybrid_motor_model.HybridMotorModel.kp = 220
+hybrid_motor_model.HybridMotorModel.kd = 2
+
+laikago_v2.Laikago.sensors = %SENSORS
+
+
+laikago_v2.Laikago.sensors = [@imu_sensor.IMUSensor()]
--- a/examples/pybullet/gym/pybullet_data/configs_v2/base/mini_cheetah_with_imu.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/base/mini_cheetah_with_imu.gin
@@ -0,0 +1,9 @@
+import pybullet_data as pd
+
+include pd.getDataPath()+'/configs_v2/robots/mini_cheetah.gin'
+include pd.getDataPath()+'/configs_v2/sensors/imu.gin'
+
+#include 'robotics/reinforcement_learning/minitaur/envs_v2/configs_v2/robots/mini_cheetah.gin'
+#include 'robotics/reinforcement_learning/minitaur/envs_v2/configs_v2/sensors/imu.gin'
+
+mini_cheetah.MiniCheetah.sensors = [@robot_sensors.IMUSensor()]
--- a/examples/pybullet/gym/pybullet_data/configs_v2/robots/laikago.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/robots/laikago.gin
@@ -0,0 +1,58 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.scene_base
+import pybullet_envs.minitaur.envs_v2.scenes.simple_scene
+import pybullet_envs.minitaur.envs_v2.sensors.imu_sensor
+import pybullet_envs.minitaur.envs_v2.sensors.motor_angle_sensor
+import pybullet_envs.minitaur.robots.hybrid_motor_model
+import pybullet_envs.minitaur.robots.laikago_v2
+import pybullet_envs.minitaur.robots.robot_config
+
+URDF_ROOT = ""
+UPPER_BOUND = 6.28318548203
+LOWER_BOUND = -6.28318548203
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 2
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+
+imu_sensor.IMUSensor.channels = [
+    %imu_sensor.IMUChannel.ROLL,
+    %imu_sensor.IMUChannel.PITCH,
+    %imu_sensor.IMUChannel.ROLL_RATE,
+    %imu_sensor.IMUChannel.PITCH_RATE,
+]
+
+imu_sensor.IMUSensor.lower_bound = [-6.28318548203, -6.28318548203,
+                                       -6283.18554688, -6283.18554688]
+imu_sensor.IMUSensor.upper_bound = [6.28318548203, 6.28318548203,
+                                       6283.18554688, 6283.18554688]
+
+# We use the default confirugration for MotorAngleSensor, which reads limits from the robot.
+SENSORS = [@imu_sensor.IMUSensor(), @motor_angle_sensor.MotorAngleSensor()]
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+
+# Specify the scene.
+scene_base.SceneBase.data_root = %URDF_ROOT
+locomotion_gym_env.LocomotionGymEnv.scene = @simple_scene.SimpleScene()
+
+# Specify the motor limits, and motor control mode.
+laikago/robot_config.MotorLimits.angle_lower_limits = %LOWER_BOUND
+laikago/robot_config.MotorLimits.angle_upper_limits = %UPPER_BOUND
+laikago_v2.Laikago.motor_limits = @laikago/robot_config.MotorLimits()
+laikago_v2.Laikago.motor_control_mode = %robot_config.MotorControlMode.POSITION
+laikago_v2.Laikago.motor_model_class = @hybrid_motor_model.HybridMotorModel
+locomotion_gym_env.LocomotionGymEnv.robot_class = @laikago_v2.Laikago
+
+# Specify the motor model parameters. Notice that we don't need to specify the control mode or motor limits here.
+hybrid_motor_model.HybridMotorModel.kp = 220
+hybrid_motor_model.HybridMotorModel.kd = 2
+
+laikago_v2.Laikago.sensors = %SENSORS
--- a/examples/pybullet/gym/pybullet_data/configs_v2/robots/mini_cheetah.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/robots/mini_cheetah.gin
@@ -0,0 +1,98 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.robots.mini_cheetah
+import pybullet_data as pd
+
+URDF_ROOT = pd.getDataPath()+"/urdf/"
+
+ABDUCTION_P_GAIN = 100.0
+ABDUCTION_D_GAIN = 1.0
+HIP_P_GAIN = 30
+HIP_D_GAIN = 2.0
+KNEE_P_GAIN = 50
+KNEE_D_GAIN = 2.0
+
+
+UPPER_BOUND = 6.28318548203
+LOWER_BOUND = -6.28318548203
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 2
+NUM_MOTORS = 12
+NOISY_READING = True
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+
+Act0/locomotion_gym_config.ScalarField.name = "motor_angle_0"
+Act0/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act0/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act1/locomotion_gym_config.ScalarField.name = "motor_angle_1"
+Act1/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act1/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act2/locomotion_gym_config.ScalarField.name = "motor_angle_2"
+Act2/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act2/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act3/locomotion_gym_config.ScalarField.name = "motor_angle_3"
+Act3/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act3/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act4/locomotion_gym_config.ScalarField.name = "motor_angle_4"
+Act4/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act4/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act5/locomotion_gym_config.ScalarField.name = "motor_angle_5"
+Act5/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act5/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act6/locomotion_gym_config.ScalarField.name = "motor_angle_6"
+Act6/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act6/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act7/locomotion_gym_config.ScalarField.name = "motor_angle_7"
+Act7/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act7/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act8/locomotion_gym_config.ScalarField.name = "motor_angle_8"
+Act8/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act8/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act9/locomotion_gym_config.ScalarField.name = "motor_angle_9"
+Act9/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act9/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act10/locomotion_gym_config.ScalarField.name = "motor_angle_10"
+Act10/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act10/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+Act11/locomotion_gym_config.ScalarField.name = "motor_angle_11"
+Act11/locomotion_gym_config.ScalarField.upper_bound = %UPPER_BOUND
+Act11/locomotion_gym_config.ScalarField.lower_bound = %LOWER_BOUND
+
+
+locomotion_gym_config.LocomotionGymConfig.actions = [
+    @Act0/locomotion_gym_config.ScalarField(),
+    @Act1/locomotion_gym_config.ScalarField(),
+    @Act2/locomotion_gym_config.ScalarField(),
+    @Act3/locomotion_gym_config.ScalarField(),
+    @Act4/locomotion_gym_config.ScalarField(),
+    @Act5/locomotion_gym_config.ScalarField(),
+    @Act6/locomotion_gym_config.ScalarField(),
+    @Act7/locomotion_gym_config.ScalarField(),
+    @Act8/locomotion_gym_config.ScalarField(),
+    @Act9/locomotion_gym_config.ScalarField(),
+    @Act10/locomotion_gym_config.ScalarField(),
+    @Act11/locomotion_gym_config.ScalarField()]
+
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+
+mini_cheetah.MiniCheetah.urdf_root = %URDF_ROOT
+mini_cheetah.MiniCheetah.time_step = %SIM_TIME_STEP
+mini_cheetah.MiniCheetah.action_repeat = %NUM_ACTION_REPEAT
+mini_cheetah.MiniCheetah.self_collision_enabled = False
+mini_cheetah.MiniCheetah.control_latency = 0.002
+mini_cheetah.MiniCheetah.pd_latency = 0.0
+mini_cheetah.MiniCheetah.motor_kp = [%ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN,
+                            %ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN,
+                            %ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN,
+                            %ABDUCTION_P_GAIN, %HIP_P_GAIN, %KNEE_P_GAIN]
+mini_cheetah.MiniCheetah.motor_kd = [%ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN,
+                            %ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN,
+                            %ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN,
+                            %ABDUCTION_D_GAIN, %HIP_D_GAIN, %KNEE_D_GAIN]
+
+locomotion_gym_env.LocomotionGymEnv.robot_class = @mini_cheetah.MiniCheetah
+
--- a/examples/pybullet/gym/pybullet_data/configs_v2/robots/minitaur.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/robots/minitaur.gin
@@ -0,0 +1,31 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_config
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.robots.minitaur_motor_model_v2
+import pybullet_envs.minitaur.robots.minitaur_v2
+
+URDF_ROOT = "robotics/reinforcement_learning/minitaur/robots/data/urdf/"
+
+UPPER_BOUND = 6.28318548203
+LOWER_BOUND = -6.28318548203
+SIM_TIME_STEP = 0.001
+NUM_ACTION_REPEAT = 6
+NUM_MOTORS = 8
+NOISY_READING = True
+
+locomotion_gym_config.SimulationParameters.sim_time_step_s = %SIM_TIME_STEP
+locomotion_gym_config.SimulationParameters.num_action_repeat = %NUM_ACTION_REPEAT
+locomotion_gym_config.SimulationParameters.enable_rendering = False
+locomotion_gym_config.LocomotionGymConfig.simulation_parameters = @locomotion_gym_config.SimulationParameters()
+locomotion_gym_env.LocomotionGymEnv.gym_config = @locomotion_gym_config.LocomotionGymConfig()
+
+minitaur_v2.Minitaur.motor_control_mode = %robot_config.MotorControlMode.POSITION
+minitaur_v2.Minitaur.motor_limits = @robot_config.MotorLimits()
+minitaur_v2.Minitaur.motor_model_class = @minitaur_motor_model_v2.MinitaurMotorModel
+minitaur_motor_model_v2.MinitaurMotorModel.pd_latency = 0.003
+minitaur_motor_model_v2.MinitaurMotorModel.kp = 1.0
+minitaur_motor_model_v2.MinitaurMotorModel.kd = 0.015
+
+locomotion_gym_env.LocomotionGymEnv.robot_class = @minitaur_v2.Minitaur
+
+robot_config.MotorLimits.angle_lower_limits = %LOWER_BOUND
+robot_config.MotorLimits.angle_upper_limits = %UPPER_BOUND
--- a/examples/pybullet/gym/pybullet_data/configs_v2/scenes/simple_scene.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/scenes/simple_scene.gin
@@ -0,0 +1,4 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.scenes.simple_scene
+
+locomotion_gym_env.LocomotionGymEnv.scene = @simple_scene.SimpleScene()
--- a/examples/pybullet/gym/pybullet_data/configs_v2/scenes/stair_scene.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/scenes/stair_scene.gin
@@ -0,0 +1,3 @@
+import pybullet_envs.minitaur.envs_v2.scenes.stair_scene
+# Specify the scene (overwrite the setting from laikago_reactive.gin)
+locomotion_gym_env.LocomotionGymEnv.scene = @stair_scene.StairScene()
--- a/examples/pybullet/gym/pybullet_data/configs_v2/tasks/fwd_task.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/tasks/fwd_task.gin
@@ -0,0 +1,4 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
--- a/examples/pybullet/gym/pybullet_data/configs_v2/tasks/fwd_task_laikago.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/tasks/fwd_task_laikago.gin
@@ -0,0 +1,8 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
+
+simple_locomotion_task.SimpleForwardTask.energy_penalty_coef = 0.002
+simple_locomotion_task.SimpleForwardTask.min_com_height = 0.3
+simple_locomotion_task.SimpleForwardTask.clip_velocity = 0.002
--- a/examples/pybullet/gym/pybullet_data/configs_v2/tasks/fwd_task_no_termination.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/tasks/fwd_task_no_termination.gin
@@ -0,0 +1,11 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+import pybullet_envs.minitaur.envs_v2.tasks.terminal_conditions
+
+terminal_conditions.maxstep_terminal_condition.max_step = 1500
+# Setup the terminal condition to not to terminate early when the robot falls.
+terminal_conditions.logical_any_terminal_condition.conditions = [
+    @terminal_conditions.maxstep_terminal_condition,
+    ]
+simple_locomotion_task.SimpleForwardTask.terminal_condition = @terminal_conditions.logical_any_terminal_condition
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
--- a/examples/pybullet/gym/pybullet_data/configs_v2/tasks/fwd_task_no_termination_simplified.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/tasks/fwd_task_no_termination_simplified.gin
@@ -0,0 +1,12 @@
+import pybullet_envs.minitaur.envs_v2.locomotion_gym_env
+import pybullet_envs.minitaur.envs_v2.tasks.simple_locomotion_task
+import pybullet_envs.minitaur.envs_v2.tasks.terminal_conditions
+
+terminal_conditions.maxstep_terminal_condition.max_step = 1500
+simple_locomotion_task.SimpleForwardTask.terminal_condition = @terminal_conditions.default_terminal_condition_for_laikago_v2
+
+simple_locomotion_task.SimpleForwardTask.divide_with_dt = True
+simple_locomotion_task.SimpleForwardTask.clip_velocity = 0.4
+simple_locomotion_task.SimpleForwardTask.energy_penalty_coef = 0
+
+locomotion_gym_env.LocomotionGymEnv.task = @simple_locomotion_task.SimpleForwardTask()
--- a/examples/pybullet/gym/pybullet_data/configs_v2/wrappers/pmtg_wrapper.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/wrappers/pmtg_wrapper.gin
@@ -0,0 +1,18 @@
+import pybullet_envs.minitaur.envs_v2.env_loader
+import pybullet_envs.minitaur.envs_v2.env_wrappers.observation_dictionary_to_array_wrapper
+import pybullet_envs.minitaur.envs_v2.env_wrappers.pmtg_wrapper_env
+
+pmtg_wrapper_env.PmtgWrapperEnv.action_filter_enable = True
+pmtg_wrapper_env.PmtgWrapperEnv.action_filter_enable = 1
+pmtg_wrapper_env.PmtgWrapperEnv.intensity_upper_bound = 1.0
+pmtg_wrapper_env.PmtgWrapperEnv.max_delta_time = 4.0
+pmtg_wrapper_env.PmtgWrapperEnv.min_delta_time = 2.0
+pmtg_wrapper_env.PmtgWrapperEnv.residual_range = 0.35
+pmtg_wrapper_env.PmtgWrapperEnv.integrator_coupling_mode = "all coupled"
+pmtg_wrapper_env.PmtgWrapperEnv.walk_height_coupling_mode = "all coupled"
+pmtg_wrapper_env.PmtgWrapperEnv.variable_swing_stance_ratio = 1
+pmtg_wrapper_env.PmtgWrapperEnv.init_gait = "walk"
+
+env_loader.load.wrapper_classes = [
+    @pmtg_wrapper_env.PmtgWrapperEnv,
+    @observation_dictionary_to_array_wrapper.ObservationDictionaryToArrayWrapper]
--- a/examples/pybullet/gym/pybullet_data/configs_v2/wrappers/pmtg_wrapper_dict.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/wrappers/pmtg_wrapper_dict.gin
@@ -0,0 +1,6 @@
+import pybullet_envs.minitaur.envs_v2.env_loader
+import pybullet_envs.minitaur.envs_v2.env_wrappers.pmtg_wrapper_env
+
+pmtg_wrapper_env.PmtgWrapperEnv.action_filter_enable = True
+
+env_loader.load.wrapper_classes = [@pmtg_wrapper_env.PmtgWrapperEnv]
--- a/examples/pybullet/gym/pybullet_data/configs_v2/wrappers/pmtg_wrapper_laikago.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/wrappers/pmtg_wrapper_laikago.gin
@@ -0,0 +1,10 @@
+import pybullet_envs.minitaur.envs_v2.env_loader
+import pybullet_envs.minitaur.envs_v2.env_wrappers.observation_dictionary_to_array_wrapper
+import pybullet_envs.minitaur.envs_v2.env_wrappers.pmtg_wrapper_env
+
+pmtg_wrapper_env.PmtgWrapperEnv.action_filter_enable = True
+pmtg_wrapper_env.PmtgWrapperEnv.action_filter_high_cut = 0.5
+
+env_loader.load.wrapper_classes = [
+    @pmtg_wrapper_env.PmtgWrapperEnv,
+    @observation_dictionary_to_array_wrapper.ObservationDictionaryToArrayWrapper]
--- a/examples/pybullet/gym/pybullet_data/configs_v2/wrappers/pmtg_wrapper_simplified_env.gin
+++ b/examples/pybullet/gym/pybullet_data/configs_v2/wrappers/pmtg_wrapper_simplified_env.gin
@@ -0,0 +1,18 @@
+import pybullet_envs.minitaur.envs_v2.env_loader
+import pybullet_envs.minitaur.envs_v2.env_wrappers.observation_dictionary_to_array_wrapper
+import pybullet_envs.minitaur.envs_v2.env_wrappers.pmtg_wrapper_env
+
+
+pmtg_wrapper_env.PmtgWrapperEnv.init_leg_phase_offsets = [0, 0.5, 0.5, 0]
+pmtg_wrapper_env.PmtgWrapperEnv.integrator_coupling_mode = 'all coupled'
+pmtg_wrapper_env.PmtgWrapperEnv.intensity_upper_bound = 0.5
+pmtg_wrapper_env.PmtgWrapperEnv.max_delta_time = 4
+pmtg_wrapper_env.PmtgWrapperEnv.min_delta_time = 1
+pmtg_wrapper_env.PmtgWrapperEnv.residual_range = 0.2
+pmtg_wrapper_env.PmtgWrapperEnv.variable_swing_stance_ratio = False
+pmtg_wrapper_env.PmtgWrapperEnv.walk_height_coupling_mode = 'null'
+pmtg_wrapper_env.PmtgWrapperEnv.action_filter_high_cut = 0.5
+
+env_loader.load.wrapper_classes = [
+    @pmtg_wrapper_env.PmtgWrapperEnv,
+    @observation_dictionary_to_array_wrapper.ObservationDictionaryToArrayWrapper]
--- a/examples/pybullet/gym/pybullet_data/testdata/init.py
+++ b/examples/pybullet/gym/pybullet_data/testdata/init.py
--- a/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/init.py
+++ b/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/init.py
--- a/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/accelerometer.npy
+++ b/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/accelerometer.npy
--- a/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/estimated_velocities.npy
+++ b/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/estimated_velocities.npy
--- a/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/feet_contact_forces.npy
+++ b/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/feet_contact_forces.npy
--- a/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/gyroscope.npy
+++ b/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/gyroscope.npy
--- a/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/jacobians.npy
+++ b/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/jacobians.npy
--- a/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/motor_velocities.npy
+++ b/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/motor_velocities.npy
--- a/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/timestamp.npy
+++ b/examples/pybullet/gym/pybullet_data/testdata/test_imu_state_estimator/timestamp.npy
--- a/examples/pybullet/gym/pybullet_data/urdf/mug.obj
+++ b/examples/pybullet/gym/pybullet_data/urdf/mug.obj
--- a/examples/pybullet/gym/pybullet_data/urdf/mug.urdf
+++ b/examples/pybullet/gym/pybullet_data/urdf/mug.urdf
@@ -0,0 +1,28 @@
+<robot name="mug">
+    <link name="base_link">
+        <contact>
+            <lateral_friction value="1.0"/>
+        </contact>
+        <inertial>
+            <origin xyz="0 0 0"/>
+            <mass value="1"/>
+            <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+        </inertial>
+        <visual>
+            <origin xyz="0 0 0"/>
+            <geometry>
+                <mesh filename="mug.obj"/>
+            </geometry>
+      <material name="mug">
+        <color rgba="1 0.2 0.2 1"/>
+        <specular rgb="1 1 1"/>
+      </material>
+        </visual>
+        <collision>
+            <origin xyz="0 0 0"/>
+            <geometry>
+                <mesh filename="mug_col.obj"/>
+            </geometry>
+        </collision>
+    </link>
+</robot>
--- a/examples/pybullet/gym/pybullet_data/urdf/mug_col.obj
+++ b/examples/pybullet/gym/pybullet_data/urdf/mug_col.obj
--- a/examples/pybullet/gym/pybullet_envs/minitaur/init.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/init.py
@@ -1 +0,0 @@
-
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/init.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/init.py
@@ -1 +0,0 @@
-
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/init.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/init.py
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/com_height_estimator.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/com_height_estimator.py
@@ -0,0 +1,76 @@
+"""State estimator for robot height."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+from typing import Any, Sequence
+
+import gin
+
+from pybullet_envs.minitaur.agents.baseline_controller import state_estimator
+
+
+@gin.configurable
+class COMHeightEstimator(state_estimator.StateEstimatorBase):
+  """Estimate the CoM height using base orientation and local toe positions."""
+
+  def __init__(
+      self,
+      robot: Any,
+      com_estimate_leg_indices: Sequence[int] = (0, 1, 2, 3),
+      initial_com_height: float = 0.45,
+  ):
+    """Initializes the class.
+
+    Args:
+      robot: A quadruped robot.
+      com_estimate_leg_indices: Leg indices used in estimating the CoM height.
+      initial_com_height: CoM height used during reset.
+    """
+    self._robot = robot
+    self._com_estimate_leg_indices = com_estimate_leg_indices
+    self._initial_com_estimate_leg_indices = copy.copy(com_estimate_leg_indices)
+    self._initial_com_height = initial_com_height
+    self.reset(0)
+
+  @property
+  def estimated_com_height(self):
+    return self._com_height
+
+  def reset(self, current_time):
+    del current_time
+    self._com_height = self._initial_com_height
+    self._com_estimate_leg_indices = copy.copy(
+        self._initial_com_estimate_leg_indices)
+
+  def update(self, current_time):
+    del current_time
+    local_toe_poses = self._robot.foot_positions(
+        position_in_world_frame=False)
+    # We rotate the local toe positions into the world orientation centered
+    # at the robot base to estimate the height of the robot.
+    world_toe_poses = []
+    for toe_p in local_toe_poses:
+      world_toe_poses.append(
+          self._robot.pybullet_client.multiplyTransforms(
+              positionA=(0, 0, 0),
+              orientationA=self._robot.base_orientation_quaternion,
+              positionB=toe_p,
+              orientationB=(0, 0, 0, 1))[0])
+    mean_height = 0.0
+    num_toe_in_contact = 0
+    for leg_id in self._com_estimate_leg_indices:
+      mean_height += world_toe_poses[leg_id][2]
+      num_toe_in_contact += 1
+    mean_height /= num_toe_in_contact
+    self._com_height = abs(mean_height)
+
+  @property
+  def com_estimate_leg_indices(self):
+    return self._com_estimate_leg_indices
+
+  @com_estimate_leg_indices.setter
+  def com_estimate_leg_indices(self, leg_indices):
+    self._com_estimate_leg_indices = leg_indices
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/com_velocity_estimator.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/com_velocity_estimator.py
@@ -0,0 +1,83 @@
+"""State estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from typing import Any, Sequence
+
+import gin
+import numpy as np
+
+from pybullet_envs.minitaur.agents.baseline_controller import state_estimator
+from pybullet_envs.minitaur.robots.safety.python import moving_window_filter
+
+_DEFAULT_WINDOW_SIZE = 20
+
+
+@gin.configurable
+class COMVelocityEstimator(state_estimator.StateEstimatorBase):
+  """Estimate the CoM velocity using on board sensors.
+
+
+  Requires knowledge about the base velocity in world frame, which for example
+  can be obtained from a MoCap system. This estimator will filter out the high
+  frequency noises in the velocity so the results can be used with controllers
+  reliably.
+
+  """
+
+  def __init__(
+      self,
+      robot: Any,
+      window_size: int = _DEFAULT_WINDOW_SIZE,
+  ):
+    self._robot = robot
+    self._window_size = window_size
+    self.reset(0)
+
+  @property
+  def com_velocity_body_yaw_aligned_frame(self) -> Sequence[float]:
+    """The base velocity projected in the body aligned inertial frame.
+
+    The body aligned frame is a intertia frame that coincides with the body
+    frame, but has a zero relative velocity/angular velocity to the world frame.
+
+    Returns:
+      The com velocity in body aligned frame.
+    """
+    return self._com_velocity_body_yaw_aligned_frame
+
+  @property
+  def com_velocity_world_frame(self) -> Sequence[float]:
+    return self._com_velocity_world_frame
+
+  def reset(self, current_time):
+    del current_time
+    # We use a moving window filter to reduce the noise in velocity estimation.
+    self._velocity_filter_x = moving_window_filter.MovingWindowFilter(
+        window_size=self._window_size)
+    self._velocity_filter_y = moving_window_filter.MovingWindowFilter(
+        window_size=self._window_size)
+    self._velocity_filter_z = moving_window_filter.MovingWindowFilter(
+        window_size=self._window_size)
+    self._com_velocity_world_frame = np.array((0, 0, 0))
+    self._com_velocity_body_yaw_aligned_frame = np.array((0, 0, 0))
+
+  def update(self, current_time):
+    del current_time
+    velocity = self._robot.base_velocity
+
+    vx = self._velocity_filter_x.CalculateAverage(velocity[0])
+    vy = self._velocity_filter_y.CalculateAverage(velocity[1])
+    vz = self._velocity_filter_z.CalculateAverage(velocity[2])
+    self._com_velocity_world_frame = np.array((vx, vy, vz))
+
+    base_orientation = self._robot.base_orientation_quaternion
+    _, inverse_rotation = self._robot.pybullet_client.invertTransform(
+        (0, 0, 0), base_orientation)
+
+    self._com_velocity_body_yaw_aligned_frame, _ = (
+        self._robot.pybullet_client.multiplyTransforms(
+            (0, 0, 0), inverse_rotation, self._com_velocity_world_frame,
+            (0, 0, 0, 1)))
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/dummy_gait_generator.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/dummy_gait_generator.py
@@ -0,0 +1,87 @@
+"""A dummy gait generator module for storing gait patterns from higher-level controller."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import google_type_annotations
+from __future__ import print_function
+
+import copy
+from typing import Any, Sequence
+
+import gin
+
+from pybullet_envs.minitaur.agents.baseline_controller import gait_generator
+
+LAIKAGO_STANDING = (
+    gait_generator.LegState.STANCE,
+    gait_generator.LegState.STANCE,
+    gait_generator.LegState.STANCE,
+    gait_generator.LegState.STANCE,
+)
+
+
+@gin.configurable
+class DummyGaitGenerator(gait_generator.GaitGenerator):
+  """A module for storing quadruped gait patterns from high-level controller.
+
+  This module stores the state for each leg of a quadruped robot. The data is
+  used by the stance leg controller to determine the appropriate contact forces.
+  A high-level controller, such as a neural network policy, can be used to
+  control the gait pattern and set corresponding leg states.
+  """
+
+  def __init__(
+      self,
+      robot: Any,
+      initial_leg_state: Sequence[gait_generator.LegState] = LAIKAGO_STANDING,
+  ):
+    """Initializes the class.
+
+    Args:
+      robot: A quadruped robot.
+      initial_leg_state: The desired initial swing/stance state of legs indexed
+        by their id.
+    """
+    self._robot = robot
+    if len(initial_leg_state) != len(
+        list(self._robot.urdf_loader.get_end_effector_id_dict().values())):
+      raise ValueError(
+          "The number of leg states should be the same of number of legs.")
+    self._initial_leg_state = initial_leg_state
+    self._leg_state = list(initial_leg_state)
+    self._desired_leg_state = list(initial_leg_state)
+
+    self.reset(0)
+
+  def reset(self, current_time):
+    del current_time
+    self._leg_state = list(self._initial_leg_state)
+    self._desired_leg_state = list(self._initial_leg_state)
+
+  @property
+  def desired_leg_state(self) -> Sequence[gait_generator.LegState]:
+    """The desired leg SWING/STANCE states.
+
+    Returns:
+      The SWING/STANCE states for all legs.
+
+    """
+    return self._desired_leg_state
+
+  @desired_leg_state.setter
+  def desired_leg_state(self, state):
+    self._desired_leg_state = copy.deepcopy(state)
+
+  @property
+  def leg_state(self) -> Sequence[gait_generator.LegState]:
+    """The leg state after considering contact with ground.
+
+    Returns:
+      The actual state of each leg after accounting for contacts.
+    """
+    return self._leg_state
+
+  @leg_state.setter
+  def leg_state(self, state):
+    self._leg_state = copy.deepcopy(state)
+
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/foot_stepper.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/foot_stepper.py
@@ -0,0 +1,199 @@
+# Lint as: python3
+"""A state machine that steps each foot for a static gait. Experimental code."""
+
+import copy
+import math
+
+import numpy as np
+
+
+class StepInput(object):
+
+  def __init__(self):
+    self.base_com_pos = np.array([0, 0, 0])
+    self.base_com_orn = np.array([0, 0, 0, 1])
+    self.toe_pos_world = np.array([0, 0, 0] * 4)
+    self.new_pos_world = np.array([0, 0, 0])
+
+
+class StepOutput(object):
+
+  def __init__(self, new_toe_pos_world):
+    self.new_toe_pos_world = new_toe_pos_world
+
+
+class FootStepper(object):
+  """This class computes desired foot placement for a quadruped robot."""
+
+  def __init__(self, bullet_client, toe_ids, toe_pos_local_ref):
+    self.bullet_client = bullet_client
+    self.state_time = 0.
+    self.toe_ids = toe_ids
+    self.toe_pos_local_ref = toe_pos_local_ref
+    self.sphere_uid = self.bullet_client.loadURDF(
+        "sphere_small.urdf", [0, 0, 0], useFixedBase=True)
+    self.is_far = True
+    self.max_shift = 0.0008
+    self.far_bound = 0.005
+    self.close_bound = 0.03
+
+    self.move_swing_foot = False
+    self.amp = 0.2
+    alpha = 1
+
+    # Loads/draws spheres for debugging purpose. The spheres visualize the
+    # target COM, the current COM and the target foothold location.
+    self.sphere_uid_centroid = self.bullet_client.loadURDF(
+        "sphere_small.urdf", [0, 0, 0], useFixedBase=True)
+    self.bullet_client.changeVisualShape(
+        self.sphere_uid_centroid, -1, rgbaColor=[1, 1, 0, alpha])
+
+    # Disable collision since visualization spheres should not collide with the
+    # robot.
+    self.bullet_client.setCollisionFilterGroupMask(self.sphere_uid_centroid, -1,
+                                                   0, 0)
+
+    self.sphere_uid_com = self.bullet_client.loadURDF(
+        "sphere_small.urdf", [0, 0, 0], useFixedBase=True)
+    self.bullet_client.changeVisualShape(
+        self.sphere_uid_com, -1, rgbaColor=[1, 0, 1, alpha])
+    self.bullet_client.setCollisionFilterGroupMask(self.sphere_uid_com, -1, 0,
+                                                   0)
+
+    self.bullet_client.setCollisionFilterGroupMask(self.sphere_uid, -1, 0, 0)
+    self.feetindices = [1, 3, 0, 2]
+    self.swing_foot_index1 = 0
+    self.swing_foot_index = self.feetindices[self.swing_foot_index1]
+    self.colors = [[1, 0, 0, 1], [0, 1, 0, 1], [0, 0, 1, 1], [1, 1, 1, 1]]
+    self.support_vertices = [[1, 2, 3], [0, 2, 3], [0, 1, 3], [0, 1, 2]]
+    self.local_diff_y_threshold = 0.05
+    self.local_diff_y = 100
+    self.is_far = True
+    self.get_reference_pos_swing_foot()
+
+  def next_foot(self):
+    self.swing_foot_index1 = (self.swing_foot_index1 + 1) % 4
+    self.swing_foot_index = self.feetindices[self.swing_foot_index1]
+
+  def swing_foot(self):
+    self.move_swing_foot = True
+
+  def get_reference_pos_swing_foot(self):
+    self.new_pos_local = np.array(
+        self.toe_pos_local_ref[self.swing_foot_index])
+    return self.new_pos_local
+
+  def set_reference_pos_swing_foot(self, new_pos_local):
+    self.new_pos_local = new_pos_local
+
+  def is_com_stable(self):
+    ld2 = self.local_diff_y * self.local_diff_y
+    yaw_ok = ld2 < (self.local_diff_y_threshold * self.local_diff_y_threshold)
+    com_ok = not self.is_far
+    return com_ok and yaw_ok
+
+  def update(self, step_input):
+    """Updates the state machine and toe movements per state."""
+    base_com_pos = step_input.base_com_pos
+    base_com_orn = step_input.base_com_orn
+    base_com_pos_inv, base_com_orn_inv = self.bullet_client.invertTransform(
+        base_com_pos, base_com_orn)
+
+    dt = step_input.dt
+    self.bullet_client.resetBasePositionAndOrientation(self.sphere_uid,
+                                                       step_input.new_pos_world,
+                                                       [0, 0, 0, 1])
+    self.bullet_client.changeVisualShape(
+        self.sphere_uid, -1, rgbaColor=self.colors[self.swing_foot_index])
+
+    all_toes_pos_locals = []
+    for toe_pos_world in step_input.toe_pos_world:
+      toe_pos_local, _ = self.bullet_client.multiplyTransforms(
+          base_com_pos_inv, base_com_orn_inv, toe_pos_world, [0, 0, 0, 1])
+      all_toes_pos_locals.append(toe_pos_local)
+    all_toes_pos_locals = np.array(all_toes_pos_locals)
+    centroid_world = np.zeros(3)
+    for v in self.support_vertices[self.swing_foot_index]:
+      vtx_pos_world = step_input.toe_pos_world[v]
+      centroid_world += vtx_pos_world
+    centroid_world /= 3.
+
+    sphere_z_offset = 0.05
+    self.diff_world = base_com_pos - centroid_world
+    self.diff_world[2] = 0.
+    self.bullet_client.resetBasePositionAndOrientation(self.sphere_uid_centroid,
+                                                       centroid_world,
+                                                       [0, 0, 0, 1])
+    self.bullet_client.resetBasePositionAndOrientation(
+        self.sphere_uid_com,
+        [base_com_pos[0], base_com_pos[1], sphere_z_offset], [0, 0, 0, 1])
+
+    l = np.linalg.norm(self.diff_world)
+    if self.is_far:
+      bound = self.far_bound
+    else:
+      bound = self.close_bound
+
+    if l > bound:
+      self.diff_world *= self.max_shift * 0.5 / l
+      if not self.is_far:
+        self.is_far = True
+    else:
+      if self.is_far:
+        self.is_far = False
+
+    if not self.is_far:
+      self.diff_world = np.zeros(3)
+    for i in range(len(self.toe_pos_local_ref)):
+      toe = self.toe_pos_local_ref[i]
+      toe = [
+          toe[0] + self.diff_world[0], toe[1] + self.diff_world[1],
+          toe[2] + self.diff_world[2]
+      ]
+      self.toe_pos_local_ref[i] = toe
+
+    self.local_diff_y = self.toe_pos_local_ref[0][
+        1] + self.toe_pos_local_ref[1][1] - self.toe_pos_local_ref[
+            2][1] - self.toe_pos_local_ref[3][1]
+
+    self.yaw = 0
+    if self.local_diff_y < -self.local_diff_y_threshold:
+      self.yaw = 0.001
+    if self.local_diff_y > self.local_diff_y_threshold:
+      self.yaw = -0.001
+
+    yaw_trans = self.bullet_client.getQuaternionFromEuler([0, 0, self.yaw])
+
+    if not self.is_far:
+      for i in range(len(self.toe_pos_local_ref)):
+        toe = self.toe_pos_local_ref[i]
+        toe, _ = self.bullet_client.multiplyTransforms([0, 0, 0], yaw_trans,
+                                                       toe, [0, 0, 0, 1])
+        self.toe_pos_local_ref[i] = toe
+
+    new_toe_pos_world = []
+
+    # Moves the swing foot to the target location.
+    if self.move_swing_foot:
+      if self.state_time <= 1:
+        self.state_time += 4 * dt
+    if self.state_time >= 1:
+      self.move_swing_foot = False
+      self.state_time = 0
+      self.toe_pos_local_ref[self.swing_foot_index] = self.new_pos_local
+    toe_pos_local_ref_copy = copy.deepcopy(self.toe_pos_local_ref)
+    old_pos = self.toe_pos_local_ref[self.swing_foot_index]
+    new_pos = [
+        old_pos[0] * (1 - self.state_time) + self.new_pos_local[0] *
+        (self.state_time), old_pos[1] * (1 - self.state_time) +
+        self.new_pos_local[1] * (self.state_time),
+        old_pos[2] * (1 - self.state_time) + self.new_pos_local[2] *
+        (self.state_time) + self.amp * math.sin(self.state_time * math.pi)
+    ]
+    toe_pos_local_ref_copy[self.swing_foot_index] = new_pos
+    for toe_pos_local in toe_pos_local_ref_copy:
+      new_toe_pos_world.append(self.bullet_client.multiplyTransforms(
+          base_com_pos, base_com_orn, toe_pos_local, [0, 0, 0, 1])[0])
+
+    step_output = StepOutput(new_toe_pos_world)
+    return step_output
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/gait_generator.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/gait_generator.py
@@ -0,0 +1,32 @@
+"""Gait pattern planning module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+import enum
+
+
+class LegState(enum.Enum):
+  """The state of a leg during locomotion."""
+  SWING = 0
+  STANCE = 1
+  # A swing leg that collides with the ground.
+  EARLY_CONTACT = 2
+  # A stance leg that loses contact.
+  LOSE_CONTACT = 3
+
+
+class GaitGenerator(object):  # pytype: disable=ignored-metaclass
+  """Generates the leg swing/stance pattern for the robot."""
+
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractmethod
+  def reset(self, current_time):
+    pass
+
+  @abc.abstractmethod
+  def update(self, current_time):
+    pass
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/imu_based_com_velocity_estimator.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/imu_based_com_velocity_estimator.py
@@ -0,0 +1,216 @@
+"""State estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from typing import Any, Sequence
+
+from filterpy import kalman
+import gin
+import numpy as np
+
+from pybullet_envs.minitaur.agents.baseline_controller import state_estimator
+from pybullet_envs.minitaur.agents.baseline_controller import time_based_moving_window_filter
+from pybullet_envs.minitaur.envs_v2.sensors import accelerometer_sensor
+from pybullet_envs.minitaur.envs_v2.sensors import imu_sensor
+
+_DEFAULT_VELOCITY_FILTER_WINDOW = 0.2
+_DEFAULT_GYRO_FILTER_WINDOW = 0.1
+_DEFAULT_VELOCITY_CLIPPING = 0.8
+_STATE_DIMENSION = 3
+_GRAVITY = (0.0, 0.0, -9.8)
+
+
+@gin.configurable
+class IMUBasedCOMVelocityEstimator(state_estimator.StateEstimatorBase):
+  """Estimate the CoM velocity using IMU sensors and velocities of stance feet.
+
+
+  Estimates the com velocity of the robot using IMU data and stance feet
+  velocities fused by a Kalman Filter. Kalman Filter assumes the true state x
+  follows a linear dynamics: x'=Fx+Bu+w, where x' and x denots the
+  current and previous state of the robot, F is the state transition matrix,
+  B is the control-input matrix, and w is the noise in the dynamics, assuming to
+  follow a zero-mean multivariate normal distribution with covariance Q. It also
+  assumes that we can obtain a noisy observation of the state with the model
+  z=Hx+v, where z is the observed state, H is the observation matrix, and v is
+  a noise model, assuming to follow a zero-mean multivariate normal distribution
+  with covariance R. In our case, x is the CoM velocity of the robot,
+  F=B=H=eye(3), u=dt * CoM acceleration, which is obtained from accelerometer,
+  and the noisy observation z is obtained from the negated average velocities at
+  the end-effectors in contact with the ground.
+
+  """
+
+  def __init__(
+      self,
+      robot: Any,
+      use_sensor_interface: bool = True,
+      accelerometer_variance=0.1,
+      observation_variance=0.1,
+      initial_variance=0.1,
+      velocity_filter_window: float = _DEFAULT_VELOCITY_FILTER_WINDOW,
+      gyroscope_filter_window: float = _DEFAULT_GYRO_FILTER_WINDOW,
+      contact_detection_threshold: float = 0.0,
+      velocity_clipping: float = _DEFAULT_VELOCITY_CLIPPING,
+  ):
+    """Initializes the class.
+
+    Args:
+      robot: A quadruped robot.
+      use_sensor_interface: Whether to use the sensor interface to obtain the
+        IMU readings or directly get them from the robot class. Former should
+        be used in simulation to enable added latency and noise while latter
+        should be used on real robot for better performance.
+      accelerometer_variance: The estimated variance in the accelerometer
+        readings, used in the Kalman Filter.
+      observation_variance: The estimated variance in the observed CoM velocity
+        from the stance feet velocities, used in the Kalman Filter.
+      initial_variance: The variance of the initial distribution for the
+        estimated CoM variance.
+      velocity_filter_window: The filtering window (in time) used to smooth the
+        estimated CoM velocity.
+      gyroscope_filter_window: The filtering window (in time) used to smooth the
+        input gyroscope readings.
+      contact_detection_threshold: Threshold on the contact sensor readings to
+        determine whether the foot is in contact with the ground.
+      velocity_clipping: Clipping value for the estimated velocity to prevent
+        unrealistically large velocity estimations.
+    """
+    self._robot = robot
+    self._contact_detection_threshold = contact_detection_threshold
+    self._velocity_clipping = velocity_clipping
+    self._use_sensor_interface = use_sensor_interface
+
+    # Use the accelerometer and gyroscope sensor from the robot
+    if self._use_sensor_interface:
+      for sensor in self._robot.sensors:
+        if isinstance(sensor, accelerometer_sensor.AccelerometerSensor):
+          self._accelerometer = sensor
+        if isinstance(sensor, imu_sensor.IMUSensor):
+          self._gyroscope = sensor
+      assert hasattr(self, "_accelerometer") and self._accelerometer is not None
+      assert hasattr(self, "_gyroscope") and self._gyroscope is not None
+
+    # x is the underlying CoM velocity we want to estimate, z is the observed
+    # CoM velocity from the stance feet velocities, and u is the accelerometer
+    # readings.
+    self._filter = kalman.KalmanFilter(
+        dim_x=_STATE_DIMENSION,
+        dim_z=_STATE_DIMENSION,
+        dim_u=_STATE_DIMENSION)
+    # Initialize the state distribution to be a zero-mean multi-variate normal
+    # distribution with initial variance.
+    self._filter.x = np.zeros(_STATE_DIMENSION)
+    self._initial_variance = initial_variance
+    self._filter.P = np.eye(_STATE_DIMENSION) * self._initial_variance
+    # Covariance matrix for the control variable.
+    self._filter.Q = np.eye(_STATE_DIMENSION) * accelerometer_variance
+    # Covariance matrix for the observed states.
+    self._filter.R = np.eye(_STATE_DIMENSION) * observation_variance
+
+    # observation function (z=H*x+N(0,R))
+    self._filter.H = np.eye(_STATE_DIMENSION)
+    # state transition matrix (x'=F*x+B*u+N(0,Q))
+    self._filter.F = np.eye(_STATE_DIMENSION)
+    # Control transition matrix
+    self._filter.B = np.eye(_STATE_DIMENSION)
+
+    self._velocity_filter = time_based_moving_window_filter.TimeBasedMovingWindowFilter(
+        velocity_filter_window)
+    self._gyroscope_filter = time_based_moving_window_filter.TimeBasedMovingWindowFilter(
+        gyroscope_filter_window)
+
+    self.reset(0)
+
+  @property
+  def com_velocity_body_yaw_aligned_frame(self) -> Sequence[float]:
+    """The base velocity projected in the body yaw aligned inertial frame.
+
+    The body yaw aligned frame is a intertia frame where the z axis coincides
+    with the yaw of the robot base and the x and y axis coincides with the world
+    frame. It has a zero relative velocity/angular velocity
+    to the world frame.
+
+    Returns:
+      The com velocity in body yaw aligned frame.
+    """
+    clipped_velocity = np.clip(self._com_velocity_body_yaw_aligned_frame,
+                               -self._velocity_clipping,
+                               self._velocity_clipping)
+
+    return clipped_velocity
+
+  def reset(self, current_time):
+    del current_time
+    self._filter.x = np.zeros(_STATE_DIMENSION)
+    self._filter.P = np.eye(_STATE_DIMENSION) * self._initial_variance
+
+    self._com_velocity_body_yaw_aligned_frame = np.zeros(_STATE_DIMENSION)
+
+    self._velocity_filter.reset()
+    self._gyroscope_filter.reset()
+
+    # Use None instead of 0 in case of a big gap between reset and first step.
+    self._last_timestamp = None
+
+  def update(self, current_time):
+    del current_time
+    current_timestamp = self._robot.timestamp
+
+    # First time step
+    if self._last_timestamp is None:
+      delta_time_s = 0.0
+    else:
+      delta_time_s = current_timestamp - self._last_timestamp
+    self._last_timestamp = current_timestamp
+
+    if self._use_sensor_interface:
+      sensor_acc = np.array(self._accelerometer.get_observation())
+      gyroscope_reading = self._gyroscope.get_observation()
+    else:
+      sensor_acc = np.array(self._robot.base_acceleration_accelerometer)
+      gyroscope_reading = self._robot.base_roll_pitch_yaw
+
+    filtered_gyroscope_reading = self._gyroscope_filter.calculate_average(
+        gyroscope_reading, current_timestamp)
+    # The yaw angle is not used here because reliably estimating the yaw angle
+    # of the robot is in general difficult. This leads to a body yaw aligned
+    # inertia frame for the estimated velocity.
+    yaw_aligned_base_orientation = self._robot.pybullet_client.getQuaternionFromEuler(
+        (filtered_gyroscope_reading[0], filtered_gyroscope_reading[1], 0.0))
+
+    rot_mat = self._robot.pybullet_client.getMatrixFromQuaternion(
+        yaw_aligned_base_orientation)
+    rot_mat = np.array(rot_mat).reshape((_STATE_DIMENSION, _STATE_DIMENSION))
+    calibrated_acc = rot_mat.dot(sensor_acc) + np.array(_GRAVITY)
+    self._filter.predict(u=calibrated_acc * delta_time_s)
+    observed_velocities = []
+
+    foot_contact = [
+        np.linalg.norm(contact_force) > self._contact_detection_threshold
+        for contact_force in self._robot.feet_contact_forces()
+    ]
+    for leg_id in range(4):
+      if foot_contact[leg_id]:
+        jacobian = self._robot.compute_jacobian_for_one_leg(leg_id)
+        # Only pick the jacobian related to joint motors
+        # TODO(magicmelon): standardize the process of picking out relevant dofs
+        com_dof = self._robot.urdf_loader.com_dof
+        jacobian = jacobian[:,
+                            com_dof + leg_id * 3:com_dof + (leg_id + 1) * 3]
+        joint_velocities = self._robot.motor_velocities[leg_id *
+                                                        3:(leg_id + 1) * 3]
+        leg_velocity_in_base_frame = jacobian.dot(joint_velocities)
+        base_velocity_in_base_frame = -leg_velocity_in_base_frame
+        observed_velocities.append(rot_mat.dot(base_velocity_in_base_frame))
+
+    if observed_velocities:
+      observed_velocities = np.mean(observed_velocities, axis=0)
+      self._filter.update(observed_velocities)
+
+    velocity = self._filter.x.copy()
+
+    self._com_velocity_body_yaw_aligned_frame = self._velocity_filter.calculate_average(
+        velocity, current_timestamp)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/leg_controller.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/leg_controller.py
@@ -0,0 +1,29 @@
+"""The leg controller class interface."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+from typing import Any
+
+
+class LegController(object):  # pytype: disable=ignored-metaclass
+  """Generates the leg control signal."""
+
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractmethod
+  def reset(self, current_time: float):
+    """Resets the controller's internal state."""
+    pass
+
+  @abc.abstractmethod
+  def update(self, current_time: float):
+    """Updates the controller's internal state."""
+    pass
+
+  @abc.abstractmethod
+  def get_action(self) -> Any:
+    """Gets the control signal e.g. torques/positions for the leg."""
+    pass
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/locomotion_controller.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/locomotion_controller.py
@@ -0,0 +1,96 @@
+"""A model based controller framework."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+from typing import Any, Callable
+
+from pybullet_envs.minitaur.agents.baseline_controller import gait_generator as gait_generator_lib
+from pybullet_envs.minitaur.agents.baseline_controller import leg_controller as leg_controller_lib
+from pybullet_envs.minitaur.agents.baseline_controller import state_estimator as state_estimator_lib
+
+
+class LocomotionController(object):
+  """Generates the quadruped locomotion.
+
+  The actual effect of this controller depends on the composition of each
+  individual subcomponent.
+
+  """
+
+  def __init__(
+      self,
+      robot: Any,
+      gait_generator: gait_generator_lib.GaitGenerator,
+      state_estimator: state_estimator_lib.StateEstimatorBase,
+      swing_leg_controller: leg_controller_lib.LegController,
+      stance_leg_controller: leg_controller_lib.LegController,
+      clock: Callable[[], float] = None,
+  ):
+    """Initializes the class.
+
+    Args:
+      robot: A robot instance.
+      gait_generator: Generates the leg swing/stance pattern.
+      state_estimator: Estimates the state of the robot (e.g. center of mass
+        position or velocity that may not be observable from sensors).
+      swing_leg_controller: Generates motor actions for swing legs.
+      stance_leg_controller: Generates motor actions for stance legs.
+      clock: A real or fake clock source.
+    """
+    self._robot = robot
+    self._clock = clock
+    if clock is None:
+      self._clock = time.time
+    self._reset_time = self._clock()
+    self._time_since_reset = 0
+    self._gait_generator = gait_generator
+    self._state_estimator = state_estimator
+    self._swing_leg_controller = swing_leg_controller
+    self._stance_leg_controller = stance_leg_controller
+
+  @property
+  def swing_leg_controller(self):
+    return self._swing_leg_controller
+
+  @property
+  def stance_leg_controller(self):
+    return self._stance_leg_controller
+
+  @property
+  def gait_generator(self):
+    return self._gait_generator
+
+  @property
+  def state_estimator(self):
+    return self._state_estimator
+
+  def reset(self):
+    self._reset_time = self._clock()
+    self._time_since_reset = 0
+    self._gait_generator.reset(self._time_since_reset)
+    self._state_estimator.reset(self._time_since_reset)
+    self._swing_leg_controller.reset(self._time_since_reset)
+    self._stance_leg_controller.reset(self._time_since_reset)
+
+  def update(self):
+    self._time_since_reset = self._clock() - self._reset_time
+    self._gait_generator.update(self._time_since_reset)
+    self._state_estimator.update(self._time_since_reset)
+    self._swing_leg_controller.update(self._time_since_reset)
+    self._stance_leg_controller.update(self._time_since_reset)
+
+  def get_action(self):
+    """Returns the control ouputs (e.g. positions/torques) for all motors."""
+    swing_action = self._swing_leg_controller.get_action()
+    stance_action = self._stance_leg_controller.get_action()
+    action = []
+    for joint_id in range(self._robot.num_motors):
+      if joint_id in swing_action:
+        action.extend(swing_action[joint_id])
+      else:
+        assert joint_id in stance_action
+        action.extend(stance_action[joint_id])
+    return action
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/locomotion_controller_example.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/locomotion_controller_example.py
@@ -0,0 +1,190 @@
+r"""Laikago walking example using the locomotion controller framework.
+
+"""
+
+import gc
+import pickle
+
+from absl import app
+from absl import flags
+import numpy as np
+import scipy.interpolate
+
+
+from pybullet_envs.minitaur.agents.baseline_controller import locomotion_controller_setup
+from pybullet_envs.minitaur.envs_v2 import env_loader
+from pybullet_envs.minitaur.robots import robot_config
+
+FLAGS = flags.FLAGS
+flags.DEFINE_boolean("run_on_robot", False,
+                     "whether to run in sim or on real hardware")
+flags.DEFINE_boolean(
+    "use_ground_truth_velocity", False,
+    "whether to use a ground truth velocity estimator (available in sim)")
+flags.DEFINE_enum("gait", "fast_trot",
+                  ["fast_trot", "slow_trot", "walk", "stand"],
+                  "The gait pattern to use")
+flags.DEFINE_boolean(
+    "use_keyboard_control", False,
+    "whether to use a keyboard to control or demo speed profile.")
+flags.DEFINE_string("log_path", None, "Path to save robot logs")
+flags.DEFINE_boolean("add_random_push", False,
+                     "whether to add random push to the robot in simulation")
+
+_MAX_TIME_SECONDS = 100
+
+
+def _load_config(render=True, run_on_robot=False):
+  """Builds the environment for the quadruped robot.
+
+  Args:
+    render: Enable/disable rendering.
+    run_on_robot: Whether deploy to robot or run in sim.
+  """
+  if run_on_robot:
+    locomotion_controller_setup.load_real_config()
+  else:
+    locomotion_controller_setup.load_sim_config(render)
+    if FLAGS.add_random_push:
+      locomotion_controller_setup.add_random_push_config()
+
+
+def _generate_example_linear_angular_speed(t):
+  """Creates an example speed profile based on time for demo purpose."""
+  vx = 0.1
+  vy = 0.1
+  wz = 0.3
+  time_points = (0, 4, 7, 11, 13, 15, 17, 19, 100)
+  speed_points = ((0, 0, 0, 0), (vx, 0, 0, 0), (-vx, 0, 0, 0), (0, -vy, 0, 0),
+                  (0, vy, 0, 0), (0, 0, 0, wz), (0, 0, 0, -wz), (0, 0, 0, 0),
+                  (0, 0, 0, 0))
+
+  speed = scipy.interpolate.interp1d(
+      time_points,
+      speed_points,
+      kind="previous",
+      fill_value="extrapolate",
+      axis=0)(
+          t)
+
+  return speed[0:3], speed[3]
+
+
+def _update_speed_from_kb(kb, lin_speed, ang_speed):
+  """Updates the controller behavior parameters."""
+  if kb.is_keyboard_hit():
+    c = kb.get_input_character()
+    if c == "w":
+      lin_speed += np.array((0.05, 0, 0))
+    if c == "s":
+      lin_speed += np.array((-0.05, 0, 0))
+    if c == "q":
+      ang_speed += 0.1
+    if c == "e":
+      ang_speed += -0.1
+    if c == "a":
+      lin_speed += np.array((0, 0.05, 0))
+    if c == "d":
+      lin_speed += np.array((0, -0.05, 0))
+    if c == "r":
+      lin_speed = np.array([0.0, 0.0, 0.0])
+      ang_speed = 0.0
+
+    lin_speed[0] = np.clip(lin_speed[0], -0.2, 0.4)
+    lin_speed[1] = np.clip(lin_speed[1], -0.2, 0.2)
+    ang_speed = np.clip(ang_speed, -0.3, 0.3)
+    print("desired speed: ", lin_speed, ang_speed)
+
+  return lin_speed, ang_speed
+
+
+def _update_controller_params(controller, lin_speed, ang_speed):
+  controller.swing_leg_controller.desired_speed = lin_speed
+  controller.swing_leg_controller.desired_twisting_speed = ang_speed
+  controller.stance_leg_controller.desired_speed = lin_speed
+  controller.stance_leg_controller.desired_twisting_speed = ang_speed
+
+
+def _run_example(max_time=_MAX_TIME_SECONDS,
+                 run_on_robot=False,
+                 use_keyboard=False):
+  """Runs the locomotion controller example."""
+  if use_keyboard:
+    kb = keyboard_utils.KeyboardInput()
+
+  env = env_loader.load()
+  env.reset()
+
+  # To mitigate jittering from the python
+  gc.collect()
+
+  # Wait for the robot to be placed properly.
+  if run_on_robot:
+    input("Press Enter to continue when robot is ready.")
+
+  lin_speed = np.array([0.0, 0.0, 0.0])
+  ang_speed = 0.0
+
+  controller = locomotion_controller_setup.setup_controller(
+      env.robot, FLAGS.gait, run_on_robot, FLAGS.use_ground_truth_velocity)
+  controller.reset()
+
+  loop_start_time = env.get_time_since_reset()
+  loop_elapsed_time = 0
+  robot_log = {
+      "timestamps": [],
+      "motor_angles": [],
+      "motor_velocities": [],
+      "base_velocities": [],
+      "foot_positions": [],
+      "base_rollpitchyaw": [],
+      "base_angular_velocities": [],
+      "actions": []
+  }
+  try:
+    while loop_elapsed_time < max_time:
+      #if use_keyboard:
+      #  lin_speed, ang_speed = _update_speed_from_kb(kb, lin_speed, ang_speed)
+      #else:
+      lin_speed, ang_speed = _generate_example_linear_angular_speed(
+            loop_elapsed_time)
+
+      # Needed before every call to get_action().
+      _update_controller_params(controller, lin_speed, ang_speed)
+      controller.update()
+      hybrid_action = controller.get_action()
+
+      # Log the robot data.
+      robot_log["timestamps"].append(env.robot.GetTimeSinceReset())
+      robot_log["motor_angles"].append(env.robot.motor_angles)
+      robot_log["motor_velocities"].append(env.robot.motor_velocities)
+      robot_log["base_velocities"].append(
+          controller.state_estimator.com_velocity_body_yaw_aligned_frame)
+      robot_log["foot_positions"].append(env.robot.foot_positions())
+      robot_log["base_rollpitchyaw"].append(env.robot.base_roll_pitch_yaw)
+      robot_log["base_angular_velocities"].append(
+          env.robot.base_roll_pitch_yaw_rate)
+      robot_log["actions"].append(hybrid_action)
+
+      env.step(hybrid_action)
+      loop_elapsed_time = env.get_time_since_reset() - loop_start_time
+
+  finally:
+    if FLAGS.run_on_robot:
+      # Apply zero torques to the robot.
+      env.robot.apply_action(
+          [0] * env.robot.num_motors,
+          motor_control_mode=robot_config.MotorControlMode.TORQUE)
+    if FLAGS.log_path:
+      pickle.dump(robot_log, gfile.Open(FLAGS.log_path + "/robot.log", "wb"))
+
+
+def main(argv):
+  del argv
+  _load_config(render=True, run_on_robot=FLAGS.run_on_robot)
+  _run_example(
+      run_on_robot=FLAGS.run_on_robot, use_keyboard=FLAGS.use_keyboard_control)
+
+
+if __name__ == "__main__":
+  app.run(main)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/locomotion_controller_in_scenario_set_example.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/locomotion_controller_in_scenario_set_example.py
@@ -0,0 +1,180 @@
+r"""ScenarioSet example for Laikago MPC controller.
+
+blaze run -c opt \
+//robotics/reinforcement_learning/minitaur/agents/baseline_controller\
+:locomotion_controller_in_scenario_set_example -- --gait=slow_trot \
+--add_random_push=True
+"""
+
+from absl import app
+from absl import flags
+import gin
+import numpy as np
+import scipy.interpolate
+
+from pybullet_envs.minitaur.agents.baseline_controller import locomotion_controller_setup
+from pybullet_envs.minitaur.envs_v2 import env_loader
+
+FLAGS = flags.FLAGS
+
+SCENARIO_SET_CONFIG = """
+import pybullet_envs.minitaur.envs_v2.scenarios.locomotion_simple_scenario_set
+
+include "google3/robotics/reinforcement_learning/minitaur/envs_v2/scenarios/default_scenario_set.gin"
+
+default_scenario_set/singleton.constructor = @locomotion_simple_scenario_set.LocomotionSimpleScenarioSet
+
+
+locomotion_simple_scenario_set.LocomotionSimpleScenarioSet.selector = "flat_ground"
+locomotion_gym_env.LocomotionGymEnv.task = @scenario_set.task()
+locomotion_gym_env.LocomotionGymEnv.scene = @scenario_set.scene()
+locomotion_gym_env.LocomotionGymEnv.env_randomizers = [
+  @scenario_set.env_randomizer()
+]
+"""
+
+_MAX_TIME_SECONDS = 30
+
+flags.DEFINE_enum("gait", "fast_trot",
+                  ["fast_trot", "slow_trot", "walk", "stand"],
+                  "The gait pattern to use")
+
+flags.DEFINE_boolean("add_random_push", False,
+                     "whether to add random push to the robot in simulation")
+
+
+def _start_stop_profile(max_speed=0.5, axis=0, duration=3):
+  speed_profile = np.zeros((3, 4))
+
+  speed_profile[1, axis] = max_speed
+
+  return (0, 0.5, duration + 0.5), speed_profile.tolist()
+
+
+def _random_speed_profile(max_speed=1, axis=0, time_interval=1.0):
+  num_pts = 11
+  time_points = np.arange(num_pts) * time_interval
+
+  speed_profile = np.zeros((num_pts, 4))
+  speed_profile[:, axis] = np.random.uniform(0, max_speed, num_pts)
+  speed_profile[-1, :] = 0
+  return time_points.tolist(), speed_profile.tolist()
+
+
+def _body_height_profile(z_range=(0.3, 0.55)):
+  del z_range
+  # TODO(tingnan): Implement this.
+
+
+def _generate_linear_angular_speed(t, time_points, speed_points):
+  """Creates an example speed profile based on time for demo purpose."""
+
+  speed = scipy.interpolate.interp1d(
+      time_points,
+      speed_points,
+      kind="previous",
+      fill_value="extrapolate",
+      axis=0)(
+          t)
+
+  return speed[0:3], speed[3]
+
+
+def _update_controller_params(controller, lin_speed, ang_speed):
+  controller.swing_leg_controller.desired_speed = lin_speed
+  controller.swing_leg_controller.desired_twisting_speed = ang_speed
+  controller.stance_leg_controller.desired_speed = lin_speed
+  controller.stance_leg_controller.desired_twisting_speed = ang_speed
+
+
+def _gen_stability_test_start_stop():
+  """Generates the speed profile for start/stop tests."""
+  axis_to_name = {
+      0: "velocity x",
+      1: "velocity y",
+      3: "angular velocity z",
+  }
+
+  axis_to_max_speed = {
+      0: 1.0,
+      1: 0.5,
+      3: 2.5,
+  }
+
+  gait_multiplier = {
+      "slow_trot": 0.7,
+      "walk": 0.3,
+      "fast_trot": 1.0,
+  }
+
+  for axis in (0, 1, 3):
+    yield axis_to_name[axis], _start_stop_profile(
+        axis_to_max_speed[axis] * gait_multiplier[FLAGS.gait], axis)
+
+
+def _gen_stability_test_random():
+  """Generates the speed profile for random walking tests."""
+  axis_to_name = {
+      0: "velocity x",
+      1: "velocity y",
+      3: "angular velocity z",
+  }
+
+  axis_to_max_speed = {
+      0: 1.0,
+      1: 0.5,
+      3: 2.5,
+  }
+
+  gait_multiplier = {
+      "slow_trot": 0.7,
+      "walk": 0.3,
+      "fast_trot": 1.0,
+  }
+
+  for axis in (0, 1, 3):
+    yield axis_to_name[axis], _random_speed_profile(
+        axis_to_max_speed[axis] * gait_multiplier[FLAGS.gait], axis)
+
+
+def _test_stability(max_time=5, render=False, test_generator=None):
+  """Tests the stability of the controller using speed profiles."""
+  locomotion_controller_setup.load_sim_config(render=render)
+  gin.parse_config(SCENARIO_SET_CONFIG)
+  if FLAGS.add_random_push:
+    locomotion_controller_setup.add_random_push_config()
+
+  env = env_loader.load()
+  controller = locomotion_controller_setup.setup_controller(
+      env.robot, gait=FLAGS.gait)
+
+  for name, speed_profile in test_generator():
+    env.reset()
+    controller.reset()
+    current_time = 0
+    while current_time < max_time:
+      current_time = env.get_time_since_reset()
+      lin_speed, ang_speed = _generate_linear_angular_speed(
+          current_time, speed_profile[0], speed_profile[1])
+      _update_controller_params(controller, lin_speed, ang_speed)
+
+      # Needed before every call to get_action().
+      controller.update()
+      hybrid_action = controller.get_action()
+
+      _, _, done, _ = env.step(hybrid_action)
+      if done:
+        break
+    print(f"Scene name: flat ground. Random push: {FLAGS.add_random_push}. "
+          f"Survival time for {name} = {speed_profile[1]} is {current_time}")
+
+
+def main(argv):
+  del argv
+  _test_stability(render=True, test_generator=_gen_stability_test_start_stop)
+  _test_stability(
+      max_time=15, render=True, test_generator=_gen_stability_test_random)
+
+
+if __name__ == "__main__":
+  app.run(main)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/locomotion_controller_setup.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/locomotion_controller_setup.py
@@ -0,0 +1,175 @@
+"""The common setups for MPC based locoomtion controller environments."""
+import time
+import gin
+import numpy as np
+
+from pybullet_envs.minitaur.agents.baseline_controller import com_velocity_estimator
+from pybullet_envs.minitaur.agents.baseline_controller import imu_based_com_velocity_estimator
+from pybullet_envs.minitaur.agents.baseline_controller import locomotion_controller
+from pybullet_envs.minitaur.agents.baseline_controller import openloop_gait_generator
+from pybullet_envs.minitaur.agents.baseline_controller import raibert_swing_leg_controller
+from pybullet_envs.minitaur.agents.baseline_controller import torque_stance_leg_controller
+#from pybullet_envs.minitaur.envs.env_randomizers import minitaur_push_randomizer
+from pybullet_envs.minitaur.envs.env_randomizers import minitaur_push_randomizer
+from pybullet_envs.minitaur.robots import robot_config
+import pybullet_data as pd
+
+CONFIG_FILE = (pd.getDataPath()+"/configs_v2/base/laikago_reactive.gin")
+
+_MOTOR_KD = [1.0, 2.0, 2.0] * 4
+_BODY_HEIGHT = 0.45
+_MAX_TIME_SECONDS = 1000000000
+_MOTOR_KD = [1.0, 2.0, 2.0] * 4
+# TODO(tingnan): This is for tunining the moments of inertia of the model.
+# Once we identified the correct value we can remove this.
+_SCALE = 4
+_INERTIA = (0.07335 * _SCALE, 0, 0, 0, 0.25068 * _SCALE, 0, 0, 0,
+            0.25447 * _SCALE)
+
+
+def load_sim_config(render=True):
+  """Builds the environment for the quadruped robot.
+
+  Args:
+    render: Enable/disable rendering.
+  """
+  gin.clear_config(clear_constants=False)
+  config_file = CONFIG_FILE
+  gin.parse_config_file(config_file)
+
+  # Sim bindings
+  # Overwrite a few parameters.
+
+  action_repeat = 4
+  gin.bind_parameter("SimulationParameters.num_action_repeat", action_repeat)
+  gin.bind_parameter("laikago_v2.Laikago.action_repeat", action_repeat)
+
+  # Control latency is NOT modeled properly for inverse kinematics and
+  # jacobians, as we are directly calling the pybullet API. We will try to fix
+  # this by loading a separate pybullet instance, set the pose and joint
+  # angles which has latency in them, and then run the jacobian/IK.
+  gin.bind_parameter("laikago_v2.Laikago.motor_control_mode",
+                     robot_config.MotorControlMode.HYBRID)
+  # Bump up a bit the adduction/abduction motor d gain for a better tracking.
+  gin.bind_parameter("hybrid_motor_model.HybridMotorModel.kd", _MOTOR_KD)
+  gin.bind_parameter("SimulationParameters.enable_rendering", render)
+  gin.bind_parameter("env_loader.load.wrapper_classes", [])
+
+
+
+def add_random_push_config():
+  """Adds a random push randomizers to the config."""
+  try:
+    current_env_randomizers = gin.query_parameter(
+        "locomotion_gym_env.LocomotionGymEnv.env_randomizers")
+
+    current_env_randomizers.append(
+        minitaur_push_randomizer.MinitaurPushRandomizer(
+            horizontal_force_bound=(500, 900),
+            vertical_force_bound=(50, 100),
+            visualize_perturbation_force=True))
+    gin.bind_parameter("locomotion_gym_env.LocomotionGymEnv.env_randomizers",
+                       current_env_randomizers)
+  except ValueError:
+    # No randoimzers bind so far
+    gin.bind_parameter("locomotion_gym_env.LocomotionGymEnv.env_randomizers", [
+        minitaur_push_randomizer.MinitaurPushRandomizer(
+            horizontal_force_bound=(500, 900),
+            vertical_force_bound=(50, 100),
+            visualize_perturbation_force=True)
+    ])
+
+
+def select_gait(gait_type="fast_trot"):
+  """Selects a gait pattern.
+
+  Args:
+    gait_type: which gait to use.
+
+  Returns:
+    A tuple of (stance_duration, duty_factor, initial_phase)
+  """
+  # Each gait is composed of stance_duration, duty_factor, and
+  # init_phase_full_cycle.
+  if gait_type == "fast_trot":
+    return [0.25] * 4, [0.6] * 4, [0, 0.5, 0.5, 0]
+  elif gait_type == "slow_trot":
+    return [0.4] * 4, [0.65] * 4, [0, 0.5, 0.5, 0]
+  elif gait_type == "walk":
+    return [0.75] * 4, [0.8] * 4, [0.25, 0.75, 0.5, 0]
+  else:
+    # Means four leg stand for as long as possible.
+    return [_MAX_TIME_SECONDS] * 4, [0.99] * 4, [0, 0, 0, 0]
+
+
+def setup_controller(robot,
+                     gait="fast_trot",
+                     run_on_robot=False,
+                     use_ground_truth_velocity=False):
+  """Demonstrates how to create a locomotion controller.
+
+  Args:
+    robot: A robot instance.
+    gait: The type of gait to use.
+    run_on_robot: Whether this controller is running on the real robot or not.
+    use_ground_truth_velocity: Whether to use ground truth velocity or velocity
+      estimator.
+
+  Returns:
+    A locomotion controller.
+  """
+  desired_speed = (0, 0)
+  desired_twisting_speed = 0
+
+  feet_positions = np.array(robot.foot_positions())
+  feet_positions[:, 2] = 0
+
+  # Sim and real robots have different mass and contact detection parameters.
+  body_weight, contact_force_threshold = (200, 20) if run_on_robot else (215, 0)
+
+  stance_duration, duty_factor, init_phase = select_gait(gait)
+  gait_generator = openloop_gait_generator.OpenloopGaitGenerator(
+      robot,
+      stance_duration=stance_duration,
+      duty_factor=duty_factor,
+      initial_leg_phase=init_phase,
+      contact_detection_force_threshold=contact_force_threshold,
+  )
+  state_estimator = (
+      imu_based_com_velocity_estimator.IMUBasedCOMVelocityEstimator(
+          robot,
+          contact_detection_threshold=contact_force_threshold,
+      ))
+
+  # Use this in sim to test ground truth velocity estimation.
+  if use_ground_truth_velocity:
+    state_estimator = com_velocity_estimator.COMVelocityEstimator(robot)
+
+  sw_controller = raibert_swing_leg_controller.RaibertSwingLegController(
+      robot,
+      gait_generator,
+      state_estimator,
+      desired_speed=desired_speed,
+      desired_twisting_speed=desired_twisting_speed,
+      desired_height=_BODY_HEIGHT,
+      local_hip_positions=feet_positions,
+  )
+  st_controller = torque_stance_leg_controller.TorqueStanceLegController(
+      robot,
+      gait_generator,
+      state_estimator,
+      desired_speed=desired_speed,
+      desired_twisting_speed=desired_twisting_speed,
+      desired_body_height=_BODY_HEIGHT,
+      body_mass=body_weight / 9.8,
+      body_inertia=_INERTIA,
+  )
+
+  controller = locomotion_controller.LocomotionController(
+      robot=robot,
+      gait_generator=gait_generator,
+      state_estimator=state_estimator,
+      swing_leg_controller=sw_controller,
+      stance_leg_controller=st_controller,
+      clock=robot.GetTimeSinceReset)
+  return controller
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/minitaur_raibert_controller.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/minitaur_raibert_controller.py
@@ -0,0 +1,448 @@
+"""A Raibert style controller for Minitaur."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import attr
+import gin
+import numpy as np
+
+from pybullet_envs.minitaur.agents.baseline_controller import minitaur_raibert_controller_utils
+from pybullet_envs.minitaur.envs.utilities import minitaur_pose_utils
+
+LEFT_FRONT_LEG_ID = 0
+LEFT_HIND_LEG_ID = 1
+RIGHT_FRONT_LEG_ID = 2
+RIGHT_HIND_LEG_ID = 3
+
+DIAGONAL_LEG_PAIR_1 = (LEFT_FRONT_LEG_ID, RIGHT_HIND_LEG_ID)
+DIAGONAL_LEG_PAIR_2 = (LEFT_HIND_LEG_ID, RIGHT_FRONT_LEG_ID)
+
+LEFT_LEG_IDS = (LEFT_FRONT_LEG_ID, LEFT_HIND_LEG_ID)
+RIGHT_LEG_IDS = (RIGHT_FRONT_LEG_ID, RIGHT_HIND_LEG_ID)
+
+# The max horizontal foot offset (in meters) for turning.
+_FOOT_HORIZONTAL_OFFSET_FOR_TURNING = 0.1
+
+_STANCE_TG_PHASE_COMPRESSION = 1.5
+_STANCE_TG_DELTA_EXTENSION = 0.2
+_STANCE_HORIZONTAL_SCALING_FACTOR = 1.2
+
+_DEFAULT_SWING_SPEED_GAIN = 0.015
+
+_DEFAULT_SWING_FOOT_CLEARANCE = 0.005
+
+_PITCH_SWING_FEEDBACK_SCALING_FACTOR = 1.2
+
+
+# A POD container to describe the controller's high level behavior.
+@attr.s
+class BehaviorParameters(object):
+  """Highlevel parameters for Raibert style controller."""
+  stance_duration = attr.ib(type=float, default=0.25)
+  desired_forward_speed = attr.ib(type=float, default=0.)
+  desired_turning_speed = attr.ib(type=float, default=0.)
+  standing_height = attr.ib(type=float, default=0.2)
+  desired_incline_angle = attr.ib(type=float, default=0.)
+
+
+def generate_default_swing_trajectory(phase, init_pose, end_pose):
+  """A swing trajectory generator.
+
+  Args:
+    phase: Float. Between [0, 1].
+    init_pose: A tuple. The leg pose (swing, extension) at phase == 0 the
+      beginning of swing.
+    end_pose: A tuple. The leg pose at phase == 1 the end of swing.
+
+  Returns:
+    The desired leg pose for the current phase.
+  """
+  # Try phase compression
+  normalized_phase = math.sqrt(min(phase * 1.5, 1))
+
+  # For swing, we use a linear interpolation:
+  swing = (end_pose[0] - init_pose[0]) * normalized_phase + init_pose[0]
+
+  # For extension, we can fit a second order polynomial:
+  min_ext = (init_pose[1] + end_pose[1]) / 2 - 0.8
+  min_ext = max(min_ext, 0.5)
+
+  # The phase value at which the extension reaches the minimal value min_ext.
+  # phi is small, the swing leg will try to lift higher in the first half of
+  # swing.
+  phi = 0.1
+
+  # We convert the extension back into the cartesion space. In this way we can
+  # guarantee a lift-up trajectory. The ankle to hip distance is easier to
+  # compute than a full forward-kinematics.
+  min_ankle_dist = minitaur_raibert_controller_utils.extension_to_ankle_dist(
+      min_ext)
+  init_ankle_dist = minitaur_raibert_controller_utils.extension_to_ankle_dist(
+      init_pose[1])
+  end_ankle_dist = minitaur_raibert_controller_utils.extension_to_ankle_dist(
+      end_pose[1])
+
+  # The polynomial is: a * phi^2 + b * phi + c
+  delta_1 = min_ankle_dist - init_ankle_dist
+  delta_2 = end_ankle_dist - init_ankle_dist
+  delta_p = phi * phi - phi
+
+  a = (delta_1 - phi * delta_2) / delta_p
+
+  b = (phi * phi * delta_2 - delta_1) / delta_p
+
+  ankle_dist = (
+      a * normalized_phase * normalized_phase + b * normalized_phase +
+      init_ankle_dist)
+
+  l1 = minitaur_raibert_controller_utils.UPPER_LEG_LEN
+  l2 = minitaur_raibert_controller_utils.LOWER_SHORT_LEG_LEN
+
+  ankle_dist = min(max(ankle_dist, l2 - l1 + 0.01), l2 + l1 - 0.01)
+
+  extension = minitaur_raibert_controller_utils.ankle_dist_to_extension(
+      ankle_dist)
+
+  return (swing, extension)
+
+
+@gin.configurable
+def generate_default_stance_trajectory(phase,
+                                       init_pose,
+                                       end_pose,
+                                       use_constant_extension=False):
+  """A stance strajectory generator.
+
+  Args:
+    phase: Float. Between [0, 1].
+    init_pose: A tuple. The leg pose (swing, extension) at phase == 0, i.e. the
+      beginning of stance.
+    end_pose: A tuple. The leg pose at phase == 1, i.e. the end of stance.
+    use_constant_extension: Boolean. Whether or not to fix the extension during
+      stance.
+
+  Returns:
+    The desired leg pose for the current phase.
+  """
+  normalized_phase = min(_STANCE_TG_PHASE_COMPRESSION * math.sqrt(phase), 1)
+  swing = (end_pose[0] - init_pose[0]) * normalized_phase + init_pose[0]
+
+  # The extension evolves nonlinearly according to the parabola equation.
+  if use_constant_extension:
+    extension = end_pose[1]
+  else:
+    extension = end_pose[1] - 4 * _STANCE_TG_DELTA_EXTENSION * (
+        normalized_phase**2 - normalized_phase)
+  return (swing, extension)
+
+
+def get_stance_foot_offset_for_turning(leg_id, steering_signal):
+  """Modify the stance foot position to achieve turning.
+
+  The strategy works for trotting gaits.
+
+  Args:
+    leg_id: Integer. The leg index.
+    steering_signal: Float. The desired turning signal in [-1, 1]. Because we
+      don't have an accurate mapping from angular velocity to the foot offset,
+      the steering signal should be treated as a reference and only its relative
+      magnitude matters.
+
+  Returns:
+    The stance foot's horizontal offset.
+
+  """
+  clipped_steering_signal = np.clip(steering_signal, -1, 1)
+
+  if leg_id in LEFT_LEG_IDS:
+    return _FOOT_HORIZONTAL_OFFSET_FOR_TURNING * clipped_steering_signal
+  else:
+    return -(_FOOT_HORIZONTAL_OFFSET_FOR_TURNING * clipped_steering_signal)
+
+
+def get_leg_swing_offset_for_pitching(body_pitch, desired_incline_angle):
+  """Get the leg swing zero point when the body is tilted.
+
+  For example, when climbing up or down stairs/slopes, the robot body will tilt
+  up or down. By compensating the body pitch, the leg's trajectory will be
+  centered around the vertical direction (not perpendicular to the surface).
+  This helps the robot to generate thrust when going upwards, and braking when
+  going downwards.
+
+  Args:
+    body_pitch: Float. Current body pitch angle.
+    desired_incline_angle: Float. The desired body pitch angle.
+
+  Returns:
+    The stance and swing leg swing offset.
+
+  """
+  kp = 0.2
+  return -((1 - kp) * body_pitch + kp * desired_incline_angle)
+
+
+@gin.configurable
+class RaibertSwingLegController(object):
+  """The swing leg controller."""
+
+  def __init__(self,
+               speed_gain=_DEFAULT_SWING_SPEED_GAIN,
+               foot_clearance=_DEFAULT_SWING_FOOT_CLEARANCE,
+               leg_trajectory_generator=generate_default_swing_trajectory):
+    """Initializes the controller.
+
+    Args:
+      speed_gain: Float. The speed feedback gain to modulate the target foot
+        position.
+      foot_clearance: Float. The foot clearance (at the end of the swing phase)
+        in meters.
+      leg_trajectory_generator: A trajectory generator function.
+    """
+    self._speed_gain = speed_gain
+    self._foot_clearance = foot_clearance
+    self._leg_trajectory_generator = leg_trajectory_generator
+
+  def get_action(self, raibert_controller):
+    """Get the swing legs' desired pose."""
+    current_speed = raibert_controller.estimate_base_velocity()
+    phase = raibert_controller.get_phase()
+    rpy = raibert_controller.robot.base_roll_pitch_yaw
+
+    leg_pose_set = {}
+    for i in raibert_controller.swing_set:
+      # Target foot horizontal position is calculated according to Raibert's
+      # original formula in "Legged robots that balance".
+      target_foot_horizontal_position = (
+          raibert_controller.behavior_parameters.stance_duration / 2 *
+          current_speed + self._speed_gain *
+          (current_speed -
+           raibert_controller.behavior_parameters.desired_forward_speed))
+
+      # 1) Convert the target foot position to leg pose space.
+      # Lift the foot a little bit.
+      target_foot_vertical_position = -(
+          raibert_controller.behavior_parameters.standing_height -
+          self._foot_clearance)
+      target_foot_position = (target_foot_horizontal_position,
+                              target_foot_vertical_position)
+      target_leg_pose = minitaur_raibert_controller_utils.foot_position_to_leg_pose(
+          target_foot_position)
+
+      # 2) Generates the curve from the swing start leg pose to the target leg
+      # pose and find the next leg pose on the curve based on current swing
+      # phase.
+
+      desired_leg_pose = self._leg_trajectory_generator(
+          phase, raibert_controller.swing_start_leg_pose, target_leg_pose)
+
+      swing_offset = get_leg_swing_offset_for_pitching(
+          rpy[1], raibert_controller.behavior_parameters.desired_incline_angle)
+
+      leg_pose_set[i] = (desired_leg_pose[0] + swing_offset,
+                         desired_leg_pose[1])
+
+    return leg_pose_set
+
+
+@gin.configurable
+class RaibertStanceLegController(object):
+  """The controller that modulates the behavior of the stance legs."""
+
+  def __init__(self,
+               speed_gain=0.1,
+               leg_trajectory_generator=generate_default_stance_trajectory):
+    """Initializes the controller.
+
+    Args:
+      speed_gain: Float. The speed feedback gain to modulate the target stance
+        foot position.
+      leg_trajectory_generator: A trajectory generator function. Generates the
+        desired leg pose during the stance phase.
+    """
+    self._speed_gain = speed_gain
+    self._leg_trajectory_generator = leg_trajectory_generator
+
+  def get_action(self, raibert_controller):
+    """Get the desired leg pose for the stance legs."""
+
+    phase = raibert_controller.get_phase()
+    current_speed = raibert_controller.estimate_base_velocity()
+    rpy = raibert_controller.robot.base_roll_pitch_yaw
+
+    leg_pose_set = {}
+    for i in raibert_controller.stance_set:
+      desired_forward_speed = (
+          raibert_controller.behavior_parameters.desired_forward_speed)
+
+      target_foot_horizontal_position = -_STANCE_HORIZONTAL_SCALING_FACTOR * (
+          raibert_controller.behavior_parameters.stance_duration / 2 *
+          current_speed - self._speed_gain *
+          (current_speed - desired_forward_speed))
+
+      target_foot_horizontal_position += get_stance_foot_offset_for_turning(
+          i, raibert_controller.behavior_parameters.desired_turning_speed)
+
+      target_foot_position = (
+          target_foot_horizontal_position,
+          -raibert_controller.behavior_parameters.standing_height)
+      target_leg_pose = minitaur_raibert_controller_utils.foot_position_to_leg_pose(
+          target_foot_position)
+
+      desired_leg_pose = (
+          self._leg_trajectory_generator(
+              phase, raibert_controller.stance_start_leg_pose, target_leg_pose))
+
+      swing_offset = _PITCH_SWING_FEEDBACK_SCALING_FACTOR * get_leg_swing_offset_for_pitching(
+          rpy[1], raibert_controller.behavior_parameters.desired_incline_angle)
+
+      leg_pose_set[i] = (desired_leg_pose[0] + swing_offset,
+                         desired_leg_pose[1])
+
+    return leg_pose_set
+
+
+@gin.configurable
+class MinitaurRaibertController(object):
+  """A Raibert style controller for trotting gait."""
+
+  def __init__(self,
+               robot,
+               behavior_parameters=BehaviorParameters(),
+               swing_leg_controller=RaibertSwingLegController(),
+               stance_leg_controller=RaibertStanceLegController(),
+               pose_feedback_controller=None):
+    self._time = 0
+    self._robot = robot
+    self.behavior_parameters = behavior_parameters
+
+    self._last_recorded_speed = 0
+
+    self._swing_leg_controller = swing_leg_controller
+    self._stance_leg_controller = stance_leg_controller
+    self._pose_feeback_controller = pose_feedback_controller
+
+    # The leg order is FL, RL, FR, RR -> [0, 1, 2, 3]
+    self._swing_set = DIAGONAL_LEG_PAIR_1
+    self._stance_set = DIAGONAL_LEG_PAIR_2
+
+    # Compute the initial leg pose.
+    self._swing_start_leg_pose = self.get_swing_leg_pose()
+    self._stance_start_leg_pose = self.get_stance_leg_pose()
+
+  @property
+  def robot(self):
+    return self._robot
+
+  @property
+  def swing_set(self):
+    return self._swing_set
+
+  @property
+  def stance_set(self):
+    return self._stance_set
+
+  @property
+  def swing_start_leg_pose(self):
+    return self._swing_start_leg_pose
+
+  @property
+  def stance_start_leg_pose(self):
+    return self._stance_start_leg_pose
+
+  def _get_average_leg_pose(self, leg_indices):
+    """Get the average leg pose."""
+    current_leg_pose = minitaur_pose_utils.motor_angles_to_leg_pose(
+        self._robot.motor_angles)
+
+    # extract the swing leg pose from the current_leg_pose
+    leg_pose = []
+    for index in leg_indices:
+      leg_pose.append([
+          current_leg_pose[index],
+          current_leg_pose[index + minitaur_pose_utils.NUM_LEGS]
+      ])
+
+    leg_pose = np.array(leg_pose)
+    return np.mean(leg_pose, axis=0)
+
+  def get_swing_leg_pose(self):
+    """Get the current swing legs' average pose."""
+    return self._get_average_leg_pose(self._swing_set)
+
+  def get_stance_leg_pose(self):
+    """Get the current stance legs' average pose."""
+    return self._get_average_leg_pose(self._stance_set)
+
+  def get_phase(self):
+    """Compute the current stance/swing phase."""
+    return math.fmod(self._time, self.behavior_parameters.stance_duration
+                    ) / self.behavior_parameters.stance_duration
+
+  def _get_new_swing_stance_set(self):
+    """Switch the set of swing/stance legs based on timing."""
+    swing_stance_phase = math.fmod(self._time,
+                                   2 * self.behavior_parameters.stance_duration)
+    if swing_stance_phase < self.behavior_parameters.stance_duration:
+      return (DIAGONAL_LEG_PAIR_1, DIAGONAL_LEG_PAIR_2)
+    return (DIAGONAL_LEG_PAIR_2, DIAGONAL_LEG_PAIR_1)
+
+  def update(self, t):
+    """Update the internal status of the controller.
+
+    Args:
+      t: Float. The current time after reset in seconds.
+    """
+    self._time = t
+    new_swing_set, new_stance_set = self._get_new_swing_stance_set()
+
+    # If there is a stance/swing switch.
+    if new_swing_set[0] is not self._swing_set[0]:
+      self._swing_set = new_swing_set
+      self._stance_set = new_stance_set
+
+      # Also records the starting pose.
+      self._swing_start_leg_pose = self.get_swing_leg_pose()
+      self._stance_start_leg_pose = self.get_stance_leg_pose()
+
+  def estimate_base_velocity(self):
+    """Estimate the current CoM speed."""
+    # It is best to use a sensor fusion approach.
+    stance_leg_pose = self.get_stance_leg_pose()
+
+    delta_sw = stance_leg_pose[0] - self._stance_start_leg_pose[0]
+
+    x, y = minitaur_raibert_controller_utils.leg_pose_to_foot_position(
+        stance_leg_pose)
+    toe_hip_len = math.sqrt(x**2 + y**2)
+    horizontal_dist = toe_hip_len * delta_sw
+    phase = self.get_phase()
+    speed = self._last_recorded_speed
+    if phase > 0.1:
+      speed = horizontal_dist / (
+          self.behavior_parameters.stance_duration * phase)
+      self._last_recorded_speed = speed
+    return speed
+
+  def get_swing_leg_action(self):
+    return self._swing_leg_controller.get_action(self)
+
+  def get_stance_leg_action(self):
+    return self._stance_leg_controller.get_action(self)
+
+  def get_action(self):
+    """Gets the desired motor angles."""
+    leg_pose = [0] * minitaur_pose_utils.NUM_MOTORS
+    swing_leg_pose = self.get_swing_leg_action()
+    for i in self._swing_set:
+      leg_pose[i] = swing_leg_pose[i][0]
+      leg_pose[i + minitaur_pose_utils.NUM_LEGS] = swing_leg_pose[i][1]
+
+    stance_leg_pose = self.get_stance_leg_action()
+    for i in self._stance_set:
+      leg_pose[i] = stance_leg_pose[i][0]
+      leg_pose[i + minitaur_pose_utils.NUM_LEGS] = stance_leg_pose[i][1]
+
+    return minitaur_pose_utils.leg_pose_to_motor_angles(leg_pose)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/minitaur_raibert_controller_utils.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/minitaur_raibert_controller_utils.py
@@ -0,0 +1,82 @@
+"""Utility functions for the Minitaur Raibert controller."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+UPPER_LEG_LEN = 0.112
+LOWER_SHORT_LEG_LEN = 0.199
+LOWER_LONG_LEG_LEN = 0.2315
+
+
+def leg_pose_to_foot_position(leg_pose):
+  """The forward kinematics."""
+  l1 = UPPER_LEG_LEN
+  l2 = LOWER_SHORT_LEG_LEN
+  l3 = LOWER_LONG_LEG_LEN
+
+  ext = leg_pose[1]
+  alpha = math.asin(l1 * math.sin(ext) / l2)
+
+  sw = -leg_pose[0]
+  x = l3 * math.sin(alpha + sw) - l1 * math.sin(ext + sw)
+  y = l3 * math.cos(alpha + sw) - l1 * math.cos(ext + sw)
+
+  return (x, -y)
+
+
+def foot_position_to_leg_pose(foot_position):
+  """The inverse kinematics."""
+  l1 = UPPER_LEG_LEN
+  l2 = LOWER_SHORT_LEG_LEN
+  l3 = LOWER_LONG_LEG_LEN
+
+  x = foot_position[0]
+  y = foot_position[1]
+
+  assert y < 0
+  hip_toe_sqr = x**2 + y**2
+  cos_beta = (l1 * l1 + l3 * l3 - hip_toe_sqr) / (2 * l1 * l3)
+  assert -1 <= cos_beta <= 1
+  hip_ankle_sqr = l1 * l1 + l2 * l2 - 2 * l1 * l2 * cos_beta
+  hip_ankle = math.sqrt(hip_ankle_sqr)
+  cos_ext = -(l1 * l1 + hip_ankle_sqr - l2 * l2) / (2 * l1 * hip_ankle)
+  ext = math.acos(cos_ext)
+
+  hip_toe = math.sqrt(hip_toe_sqr)
+  cos_theta = (hip_toe_sqr + hip_ankle_sqr -
+               (l3 - l2)**2) / (2 * hip_ankle * hip_toe)
+
+  assert cos_theta > 0
+  theta = math.acos(cos_theta)
+  sw = math.asin(x / hip_toe) - theta
+  return (-sw, ext)
+
+
+def extension_to_ankle_dist(extension):
+  """Converts leg extension to ankle-hip distance in meters.
+
+  The ankle is defined as the joint of the two lower links, which is different
+  from the toe which is the tip of the longer lower limb.
+
+  Args:
+    extension: Float. the leg extension.
+
+  Returns:
+    Float. The hip to ankle distance in meters.
+
+  """
+  l1 = UPPER_LEG_LEN
+  l2 = LOWER_SHORT_LEG_LEN
+  alpha = math.asin(l1 / l2 * math.sin(extension))
+  return l2 * math.cos(alpha) - l1 * math.cos(extension)
+
+
+def ankle_dist_to_extension(dist):
+  """Converts ankle-hip distance (meters) to extension."""
+  l1 = UPPER_LEG_LEN
+  l2 = LOWER_SHORT_LEG_LEN
+  cos_extension = -(l1**2 + dist**2 - l2**2) / (2 * l1 * dist)
+  return math.acos(cos_extension)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/model_predictive_control.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/model_predictive_control.py
@@ -0,0 +1,149 @@
+# Lint as: python3
+"""Classic model predictive control methods."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+from typing import Sequence
+import numpy as np
+
+_MAX_ABS_RPY = 0.3
+_MAX_ABS_ANGULAR_VELOCITY = math.pi
+
+# The center of mass torque is computed using a simple PD control: tau = -KP *
+# delta_rotation - KD * delta_angular_velocity
+_TORQUE_KP = 2000
+_TORQUE_KD = 150
+
+# For center of mass force, we only need to track position in the z direction
+# (i.e. maintain the body height), and speed in x-y plane.
+_FORCE_KP = 500
+_FORCE_KD = 200
+
+
+def compute_contact_force_projection_matrix(
+    foot_positions_in_com_frame: Sequence[Sequence[float]],
+    stance_foot_ids: Sequence[int],
+) -> np.ndarray:
+  r"""Computes the 6 x 3n matrix to map contact force to com dynamics.
+
+  This is essentially the vectorized rhs of com dynamics equation:
+      ma = \sum f
+      I\omega_dot = \sum r \cross f
+
+  where the summation if over all feet in contact with ground.
+
+  Caveats: Current we have not taken the com rotation into account as we assume
+  the com rotation would be small. Ideally we should rotate the foot_positions
+  to a world frame centered at com. Also, since absolute yaw angles are not
+  accurate dute to drifting, we should use (roll, pitch, 0) to do the foot
+  position projection. This feature will be quite useful for MPC.
+  TODO(b/143378213): Fix this.
+
+  Args:
+    foot_positions_in_com_frame: the local position of each foot.
+    stance_foot_ids: The stance foot to be used to assemble the matrix.
+
+  Returns:
+    The contact force projection matrix.
+
+  """
+  jacobians = []
+  for foot_id in stance_foot_ids:
+    jv = np.identity(3)
+    foot_position = foot_positions_in_com_frame[foot_id]
+    x, y, z = foot_position[:3]
+    jw = np.array(((0, -z, y), (z, 0, -x), (-y, x, 0)))
+    jacobians.append(np.vstack((jv, jw)))
+
+  return np.hstack(jacobians)
+
+
+def plan_foot_contact_force(
+    mass: float,
+    inertia: np.ndarray,
+    com_position: np.ndarray,
+    com_velocity: np.ndarray,
+    com_roll_pitch_yaw: np.ndarray,
+    com_angular_velocity: np.ndarray,
+    foot_positions_in_com_frame: Sequence[Sequence[float]],
+    foot_contact_state: Sequence[bool],
+    desired_com_position: np.ndarray,
+    desired_com_velocity: np.ndarray,
+    desired_com_roll_pitch_yaw: np.ndarray,
+    desired_com_angular_velocity: np.ndarray,
+):
+  """Plan the foot contact forces using robot states.
+
+  TODO(b/143382305): Wrap this interface in a MPC class so we can use other
+  planning algorithms.
+
+  Args:
+    mass: The total mass of the robot.
+    inertia: The diagnal elements [Ixx, Iyy, Izz] of the robot.
+    com_position: Center of mass position in world frame. Usually we cannot
+      accurrately obtain this without motion capture.
+    com_velocity: Center of mass velocity in world frame.
+    com_roll_pitch_yaw: Center of mass rotation wrt world frame in euler angles.
+    com_angular_velocity: The angular velocity (roll_dot, pitch_dot, yaw_dot).
+    foot_positions_in_com_frame: The position of all feet/toe joints in the body
+      frame.
+    foot_contact_state: Indicates if a foot is in contact with the ground.
+    desired_com_position: We usually just care about the body height.
+    desired_com_velocity: In world frame.
+    desired_com_roll_pitch_yaw: We usually care about roll and pitch, since yaw
+      measurement can be unreliable.
+    desired_com_angular_velocity: Roll and pitch change rate are usually zero.
+      Yaw rate is the turning speed of the robot.
+
+  Returns:
+    The desired stance foot contact forces.
+  """
+  del inertia
+  del com_position
+  body_height = []
+  stance_foot_ids = []
+  for foot_id, foot_position in enumerate(foot_positions_in_com_frame):
+    if not foot_contact_state[foot_id]:
+      continue
+    stance_foot_ids.append(foot_id)
+    body_height.append(foot_position[2])
+
+  avg_bogy_height = abs(sum(body_height) / len(body_height))
+
+  rpy = com_roll_pitch_yaw
+  rpy[:2] = np.clip(rpy[:2], -_MAX_ABS_RPY, _MAX_ABS_RPY)
+  rpy_dot = com_angular_velocity
+  rpy_dot = np.clip(rpy_dot, -_MAX_ABS_ANGULAR_VELOCITY,
+                    _MAX_ABS_ANGULAR_VELOCITY)
+
+  com_torque = -avg_bogy_height * (
+      _TORQUE_KP * (rpy - desired_com_roll_pitch_yaw) + _TORQUE_KD * rpy_dot)
+
+  # We don't care about the absolute yaw angle in the low level controller.
+  # Instead, we stabialize the angular velocity in the z direction.
+  com_torque[2] = -avg_bogy_height * _TORQUE_KD * (
+      rpy_dot[2] - desired_com_angular_velocity[2])
+
+  # Track a desired com velocity.
+  com_force = -_FORCE_KD * (com_velocity - desired_com_velocity)
+
+  # In the z-direction we also want to track the body height.
+  com_force[2] += mass * 9.8 - _FORCE_KP * (
+      avg_bogy_height - desired_com_position[2])
+
+  com_force_torque = np.concatenate((com_force, com_torque)).transpose()
+
+  # Map the com force torque to foot contact forces.
+  foot_force_to_com = compute_contact_force_projection_matrix(
+      foot_positions_in_com_frame, stance_foot_ids)
+  all_contact_force = -np.matmul(
+      np.linalg.pinv(foot_force_to_com), com_force_torque).transpose()
+  contact_force = {}
+
+  for i, foot_id in enumerate(stance_foot_ids):
+    contact_force[foot_id] = all_contact_force[3 * i:3 * i + 3]
+
+  return contact_force
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/multi_state_estimator.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/multi_state_estimator.py
@@ -0,0 +1,48 @@
+"""A class for combining multiple state estimators."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from typing import Any, Sequence
+
+import gin
+
+from pybullet_envs.minitaur.agents.baseline_controller import state_estimator
+
+
+@gin.configurable
+class MultiStateEstimator(state_estimator.StateEstimatorBase):
+  """Combine multiple state estimators.
+
+
+  This class can be used to combine multiple state estimators into one. For
+  example, one can use the COMVelocityEstimator to estimate the com velocity
+  and COMHeightEstimator to estimate the com height.
+
+  """
+
+  def __init__(
+      self,
+      robot: Any,
+      state_estimators: Sequence[state_estimator.StateEstimatorBase],
+  ):
+    self._robot = robot
+    self._state_estimators = state_estimators
+    self.reset(0)
+
+  def reset(self, current_time):
+    for single_state_estimator in self._state_estimators:
+      single_state_estimator.reset(current_time)
+
+  def update(self, current_time):
+    for single_state_estimator in self._state_estimators:
+      single_state_estimator.update(current_time)
+
+  def __getattr__(self, attr):
+    for single_state_estimator in self._state_estimators:
+      if hasattr(single_state_estimator, attr):
+        return getattr(single_state_estimator, attr)
+    raise ValueError(
+        "{} is not found in any of the state estimators".format(attr))
+
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/openloop_gait_generator.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/openloop_gait_generator.py
@@ -0,0 +1,194 @@
+"""Gait pattern planning module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import math
+from typing import Any, Sequence
+
+import gin
+import numpy as np
+
+from pybullet_envs.minitaur.agents.baseline_controller import gait_generator
+
+_DEFAULT_INITIAL_LEG_STATE = (
+    gait_generator.LegState.STANCE,
+    gait_generator.LegState.STANCE,
+    gait_generator.LegState.STANCE,
+    gait_generator.LegState.STANCE,
+)
+
+_NOMINAL_STANCE_DURATION = (0.25, 0.25, 0.25, 0.25)
+_NOMINAL_DUTY_FACTOR = (0.6, 0.6, 0.6, 0.6)
+_TROTTING_LEG_PHASE = (0, 0.5, 0.5, 0)
+_NOMINAL_CONTACT_DETECTION_PHASE = 0.4
+
+
+@gin.configurable
+class OpenloopGaitGenerator(gait_generator.GaitGenerator):
+  """Generates openloop gaits for quadruped robots.
+
+  A flexible open-loop gait generator. Each leg has its own cycle and duty
+  factor. And the state of each leg alternates between stance and swing. One can
+  easily formuate a set of common quadruped gaits like trotting, pacing,
+  pronking, bounding, etc by tweaking the input parameters.
+  """
+
+  def __init__(
+      self,
+      robot: Any,
+      stance_duration: Sequence[float] = _NOMINAL_STANCE_DURATION,
+      duty_factor: Sequence[float] = _NOMINAL_DUTY_FACTOR,
+      initial_leg_phase: Sequence[float] = _TROTTING_LEG_PHASE,
+      contact_detection_force_threshold: float = 0,
+      contact_detection_phase_threshold:
+      float = _NOMINAL_CONTACT_DETECTION_PHASE,
+  ):
+    """Initializes the class.
+
+    Args:
+      robot: A quadruped robot that at least implements the GetFootContacts API
+        and num_legs property.
+      stance_duration: The desired stance duration.
+      duty_factor: The ratio  stance_duration / total_gait_cycle.
+      initial_leg_phase: The desired initial phase [0, 1] of the legs within the
+        full swing + stance cycle.
+      contact_detection_force_threshold: The minimal contact force required to
+        detect if a foot is in contact with the ground. For real robots this
+        needs to be larger (i.e. 25 for Laikago).
+      contact_detection_phase_threshold: Updates the state of each leg based on
+        contact info, when the current normalized phase is greater than this
+        threshold. This is essential to remove false positives in contact
+        detection when phase switches. For example, a swing foot at at the
+        beginning of the gait cycle might be still on the ground.
+    """
+    self._robot = robot
+    self._stance_duration = stance_duration
+    self._duty_factor = duty_factor
+    self._swing_duration = np.array(stance_duration) / np.array(
+        duty_factor) - np.array(stance_duration)
+    if len(initial_leg_phase) != len(
+        list(self._robot.urdf_loader.get_end_effector_id_dict().values())):
+      raise ValueError(
+          "The number of leg phases should be the same as number of legs.")
+    self._initial_leg_phase = initial_leg_phase
+
+    self._initial_leg_state = _DEFAULT_INITIAL_LEG_STATE
+    self._next_leg_state = []
+    # The ratio in cycle is duty factor if initial state of the leg is STANCE,
+    # and 1 - duty_factory if the initial state of the leg is SWING.
+    self._initial_state_ratio_in_cycle = []
+    for state, duty in zip(self._initial_leg_state, duty_factor):
+      assert state == gait_generator.LegState.STANCE
+      self._initial_state_ratio_in_cycle.append(duty)
+      self._next_leg_state.append(gait_generator.LegState.SWING)
+
+    self._contact_detection_force_threshold = contact_detection_force_threshold
+    self._contact_detection_phase_threshold = contact_detection_phase_threshold
+
+    # The normalized phase within swing or stance duration.
+    self._normalized_phase = None
+
+    # The current leg state, when contact is considered.
+    self._leg_state = None
+
+    # The desired leg state (i.e. SWING or STANCE).
+    self._desired_leg_state = None
+
+    self.reset(0)
+
+  def reset(self, current_time):
+    # The normalized phase within swing or stance duration.
+    self._normalized_phase = np.zeros(
+        len(list(self._robot.urdf_loader.get_end_effector_id_dict().values())))
+    self._leg_state = list(self._initial_leg_state)
+    self._desired_leg_state = list(self._initial_leg_state)
+
+  @property
+  def desired_leg_state(self) -> Sequence[gait_generator.LegState]:
+    """The desired leg SWING/STANCE states.
+
+    Returns:
+      The SWING/STANCE states for all legs.
+
+    """
+    return self._desired_leg_state
+
+  @property
+  def leg_state(self) -> Sequence[gait_generator.LegState]:
+    """The leg state after considering contact with ground.
+
+    Returns:
+      The actual state of each leg after accounting for contacts.
+    """
+    return self._leg_state
+
+  @property
+  def swing_duration(self) -> Sequence[float]:
+    return self._swing_duration
+
+  @property
+  def stance_duration(self) -> Sequence[float]:
+    return self._stance_duration
+
+  @property
+  def normalized_phase(self) -> Sequence[float]:
+    """The phase within the current swing or stance cycle.
+
+    Reflects the leg's phase within the curren swing or stance stage. For
+    example, at the end of the current swing duration, the phase will
+    be set to 1 for all swing legs. Same for stance legs.
+
+    Returns:
+      Normalized leg phase for all legs.
+
+    """
+    return self._normalized_phase
+
+  def update(self, current_time):
+    contact_state = [
+        np.linalg.norm(contact_force) > self._contact_detection_force_threshold
+        for contact_force in self._robot.feet_contact_forces()
+    ]
+
+    for leg_id in range(
+        len(list(self._robot.urdf_loader.get_end_effector_id_dict().values()))):
+      # Here is the explanation behind this logic: We use the phase within the
+      # full swing/stance cycle to determine if a swing/stance switch occurs
+      # for a leg. The threshold value is the "initial_state_ratio_in_cycle" as
+      # explained before. If the current phase is less than the initial state
+      # ratio, the leg is either in the initial state or has switched back after
+      # one or more full cycles.
+      full_cycle_period = (
+          self._stance_duration[leg_id] / self._duty_factor[leg_id])
+      # To account for the non-zero initial phase, we offset the time duration
+      # with the effect time contribution from the initial leg phase.
+      augmented_time = current_time + self._initial_leg_phase[
+          leg_id] * full_cycle_period
+      phase_in_full_cycle = math.fmod(augmented_time,
+                                      full_cycle_period) / full_cycle_period
+      ratio = self._initial_state_ratio_in_cycle[leg_id]
+      if phase_in_full_cycle < ratio:
+        self._desired_leg_state[leg_id] = self._initial_leg_state[leg_id]
+        self._normalized_phase[leg_id] = phase_in_full_cycle / ratio
+      else:
+        # A phase switch happens for this leg.
+        self._desired_leg_state[leg_id] = self._next_leg_state[leg_id]
+        self._normalized_phase[leg_id] = (phase_in_full_cycle - ratio) / (1 -
+                                                                          ratio)
+
+      self._leg_state[leg_id] = self._desired_leg_state[leg_id]
+
+      # No contact detection at the beginning of each SWING/STANCE phase.
+      if (self._normalized_phase[leg_id] <
+          self._contact_detection_phase_threshold):
+        continue
+      if (self._leg_state[leg_id] == gait_generator.LegState.SWING and
+          contact_state[leg_id]):
+        logging.info("early touch down detected")
+        self._leg_state[leg_id] = gait_generator.LegState.EARLY_CONTACT
+      if (self._leg_state[leg_id] == gait_generator.LegState.STANCE and
+          not contact_state[leg_id]):
+        self._leg_state[leg_id] = gait_generator.LegState.LOSE_CONTACT
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/raibert_swing_leg_controller.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/raibert_swing_leg_controller.py
@@ -0,0 +1,242 @@
+"""The swing leg controller class."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import math
+from typing import Any, Mapping, Sequence, Tuple
+
+import gin
+import numpy as np
+
+from pybullet_envs.minitaur.agents.baseline_controller import gait_generator as gait_generator_lib
+from pybullet_envs.minitaur.agents.baseline_controller import leg_controller
+
+# The position correction coefficients in Raibert's formula.
+_KP = 0.025
+
+# At the end of swing, we leave a small clearance to prevent unexpected foot
+# collision.
+_FOOT_CLEARANCE_M = 0.0
+_DEFAULT_LOCAL_HIP_POSITIONS = ((0.21, -0.1157, 0), (0.21, 0.1157, 0),
+                                (-0.21, -0.1157, 0), (-0.21, 0.1157, 0))
+_DEFAULT_SWING_EASE_UP_PHASE = 1.0
+_DEFAULT_SWING_EASE_UP_PERCENT = 1.0
+_FEED_FORWARD_TORQUES = (-0.7, 0, 0, 0.7, 0, 0, -0.7, 0, 0, 0.7, 0, 0)
+
+
+def _gen_parabola(phase: float, start: float, mid: float, end: float) -> float:
+  """Gets a point on a parabola y = a x^2 + b x + c.
+
+  The Parabola is determined by three points (0, start), (0.5, mid), (1, end) in
+  the plane.
+
+  Args:
+    phase: Normalized to [0, 1]. A point on the x-axis of the parabola.
+    start: The y value at x == 0.
+    mid: The y value at x == 0.5.
+    end: The y value at x == 1.
+
+  Returns:
+    The y value at x == phase.
+  """
+  mid_phase = 0.5
+  delta_1 = mid - start
+  delta_2 = end - start
+  delta_3 = mid_phase**2 - mid_phase
+  coef_a = (delta_1 - delta_2 * mid_phase) / delta_3
+  coef_b = (delta_2 * mid_phase**2 - delta_1) / delta_3
+  coef_c = start
+
+  return coef_a * phase**2 + coef_b * phase + coef_c
+
+
+def _gen_swing_foot_trajectory(input_phase: float, start_pos: Sequence[float],
+                               end_pos: Sequence[float], ease_up_phase: float,
+                               ease_up_percent: float) -> Tuple[float]:
+  """Generates the swing trajectory using a parabola.
+
+  Args:
+    input_phase: the swing/stance phase value between [0, 1].
+    start_pos: The foot's position at the beginning of swing cycle.
+    end_pos: The foot's desired position at the end of swing cycle.
+    ease_up_phase: Time length for the initial ease up phase dueing swing cycle.
+    ease_up_percent: Percentage of the swing cycle completed after
+      ease_up_phase.
+
+  Returns:
+    The desired foot position at the current phase.
+  """
+  # We augment the swing speed using the below formula. For the first portion of
+  # the swing cycle (ease_up_phase), the swing leg moves faster and finishes
+  # ease_up_percent% of the full swing trajectory. The rest of trajectory takes
+  # the rest of the swing cycle. Intuitely, we want to move the swing foot
+  # quickly to the target landing location and stay above the ground, in this
+  # way the control is more robust to perturbations to the body that may cause
+  # the swing foot to drop onto the ground earlier than expected. This is a
+  # common practice similar to the MIT cheetah and Marc Raibert's original
+  # controllers.
+  assert 0 <= ease_up_percent <= 1
+  assert 0 <= ease_up_phase <= 1
+  phase = input_phase
+  if input_phase <= ease_up_phase:
+    phase = ease_up_percent * math.sin(input_phase /
+                                       (2 * ease_up_phase) * math.pi)
+  else:
+    phase = ease_up_percent + (input_phase - ease_up_phase) * (
+        1 - ease_up_percent) / (1 - ease_up_phase)
+
+  x = (1 - phase) * start_pos[0] + phase * end_pos[0]
+  y = (1 - phase) * start_pos[1] + phase * end_pos[1]
+  max_clearance = 0.1
+  mid = max(end_pos[2], start_pos[2]) + max_clearance
+  z = _gen_parabola(phase, start_pos[2], mid, end_pos[2])
+
+  # PyType detects the wrong return type here.
+  return (x, y, z)  # pytype: disable=bad-return-type
+
+
+@gin.configurable
+class RaibertSwingLegController(leg_controller.LegController):
+  """Controls the swing leg position using Raibert's formula.
+
+  For details, please refer to chapter 2 in "Legged robbots that balance" by
+  Marc Raibert. The key idea is to stablize the swing foot's location based on
+  the CoM moving speed.
+
+  """
+
+  def __init__(
+      self,
+      robot: Any,
+      gait_generator: Any,
+      state_estimator: Any,
+      desired_speed: Tuple[float] = (0, 0),
+      desired_twisting_speed: float = 0,
+      desired_height: float = 0.45,
+      foot_clearance: float = _FOOT_CLEARANCE_M,
+      local_hip_positions: Tuple[Tuple[float]] = _DEFAULT_LOCAL_HIP_POSITIONS,
+      ease_up_phase: float = _DEFAULT_SWING_EASE_UP_PHASE,
+      ease_up_percent: float = _DEFAULT_SWING_EASE_UP_PERCENT,
+      feed_forward_torques: Sequence[float] = _FEED_FORWARD_TORQUES
+  ):
+    """Initializes the class.
+
+    Args:
+      robot: A robot instance.
+      gait_generator: Generates the stance/swing pattern.
+      state_estimator: Estiamtes the CoM speeds.
+      desired_speed: Behavior parameters. X-Y speed.
+      desired_twisting_speed: Behavior control parameters.
+      desired_height: Desired standing height.
+      foot_clearance: The foot clearance on the ground at the end of the swing
+        cycle.
+      local_hip_positions: Positions of the robot's hips in local frames.
+      ease_up_phase: Time length for the initial ease up phase dueing swing
+        cycle.
+      ease_up_percent: Percentage of the swing cycle completed after
+        ease_up_phase.
+      feed_forward_torques: A feed-forward torque applied to the actuators on
+        the swing legs (e.g. for gravity compensation).
+    """
+    self._robot = robot
+    self._state_estimator = state_estimator
+    self._gait_generator = gait_generator
+    self._last_leg_state = gait_generator.desired_leg_state
+    self.desired_speed = np.array((desired_speed[0], desired_speed[1], 0))
+    self.desired_twisting_speed = desired_twisting_speed
+    self._desired_height = np.array((0, 0, desired_height - foot_clearance))
+    self._local_hip_positions = local_hip_positions
+    self._ease_up_phase = ease_up_phase
+    self._ease_up_percent = ease_up_percent
+    self._feed_forward_torques = feed_forward_torques
+
+    self._joint_angles = None
+    self._phase_switch_foot_local_position = None
+    self.reset(0)
+
+  def reset(self, current_time: float) -> None:
+    """Called during the start of a swing cycle.
+
+    Args:
+      current_time: The wall time in seconds.
+    """
+    del current_time
+    self._last_leg_state = self._gait_generator.desired_leg_state
+    self._phase_switch_foot_local_position = (self._robot.foot_positions())
+    self._joint_angles = {}
+
+  def update(self, current_time: float) -> None:
+    """Called at each control step.
+
+    Args:
+      current_time: The wall time in seconds.
+    """
+    del current_time
+    new_leg_state = self._gait_generator.desired_leg_state
+
+    # Detects phase switch for each leg so we can remember the feet position at
+    # the beginning of the swing phase.
+    for leg_id, state in enumerate(new_leg_state):
+      if (state == gait_generator_lib.LegState.SWING and
+          state != self._last_leg_state[leg_id]):
+        self._phase_switch_foot_local_position[leg_id] = (
+            self._robot.foot_positions()[leg_id])
+
+    self._last_leg_state = copy.deepcopy(new_leg_state)
+
+  def get_action(self) -> Mapping[Any, Any]:
+    com_velocity = self._state_estimator.com_velocity_body_yaw_aligned_frame
+    com_velocity = np.array((com_velocity[0], com_velocity[1], 0))
+
+    _, _, yaw_dot = self._robot.base_roll_pitch_yaw_rate
+
+    local_toe_positions = np.array(self._robot.foot_positions())
+
+    for leg_id, leg_state in enumerate(self._gait_generator.leg_state):
+      if (leg_state == gait_generator_lib.LegState.STANCE or
+          leg_state == gait_generator_lib.LegState.EARLY_CONTACT):
+        continue
+
+      # For now we did not consider the body pitch/roll and all calculation is
+      # in the body frame. TODO(b/143378213): Calculate the foot_target_position
+      # in world farme and then project back to calculate the joint angles.
+      hip_offset = self._local_hip_positions[leg_id]
+      twisting_vector = np.array((-hip_offset[1], hip_offset[0], 0))
+      hip_horizontal_velocity = com_velocity + yaw_dot * twisting_vector
+      target_hip_horizontal_velocity = (
+          self.desired_speed + self.desired_twisting_speed * twisting_vector)
+
+      foot_target_position = (
+          hip_horizontal_velocity *
+          self._gait_generator.swing_duration[leg_id] / 2 - _KP *
+          (target_hip_horizontal_velocity - hip_horizontal_velocity)
+      ) - self._desired_height + np.array((hip_offset[0], hip_offset[1], 0))
+
+      foot_position = _gen_swing_foot_trajectory(
+          self._gait_generator.normalized_phase[leg_id],
+          self._phase_switch_foot_local_position[leg_id], foot_target_position,
+          self._ease_up_phase, self._ease_up_percent)
+
+      local_toe_positions[leg_id] = foot_position
+    joint_ids, joint_angles = self._robot.motor_angles_from_foot_positions(
+        local_toe_positions, position_in_world_frame=False)
+    # Update the stored joint angles as needed.
+    motors_per_leg = len(joint_ids) // len(local_toe_positions)
+    for joint_id, joint_angle in zip(joint_ids, joint_angles):
+      self._joint_angles[joint_id] = (joint_angle, joint_id // motors_per_leg)
+
+    action = {}
+    kps, kds = self._robot.motor_model.get_motor_gains()
+
+    for joint_id, joint_angle_leg_id in self._joint_angles.items():
+      leg_id = joint_angle_leg_id[1]
+      if self._gait_generator.leg_state[
+          leg_id] == gait_generator_lib.LegState.SWING:
+        # This is a hybrid action for PD control.
+        action[joint_id] = (joint_angle_leg_id[0], kps[joint_id], 0,
+                            kds[joint_id], self._feed_forward_torques[joint_id])
+
+    return action
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/state_estimator.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/state_estimator.py
@@ -0,0 +1,21 @@
+"""State estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+
+
+class StateEstimatorBase(object):  # pytype: disable=ignored-metaclass
+  """Estimates the unmeasurable state of the robot."""
+
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractmethod
+  def reset(self, current_time):
+    pass
+
+  @abc.abstractmethod
+  def update(self, current_time):
+    pass
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/static_gait_controller.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/static_gait_controller.py
@@ -0,0 +1,63 @@
+# Lint as: python3
+"""A static gait controller for a quadruped robot. Experimental code."""
+
+import gin
+import numpy as np
+from pybullet_envs.minitaur.agents.baseline_controller import foot_stepper
+
+toe_pos_local_ref = np.array([[0.1478, -0.11459, -0.45576],
+                              [0.1478, 0.11688, -0.45576],
+                              [-0.2895, -0.11459, -0.45576],
+                              [-0.2895, 0.11688, -0.45576]])
+
+
+@gin.configurable
+class StaticGaitController(object):
+  """A static gait controller for a quadruped robot."""
+
+  def __init__(self, robot):
+    self._robot = robot
+    self._toe_ids = tuple(robot.urdf_loader.get_end_effector_id_dict().values())
+    self._wait_count = 0
+    self._stepper = foot_stepper.FootStepper(self._robot.pybullet_client,
+                                             self._toe_ids, toe_pos_local_ref)
+
+  def act(self, observation):
+    """Computes actions based on observations."""
+    del observation
+    p = self._robot.pybullet_client
+    quadruped = self._robot.robot_id
+    step_input = foot_stepper.StepInput()
+    ls = p.getLinkStates(
+        quadruped, self._toe_ids, computeForwardKinematics=True)
+    toe_pos_world = np.array([ls[0][0], ls[1][0], ls[2][0], ls[3][0]])
+    base_com_pos, base_com_orn = p.getBasePositionAndOrientation(quadruped)
+    new_pos_world = np.array([0, 0, 0])
+
+    if self._stepper.is_com_stable() and not self._stepper.move_swing_foot:
+      self._wait_count += 1
+      if self._wait_count == 20:
+        self._stepper.next_foot()
+      if self._wait_count > 50:
+        self._wait_count = 0
+        step_dist = 0.15
+        print("time {}, make a step of {}".format(
+            self._robot.GetTimeSinceReset(), step_dist))
+        new_pos_local = self._stepper.get_reference_pos_swing_foot()
+        new_pos_local[0] += step_dist
+        new_pos_world, _ = p.multiplyTransforms(base_com_pos, base_com_orn,
+                                                new_pos_local, [0, 0, 0, 1])
+        self._stepper.swing_foot()
+
+    step_input.new_pos_world = new_pos_world
+    step_input.base_com_pos = base_com_pos
+    step_input.base_com_orn = base_com_orn
+    step_input.toe_pos_world = toe_pos_world
+    step_input.dt = 1.0 / 250
+    step_output = self._stepper.update(step_input)
+
+    # Finds joint poses to achieve toePosWorld
+    desired_joint_angles = self._robot.motor_angles_from_foot_positions(
+        foot_positions=step_output.new_toe_pos_world,
+        position_in_world_frame=True)[1]
+    return desired_joint_angles
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/time_based_moving_window_filter.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/time_based_moving_window_filter.py
@@ -0,0 +1,45 @@
+"""A moving-window filter for smoothing the signals within certain time interval."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import gin
+import numpy as np
+
+
+@gin.configurable
+class TimeBasedMovingWindowFilter:
+  """A moving-window filter for smoothing the signals within certain time interval."""
+
+  def __init__(
+      self,
+      filter_window: float = 0.1,
+  ):
+    """Initializes the class.
+
+    Args:
+      filter_window: The filtering window (in time) used to smooth the input
+        signal.
+    """
+    self._filter_window = filter_window
+    self.reset()
+
+  def reset(self):
+    self._timestamp_buffer = []
+    self._value_buffer = []
+
+  def calculate_average(self, new_value, timestamp):
+    """Compute the filtered signals based on the time-based moving window."""
+    self._timestamp_buffer.append(timestamp)
+    self._value_buffer.append(new_value)
+
+    while len(self._value_buffer) > 1:
+      if self._timestamp_buffer[
+          0] < timestamp - self._filter_window:
+        self._timestamp_buffer.pop(0)
+        self._value_buffer.pop(0)
+      else:
+        break
+    return np.mean(self._value_buffer, axis=0)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/torque_stance_leg_controller.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/baseline_controller/torque_stance_leg_controller.py
@@ -0,0 +1,229 @@
+# Lint as: python3
+"""A torque based stance controller framework."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import logging
+from typing import Any, Sequence, Tuple
+
+import gin
+import numpy as np
+
+from pybullet_envs.minitaur.agents.baseline_controller import gait_generator as gait_generator_lib
+from pybullet_envs.minitaur.agents.baseline_controller import leg_controller
+#from pybullet_envs.minitaur.agents.baseline_controller.convex_mpc.python import convex_mpc
+#from google3.util.task.python import error
+
+try:
+  import mpc_osqp as convex_mpc  # pytype: disable=import-error
+except:  #pylint: disable=W0702
+  print("You need to install motion_imitation")
+  print("or use pip3 install motion_imitation --user")
+  print("see also https://github.com/google-research/motion_imitation")
+  import sys
+  sys.exit()
+  
+
+_FORCE_DIMENSION = 3
+# The QP weights in the convex MPC formulation. See the MIT paper for details:
+#   https://ieeexplore.ieee.org/document/8594448/
+# Intuitively, this is the weights of each state dimension when tracking a
+# desired CoM trajectory. The full CoM state is represented by
+# (roll_pitch_yaw, position, angular_velocity, velocity, gravity_place_holder).
+_MPC_WEIGHTS = (5, 5, 0.2, 0, 0, 10, 0.5, 0.5, 0.2, 0.2, 0.2, 0.1, 0)
+_PLANNING_HORIZON_STEPS = 10
+_PLANNING_TIMESTEP = 0.025
+#_MPC_CONSTRUCTOR = functools.partial(
+#    convex_mpc.ConvexMpc, qp_solver_name=convex_mpc.QPSolverName.QPOASES)
+
+
+@gin.configurable
+class TorqueStanceLegController(leg_controller.LegController):
+  """A torque based stance leg controller framework.
+
+  Takes in high level parameters like walking speed and turning speed, and
+  generates necessary the torques for stance legs.
+  """
+
+  def __init__(
+      self,
+      robot: Any,
+      gait_generator: Any,
+      state_estimator: Any,
+      desired_speed: Tuple[float] = (0, 0),
+      desired_twisting_speed: float = 0,
+      desired_roll_pitch: Tuple[float] = (0, 0),
+      desired_body_height: float = 0.45,
+      body_mass: float = 220 / 9.8,
+      body_inertia: Tuple[float] = (0.183375, 0, 0, 0, 0.6267, 0, 0, 0,
+                                    0.636175),
+      num_legs: int = 4,
+      friction_coeffs: Sequence[float] = (0.5, 0.5, 0.5, 0.5),
+      qp_weights: Sequence[float] = _MPC_WEIGHTS,
+      planning_horizon: int = _PLANNING_HORIZON_STEPS,
+      planning_timestep: int = _PLANNING_TIMESTEP,
+  ):
+    """Initializes the class.
+
+    Tracks the desired position/velocity of the robot by computing proper joint
+    torques using MPC module.
+
+    Args:
+      robot: A robot instance.
+      gait_generator: Used to query the locomotion phase and leg states.
+      state_estimator: Estimate the robot states (e.g. CoM velocity).
+      desired_speed: desired CoM speed in x-y plane.
+      desired_twisting_speed: desired CoM rotating speed in z direction.
+      desired_roll_pitch: desired CoM roll and pitch.
+      desired_body_height: The standing height of the robot.
+      body_mass: The total mass of the robot.
+      body_inertia: The inertia matrix in the body principle frame. We assume
+        the body principle coordinate frame has x-forward and z-up.
+      num_legs: The number of legs used for force planning.
+      friction_coeffs: The friction coeffs on the contact surfaces.
+      qp_weights: The weights used in solving the QP problem.
+      planning_horizon: Number of steps to roll-out in the QP formulation.
+      planning_timestep: Timestep between each step in the QP formulation.
+    """
+
+    self._robot = robot
+    self._gait_generator = gait_generator
+    self._state_estimator = state_estimator
+    self._desired_speed = desired_speed
+    self._desired_twisting_speed = desired_twisting_speed
+    self._desired_roll_pitch = desired_roll_pitch
+    self._desired_body_height = desired_body_height
+    self._body_mass = body_mass
+    self._num_legs = num_legs
+    self._friction_coeffs = np.array(friction_coeffs)
+    self._qp_solver_fail = False
+    self._com_estimate_leg_indices = None
+    qp_solver = convex_mpc.QPOASES #convex_mpc.OSQP #
+    
+    body_inertia_list = list(body_inertia)
+    weights_list = list(qp_weights)
+
+    self._mpc = convex_mpc.ConvexMpc(
+        body_mass,
+        body_inertia_list,
+        self._num_legs,
+        planning_horizon,
+        planning_timestep,
+        weights_list,
+        1e-6,
+        qp_solver
+    )
+    
+
+  def reset(self, current_time):
+    del current_time
+    self._qp_solver_fail = False
+    self._com_estimate_leg_indices = None
+
+  def update(self, current_time):
+    del current_time
+
+  def get_action(self):
+    """Computes the torque for stance legs."""
+    desired_com_position = np.array((0., 0., self._desired_body_height),
+                                    dtype=np.float64)
+    desired_com_velocity = np.array(
+        (self.desired_speed[0], self.desired_speed[1], 0.), dtype=np.float64)
+    desired_com_roll_pitch_yaw = np.array(
+        (self.desired_roll_pitch[0], self.desired_roll_pitch[1], 0.),
+        dtype=np.float64)
+    desired_com_angular_velocity = np.array(
+        (0., 0., self.desired_twisting_speed), dtype=np.float64)
+    foot_contact_state = np.array(
+        [(leg_state == gait_generator_lib.LegState.STANCE or
+          leg_state == gait_generator_lib.LegState.EARLY_CONTACT)
+         for leg_state in self._gait_generator.desired_leg_state],
+        dtype=np.int32)
+
+    # We use the body yaw aligned world frame for MPC computation.
+    com_roll_pitch_yaw = np.array(
+        self._robot.base_roll_pitch_yaw, dtype=np.float64)
+    com_roll_pitch_yaw[2] = 0
+    #try:
+    estimated_com_position = np.array(())
+    if hasattr(self._state_estimator, "estimated_com_height"):
+        estimated_com_position = np.array(
+            (0, 0, self._state_estimator.estimated_com_height))
+    try:
+      predicted_contact_forces = self._mpc.compute_contact_forces(
+        estimated_com_position,  #com_position
+        np.asarray(self._state_estimator.com_velocity_body_yaw_aligned_frame,
+                   dtype=np.float64),  #com_velocity
+        np.array(com_roll_pitch_yaw, dtype=np.float64),  #com_roll_pitch_yaw
+        # Angular velocity in the yaw aligned world frame is actually different
+        # from rpy rate. We use it here as a simple approximation.
+        np.asarray(self._robot.base_roll_pitch_yaw_rate,
+                   dtype=np.float64),  #com_angular_velocity
+        foot_contact_state,  #foot_contact_states
+        np.array(self._robot.foot_positions(
+              position_in_world_frame=False).flatten(),
+                 dtype=np.float64),  #foot_positions_base_frame
+        self._friction_coeffs,  #foot_friction_coeffs
+        desired_com_position,  #desired_com_position
+        desired_com_velocity,  #desired_com_velocity
+        desired_com_roll_pitch_yaw,  #desired_com_roll_pitch_yaw
+        desired_com_angular_velocity  #desired_com_angular_velocity
+    )
+    except:# error.StatusNotOk as e:
+      logging.error("Error in Torque Stance Leg")#e.message)
+      self._qp_solver_fail = True
+      predicted_contact_forces = np.zeros(self._num_legs * _FORCE_DIMENSION)
+
+    contact_forces = {}
+    for i in range(self._num_legs):
+      contact_forces[i] = np.array(
+          predicted_contact_forces[i * _FORCE_DIMENSION:(i + 1) *
+                                   _FORCE_DIMENSION])
+
+    _, kds = self._robot.motor_model.get_motor_gains()
+    action = {}
+    for leg_id, force in contact_forces.items():
+      motor_torques = self._robot.map_contact_force_to_joint_torques(
+          leg_id, force)
+      for joint_id, torque in motor_torques.items():
+        action[joint_id] = (0, 0, 0, kds[joint_id], torque)
+    return action
+
+  @property
+  def qp_solver_fail(self):
+    return self._qp_solver_fail
+
+  @property
+  def desired_speed(self):
+    return self._desired_speed
+
+  @desired_speed.setter
+  def desired_speed(self, speed):
+    self._desired_speed = speed
+
+  @property
+  def desired_twisting_speed(self):
+    return self._desired_twisting_speed
+
+  @desired_twisting_speed.setter
+  def desired_twisting_speed(self, twisting_speed):
+    self._desired_twisting_speed = twisting_speed
+
+  @property
+  def desired_roll_pitch(self):
+    return self._desired_roll_pitch
+
+  @desired_roll_pitch.setter
+  def desired_roll_pitch(self, roll_pitch):
+    self._desired_roll_pitch = roll_pitch
+
+  @property
+  def desired_body_height(self):
+    return self._desired_body_height
+
+  @desired_body_height.setter
+  def desired_body_height(self, body_height):
+    self._desired_body_height = body_height
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/trajectory_generator/controller_simple.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/trajectory_generator/controller_simple.py
@@ -0,0 +1,87 @@
+"""Asymmetric sine controller for quadruped locomotion.
+
+Asymmetric sine uses cosine and sine waves to generate swinging and extension
+for leg motion. It's asymmetric because sine waves are split into two phases
+(swing forward and stance) and these phases have different frequencies according
+to what proportion of a period will be spend on swinging forward forward
+swinging backwards. In addition, the sine wave for extension has different
+amplitudes during these two phases.
+"""
+import math
+
+TWO_PI = 2 * math.pi
+_DEFAULT_LEG_AMPLITUDE_EXTENSION = -0.02
+_DEFAULT_LEG_AMPLITUDE_SWING = 0.5
+_DEFAULT_LEG_AMPLITUDE_LIFT = 0.9
+_DEFAULT_WALKING_HEIGHT = 0.0
+_DEFAULT_LEG_CENTER_SWING = 0.0
+_DELTA_CENTER_EXTENSION_CAP = 1
+_DELTA_INTENSITY_CAP = 0.1
+
+
+class SimpleLegController(object):
+  """Controller that gives swing and extension based on phase and parameters.
+
+
+  The controller returns the swing-extend pair based on a parameterized
+  ellipsoid trajectory that depends on center of motion, amplitude and phase.
+  The parameters are
+    amplitude_extension: Amplitude for extension during stance (phase < pi).
+    amplitude_lift: Amplitude for extension during swing (phase > pi).
+    amplitude_swing: Amplitude for swing.
+    center_extension: The value extension signal oscillates around.
+    center_swing: The value swing signal oscillates around.
+    intensity: A coefficient that scales the motion of the legs.
+  The formula to calculate motion and more detailed information about these
+  parameters can be found at go/pmtg-refactored.
+  """
+
+  def __init__(self, init_phase=0):
+    self.amplitude_extension = _DEFAULT_LEG_AMPLITUDE_EXTENSION
+    self.amplitude_swing = _DEFAULT_LEG_AMPLITUDE_SWING
+    self.amplitude_lift = _DEFAULT_LEG_AMPLITUDE_LIFT
+    self.center_extension = _DEFAULT_WALKING_HEIGHT
+    self.center_swing = _DEFAULT_LEG_CENTER_SWING
+    self.intensity = 1.0
+    self._init_phase = init_phase
+    self.phase = init_phase
+    self.phase_offset = 0
+
+  def reset(self):
+    self.phase = self._init_phase
+
+  def get_swing_extend(self):
+    """Returns the swing and extend parameters for the leg.
+
+    Returns:
+      swing: Desired swing of the leg.
+      extend: Desired extension amount of the leg.
+    """
+
+    # Increase default extension by the extra extension scaled by intensity.
+    # Extend reduces to default center extension when intensity goes to 0,
+    # because we prefer the legs to stay at walking height when intensity is
+    # 0.
+    amplitude_extension = self.amplitude_extension
+    # The leg is in swing phase when phase > pi.
+    if self.phase > math.pi:
+      amplitude_extension = self.amplitude_lift
+    extend = self.center_extension + (
+        amplitude_extension * math.sin(self.phase)) * self.intensity
+    # Calculate the swing based on the signal and scale it with intensity.
+    # Swing reduces to 0 when intensity goes to 0, because we would prefer the
+    # legs to stay neutral (standing position instead of center swing) when
+    # intensity is 0.
+    swing = self.center_swing + self.amplitude_swing * math.cos(self.phase)
+    swing *= self.intensity
+
+    return swing, extend
+
+  def adjust_center_extension(self, target_center_extension):
+    delta = min(_DELTA_CENTER_EXTENSION_CAP,
+                target_center_extension - self.center_extension)
+    self.center_extension += delta
+
+  def adjust_intensity(self, target_intensity):
+    delta = min(_DELTA_INTENSITY_CAP, target_intensity - self.intensity)
+    self.intensity += delta
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/trajectory_generator/tg_inplace.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/trajectory_generator/tg_inplace.py
@@ -0,0 +1,57 @@
+"""Trajectory Generator for in-place stepping motion for quadruped robot."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import numpy as np
+
+TWO_PI = 2 * math.pi
+
+
+def _get_actions_asymmetric_sine(phase, tg_params):
+  """Returns the leg extension given current phase of TG and parameters.
+
+  Args:
+    phase: a number in [0, 2pi) representing current leg phase
+    tg_params: a dictionary of tg parameters:
+      stance_lift_cutoff -- switches the TG between stance (phase < cutoff) and
+                            lift (phase > cutoff) phase
+      amplitude_swing -- amplitude in swing phase
+      amplitude_lift -- amplitude in lift phase
+      center_extension -- center of leg extension
+  """
+  stance_lift_cutoff = tg_params['stance_lift_cutoff']
+  a_prime = np.where(phase < stance_lift_cutoff, tg_params['amplitude_stance'],
+                     tg_params['amplitude_lift'])
+  scaled_phase = np.where(
+      phase > stance_lift_cutoff, np.pi + (phase - stance_lift_cutoff) /
+      (TWO_PI - stance_lift_cutoff) * np.pi, phase / stance_lift_cutoff * np.pi)
+  return tg_params['center_extension'] + a_prime * np.sin(scaled_phase)
+
+
+def step(current_phases, leg_frequencies, dt, tg_params):
+  """Steps forward the in-place trajectory generator.
+
+  Args:
+    current_phases: phases of each leg.
+    leg_frequencies: the frequency to proceed the phase of each leg.
+    dt: amount of time (sec) between consecutive time steps.
+    tg_params: a set of parameters for trajectory generator, see the docstring
+      of "_get_actions_asymmetric_sine" for details.
+
+  Returns:
+    actions: leg swing/extensions as output by the trajectory generator.
+    new_state: new swing/extension.
+  """
+  new_phases = np.fmod(current_phases + TWO_PI * leg_frequencies * dt, TWO_PI)
+  extensions = []
+  for leg_id in range(4):
+    extensions.append(
+        _get_actions_asymmetric_sine(new_phases[..., leg_id], tg_params))
+  return new_phases, extensions
+
+
+def reset():
+  return np.array([0, np.pi * 0.5, np.pi, np.pi * 1.5])
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/trajectory_generator/tg_simple.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/trajectory_generator/tg_simple.py
@@ -0,0 +1,400 @@
+"""Trajectory Generator generates walking leg motions for a quadruped robot.
+
+Trajectory Generator (TG) has an internal state (phase) and generates
+walking-like motion for 8 motors of minitaur quadruped robot based on
+parameters
+such as:
+ - delta time to progress the TG's internal state.
+ - intensity to control amount of movement (stride length and lift of the legs).
+ - waking height to control the average extension of the legs.
+
+Each time step() is called, the internal state is progressed and 8 motor
+positions are generated. This TG uses the open-loop SineController class to
+provide leg positions. It is mainly a wrapper for ability to modulating the
+time
+and other parameters of the SineController.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import numpy as np
+import gin
+from pybullet_envs.minitaur.agents.trajectory_generator import controller_simple
+
+PHASE_LOWER_BOUND = 0.0
+PHASE_UPPER_BOUND = 1.0
+WALK_HEIGHT_LOWER_BOUND = -0.5
+WALK_HEIGHT_UPPER_BOUND = 1.0
+INTENSITY_LOWER_BOUND = 0.0
+INTENSITY_UPPER_BOUND = 1.5
+_SWING_STANCE_LOWER_BOUND = 0.2
+_SWING_STANCE_UPPER_BOUND = 5.0
+_DELTA_SWING_STANCE_CAP = 0.4
+_TWO_PI = math.pi * 2.0
+_LEG_COUPLING_DICT = {
+    "null": [],
+    # All the legs are coupled.
+    "all coupled": [0, 0, 0, 0],
+    # Front legs and back legs are coupled separately.
+    "front back": [0, 1, 0, 1],
+    # Left legs and right legs are coupled separately.
+    "left right": [0, 0, 1, 1],
+    # Diagonal legs are coupled (i.e. trottting).
+    "diagonal": [0, 1, 1, 0],
+    # Each leg is indepenent.
+    "decoupled": [0, 1, 2, 3]
+}
+
+
+@gin.configurable
+class TgSimple(object):
+  """TgSimple class is a simplified trajectory generator for quadruped walking.
+
+  It returns 8 actions for quadruped slow walking behavior
+  based on the parameters provided such as intensity, walking height and delta
+  time. It returns its internal phase as information.
+  """
+
+  def __init__(self,
+               walk_height_lower_bound=WALK_HEIGHT_LOWER_BOUND,
+               walk_height_upper_bound=WALK_HEIGHT_UPPER_BOUND,
+               intensity_lower_bound=INTENSITY_LOWER_BOUND,
+               intensity_upper_bound=INTENSITY_UPPER_BOUND,
+               swing_stance_lower_bound=_SWING_STANCE_LOWER_BOUND,
+               swing_stance_upper_bound=_SWING_STANCE_UPPER_BOUND,
+               integrator_coupling_mode="all coupled",
+               walk_height_coupling_mode="all coupled",
+               variable_swing_stance_ratio=False,
+               swing_stance_ratio=1.0,
+               init_leg_phase_offsets=None):
+    """Initialize the trajectory generator with a controller.
+
+    For trajectory generator, we create an asymmetric sine controller with
+    parameters that was previously optimized as an open-loop controller.
+
+    Args:
+      walk_height_lower_bound: Lower bound for walking height which sets the
+        default leg extension of the gait. Unit is rad, -0.5 by default.
+      walk_height_upper_bound: Lower bound for walking height which sets the
+        default leg extension of the gait. Unit is rad, 1.0 by default.
+      intensity_lower_bound: The upper bound for intensity of the trajectory
+        generator. It can be used to limit the leg movement.
+      intensity_upper_bound: The upper bound for intensity of the trajectory
+        generator. It can be used to limit the leg movement.
+      swing_stance_lower_bound: Lower bound for the swing vs stance ratio
+        parameter. Default value is 0.2.
+      swing_stance_upper_bound: Upper bound for the swing vs stance ratio
+        parameter. Default value is 0.2.
+      integrator_coupling_mode: How the legs should be coupled for integrators.
+      walk_height_coupling_mode: The same coupling mode used for walking
+        heights for the legs.
+      variable_swing_stance_ratio: A boolean to indicate if the swing stance
+        ratio can change per time step or not.
+      swing_stance_ratio: Time taken by swing phase vs stance phase. This is
+        only relevant if variable_swing_stance_ratio is False.
+      init_leg_phase_offsets: The initial phases of the legs. A list of 4
+        variables within [0,1). The order is front-left, rear-left, front-right
+        and rear-right.
+
+    Raises:
+      ValueError: If parameters are not valid values.
+    """
+    self._walk_height_lower_bound = walk_height_lower_bound
+    self._walk_height_upper_bound = walk_height_upper_bound
+    self._intensity_lower_bound = intensity_lower_bound
+    self._intensity_upper_bound = intensity_upper_bound
+    self._swing_stance_lower_bound = swing_stance_lower_bound
+    self._swing_stance_upper_bound = swing_stance_upper_bound
+    if not init_leg_phase_offsets:
+      init_leg_phase_offsets = [0, 0.25, 0.5, 0.75]
+    if len(init_leg_phase_offsets) != 4:
+      raise ValueError("The number of leg phase offsets is not equal to 4.")
+    if min(init_leg_phase_offsets) < 0 or max(init_leg_phase_offsets) >= 1:
+      raise ValueError("Leg phase offsets are not within [0,1)")
+    self._legs = []
+    for period in init_leg_phase_offsets:
+      init_phase = period * 2 * math.pi
+      self._legs.append(controller_simple.SimpleLegController(init_phase))
+
+    if integrator_coupling_mode not in _LEG_COUPLING_DICT:
+      raise ValueError("Invalid integrator_coupling_mode.")
+    if walk_height_coupling_mode not in _LEG_COUPLING_DICT:
+      raise ValueError("Invalid walk_height_coupling_mode.")
+
+    # Set the phase couplings and build a list of legs per phase coupling.
+    self._integrator_id_per_leg = _LEG_COUPLING_DICT[integrator_coupling_mode]
+    self._num_integrators = max(
+        self._integrator_id_per_leg) + 1 if self._integrator_id_per_leg else 0
+    self._legs_per_integrator_id = [[], [], [], []]
+    for idx, phase_id in enumerate(self._integrator_id_per_leg):
+      self._legs_per_integrator_id[phase_id].append(self._legs[idx])
+
+    # For each integrator coupling, create a integrator unit.
+    # For each leg controlled by that phase generator, mark the phase offset.
+    self._integrator_units = []
+    for legs_per_integrator in self._legs_per_integrator_id:
+      if legs_per_integrator:
+        circular_integrator = CircularAsymmetricalIntegratorUnit(
+            legs_per_integrator[0].phase)
+        self._integrator_units.append(circular_integrator)
+        for leg in legs_per_integrator:
+          leg.phase_offset = leg.phase - circular_integrator.phase
+
+    # Set the walking heights couplings.
+    self._walk_height_id_per_leg = _LEG_COUPLING_DICT[walk_height_coupling_mode]
+    self._num_walk_heights = max(
+        self._walk_height_id_per_leg) + 1 if self._walk_height_id_per_leg else 0
+    self._variable_swing_stance_ratio = variable_swing_stance_ratio
+    self._swing_stance_ratio = swing_stance_ratio
+
+  def reset(self):
+    """Resets leg phase offsets to their initial values."""
+    for leg in self._legs:
+      leg.reset()
+    for circular_integrator in self._integrator_units:
+      circular_integrator.reset()
+
+  def get_parameter_bounds(self):
+    """Lower and upper bounds for the parameters generator's parameters.
+
+    Returns:
+      2-tuple of:
+      - Lower bounds for the parameters such as intensity, walking height and
+      lift fraction.
+      - Upper bounds for the same parameters.
+    """
+    lower_bounds = [self._intensity_lower_bound]
+    upper_bounds = [self._intensity_upper_bound]
+    lower_bounds += [self._walk_height_lower_bound] * self._num_walk_heights
+    upper_bounds += [self._walk_height_upper_bound] * self._num_walk_heights
+    lower_bounds += [self._swing_stance_lower_bound
+                    ] * self._variable_swing_stance_ratio
+    upper_bounds += [self._swing_stance_upper_bound
+                    ] * self._variable_swing_stance_ratio
+
+    return lower_bounds, upper_bounds
+
+  def get_actions(self, delta_real_time, tg_params):
+    """Get actions for 8 motors after increasing the phase delta_time.
+
+    Args:
+      delta_real_time: Time in seconds that have actually passed since the last
+        step of the trajectory generator.
+      tg_params: An ndarray of the parameters for generating the trajectory. The
+        parameters must be in the correct order (time_scale, intensity,
+        walking_height, and swing vs stance)
+
+    Raises:
+      ValueError: In case the input dimension does not match expected.
+    Returns:
+      The rotations for all the 8 motors for this time step
+      returned in an array [front_left_motor_1, front_left_motor_2, etc].
+    """
+    speeds, intensity, heights, swing_stance_ratio = self._process_tg_params(
+        tg_params)
+    # Adjust the swing stance ratio of the controller (used for all four legs).
+    if swing_stance_ratio:
+      self.adjust_swing_stance_ratio(swing_stance_ratio)
+    # Adjust the walking height, intensity and swing vs stance of the legs.
+    for idx, leg in enumerate(self._legs):
+      leg.adjust_intensity(intensity)
+      if heights:
+        leg.adjust_center_extension(heights[self._walk_height_id_per_leg[idx]])
+
+    # Progress all the phase generators based on delta time.
+    for idx, integrator_unit in enumerate(self._integrator_units):
+      integrator_unit.progress_phase(speeds[idx] * delta_real_time,
+                                     self._swing_stance_ratio)
+
+    # Set the phases for the legs based on their offsets with phase generators.
+    for phase_id, leg_list in enumerate(self._legs_per_integrator_id):
+      for leg in leg_list:
+        delta_period = leg.phase_offset / (2.0 * math.pi)
+        leg.phase = self._integrator_units[phase_id].calculate_progressed_phase(
+            delta_period, self._swing_stance_ratio)
+
+    # Calculate swingextend and convert it to the motor rotations.
+    actions = []
+    for idx, leg in enumerate(self._legs):
+      swing, extend = leg.get_swing_extend()
+      actions.extend([swing, extend])
+    return actions
+
+  def _process_tg_params(self, tg_params):
+    """Process the trajectory generator parameters and split them.
+
+    Args:
+      tg_params: A list consisting of time_scales, intensity, walking_heights,
+        swing_stance_ratio. The size depends on the configuration and inital
+        flags.
+
+    Returns:
+      time_scales: A list of multipliers of delta time (one per integrator).
+      intensity: Intensity of the trajectory generator (one variable).
+      walking_heights: Walking heights used for the legs. The length depends on
+        the coupling between the legs selected at the initialization.
+      swing_stance_ratio: The ratio of the speed of the leg during swing stance
+      vs stance phase.
+    """
+
+    # Check if the given input's dimension matches the expectation considering
+    # the number of parameters the trajectory generator uses.
+    if isinstance(tg_params, np.ndarray):
+      tg_params = tg_params.tolist()
+    expected_action_dim = 1 + self._num_integrators + self._num_walk_heights
+    if self._variable_swing_stance_ratio:
+      expected_action_dim += 1
+    if len(tg_params) != expected_action_dim:
+      raise ValueError(
+          "Action dimension does not match the expectation {} vs {}".format(
+              len(tg_params), expected_action_dim))
+    # Split input into different parts based on type. The order must match the
+    # order given by the order in get_parameter_bounds
+    time_scales = tg_params[0:self._num_integrators]
+    intensity = tg_params[self._num_integrators]
+    walking_heights = tg_params[(self._num_integrators + 1):(
+        1 + self._num_integrators + self._num_walk_heights)]
+    swing_stance_ratio = None
+    if self._variable_swing_stance_ratio:
+      swing_stance_ratio = tg_params[1 + self._num_integrators +
+                                     self._num_walk_heights]
+
+    return time_scales, intensity, walking_heights, swing_stance_ratio
+
+  def get_state(self):
+    """Returns a list of floats representing the phase of the controller.
+
+    The phase of the controller is composed of the phases of the integrators.
+    For each integrator, the phase is composed of 2 floats that represents the
+    sine and cosine of the phase of that integrator.
+
+    Returns:
+      List containing sine and cosine of the phases of all the integrators.
+    """
+    return [x for y in self._integrator_units for x in y.get_state()]
+
+  def get_state_lower_bounds(self):
+    """Lower bounds for the internal state.
+
+    Returns:
+      The list containing the lower bounds.
+    """
+    return [PHASE_LOWER_BOUND] * 2 * self._num_integrators
+
+  def get_state_upper_bounds(self):
+    """Upper bounds for the internal state.
+
+    Returns:
+      The list containing the upper bounds.
+    """
+    return [PHASE_UPPER_BOUND] * 2 * self._num_integrators
+
+  def adjust_swing_stance_ratio(self, target_swing_stance_ratio):
+    """Adjust the parameter swing_stance_ratio towards a given target value.
+
+    Args:
+      target_swing_stance_ratio: The target value for the ratio between swing
+        and stance phases.
+    """
+    delta = min(_DELTA_SWING_STANCE_CAP,
+                target_swing_stance_ratio - self._swing_stance_ratio)
+    self._swing_stance_ratio += delta
+
+  @property
+  def num_integrators(self):
+    """Gets the number of integrators used based on coupling mode."""
+    return self._num_integrators
+
+
+class CircularAsymmetricalIntegratorUnit(object):
+  """A circular integrator with asymmetry between first and second half.
+
+  An integrator is a memory unit that accumulates the given parameter at every
+  time step.
+  A circular integrator is when the integrator cycles within [0,2pi].
+  The phase of a circular integrator indicates the accumulated number and it is
+  stored as fmod of 2Pi.
+  Asymmetrical circular integrator has a further characteristic where it
+  distinguishes between the first half of the period vs the second half. It
+  allows the integrator to move at different speeds during these two periods.
+  From a locomotion perspective these two halves of the period can be considered
+  as swing and stance phases. The speed difference is calculated using the
+  variable swing_stance_ratio provided at every time step.
+  CircularAsymmetricalIntegratorUnit can be used to control one or multiple legs
+  depending on the preference. If more than one leg is assigned to a single unit
+  the other legs are calculated based on their initial phase difference.
+  """
+
+  def __init__(self, init_phase=0):
+    self._init_phase = init_phase
+    self.reset()
+
+  def reset(self):
+    self.phase = self._init_phase
+
+  def calculate_progressed_phase(self, delta_period, swing_stance_speed_ratio):
+    """Calculate a hypotethical phase based on the current phase and args.
+
+    This is used to both calculate the new phase, as well as the current phase
+    of the other legs with a given offset of delta_period.
+
+    Args:
+      delta_period: The fraction of the period to add to the current phase of
+        the integrator. If set to 1, the integrator will accomplish one full
+        period and return the same phase. The calculated phase will depend on
+        the current phase (if it is in first half vs second half) and swing vs
+        stance speed ratio.
+      swing_stance_speed_ratio: The ratio of the speed of the phase when it is
+        in swing (second half) vs stance (first half). Set to 1.0 by default,
+        making it symettric, same as a classical integrator.
+
+    Returns:
+      The new phase between 0 and 2 * pi.
+    """
+    stance_speed_coef = (
+        swing_stance_speed_ratio + 1) * 0.5 / swing_stance_speed_ratio
+    swing_speed_coef = (swing_stance_speed_ratio + 1) * 0.5
+    delta_left = delta_period
+    new_phase = self.phase
+    while delta_left > 0:
+      if 0 <= new_phase < math.pi:
+        delta_phase_multiplier = stance_speed_coef * _TWO_PI
+        new_phase += delta_left * delta_phase_multiplier
+        delta_left = 0
+        if new_phase < math.pi:
+          delta_left = 0
+        else:
+          delta_left = (new_phase - math.pi) / delta_phase_multiplier
+          new_phase = math.pi
+      else:
+        delta_phase_multiplier = swing_speed_coef * _TWO_PI
+        new_phase += delta_left * delta_phase_multiplier
+        if math.pi <= new_phase < _TWO_PI:
+          delta_left = 0
+        else:
+          delta_left = (new_phase - _TWO_PI) / delta_phase_multiplier
+          new_phase = 0
+    return math.fmod(new_phase, _TWO_PI)
+
+  def progress_phase(self, delta_period, swing_stance_ratio):
+    """Updates the phase based on the current phase, delta period and ratio.
+
+    Args:
+      delta_period: The fraction of the period to add to the current phase of
+        the integrator. If set to 1, the integrator will accomplish one full
+        period and return the same phase. The calculated phase will depend on
+        the current phase (if it is in first half vs second half) and swing vs
+        stance speed ratio.
+      swing_stance_ratio: The ratio of the speed of the phase when it is in
+        swing (second half) vs stance (first half). Set to 1.0 by default,
+        making it symettric, same as a classical integrator.
+    """
+    self.phase = self.calculate_progressed_phase(delta_period,
+                                                 swing_stance_ratio)
+
+  def get_state(self):
+    """Returns the sin and cos of the phase as state."""
+    return [(math.cos(self.phase) + 1) / 2.0, (math.sin(self.phase) + 1) / 2.0]
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/init.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/init.py
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/base_client.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/base_client.py
@@ -0,0 +1,63 @@
+"""Base class for simulation client."""
+
+import enum
+from typing import Text
+
+GRAY = (0.3, 0.3, 0.3, 1)
+
+
+class ClientType(enum.Enum):
+  """Type of client."""
+  BULLET = "pybullet"
+  
+
+
+class ClientMode(enum.Enum):
+  """Client running mode."""
+
+  # Client is being initialized, object is being loaded, teleported, etc.
+  CONFIGURATION = 1
+
+  # Client is in a mode that simulate motion according to physics and control.
+  SIMULATION = 2
+
+
+class WrongClientModeError(Exception):
+  """Client mode does not meet expectation (e.g. load object in sim mode)."""
+
+
+class BaseClient(object):
+  """Base class for simulation client."""
+
+  def __init__(self, client_type: Text = ""):
+    self._client_type = client_type
+
+    # Default to configuration mode.
+    self._client_mode = ClientMode.CONFIGURATION
+
+  @property
+  def client_type(self) -> ClientType:
+    return self._client_type
+
+  def switch_mode(self, mode: ClientMode) -> bool:
+    """Switches running mode of simulation client and return if mode changed."""
+    if mode not in (ClientMode.CONFIGURATION, ClientMode.SIMULATION):
+      raise ValueError(f"Invalid client mode {mode}.")
+    if mode == self._client_mode:
+      return False
+    self._client_mode = mode
+    return True
+
+  @property
+  def client_mode(self) -> ClientMode:
+    """Returns current client mode."""
+    return self._client_mode
+
+  def _assert_in_configuration_mode(self, operation: Text = "this operation"):
+    """Raises exception if client is not in configuration mode."""
+    if self._client_mode != ClientMode.CONFIGURATION:
+      raise WrongClientModeError(
+          f"Sim client is expected to be in configuration mode for "
+          f"{operation}.")
+
+
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/curriculum_reset_helpers.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/curriculum_reset_helpers.py
@@ -0,0 +1,62 @@
+"""Implements dynamic locomotion gym env that changes based on iteration."""
+import gin
+
+import pybullet
+
+
+# TODO(b/142642890): Make this reset to the initial world state first.
+@gin.configurable
+def gap_task_curriculum_update(env,
+                               num_iter,
+                               distance_to_gap_or_hurdle=1.5,
+                               initial_gap_length=0.1,
+                               max_iterations=500,
+                               gap_delta=0.0008):
+  """Linearly increase the gap width wrt the iteration number.
+
+  This is specific to BuildSingleGapWorld.
+
+  Args:
+    env: An instance of a LocomotionGymEnv.
+    num_iter: The training iteration we are on.
+    distance_to_gap_or_hurdle: The distance to the gap.
+    initial_gap_length: The starting gap length.
+    max_iterations: The number of iterations up to which we will modify the
+      environment.
+    gap_delta: The amount to increase the gap width by for each increase of 1 in
+      the iteration.
+  """
+
+  gap_length = initial_gap_length + gap_delta * min(max_iterations, num_iter)
+  env.task.reset(
+      env,
+      distance_to_gap_or_hurdle=distance_to_gap_or_hurdle,
+      gap_or_hurdle_width=gap_length)
+
+
+@gin.configurable
+def gap_world_curriculum_update(env,
+                                num_iter,
+                                initial_second_block_x=8.15,
+                                max_iterations=500,
+                                gap_delta=0.0008):
+  """Update the world, linearly increasing gap width wrt iteration number.
+
+  This is specific to SingleGapScene.
+
+  Args:
+    env: An instance of a LocomotionGymEnv.
+    num_iter: The training iteration we are on.
+    initial_second_block_x: The initial x position of the second block.
+    max_iterations: The number of iterations up to which we will modify the
+      environment.
+    gap_delta: The amount to increase the gap width by for each increase of 1 in
+      the iteration.
+  """
+
+  ground = env.scene.ground_ids
+  pos = pybullet.getBasePositionAndOrientation(ground[-1])[0]
+  # Linearly increase the gap width to 0.5m by the last iteration.
+  next_x = initial_second_block_x + gap_delta * min(max_iterations, num_iter)
+  pybullet.resetBasePositionAndOrientation(ground[-1], (next_x, pos[1], pos[2]),
+                                           [0, 0, 0, 1])
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_loader.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_loader.py
@@ -0,0 +1,75 @@
+"""Load the locomotion gym env using the gin config files."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gin
+from pybullet_envs.minitaur.envs_v2 import locomotion_gym_env
+from pybullet_envs.minitaur.envs_v2 import multiagent_mobility_gym_env
+
+ROBOT_FIELD_IN_CONFIG = 'robot_params'
+TASK_FIELD_IN_CONFIG = 'task_params'
+PMTG_FIELD_IN_CONFIG = 'pmtg_params'
+_PMTG_GIN_QUERY = 'pmtg_wrapper_env.PmtgWrapperEnv.'
+
+
+@gin.configurable
+def load(wrapper_classes=None, multiagent=False, **kwargs):
+  """load a pre-defined locomotion gym environment.
+
+  The env specific settings should have been set in the gin files.
+
+  Args:
+    wrapper_classes: A list of wrapper_classes.
+    multiagent: Whether to use multiagent environment.
+    **kwargs: Keyword arguments to be passed to the environment constructor.
+
+  Returns:
+    env: The instance of the minitaur gym environment.
+  """
+  # Gin config are not always specified this way (e.g. namescoped config).
+  # Only guery parameters when it is necessary.
+  if any(
+      k in (PMTG_FIELD_IN_CONFIG, TASK_FIELD_IN_CONFIG, ROBOT_FIELD_IN_CONFIG)
+      for k in kwargs):
+    with gin.unlock_config():
+      if multiagent:
+        #  Currently assume robots and tasks are identical
+        robot_class = gin.query_parameter(
+            'multiagent_mobility_gym_env.MultiagentMobilityGymEnv.robot_classes'
+        )[0].selector
+        task = gin.query_parameter(
+            'multiagent_mobility_gym_env.MultiagentMobilityGymEnv.tasks'
+        )[0].selector
+      else:
+        robot_class = gin.query_parameter(
+            'locomotion_gym_env.LocomotionGymEnv.robot_class').selector
+        task = gin.query_parameter(
+            'locomotion_gym_env.LocomotionGymEnv.task').selector
+      gin_prefix_dict = {
+          PMTG_FIELD_IN_CONFIG: _PMTG_GIN_QUERY,
+          TASK_FIELD_IN_CONFIG: task + '.',
+          ROBOT_FIELD_IN_CONFIG: robot_class + '.',
+      }
+      for field_name, field_values in kwargs.items():
+        if field_name in gin_prefix_dict:
+          for var_name, value in field_values.items():
+            gin.bind_parameter(gin_prefix_dict[field_name] + var_name, value)
+        else:
+          raise ValueError(
+              'Environment argument type is not found in gin_prefix_dict.')
+  if multiagent:
+    env = multiagent_mobility_gym_env.MultiagentMobilityGymEnv()
+  else:
+    env = locomotion_gym_env.LocomotionGymEnv()
+  if wrapper_classes is not None:
+    # A little macro for the automatic list expansion
+    if not isinstance(wrapper_classes, list):
+      wrapper_classes = [wrapper_classes]
+
+    # Wrap environments with user-provided wrappers
+    # (e.g. TrajectoryGeneratorWrapperEnv)
+    for wrapper_cls in wrapper_classes:
+      env = wrapper_cls(env)
+  return env
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/init.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/init.py
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/action_denormalize_wrapper.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/action_denormalize_wrapper.py
@@ -0,0 +1,41 @@
+"""Denormalize the action from [-1, 1] to the env.action_space."""
+
+import gin
+import gym
+import numpy as np
+
+
+def _denomralize(env, action):
+  action = np.array(action)
+  low = np.array(env.action_space.low)
+  high = np.array(env.action_space.high)
+  return (high - low) / 2.0 * action + (high + low) / 2.0
+
+
+@gin.configurable
+class ActionDenormalizeWrapper(object):
+  """An env wrapper that denormalize the action from [-1, 1] to the bounds."""
+
+  def __init__(self, gym_env):
+    """Initializes the wrapper."""
+    self._gym_env = gym_env
+    self.action_space = gym.spaces.Box(
+        low=-1.0,
+        high=1.0,
+        shape=self._gym_env.action_space.low.shape,
+        dtype=np.float32)
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def step(self, action):
+    """Steps the wrapped environment.
+
+    Args:
+      action: Numpy array between [-1.0, 1.0].
+
+    Returns:
+      The tuple containing the observation, the reward, and the epsiode
+        end indicator.
+    """
+    return self._gym_env.step(_denomralize(self._gym_env, action))
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/alternating_legs_openloop.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/alternating_legs_openloop.py
@@ -0,0 +1,125 @@
+"""A trajectory generator that return signals for alternating legs."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import google_type_annotations
+from __future__ import print_function
+
+import math
+import attr
+import gin
+from gym import spaces
+import numpy as np
+from pybullet_envs.minitaur.envs.utilities import laikago_pose_utils
+
+TROT_GAIT = "trot"
+PACE_GAIT = "pace"
+NUM_MOTORS_LAIKAGO = 12
+STD_FOR_GAUSSIAN_TRAJECTORY = 0.15
+MOTION_FREQUENCY = 1.0
+MOTION_AMPLITUDE = 0.25
+ACTION_BOUND = 0.25
+
+
+# TODO(b/131193449): Add a test to this class.
+@gin.configurable
+class LaikagoAlternatingLegsTrajectoryGenerator(object):
+  """A trajectory generator that return signals for alternating legs."""
+
+  def __init__(
+      self,
+      init_abduction=laikago_pose_utils.LAIKAGO_DEFAULT_ABDUCTION_ANGLE,
+      init_hip=laikago_pose_utils.LAIKAGO_DEFAULT_HIP_ANGLE,
+      init_knee=laikago_pose_utils.LAIKAGO_DEFAULT_KNEE_ANGLE,
+      amplitude=MOTION_AMPLITUDE,
+      frequency=MOTION_FREQUENCY,
+      gait=PACE_GAIT,  # can be TROT_GAIT or PACE_GAIT
+  ):
+    """Initializes the controller."""
+    self._pose = np.array(
+        attr.astuple(
+            laikago_pose_utils.LaikagoPose(
+                abduction_angle_0=init_abduction,
+                hip_angle_0=init_hip,
+                knee_angle_0=init_knee,
+                abduction_angle_1=init_abduction,
+                hip_angle_1=init_hip,
+                knee_angle_1=init_knee,
+                abduction_angle_2=init_abduction,
+                hip_angle_2=init_hip,
+                knee_angle_2=init_knee,
+                abduction_angle_3=init_abduction,
+                hip_angle_3=init_hip,
+                knee_angle_3=init_knee)))
+    action_high = np.array([ACTION_BOUND] * NUM_MOTORS_LAIKAGO)
+    self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
+    self.amplitude = amplitude
+    self.period = 1.0 / frequency
+    self.gait = gait
+
+  def _alternating_legs_trajectory(self, t):
+    """The reference trajectory of each joint when alternating legs.
+
+    Args:
+      t: The time since the latest robot reset.
+
+    Returns:
+      An array of 12 desired motor angles.
+    """
+    phase_in_period = (t % self.period) / self.period
+    is_first_half_gait = phase_in_period < 0.5
+    if self.gait == TROT_GAIT and is_first_half_gait:
+      phases = [0, 1, 1, 0]  # 0 means stance and 1 means retraction.
+    elif self.gait == TROT_GAIT and not is_first_half_gait:
+      phases = [1, 0, 0, 1]
+    elif self.gait == PACE_GAIT and is_first_half_gait:
+      phases = [0, 1, 0, 1]
+    elif self.gait == PACE_GAIT and not is_first_half_gait:
+      phases = [1, 0, 1, 0]
+    else:
+      raise ValueError("{} gait is not supported in alternating legs.".format(
+          self.gait))
+
+    phase_step_center = 0.25 if is_first_half_gait else 0.75
+    std = STD_FOR_GAUSSIAN_TRAJECTORY
+    # Uses Gaussian instead of sine for gentle foot touch down.
+    # The following joint angles are added to self._pose.
+    retract_hip_angle = self.amplitude * math.exp(
+        -(phase_in_period - phase_step_center) *
+        (phase_in_period - phase_step_center) / (std * std))
+    retract_knee_angle = -2.0 * retract_hip_angle
+    retract_abduction_angle = 0.0
+    stance_hip_angle = 0.0
+    stance_knee_angle = 0.0
+    stance_abduction_angle = 0.0
+    angles = []
+    for is_retract in phases:
+      if is_retract:
+        angles.extend([retract_abduction_angle, retract_hip_angle,
+                       retract_knee_angle])
+      else:
+        angles.extend([stance_abduction_angle, stance_hip_angle,
+                       stance_knee_angle])
+    return np.array(angles)
+
+  def reset(self):
+    pass
+
+  def get_action(self, current_time, input_action):
+    """Computes the trajectory according to input time and action.
+
+    Args:
+      current_time: The time in gym env since reset.
+      input_action: A numpy array. The input leg pose from a NN controller.
+
+    Returns:
+      A numpy array. The desired motor angles.
+    """
+
+    return self._pose + self._alternating_legs_trajectory(
+        current_time) + input_action
+
+  def get_observation(self, input_observation):
+    """Get the trajectory generator's observation."""
+
+    return input_observation
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/curriculum_wrapper_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/curriculum_wrapper_env.py
@@ -0,0 +1,50 @@
+"""A wrapped LocomotionGymEnv with functions that change the world and task."""
+
+import gin
+
+
+@gin.configurable
+class CurriculumWrapperEnv(object):
+  """A wrapped LocomotionGymEnv with an evolving environment."""
+
+  def __init__(self,
+               gym_env,
+               num_iter=0,
+               curriculum_world_update=None,
+               curriculum_task_update=None):
+    """Initializes the wrapped env.
+
+    Args:
+      gym_env: An instance of a (potentially previously wrapped)
+        LocomotionGymEnv.
+      num_iter: The training iteration we are on.
+      curriculum_world_update: A function that updates the environment based on
+        the iteration. Takes in the environment as an argument.
+      curriculum_task_update: A function that updates the task (eg.
+        observations) based on the iteration. Takes in the environment as an
+        argument.
+    """
+    self._gym_env = gym_env
+    self._num_iter = num_iter
+    self._curriculum_world_update = curriculum_world_update
+    self._curriculum_task_update = curriculum_task_update
+
+  def modify(self, step=0):
+    self._num_iter = step
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def reset(self, *args, **kwargs):
+    """Reset and adjust the environment."""
+    self._gym_env.reset(*args, **kwargs)
+    if self._curriculum_world_update is not None:
+      self._curriculum_world_update(self._gym_env, self._num_iter)
+    if self._curriculum_task_update is not None:
+      self._curriculum_task_update(self._gym_env, self._num_iter)
+    return self._get_observation()
+
+  # Used for testing.
+  @property
+  def num_iter(self):
+    return self._num_iter
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/depth_uv_to_footplacement_wrapper.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/depth_uv_to_footplacement_wrapper.py
@@ -0,0 +1,357 @@
+"""Change the action from uv of the depth map to xyz in world frame."""
+
+import copy
+import math
+from typing import Sequence
+import gin
+import gym
+import numpy as np
+
+from pybullet_envs.minitaur.robots import laikago_kinematic_constants
+from pybullet_envs.minitaur.robots import quadruped_base
+from pybullet_envs.minitaur.robots import wheeled_robot_base_sim
+
+UNIT_QUATERNION = (0, 0, 0, 1)
+# A small gap between the target foot position and the floor.
+FLOOR_HEIGHT_EPSILON = 0.02
+FOOT_HEIGHT_CLEARNCE = 0.075
+BASE_MOVEMENT_SPEED = 0.3
+INIT_ABDUCTION_ANGLE = laikago_kinematic_constants.INIT_ABDUCTION_ANGLE
+INIT_HIP_ANGLE = laikago_kinematic_constants.INIT_HIP_ANGLE
+INIT_KNEE_ANGLE = laikago_kinematic_constants.INIT_KNEE_ANGLE
+NUM_LEGS = laikago_kinematic_constants.NUM_LEGS
+COM_VIZ_BOX_SIZE = [0.025, 0.025, 0.005]
+_DEFAULT_JOINT_POSE = (INIT_ABDUCTION_ANGLE, INIT_HIP_ANGLE,
+                       INIT_KNEE_ANGLE) * NUM_LEGS
+# Weights for computing the target COM from the supporting feet locations.
+# The target COM for the front feet are biased forward for better robot
+# stability and seeing farther in distance, which enables larger steps.
+_SUPPORT_WEIGHT_MAP = [[0.0, 0.4, 0.3, 0.3], [0.4, 0.0, 0.3, 0.3],
+                       [1.0 / 3, 1.0 / 3, 0.0, 1.0 / 3],
+                       [1.0 / 3, 1.0 / 3, 1.0 / 3, 0.0]]
+_TASK_SENSOR_NAME = "sensors"
+
+
+@gin.configurable
+class DepthUVToFootPlacementWrapper(object):
+  """Changes the action from a point in depth map to a point in the world frame.
+
+  Attributes:
+    observation: The current observation of the environment.
+    last_action: The last that was used to step the environment.
+    env_step_counter: The number of control steps that have been elapesed since
+      the environment is reset.
+    action_space: The action space of the environment.
+  """
+
+  def __init__(self,
+               gym_env,
+               visualization=True,
+               foot_movement=False,
+               foot_clearance_height=FOOT_HEIGHT_CLEARNCE,
+               base_movement_speed=BASE_MOVEMENT_SPEED,
+               foothold_update_frequency=4):
+    """Initializes the wrapper.
+
+    Args:
+      gym_env: the wrapped gym environment. The robot is controlled
+        kinematically when gym_env.robot inherits from WheeledRobotBase and is
+        controlled dynamically using a static gait controller when gym_env.robot
+        inherits from QudrupedBase.
+      visualization: whether to draw a sphere that represents the foothold
+        position.
+      foot_movement: whether move the toe to the desired foothold position using
+        IK for visualization/debugging purpose.
+      foot_clearance_height: the maximum height of the swing foot.
+      base_movement_speed: the speed of the robot base.
+      foothold_update_frequency: the frequency of updating the foothold, which
+        is the same as the frequency of the steps. The default value 4 means
+        four steps (1 complete cycle of static gait) per second.
+    """
+    self._gym_env = gym_env
+    self._num_control_steps_per_foothold_update = max(
+        1, int(1.0 / foothold_update_frequency / self._gym_env.env_time_step))
+    self.last_action = None
+    self._visualization = visualization
+    self._foot_movement = foot_movement
+    self._foot_clearance_height = foot_clearance_height
+    self._base_movement_speed = base_movement_speed
+    self._step_counter = 0
+    self.observation_space.spaces["toe_position"] = gym.spaces.Box(
+        np.array([-1.0] * 3), np.array([1.0] * 3))
+    self.action_space = gym.spaces.Box(
+        np.array([-1.0] * 2), np.array([1.0] * 2))
+    self.task.reset(self)
+    if hasattr(self.task, _TASK_SENSOR_NAME):
+      self.observation_space.spaces[
+          self.task.get_name()] = self.task.observation_space
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def reset(self, **kwargs):
+    """Reset the environment."""
+    obs = self._gym_env.reset(**kwargs)
+    self.task.reset(self)
+    self._step_counter = 0
+    current_end_effector_pos = np.array(
+        self._gym_env.robot.foot_positions()[self.task.swing_foot_id])
+    self.last_action = [0, 0]
+    if self._visualization:
+      self._create_foot_placement_visualization()
+      self._create_com_visualization()
+    self._initial_local_toe_positions = np.array(
+        self._gym_env.robot.foot_positions(position_in_world_frame=False))
+
+    # Move COM to prepare for the first swing step.
+    if isinstance(self._gym_env.robot, quadruped_base.QuadrupedBase):
+      obs, _ = self._move_com_dynamic(
+          self.task.swing_foot_id,
+          self._num_control_steps_per_foothold_update // 3 * 2)
+
+    # TODO(b/157614175): Adds a toe_position_sensor.
+    obs["toe_position"] = current_end_effector_pos
+    obs["vision"] = self.task.get_depth_image_for_foot()
+    obs["LastAction"] = self.last_action
+
+    self._observation = obs
+    return self._observation
+
+  def _move_kinematic(self):
+    """Move robot kinematically.
+
+    Returns:
+      The tuple containing the observation and env info.
+    """
+    if self._foot_movement:
+      toe_positions = np.array(
+          self._gym_env.robot.foot_positions(position_in_world_frame=True))
+      toe_positions[:, 2] = FLOOR_HEIGHT_EPSILON
+      destination_foothold_xyz_global = self.task.get_foothold_location(
+          self.last_action, use_world_frame=True)
+      destination_foothold_xyz_global[2] = FLOOR_HEIGHT_EPSILON
+    joint_pose = _DEFAULT_JOINT_POSE
+    for i in range(self._num_control_steps_per_foothold_update):
+      if self._foot_movement:
+        alpha = i / (self._num_control_steps_per_foothold_update - 1)
+        toe_positions_over_time = copy.deepcopy(toe_positions)
+        toe_positions_over_time[self.task.swing_foot_id] = (
+            self._construct_foot_trajectories(
+                alpha, toe_positions[self.task.swing_foot_id],
+                destination_foothold_xyz_global, self._foot_clearance_height))
+        joint_pose = np.array(
+            self.robot.motor_angles_from_foot_positions(
+                toe_positions_over_time, position_in_world_frame=True)[1])
+      action = {
+          wheeled_robot_base_sim.BASE_ACTION_NAME:
+              (self._base_movement_speed, 0),
+          wheeled_robot_base_sim.BODY_ACTION_NAME:
+              joint_pose,
+      }
+      obs, _, _, info = self._gym_env.step(action)
+    return obs, info
+
+  def _move_com_dynamic(self, swing_foot_id, num_control_steps):
+    """Move robot COM to the weightd center of the support polygon through dynamics.
+
+    Args:
+      swing_foot_id: Index of the swing foot.
+      num_control_steps: Total number of control steps for moving the COM.
+
+    Returns:
+      The tuple containing the observation and env info.
+    """
+
+    toe_positions = np.array(
+        self._gym_env.robot.foot_positions(position_in_world_frame=False))
+    support_polygon_center = np.array([0.0, 0.0, 0.0])
+    for i in range(NUM_LEGS):
+      if i != swing_foot_id:
+        support_polygon_center += toe_positions[i] * _SUPPORT_WEIGHT_MAP[
+            swing_foot_id][i]
+    support_polygon_center[2] = 0.0
+
+    for i in range(num_control_steps):
+      alpha = i / (num_control_steps - 1)
+      # Create a flat phase towards the end to stabilize the com movement.
+      alpha = np.clip(alpha * 1.1, 0, 1)
+      toe_positions_over_time = copy.deepcopy(
+          toe_positions) - alpha * support_polygon_center
+      # Use initial toe height to maintain the overal base height.
+      for j in range(len(toe_positions_over_time)):
+        toe_positions_over_time[j][2] = self._initial_local_toe_positions[j][2]
+      joint_pose = np.array(
+          self.robot.motor_angles_from_foot_positions(
+              toe_positions_over_time, position_in_world_frame=False)[1])
+      obs, _, _, info = self._gym_env.step(joint_pose)
+      self._update_com_visualization()
+    return obs, info
+
+  def _swing_leg_dynamic(self, swing_foot_id, destination_foothold_xyz_local,
+                         num_control_steps):
+    """Move swing leg to the target foothold through IK and dynamics.
+
+    Args:
+      swing_foot_id: Index of the swing foot.
+      destination_foothold_xyz_local: Target foothold position.
+      num_control_steps: Total number of control steps for swinging the leg.
+
+    Returns:
+      The tuple containing the observation and env info.
+    """
+
+    local_toe_positions = np.array(
+        self._gym_env.robot.foot_positions(position_in_world_frame=False))
+
+    # Move swing leg
+    for i in range(num_control_steps):
+      alpha = i / (num_control_steps - 1)
+      toe_positions_over_time = copy.deepcopy(local_toe_positions)
+      toe_positions_over_time[swing_foot_id] = (
+          self._construct_foot_trajectories(alpha,
+                                            local_toe_positions[swing_foot_id],
+                                            destination_foothold_xyz_local,
+                                            self._foot_clearance_height))
+      joint_pose = np.array(
+          self.robot.motor_angles_from_foot_positions(
+              toe_positions_over_time, position_in_world_frame=False)[1])
+
+      obs, _, _, info = self._gym_env.step(joint_pose)
+    return obs, info
+
+  def _move_dynamic(self):
+    """Move robot dynamically.
+
+    Returns:
+      The tuple containing the observation and env info.
+    """
+    destination_foothold_xyz_local = self.task.get_foothold_location(
+        self.last_action, use_world_frame=False)
+    # Lift the target foothold slightly to account for thickness of the feet.
+    destination_foothold_xyz_local[2] += FLOOR_HEIGHT_EPSILON
+
+    # Swing leg in the first 1/3 of the duration.
+    self._swing_leg_dynamic(self.task.swing_foot_id,
+                            destination_foothold_xyz_local,
+                            self._num_control_steps_per_foothold_update // 3)
+
+    # Move COM in the rest 2/3 of the duration.
+    obs, info = self._move_com_dynamic(
+        self.task.next_swing_foot_id,
+        self._num_control_steps_per_foothold_update // 3 * 2)
+
+    return obs, info
+
+  def step(self, action: Sequence[float]):
+    """Steps the wrapped environment.
+
+    Args:
+      action: 2 dimensional numpy array between [-1.0, 1.0]. They represents the
+        depth image pixel index. We assume that only one foot is swinging in
+        this wrapper, and this is the foothold location for that swinging leg.
+        The order of the swinging leg and the index of the current swinging leg
+        is defined in stepstone_visiontask.py.
+
+    Returns:
+      The tuple containing the observation, the reward, and the episode
+        end indicator.
+    """
+    self.last_action = action
+    reward = self.task(self)
+    done = self.task.done(self)
+    if self._visualization:
+      self._update_foothold_visualization(action)
+
+    if isinstance(self._gym_env.robot, quadruped_base.QuadrupedBase):
+      obs, info = self._move_dynamic()
+    else:
+      obs, info = self._move_kinematic()
+
+    self._step_counter += 1
+    current_end_effector_pos = np.array(
+        self._gym_env.robot.foot_positions()[self.task.swing_foot_id])
+    obs["toe_position"] = current_end_effector_pos
+    obs["vision"] = self.task.get_depth_image_for_foot()
+    self._observation = obs
+    return obs, reward, done, info
+
+  def _construct_foot_trajectories(self, alpha, swing_foot_start_position,
+                                   swing_foot_destination,
+                                   foot_clearance_height):
+    """Construct the target foot position for the swing foot.
+
+    Args:
+      alpha: a float in [0.0, 1.0] indicating the phase of the swing foot.
+      swing_foot_start_position: foot position at the beginning of the swing
+        motion.
+      swing_foot_destination: target foot position at the end of the swing
+        motion.
+      foot_clearance_height: the maximum height of the swing foot.
+
+    Returns:
+      The interpolated swing foot position.
+    """
+    new_swing_foot_position = swing_foot_start_position + alpha * (
+        swing_foot_destination - swing_foot_start_position)
+    new_swing_foot_position[2] += (
+        foot_clearance_height * math.sin((alpha) * math.pi))
+    return new_swing_foot_position
+
+  def _create_foot_placement_visualization(self):
+    """Creates a visualization sphere that represents the foothold position."""
+    visual_id = self._gym_env.pybullet_client.createVisualShape(
+        self._gym_env.pybullet_client.GEOM_SPHERE,
+        radius=0.02,
+        rgbaColor=[0.7, 0.7, 0.7, 1])
+    self._foothold_visual_body = self._gym_env.pybullet_client.createMultiBody(
+        baseMass=0, baseVisualShapeIndex=visual_id, basePosition=[0, 0, 0])
+
+  def _update_foothold_visualization(self, action):
+    """Moves the visualization sphere that represents the next foothold."""
+    foothold_xyz = self.task.get_foothold_location(
+        action, use_world_frame=True)
+    self._gym_env.pybullet_client.resetBasePositionAndOrientation(
+        self._foothold_visual_body, foothold_xyz, UNIT_QUATERNION)
+
+  def _create_com_visualization(self):
+    """Creates visualization boxes for COM and support polygon center."""
+    visual_id = self._gym_env.pybullet_client.createVisualShape(
+        self._gym_env.pybullet_client.GEOM_BOX,
+        halfExtents=COM_VIZ_BOX_SIZE,
+        rgbaColor=[0.7, 0.5, 0.5, 1])
+    self._support_polygon_center_visual_body = self._gym_env.pybullet_client.createMultiBody(
+        baseMass=0, baseVisualShapeIndex=visual_id, basePosition=[0, 0, 0])
+
+    visual_id = self._gym_env.pybullet_client.createVisualShape(
+        self._gym_env.pybullet_client.GEOM_BOX,
+        halfExtents=COM_VIZ_BOX_SIZE,
+        rgbaColor=[0.5, 0.75, 0.5, 1])
+    self._projected_com_visual_body = self._gym_env.pybullet_client.createMultiBody(
+        baseMass=0, baseVisualShapeIndex=visual_id, basePosition=[0, 0, 0])
+
+  def _update_com_visualization(self):
+    """Moves the visualization for the COM and support polygon center."""
+    toe_positions = np.array(
+        self._gym_env.robot.foot_positions(position_in_world_frame=True))
+    support_polygon_center_global = np.array([0.0, 0.0, 0.0])
+    for i in range(NUM_LEGS):
+      if i != self.task.next_swing_foot_id:
+        support_polygon_center_global += toe_positions[i] * _SUPPORT_WEIGHT_MAP[
+            self.task.next_swing_foot_id][i]
+    support_polygon_center_global[2] = 0.0
+    self._gym_env.pybullet_client.resetBasePositionAndOrientation(
+        self._support_polygon_center_visual_body, support_polygon_center_global,
+        UNIT_QUATERNION)
+
+    com = np.copy(self._gym_env.robot.base_position)
+    com[2] = 0.0
+    self._gym_env.pybullet_client.resetBasePositionAndOrientation(
+        self._projected_com_visual_body, com, UNIT_QUATERNION)
+
+  @property
+  def observation(self):
+    return self._observation
+
+  @property
+  def env_step_counter(self):
+    return self._step_counter
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/fixed_steptime_wrapper_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/fixed_steptime_wrapper_env.py
@@ -0,0 +1,92 @@
+"""A wrapper that controls the timing between steps.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import google_type_annotations
+from __future__ import print_function
+
+import time
+import gin
+
+
+@gin.configurable
+class FixedSteptimeWrapperEnv(object):
+  """A wrapped LocomotionGymEnv with timing control between steps."""
+
+  def __init__(self,
+               gym_env,
+               desired_time_between_steps=None):
+    """Initializes the wrapper env.
+
+    Args:
+      gym_env: An instance of LocomotionGymEnv.
+      desired_time_between_steps: The desired time between steps in seconds.
+        If this is None, it is set to the env_time_step of the gym_env.
+    """
+    self._gym_env = gym_env
+    if desired_time_between_steps is None:
+      self._desired_time_between_steps = gym_env.env_time_step
+    else:
+      self._desired_time_between_steps = desired_time_between_steps
+
+    self._last_reset_time = time.time()
+    self._last_step_time = time.time()
+    self._step_counter = 0
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def reset(self, initial_motor_angles=None, reset_duration=1.0):
+    """Reset the environment.
+
+    This function records the timing of the reset.
+
+    Args:
+      initial_motor_angles: Not used.
+      reset_duration: Not used.
+
+    Returns:
+      The observation of the environment after reset.
+    """
+    obs = self._gym_env.reset(initial_motor_angles=initial_motor_angles,
+                              reset_duration=reset_duration)
+    self._last_reset_time = time.time()
+    self._last_step_time = time.time()
+    self._step_counter = 0
+    return obs
+
+  def step(self, action):
+    """Steps the wrapped environment.
+
+    Args:
+      action: Numpy array. The input action from an NN agent.
+
+    Returns:
+      The tuple containing the observation, the reward, the epsiode end
+      indicator.
+
+    Raises:
+      ValueError if input action is None.
+    """
+    time_between_steps = time.time() - self._last_step_time
+    if time_between_steps < self._desired_time_between_steps:
+      time.sleep(self._desired_time_between_steps - time_between_steps)
+    self._last_step_time = time.time()
+    self._step_counter += 1
+    return self._gym_env.step(action)
+
+  @property
+  def elapsed_time(self):
+    """Returns the elapsed time in seconds."""
+    return time.time() - self._last_reset_time
+
+  @property
+  def steps_per_second(self):
+    """Returns the average number of time steps for 1 second."""
+    return self._step_counter / self.elapsed_time
+
+  @property
+  def seconds_per_step(self):
+    """Returns the average time between steps."""
+    return self.elapsed_time / self._step_counter
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/ik_based_wrapper_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/ik_based_wrapper_env.py
@@ -0,0 +1,109 @@
+"""A wrapped Quadruped with Inverse Kinematics based controller."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gin
+import gym
+import numpy as np
+from pybullet_envs.minitaur.robots import vision60
+from pybullet_envs.minitaur.robots.utilities import kinematics
+
+ACTION_DIM_PER_LEG = 3
+ACTION_DIM_BASE = 7
+ACTION_DIM_TOTAL = vision60.NUM_LEGS * ACTION_DIM_PER_LEG + ACTION_DIM_BASE
+
+
+@gin.configurable
+class IKBasedWrapperEnv(object):
+  """An env using IK to convert toe positions to joint angles."""
+
+  def __init__(self,
+               gym_env,
+               toe_indices=(3, 7, 11, 15),
+               abduction_motor_ids=(0, 3, 6, 9)):
+    """Initialzes the wrapped env.
+
+    Args:
+      gym_env: An instance of LocomotionGymEnv.
+      toe_indices: A list of four pybullet joint indices for the four toes. [3,
+        7, 11, 15] for the vision60.
+      abduction_motor_ids: A list of four pybullet joint indices for the four
+        abuduction motors. [0, 3, 6, 9] for the vision60.
+    """
+    lower_bound = np.array([-1.0] * ACTION_DIM_TOTAL)
+    upper_bound = np.array([1.0] * ACTION_DIM_TOTAL)
+    self._gym_env = gym_env
+    self.action_space = gym.spaces.Box(lower_bound, upper_bound)
+    self._toe_ids = toe_indices
+    self._abduction_motor_ids = abduction_motor_ids
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def get_toe_indices(self):
+    return self._toe_ids
+
+  def _joint_angles_from_toe_positions_and_base_pose(self, ik_actions):
+    """Uses Inverse Kinematics to calculate jont angles.
+
+    Args:
+      ik_actions: The action should be local (x, y, z) for each toe. action for
+        each leg [x, y, z] in a local frame. This local frame is transformed
+        relative to the COM frame using a given translation, and rotation. The
+        total action space would be 3 + 4 + 3 * ACTION_DIM_PER_LEG = 16.
+
+    Returns:
+      A list of joint angles.
+    """
+    assert len(ik_actions) == ACTION_DIM_TOTAL
+
+    base_translation_index = vision60.NUM_LEGS * ACTION_DIM_PER_LEG
+    base_rotation_index = vision60.NUM_LEGS * ACTION_DIM_PER_LEG + 3
+
+    base_translation = ik_actions[
+        base_translation_index:base_translation_index + 3]
+    base_rotation = ik_actions[base_rotation_index:base_rotation_index + 4]
+    desired_joint_angles = []
+    for i in range(vision60.NUM_LEGS):
+      local_toe_pos = ik_actions[i * ACTION_DIM_PER_LEG:i * ACTION_DIM_PER_LEG +
+                                 ACTION_DIM_PER_LEG]
+      leg_joint_ids = [
+          self._abduction_motor_ids[i], self._abduction_motor_ids[i] + 1,
+          self._abduction_motor_ids[i] + 2
+      ]
+
+      desired_joint_angles.extend(
+          kinematics.joint_angles_from_link_position(
+              robot=self._gym_env.robot,
+              link_position=local_toe_pos,
+              link_id=self._toe_ids[i],
+              joint_ids=leg_joint_ids,
+              base_translation=base_translation,
+              base_rotation=base_rotation))
+
+    return desired_joint_angles
+
+  def step(self, action):
+    """Steps the wrapped environment.
+
+    Args:
+      action: Numpy array. The input action from an NN agent.
+
+    Returns:
+      The tuple containing the modified observation, the reward, the epsiode end
+      indicator.
+
+    Raises:
+      ValueError if input action is None.
+
+    """
+    if action is None:
+      raise ValueError('Action cannot be None')
+
+    desired_joint_angles = self._joint_angles_from_toe_positions_and_base_pose(
+        ik_actions=action)
+    observation, reward, done, _ = self._gym_env.step(desired_joint_angles)
+
+    return observation, reward, done, _
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/imitation_wrapper_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/imitation_wrapper_env.py
@@ -0,0 +1,101 @@
+"""A wrapper for motion imitation environment."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gin
+import gym
+import numpy as np
+
+
+@gin.configurable
+class ImitationWrapperEnv(object):
+  """An env using for training policy with motion imitation."""
+
+  def __init__(self, gym_env):
+    """Initialzes the wrapped env.
+
+    Args:
+      gym_env: An instance of LocomotionGymEnv.
+    """
+    self._gym_env = gym_env
+    self.observation_space = self._build_observation_space()
+
+    self.seed()
+    return
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def step(self, action):
+    """Steps the wrapped environment.
+
+    Args:
+      action: Numpy array. The input action from an NN agent.
+
+    Returns:
+      The tuple containing the modified observation, the reward, the epsiode end
+      indicator.
+
+    Raises:
+      ValueError if input action is None.
+
+    """
+    original_observation, reward, done, _ = self._gym_env.step(action)
+    observation = self._modify_observation(original_observation)
+
+    return observation, reward, done, _
+
+  @gin.configurable('imitation_wrapper_env.ImitationWrapperEnv.reset')
+  def reset(self, initial_motor_angles=None, reset_duration=1.0):
+    """Resets the robot's position in the world or rebuild the sim world.
+
+    The simulation world will be rebuilt if self._hard_reset is True.
+
+    Args:
+      initial_motor_angles: A list of Floats. The desired joint angles after
+        reset. If None, the robot will use its built-in value.
+      reset_duration: Float. The time (in seconds) needed to rotate all motors
+        to the desired initial values.
+
+    Returns:
+      A numpy array contains the initial observation after reset.
+    """
+    original_observation = self._gym_env.reset(initial_motor_angles, reset_duration)
+    observation = self._modify_observation(original_observation)
+    return observation
+
+  def _modify_observation(self, original_observation):
+    """Appends target observations from the reference motion to the observations.
+
+    Args:
+      original_observation: A numpy array containing the original observations.
+
+    Returns:
+      A numpy array contains the initial original concatenated with target
+      observations from the reference motion.
+    """
+    target_observation = self._task.build_target_obs()
+    observation = np.concatenate([original_observation, target_observation], axis=-1)
+    return observation
+
+  def _build_observation_space(self):
+    """Constructs the observation space, including target observations from
+    the reference motion.
+
+    Returns:
+      Observation space representing the concatenations of the original
+      observations and target observations.
+    """
+    obs_space0 = self._gym_env.observation_space
+    low0 = obs_space0.low
+    high0 = obs_space0.high
+
+    task_low, task_high = self._task.get_target_obs_bounds()
+    low = np.concatenate([low0, task_low], axis=-1)
+    high = np.concatenate([high0, task_high], axis=-1)
+
+    obs_space = gym.spaces.Box(low, high)
+
+    return obs_space
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/mpc_locomotion_wrapper.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/mpc_locomotion_wrapper.py
@@ -0,0 +1,794 @@
+"""An env that uses MPC-based motion controller to realize higher level footstep planning."""
+
+import copy
+import enum
+from typing import Sequence
+
+import dataclasses
+import gin
+import gym
+import numpy as np
+
+from pybullet_envs.minitaur.agents.baseline_controller import com_height_estimator
+from pybullet_envs.minitaur.agents.baseline_controller import gait_generator as gait_generator_lib
+from pybullet_envs.minitaur.agents.baseline_controller import imu_based_com_velocity_estimator
+from pybullet_envs.minitaur.agents.baseline_controller import multi_state_estimator
+from pybullet_envs.minitaur.agents.baseline_controller import openloop_gait_generator
+from pybullet_envs.minitaur.agents.baseline_controller import torque_stance_leg_controller
+from pybullet_envs.minitaur.robots import laikago_kinematic_constants
+
+_UNIT_QUATERNION = (0, 0, 0, 1)
+_NUM_LEGS = laikago_kinematic_constants.NUM_LEGS
+_MOTORS_PER_LEG = 3
+_DEFAULT_BODY_HEIGHT = 0.45
+_DEFAULT_BASE_SPEED = (0.0, 0.0)
+_DEFAULT_BASE_TWIST_SPEED = 0.0
+_DEFAULT_ROLL_PITCH = (0.0, 0.0)
+_DEFAULT_SWING_TARGET = (0.0, 0.0, 0.0)
+_DEFAULT_SWING_CLERANCE = 0.05
+_MOTOR_KP = [220.0] * 12
+_MOTOR_KD = [0.3, 2.0, 2.0] * 4
+_BASE_VELOCITY_ACTION_RANGE = ((-1.0, -0.1), (1.0, 0.1))
+_BASE_TWIST_SPEED_ACTION_RANGE = (-0.5, 0.5)
+_BASE_HEIGHT_ACTION_RANGE = (0.3, 0.5)
+# Action bound for the swing target in local x, y, z direction.
+_SWING_TARGET_ACTION_RANGE = ((-0.3, -0.1, -0.25), (0.3, 0.1, 0.25))
+_PITCH_ROLL_ACTION_RANGE = (-0.35, 0.35)
+_SWING_CLEARANCE_ACTION_RANGE = (0.05, 0.3)
+_SWING_TARGET_DELTA_ACTION_RANGE = ((-0.02, -0.01, -0.02), (0.02, 0.01, 0.02))
+_SWING_CLEARANCE_DELTA_RANGE = (-0.02, 0.02)
+
+
+@gin.configurable
+@dataclasses.dataclass
+class BaseTargetHorizontalComVelocityHeuristic(object):
+  """A class for mapping swing foot targets to a heuristic target com velocity."""
+  horizontal_com_velocity_heuristic: np.ndarray = np.zeros(2)
+
+  def update_horizontal_com_velocity_heuristic(self, hip_relative_swing_targets,
+                                               com_velocity, swing_durations):
+    del hip_relative_swing_targets, com_velocity, swing_durations
+    pass
+
+  def reset(self):
+    self.horizontal_com_velocity_heuristic = np.zeros(2)
+
+
+# TODO(magicmelon): Add a one-pager to explain the inverse raibert heuristics.
+@gin.configurable
+class InverseRaibertTargetHorizontalComVelocityHeuristic(
+    BaseTargetHorizontalComVelocityHeuristic):
+  """A class for mapping swing foot targets to a target com velocity with Raibert Heuristics."""
+
+  def __init__(self, gains=(-0.25, -0.1)):
+    self._gains = np.array(gains)
+
+  def update_horizontal_com_velocity_heuristic(self, hip_relative_swing_targets,
+                                               com_velocity, swing_durations):
+    assert len(hip_relative_swing_targets) == len(swing_durations)
+    target_com_horizontal_velocities = []
+    for i in range(len(hip_relative_swing_targets)):
+      target_com_horizontal_velocity = (
+          com_velocity / 2.0 * swing_durations[i] -
+          hip_relative_swing_targets[i]) / self._gains + com_velocity
+      target_com_horizontal_velocities.append(target_com_horizontal_velocity)
+    if target_com_horizontal_velocities:
+      self.horizontal_com_velocity_heuristic = np.mean(
+          target_com_horizontal_velocities, axis=0)
+
+
+@gin.constants_from_enum
+class Gait(enum.Enum):
+  """The possible gaits."""
+  WALK = 0
+  TROT = 1
+
+
+def _select_gait(gait_type=Gait.WALK):
+  """Selects a gait pattern.
+
+  Args:
+    gait_type: which gait to use.
+
+  Returns:
+    A tuple of (stance_duration, duty_factor, initial_phase)
+  """
+  # Each gait is composed of stance_duration, duty_factor, and
+  # init_phase_full_cycle.
+  if gait_type == Gait.TROT:
+    return [0.3] * 4, [0.6] * 4, [0, 0.5, 0.5, 0]
+  elif gait_type == Gait.WALK:
+    return [0.75] * 4, [0.8] * 4, [0.25, 0.75, 0.5, 0]
+  else:
+    raise NotImplementedError
+
+
+@gin.configurable
+class MPCLocomotionWrapper(object):
+  """An env that uses MPC-based motion controller to realize footstep planning.
+
+    The env takes as input the target position of the swing feet and the target
+    base movements, and internally uses an MPC-based controller to achieve these
+    targets. It assumes that the robot follows a given gait pattern, specified
+    during initialization.
+    Before each foot starts to swing, the env will request from the policy a
+    target swing location and height defined in the local frame w.r.t the
+    default toe position. During the swing of the foot, the policy can adjust
+    the base velocity, height, roll, pitch, and twist. Optionally, the policy
+    can also output a delta to the last target swing location to adjust the
+    swing trajectory during the swing
+    Observations (introduced in this env):
+      gait_phases (4D): Normalized phase within the gait cycle for each foot.
+      feet_states (4D): State of each foot. -1: stance, 1: swing, -2: lose
+        contact, 2: early contact.
+      need_new_swing_target (4D): Whether the foot needs a new swing target at
+        the current step. Set to 1 when the foot switches to swing from a
+        different state. When equals to 0, the corresponding foot target will
+        not have effect.
+      estimated_base_speed (3D): Estimated base velocity.
+      estimated_body_height (1D): Estimated base height.
+      heuristics_com_velocity (3D): Target base velocity calculated from the
+        input step-length using inverse Raibert heuristics. Used when
+        compute_heuristic_com_speed is True.
+      current_toe_target: Immediate tracking targets for the four feet in the
+        local frame.
+    Action components:
+      swing_targets (12D): Used in HL_LL and HL_only mode. Specifies
+        the swing target for each foot w.r.t the default local toe position.
+      swing_clearance (4D): Used in HL_LL and HL_only mode. Specifies
+        the height of the highest point in the swing trajectory.
+      swing_targets_delta (12D): Used if policy_output_swing_action_delta.
+        Specifies the change in the swing target for each foot.
+      swing_clearance_delta (4D): Used if policy_output_swing_action_delta.
+        Specifies the change in the swing clearance for each foot.
+      target_base_velocity (2D): Target base velocity in the horizontal plane.
+        Used when compute_heuristic_com_speed is False.
+      base_twist (1D): Target base twist.
+      base_height (1D): Target base height.
+      base_roll_pitch (2D): Target base roll and pitch.
+
+  Attributes:
+    observation: The current observation of the environment.
+    last_action: The last that was used to step the environment.
+    env_step_counter: The number of control steps that have been elapesed since
+      the environment is reset.
+    action_space: The action space of the environment.
+  """
+
+  def __init__(
+      self,
+      gym_env,
+      swing_target_action_range=_SWING_TARGET_ACTION_RANGE,
+      swing_clearance_action_range=_SWING_CLEARANCE_ACTION_RANGE,
+      pitch_action_range=_PITCH_ROLL_ACTION_RANGE,
+      roll_action_range=_PITCH_ROLL_ACTION_RANGE,
+      base_velocity_action_range=_BASE_VELOCITY_ACTION_RANGE,
+      base_twist_action_range=_BASE_TWIST_SPEED_ACTION_RANGE,
+      base_height_action_range=_BASE_HEIGHT_ACTION_RANGE,
+      policy_output_swing_action_delta=False,
+      swing_target_delta_range=_SWING_TARGET_DELTA_ACTION_RANGE,
+      swing_clearance_delta_range=_SWING_CLEARANCE_DELTA_RANGE,
+      foot_friction_coeff=0.5,
+      contact_detection_force_threshold=0.0,
+      locomotion_gait=Gait.WALK,
+      target_horizontal_com_velocity_heuristic=BaseTargetHorizontalComVelocityHeuristic(
+      ),
+      robot_mass_in_mpc=235.0 / 9.8,
+      control_frequency=20,
+      com_velocity_estimator_class=imu_based_com_velocity_estimator
+      .IMUBasedCOMVelocityEstimator):
+    """Initializes the wrapper.
+
+    Args:
+      gym_env: the wrapped gym environment.
+      swing_target_action_range: range for the swing targets specified before
+        each swing.
+      swing_clearance_action_range: range for the swing clearance.
+      pitch_action_range: range for the target body pitch.
+      roll_action_range: range for the target body roll.
+      base_velocity_action_range: range for the base velocity.
+      base_twist_action_range: range for the base twist.
+      base_height_action_range: range for the base height.
+      policy_output_swing_action_delta: whether to allow the policy to output an
+        adjustment to the last swing target during the swing motion.
+      swing_target_delta_range: range for the adjustment of the swing target.
+      swing_clearance_delta_range: range for the swing clearance adjustments.
+      foot_friction_coeff: friction on the feet.
+      contact_detection_force_threshold: Threshold of the contact sensor for
+        determining whether a foot is in contact. Use 20 for real robot and 0
+        for simulation.
+      locomotion_gait: Gait to be used.
+      target_horizontal_com_velocity_heuristic: .
+      robot_mass_in_mpc: mass of the robot used in MPC.
+      control_frequency: frequency of querying the policy. The internal MPC
+        controller can have higher frequency. Note that the policy outputs a
+        swing target and clearance at each query, however, it is only used by
+        the environment at the beginning of each swing phase (when
+        need_new_swing_target is 1).
+      com_velocity_estimator_class: class of the com velocity estimator. Use
+        IMUBasedCOMVelocityEstimator for estimating velocity from IMU sensor and
+        contact states. Use COMVelocityEstimator for using the ground-truth com
+        velocity (e.g. when mocap is available).
+    """
+    self._gym_env = gym_env
+    self._time_per_control_step = 1.0 / control_frequency
+    self._foot_friction_coeff = foot_friction_coeff
+    self._contact_detection_force_threshold = contact_detection_force_threshold
+    self._locomotion_gait = locomotion_gait
+    self._policy_output_swing_action_delta = policy_output_swing_action_delta
+    self._target_horizontal_com_velocity_heuristic = target_horizontal_com_velocity_heuristic
+
+    self.last_action = None
+
+    self._configure_action_space(
+        swing_target_action_range, swing_clearance_action_range,
+        pitch_action_range, roll_action_range, base_velocity_action_range,
+        base_twist_action_range, base_height_action_range,
+        policy_output_swing_action_delta, swing_target_delta_range,
+        swing_clearance_delta_range)
+
+    self._configure_observation_space()
+
+    # Set up the MPC controller
+    stance_duration, duty_factor, initial_phase = _select_gait(locomotion_gait)
+    self._gait_generator = openloop_gait_generator.OpenloopGaitGenerator(
+        self._gym_env.robot, stance_duration, duty_factor, initial_phase,
+        contact_detection_force_threshold)
+    self._com_velocity_estimator = com_velocity_estimator_class(
+        self._gym_env.robot)
+    self._com_height_estimator = com_height_estimator.COMHeightEstimator(
+        self._gym_env.robot)
+    self._state_estimator = multi_state_estimator.MultiStateEstimator(
+        self._gym_env.robot,
+        state_estimators=[
+            self._com_velocity_estimator, self._com_height_estimator
+        ])
+    self._stance_controller = torque_stance_leg_controller.TorqueStanceLegController(
+        self._gym_env.robot,
+        self._gait_generator,
+        self._state_estimator,
+        desired_speed=np.array(_DEFAULT_BASE_SPEED),
+        desired_twisting_speed=_DEFAULT_BASE_TWIST_SPEED,
+        desired_body_height=_DEFAULT_BODY_HEIGHT,
+        desired_roll_pitch=np.array(_DEFAULT_ROLL_PITCH),
+        body_mass=robot_mass_in_mpc)
+
+  def _configure_observation_space(self):
+    """Configure the observation space."""
+    self.observation_space.spaces["gait_phases"] = gym.spaces.Box(
+        np.array([-1.0] * 4), np.array([1.0] * 4))
+    self.observation_space.spaces["feet_states"] = gym.spaces.Box(
+        np.array([-2.0] * 4), np.array([2.0] * 4))
+    self.observation_space.spaces["need_new_swing_target"] = gym.spaces.Box(
+        np.array([0.0] * 4), np.array([1.0] * 4))
+    self.observation_space.spaces["estimated_base_speed"] = gym.spaces.Box(
+        np.array([-1.0, -1.0, -1.0]), np.array([1.0, 1.0, 1.0]))
+    self.observation_space.spaces["estimated_body_height"] = gym.spaces.Box(
+        np.array([0.35]), np.array([0.5]))
+    self.observation_space.spaces["heuristics_com_velocity"] = gym.spaces.Box(
+        np.array([-1.0] * 2), np.array([1.0] * 2))
+    self.observation_space.spaces["current_toe_target"] = gym.spaces.Box(
+        np.array([-1.0, -1.0, -1.0] * _NUM_LEGS),
+        np.array([1.0, 1.0, 1.0] * _NUM_LEGS))
+
+    # Needed so that LastActionSensor uses the correct action space.
+    for s in self.all_sensors():
+      s.on_reset(self)
+    for sensor in self.all_sensors():
+      if sensor.get_name() not in self._gym_config.ignored_sensor_list:
+        if hasattr(sensor, "observation_space"):
+          self.observation_space.spaces[
+              sensor.get_name()] = sensor.observation_space
+
+    self.task.reset(self)
+    if hasattr(self.task, "observation_space"):
+      self.observation_space.spaces[
+          self.task.get_name()] = self.task.observation_space
+
+  def _configure_action_space(self, swing_target_action_range,
+                              swing_clearance_action_range, pitch_action_range,
+                              roll_action_range, base_velocity_action_range,
+                              base_twist_action_range, base_height_action_range,
+                              policy_output_swing_action_delta,
+                              swing_target_delta_range,
+                              swing_clearance_delta_range):
+    """Configure the action space."""
+    ac_lb = np.array([])
+    ac_ub = np.array([])
+
+    # Index of different part of the actions within the action array.
+    self._action_start_indices = {}
+    self._action_dimensions = {}
+    self._action_names = []
+
+    # Swing targets and swing clearance.
+    for leg_id in range(_NUM_LEGS):
+      self._action_start_indices["swing_targets_" + str(leg_id)] = len(ac_lb)
+      self._action_dimensions["swing_targets_" + str(leg_id)] = len(
+          swing_target_action_range[0])
+      self._action_names.append("swing_targets_" + str(leg_id))
+      ac_lb = np.concatenate([ac_lb, swing_target_action_range[0]])
+      ac_ub = np.concatenate([ac_ub, swing_target_action_range[1]])
+    for leg_id in range(_NUM_LEGS):
+      self._action_start_indices["swing_clearance_" + str(leg_id)] = len(ac_lb)
+      self._action_dimensions["swing_clearance_" + str(leg_id)] = 1
+      self._action_names.append("swing_clearance_" + str(leg_id))
+      ac_lb = np.concatenate([ac_lb, [swing_clearance_action_range[0]]])
+      ac_ub = np.concatenate([ac_ub, [swing_clearance_action_range[1]]])
+
+    # Delta to the swing targets and clearance.
+    if policy_output_swing_action_delta:
+      for leg_id in range(_NUM_LEGS):
+        self._action_start_indices["swing_targets_delta_" +
+                                   str(leg_id)] = len(ac_lb)
+        self._action_dimensions["swing_targets_delta_" + str(leg_id)] = len(
+            swing_target_delta_range[0])
+        self._action_names.append("swing_targets_delta_" + str(leg_id))
+        ac_lb = np.concatenate([ac_lb, swing_target_delta_range[0]])
+        ac_ub = np.concatenate([ac_ub, swing_target_delta_range[1]])
+      for leg_id in range(_NUM_LEGS):
+        self._action_start_indices["swing_clearance_delta_" +
+                                   str(leg_id)] = len(ac_lb)
+        self._action_dimensions["swing_clearance_delta_" + str(leg_id)] = 1
+        self._action_names.append("swing_clearance_delta_" + str(leg_id))
+        ac_lb = np.concatenate([ac_lb, [swing_clearance_delta_range[0]]])
+        ac_ub = np.concatenate([ac_ub, [swing_clearance_delta_range[1]]])
+
+    # Desired CoM velocity actions
+    # Do not include the action if bounds are all equal to zero
+    if not np.all(np.array(base_velocity_action_range) == 0):
+      self._action_start_indices["target_base_velocity"] = len(ac_lb)
+      self._action_dimensions["target_base_velocity"] = 2
+      self._action_names.append("target_base_velocity")
+      ac_lb = np.concatenate([
+          ac_lb,
+          [base_velocity_action_range[0][0], base_velocity_action_range[0][1]]
+      ])
+      ac_ub = np.concatenate([
+          ac_ub,
+          [base_velocity_action_range[1][0], base_velocity_action_range[1][1]]
+      ])
+
+    # Base twist speed action
+    self._action_start_indices["base_twist"] = len(ac_lb)
+    self._action_dimensions["base_twist"] = 1
+    self._action_names.append("base_twist")
+    ac_lb = np.concatenate([ac_lb, [base_twist_action_range[0]]])
+    ac_ub = np.concatenate([ac_ub, [base_twist_action_range[1]]])
+
+    # Base height action
+    self._action_start_indices["base_height"] = len(ac_lb)
+    self._action_dimensions["base_height"] = 1
+    self._action_names.append("base_height")
+    ac_lb = np.concatenate([ac_lb, [base_height_action_range[0]]])
+    ac_ub = np.concatenate([ac_ub, [base_height_action_range[1]]])
+
+    # Roll-pitch action
+    self._action_start_indices["base_roll_pitch"] = len(ac_lb)
+    self._action_dimensions["base_roll_pitch"] = 2
+    self._action_names.append("base_roll_pitch")
+    ac_lb = np.concatenate(
+        [ac_lb, [roll_action_range[0], pitch_action_range[0]]])
+    ac_ub = np.concatenate(
+        [ac_ub, [roll_action_range[1], pitch_action_range[1]]])
+
+    self.action_space = gym.spaces.Box(ac_lb, ac_ub)
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def _fill_observations(self, obs):
+    """Fill the additional observations from this wrapper."""
+    phase_offset = np.array([
+        0 if leg_state == gait_generator_lib.LegState.STANCE else 1
+        for leg_state in self._gait_generator.desired_leg_state
+    ])
+    obs["gait_phases"] = self._gait_generator.normalized_phase - phase_offset
+    obs["feet_states"] = []
+    for leg_state in self._gait_generator.desired_leg_state:
+      if leg_state == gait_generator_lib.LegState.STANCE:
+        obs["feet_states"].append(-1)
+      if leg_state == gait_generator_lib.LegState.SWING:
+        obs["feet_states"].append(1)
+      if leg_state == gait_generator_lib.LegState.EARLY_CONTACT:
+        obs["feet_states"].append(2)
+      if leg_state == gait_generator_lib.LegState.LOSE_CONTACT:
+        obs["feet_states"].append(-2)
+    obs["need_new_swing_target"] = np.copy(self._need_new_swing_target)
+    obs["estimated_base_speed"] = self._state_estimator.com_velocity_body_yaw_aligned_frame
+    obs["estimated_body_height"] = [self._state_estimator.estimated_com_height]
+    obs["heuristics_com_velocity"] = np.copy(
+        self._target_horizontal_com_velocity_heuristic
+        .horizontal_com_velocity_heuristic)
+    obs["current_toe_target"] = np.copy(self._current_toe_target)
+
+  def _reset_mpc_controller(self):
+    """Reset the state of the MPC controller."""
+    self._gait_generator.reset(0.0)
+    self._state_estimator.reset(0.0)
+    self._stance_controller.reset(0.0)
+    self._stance_controller.desired_speed = np.array(_DEFAULT_BASE_SPEED)
+    self._stance_controller.desired_twisting_speed = _DEFAULT_BASE_TWIST_SPEED
+    self._stance_controller.desired_body_height = _DEFAULT_BODY_HEIGHT
+    self._stance_controller.desired_roll_pitch = np.array(_DEFAULT_ROLL_PITCH)
+    self._mpc_reset_time = self.robot.GetTimeSinceReset()
+
+  def reset(self, **kwargs):
+    """Reset the environment."""
+    self._gym_env.reset(**kwargs)
+
+    self._reset_mpc_controller()
+
+    self._last_leg_state = copy.copy(self._gait_generator.leg_state)
+
+    self._initial_local_toe_positions = np.array(
+        self._gym_env.robot.foot_positions(position_in_world_frame=False))
+
+    self._current_toe_target = np.reshape(
+        copy.deepcopy(self._initial_local_toe_positions), 3 * _NUM_LEGS)
+
+    # Record last lift off position for each foot for computing swing
+    # trajectory.
+    self._lift_off_positions = copy.deepcopy(self._initial_local_toe_positions)
+
+    # Swing command by policy at the beginning of each swing phase.
+    self._nominal_swing_leg_commands = []
+    for _ in range(_NUM_LEGS):
+      self._nominal_swing_leg_commands.append({
+          "swing_target": np.array(_DEFAULT_SWING_TARGET),
+          "swing_clearance": _DEFAULT_SWING_CLERANCE
+      })
+    # Actual swing leg commands that incorporated potential delta adjustments
+    # from the policy.
+    self._actual_swing_leg_commands = []
+    for _ in range(_NUM_LEGS):
+      self._actual_swing_leg_commands.append({
+          "swing_target": np.array(_DEFAULT_SWING_TARGET),
+          "swing_clearance": _DEFAULT_SWING_CLERANCE
+      })
+
+    self._need_new_swing_target = np.array([0.0] * _NUM_LEGS)
+
+    self._target_horizontal_com_velocity_heuristic.reset()
+
+    self.last_action = []
+    for leg_id in range(_NUM_LEGS):
+      self.last_action.extend(
+          [0.0] * self._action_dimensions["swing_targets_" + str(leg_id)])
+    for leg_id in range(_NUM_LEGS):
+      self.last_action.extend(
+          [0.0] * self._action_dimensions["swing_clearance_" + str(leg_id)])
+    if self._policy_output_swing_action_delta:
+      for leg_id in range(_NUM_LEGS):
+        self.last_action.extend(
+            [0.0] *
+            self._action_dimensions["swing_targets_delta_" + str(leg_id)])
+      for leg_id in range(_NUM_LEGS):
+        self.last_action.extend(
+            [0.0] *
+            self._action_dimensions["swing_clearance_delta_" + str(leg_id)])
+    print("=========", len(self.last_action))
+    if "target_base_velocity" in self._action_names:
+      self.last_action.extend([0.0] *
+                              self._action_dimensions["target_base_velocity"])
+    self.last_action.extend([0.0] * self._action_dimensions["base_twist"])
+    self.last_action.extend([0.0] * self._action_dimensions["base_height"])
+    self.last_action.extend([0.0] * self._action_dimensions["base_roll_pitch"])
+
+    # Needed for LastActionSensor to use the correct last_action.
+    for s in self.all_sensors():
+      s.on_reset(self)
+    obs = self._get_observation()
+    self._fill_observations(obs)
+
+    self.task.reset(self)
+    self._step_counter = 0
+
+    # Change feet friction.
+    for link_id in list(
+        self.robot.urdf_loader.get_end_effector_id_dict().values()):
+      self.pybullet_client.changeDynamics(
+          self.robot.robot_id,
+          link_id,
+          lateralFriction=self._foot_friction_coeff)
+    self._observation = obs
+
+    return self._observation
+
+  def _clean_up_action(self, action):
+    """Return a cleaned up action to use previous value or zero for components not used.
+
+    Args:
+      action: Input action from the policy.
+
+    Returns:
+      A cleaned up action where components not used in this step is replaced
+        with zero or value from previous steps.
+    """
+    cleaned_action = np.copy(action)
+    for leg_id in range(_NUM_LEGS):
+      if not self._need_new_swing_target[leg_id]:
+        swing_target_name = "swing_targets_" + str(leg_id)
+        swing_targets_start_index = self._action_start_indices[
+            swing_target_name]
+        swing_targets_end_index = swing_targets_start_index + self._action_dimensions[
+            swing_target_name]
+        cleaned_action[swing_targets_start_index:
+                       swing_targets_end_index] = self.last_action[
+                           swing_targets_start_index:swing_targets_end_index]
+
+        swing_clearance_name = "swing_clearance_" + str(leg_id)
+        swing_clearance_start_index = self._action_start_indices[
+            swing_clearance_name]
+        cleaned_action[swing_clearance_start_index] = self.last_action[
+            swing_clearance_start_index]
+    if self._policy_output_swing_action_delta:
+      for leg_id in range(_NUM_LEGS):
+        if self._observation["feet_states"][leg_id] != 1:
+          swing_target_delta_name = "swing_targets_delta_" + str(leg_id)
+          swing_targets_delta_start_index = self._action_start_indices[
+              swing_target_delta_name]
+          swing_targets_delta_end_index = swing_targets_delta_start_index + self._action_dimensions[
+              swing_target_delta_name]
+          cleaned_action[
+              swing_targets_delta_start_index:
+              swing_targets_delta_end_index] = self.last_action[
+                  swing_targets_delta_start_index:swing_targets_delta_end_index]
+
+          swing_clearance_delta_name = "swing_clearance_delta_" + str(leg_id)
+          swing_clearance_delta_start_index = self._action_start_indices[
+              swing_clearance_delta_name]
+          cleaned_action[swing_clearance_delta_start_index] = self.last_action[
+              swing_clearance_delta_start_index]
+
+    return cleaned_action
+
+  def _get_toe_tracking_target(self, lift_off_position, phase, swing_clearance,
+                               landing_position):
+    """Get the tracking target for the toes during the swing phase.
+
+    The swing toe will move 70% of the distance in the first half of the swing.
+    Intuitely, we want to move the swing foot quickly to the target landing
+    location and stay above the ground, in this way the control is more robust
+    to perturbations to the body that may cause the swing foot to drop onto
+    the ground earlier than expected. This is a common practice similar
+    to the MIT cheetah and Marc Raibert's original controllers. After the
+    designated swing motion finishes, we also command the foot to go down
+    for a short ditance (0.03m). This is to mitigate issues when the swing
+    finishes before it touches the ground.
+
+    Args:
+      lift_off_position: Local position when the foot leaves ground.
+      phase: Normalized phase of the foot in the current swing cycle.
+      swing_clearance: Height of the highest point in the swing trajectory.
+      landing_position: Target landing position in the local space.
+
+    Returns:
+      The interpolated foot target for the current step.
+    """
+    # Up vector in the world coordinate (without considering yaw).
+    rotated_up_vec = np.array(
+        self.pybullet_client.multiplyTransforms(
+            (0, 0, 0),
+            self.pybullet_client.getQuaternionFromEuler(
+                (self.robot.base_roll_pitch_yaw[0],
+                 self.robot.base_roll_pitch_yaw[1], 0)), (0, 0, 1),
+            _UNIT_QUATERNION)[0])
+
+    # Linearly interpolate the trajectory to get the foot target.
+    keyframe_timings = [0.0, 0.45, 0.9, 0.9001, 1.0]
+    peak_toe_position = 0.3 * lift_off_position + 0.7 * landing_position + rotated_up_vec * swing_clearance
+    # TODO(magicmelon): Update the gait generator to warp the gait and keep
+    # the swing leg going down until it touches the ground.
+    landing_position_pressing_down = landing_position - rotated_up_vec * 0.03
+    keyframe_positions = np.array([
+        lift_off_position, peak_toe_position, landing_position,
+        landing_position_pressing_down, landing_position_pressing_down
+    ])
+
+    target_toe_positions = np.array([
+        np.interp(phase, keyframe_timings, keyframe_positions[:, 0]),
+        np.interp(phase, keyframe_timings, keyframe_positions[:, 1]),
+        np.interp(phase, keyframe_timings, keyframe_positions[:, 2])
+    ])
+    return target_toe_positions
+
+  def step(self, action: Sequence[float]):
+    """Steps the wrapped environment.
+
+    Args:
+      action:
+
+    Returns:
+      The tuple containing the observation, the reward, and the episode
+        end indicator.
+    """
+    self.last_action = self._clean_up_action(action)
+
+    obs, reward, done, info = self._step_motion_controller(self.last_action)
+    self._step_counter += 1
+    self._fill_observations(obs)
+    self._observation = obs
+
+    return obs, reward, done, info
+
+  def _extract_action(self, action, name):
+    return action[self.
+                  _action_start_indices[name]:self._action_start_indices[name] +
+                  self._action_dimensions[name]]
+
+  def _get_swing_foot_ids(self):
+    swing_foot_ids = []
+    for leg_id in range(_NUM_LEGS):
+      if self._gait_generator.leg_state[
+          leg_id] == gait_generator_lib.LegState.SWING:
+        swing_foot_ids.append(leg_id)
+    return swing_foot_ids
+
+  def _update_gait_states_and_flags(self):
+    """Update gait-related variables and flags."""
+    current_leg_state = self._gait_generator.leg_state
+    for leg_id in range(_NUM_LEGS):
+      if current_leg_state[
+          leg_id] == gait_generator_lib.LegState.SWING and self._last_leg_state[
+              leg_id] != gait_generator_lib.LegState.SWING:
+        self._lift_off_positions[leg_id] = self._gym_env.robot.foot_positions(
+        )[leg_id]
+        self._need_new_swing_target[leg_id] = 1
+    self._last_leg_state = copy.copy(current_leg_state)
+
+    com_estimate_leg_indices = []
+    for leg_id in range(_NUM_LEGS):
+      # Use the ones not swinging to estimate the com height
+      if leg_id not in self._get_swing_foot_ids():
+        com_estimate_leg_indices.append(leg_id)
+    self._com_height_estimator.com_estimate_leg_indices = com_estimate_leg_indices
+    self._state_estimator.update(self.robot.GetTimeSinceReset() -
+                                 self._mpc_reset_time)
+    self._gait_generator.update(self.robot.GetTimeSinceReset() -
+                                self._mpc_reset_time)
+
+  def _process_action(self, action):
+    """Process the action and set relevant variables."""
+    # Extract the swing targets from the input action.
+    for leg_id in range(_NUM_LEGS):
+      if self._need_new_swing_target[leg_id]:
+        self._nominal_swing_leg_commands[leg_id]["swing_target"] = (
+            self._extract_action(action, "swing_targets_" + str(leg_id)))
+        self._nominal_swing_leg_commands[leg_id]["swing_clearance"] = (
+            self._extract_action(action, "swing_clearance_" + str(leg_id)))
+
+        self._actual_swing_leg_commands[leg_id]["swing_target"] = np.copy(
+            self._nominal_swing_leg_commands[leg_id]["swing_target"])
+        self._actual_swing_leg_commands[leg_id][
+            "swing_clearance"] = self._nominal_swing_leg_commands[leg_id][
+                "swing_clearance"]
+
+    # Reset the flags so the next high level commands are not used until the
+    # next swing happens.
+    self._need_new_swing_target = np.zeros(_NUM_LEGS)
+
+    # Extract the delta swing targets from the input action.
+    if self._policy_output_swing_action_delta:
+      for leg_id in self._get_swing_foot_ids():
+        self._actual_swing_leg_commands[leg_id]["swing_target"] = (
+            self._nominal_swing_leg_commands[leg_id]["swing_target"] +
+            self._extract_action(action, "swing_targets_delta_" + str(leg_id)))
+        self._actual_swing_leg_commands[leg_id]["swing_clearance"] = (
+            self._nominal_swing_leg_commands[leg_id]["swing_clearance"] +
+            self._extract_action(action,
+                                 "swing_clearance_delta_" + str(leg_id)))
+    # Extract the target base movement commands from the input action.
+    if "target_base_velocity" in self._action_names:
+      self._target_base_velocity_from_policy = self._extract_action(
+          action, "target_base_velocity")
+    else:
+      self._target_base_velocity_from_policy = np.zeros(3)
+    desired_twist_speed = self._extract_action(action, "base_twist")
+    desired_body_height = self._extract_action(action, "base_height")
+    desired_roll_pitch = self._extract_action(action, "base_roll_pitch")
+
+    self._stance_controller.desired_twisting_speed = desired_twist_speed
+    self._stance_controller.desired_body_height = desired_body_height
+    self._stance_controller.desired_roll_pitch = desired_roll_pitch
+
+  def _compute_swing_action(self):
+    """Compute actions for the swing legs."""
+    local_toe_positions = np.array(
+        self._gym_env.robot.foot_positions(position_in_world_frame=False))
+    toe_positions_over_time = copy.deepcopy(local_toe_positions)
+
+    for leg_id in self._get_swing_foot_ids():
+      target_toe_positions_local = self._get_toe_tracking_target(
+          self._lift_off_positions[leg_id],
+          self._gait_generator.normalized_phase[leg_id],
+          self._actual_swing_leg_commands[leg_id]["swing_clearance"],
+          self._actual_swing_leg_commands[leg_id]["swing_target"] +
+          self._initial_local_toe_positions[leg_id])
+      toe_positions_over_time[leg_id] = target_toe_positions_local
+    self._current_toe_target = np.reshape(
+        copy.deepcopy(toe_positions_over_time), 3 * _NUM_LEGS)
+    return np.array(
+        self.robot.motor_angles_from_foot_positions(
+            toe_positions_over_time, position_in_world_frame=False)[1])
+
+  def _compute_stance_action(self):
+    """Compute actions for the stance legs."""
+
+    # update target com velocity by combining policy output and heuristics
+    hip_relative_swing_targets = []
+    swing_durations = []
+    com_horizontal_velocity = np.array(
+        self._state_estimator.com_velocity_body_yaw_aligned_frame)[0:2]
+    for swing_id in self._get_swing_foot_ids():
+      hip_relative_swing_targets.append(
+          np.array([
+              self._actual_swing_leg_commands[swing_id]["swing_target"][0],
+              self._actual_swing_leg_commands[swing_id]["swing_target"][1]
+          ]))
+      swing_durations.append(self._gait_generator.swing_duration[swing_id])
+
+    self._target_horizontal_com_velocity_heuristic.update_horizontal_com_velocity_heuristic(
+        hip_relative_swing_targets, com_horizontal_velocity, swing_durations)
+    self._stance_controller.desired_speed = np.array(
+        self._target_base_velocity_from_policy)
+    self._stance_controller.desired_speed[
+        0:
+        2] += self._target_horizontal_com_velocity_heuristic.horizontal_com_velocity_heuristic
+
+    # compute actions for the stance leg
+    return self._stance_controller.get_action()
+
+  def _combine_swing_stance_action(self, swing_action, stance_action):
+    """Combine stance and swing leg actions."""
+    feet_contact_states = copy.copy(self._gait_generator.leg_state)
+    actions = []
+    for leg_id in range(_NUM_LEGS):
+      if leg_id in self._get_swing_foot_ids(
+      ) and feet_contact_states[leg_id] == gait_generator_lib.LegState.SWING:
+        for motor_id_in_leg in range(_MOTORS_PER_LEG):
+          actions.extend(
+              (swing_action[leg_id * _MOTORS_PER_LEG + motor_id_in_leg],
+               _MOTOR_KP[leg_id * _MOTORS_PER_LEG + motor_id_in_leg], 0.0,
+               _MOTOR_KD[leg_id * _MOTORS_PER_LEG + motor_id_in_leg], 0.0))
+      else:
+        for motor_id_in_leg in range(_MOTORS_PER_LEG):
+          actions.extend(stance_action[leg_id * _MOTORS_PER_LEG +
+                                       motor_id_in_leg])
+    return actions
+
+  def _step_motion_controller(self, action):
+    """Run the MPC controller to advance the robot's state."""
+
+    self._process_action(action)
+
+    # run MPC-based control for a certain amount of time
+    total_reward = 0.0
+    start_time = self._gym_env.robot.GetTimeSinceReset()
+    while self._gym_env.robot.GetTimeSinceReset(
+    ) - start_time < self._time_per_control_step:
+      self._update_gait_states_and_flags()
+
+      swing_action = self._compute_swing_action()
+
+      stance_action = self._compute_stance_action()
+
+      actions = self._combine_swing_stance_action(swing_action, stance_action)
+
+      obs, rew, done, info = self._gym_env.step(actions)
+      if self._stance_controller.qp_solver_fail:
+        done = True
+      total_reward += rew
+
+      if done:
+        return obs, 0.0, done, info
+
+    return obs, total_reward, done, info
+
+  @property
+  def observation(self):
+    return self._observation
+
+  @property
+  def env_step_counter(self):
+    return self._step_counter
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/observation_dictionary_to_array_wrapper.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/observation_dictionary_to_array_wrapper.py
@@ -0,0 +1,65 @@
+"""An env wrapper that flattens the observation dictionary to an array."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gym
+import gin
+from pybullet_envs.minitaur.envs_v2.utilities import env_utils
+
+
+@gin.configurable
+class ObservationDictionaryToArrayWrapper(gym.Env):
+  """An env wrapper that flattens the observation dictionary to an array."""
+
+  def __init__(self, gym_env, observation_excluded=()):
+    """Initializes the wrapper."""
+    self.observation_excluded = observation_excluded
+    self._gym_env = gym_env
+    self.observation_space = self._flatten_observation_spaces(
+        self._gym_env.observation_space)
+    self.action_space = self._gym_env.action_space
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def _flatten_observation_spaces(self, observation_spaces):
+    flat_observation_space = env_utils.flatten_observation_spaces(
+        observation_spaces=observation_spaces,
+        observation_excluded=self.observation_excluded)
+    return flat_observation_space
+
+  def _flatten_observation(self, input_observation):
+    """Flatten the dictionary to an array."""
+    return env_utils.flatten_observations(
+        observation_dict=input_observation,
+        observation_excluded=self.observation_excluded)
+
+  def seed(self, seed=None):
+    return self._gym_env.seed(seed)
+
+  def reset(self, initial_motor_angles=None, reset_duration=1.0):
+    observation = self._gym_env.reset(
+        initial_motor_angles=initial_motor_angles,
+        reset_duration=reset_duration)
+    return self._flatten_observation(observation)
+
+  def step(self, action):
+    """Steps the wrapped environment.
+
+    Args:
+      action: Numpy array. The input action from an NN agent.
+
+    Returns:
+      The tuple containing the flattened observation, the reward, the epsiode
+        end indicator.
+    """
+    observation_dict, reward, done, _ = self._gym_env.step(action)
+    return self._flatten_observation(observation_dict), reward, done, _
+
+  def render(self, mode='human'):
+    return self._gym_env.render(mode)
+
+  def close(self):
+    return self._gym_env.close()
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/pmtg_inplace_wrapper_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/pmtg_inplace_wrapper_env.py
@@ -0,0 +1,176 @@
+"""A wrapped MinitaurGymEnv with a built-in controller."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import attr
+from gym import spaces
+import numpy as np
+import gin
+from pybullet_envs.minitaur.agents.trajectory_generator import tg_inplace
+from pybullet_envs.minitaur.envs.utilities import laikago_pose_utils
+from pybullet_envs.minitaur.envs.utilities import minitaur_pose_utils
+
+_NUM_LEGS = 4
+_LAIKAGO_NUM_ACTIONS = 12
+_FREQ_LOWER_BOUND = 0.0
+_FREQ_UPPER_BOUND = 3
+_DEFAULT_AMPLITUDE_STANCE = -0.02
+_DEFAULT_AMPLITUDE_LIFT = 0.9
+_DEFAULT_CENTER_EXTENSION = 0
+_DEFAULT_STANCE_LIFT_CUTOFF = 2 * np.pi * 0.67
+_DEFAULT_RESIDUAL_RANGE = 0.4
+_LAIKAGO_KNEE_ACTION_INDEXES = [2, 5, 8, 11]
+MINITAUR_INIT_EXTENSION_POS = 2.0
+MINITAUR_INIT_SWING_POS = 0.0
+
+
+@gin.configurable
+class PmtgInplaceWrapperEnv(object):
+  """A wrapped LocomotionGymEnv with a built-in trajectory generator."""
+
+  def __init__(self,
+               gym_env,
+               freq_lower_bound=_FREQ_LOWER_BOUND,
+               freq_upper_bound=_FREQ_UPPER_BOUND,
+               residual_range=_DEFAULT_RESIDUAL_RANGE,
+               amplitude_stance=_DEFAULT_AMPLITUDE_STANCE,
+               amplitude_lift=_DEFAULT_AMPLITUDE_LIFT,
+               center_extension=_DEFAULT_CENTER_EXTENSION,
+               stance_lift_cutoff=_DEFAULT_STANCE_LIFT_CUTOFF):
+    """Initializes the TG inplace wrapper class.
+
+    Args:
+      gym_env: the gym environment to wrap on.
+      freq_lower_bound: minimum frequency that the TGs can be propagated at.
+      freq_upper_bound: maximum frequency that the TGs can be propagated at.
+      residual_range: range of residuals that can be added to tg outputs.
+      amplitude_stance: stance amplitude of TG (see tg_inplace.py for details).
+      amplitude_lift: swing amplitude of TG (see tg_inplace.py for details).
+      center_extension: center extension of TG (see tg_inplace.py for details).
+      stance_lift_cutoff: phase cutoff between stance and lift phase (see
+        tg_inplace.py for details).
+    """
+    self._gym_env = gym_env
+    self._num_actions = gym_env.robot.num_motors
+    self._tg_phases = tg_inplace.reset()
+    self._tg_params = dict(
+        amplitude_stance=amplitude_stance,
+        amplitude_lift=amplitude_lift,
+        center_extension=center_extension,
+        stance_lift_cutoff=stance_lift_cutoff)
+
+    # Add the action boundaries for delta time, one per integrator.
+    action_low = np.hstack(
+        ([-residual_range] * self._num_actions, [freq_lower_bound] * _NUM_LEGS))
+    action_high = np.hstack(
+        ([residual_range] * self._num_actions, [freq_upper_bound] * _NUM_LEGS))
+    self.action_space = spaces.Box(action_low, action_high)
+
+    # Set the observation space and boundaries.
+    if hasattr(self._gym_env.observation_space, "spaces"):
+      self.observation_space = self._gym_env.observation_space
+      self.observation_space.spaces["pmtg_inplace"] = spaces.Box(
+          -1 * np.ones(2 * _NUM_LEGS), np.ones(2 * _NUM_LEGS))
+    else:
+      lower_bound = self._gym_env.observation_space.low
+      upper_bound = self._gym_env.observation_space.high
+      lower_bound = np.hstack((lower_bound, [-1.] * 2 * _NUM_LEGS))
+      upper_bound = np.hstack((upper_bound, [1.] * 2 * _NUM_LEGS))
+      self.observation_space = spaces.Box(lower_bound, upper_bound)
+
+  def __getattr__(self, attrb):
+    return getattr(self._gym_env, attrb)
+
+  def _modify_observation(self, observation):
+    if isinstance(observation, dict):
+      observation["tg_inplace"] = np.hstack(
+          (np.cos(self._tg_phases), np.sin(self._tg_phases)))
+      return observation
+    else:
+      return np.hstack(
+          (observation, np.cos(self._tg_phases), np.sin(self._tg_phases)))
+
+  def reset(self, initial_motor_angles=None, reset_duration=1.0):
+    """Resets the environment as well as trajectory generators."""
+    self._last_real_time = 0
+    self._num_step = 0
+    self._tg_phases = tg_inplace.reset()
+    if self._num_actions == _LAIKAGO_NUM_ACTIONS:
+      # Use laikago's init pose as zero action.
+      init_pose = np.array(
+          attr.astuple(
+              laikago_pose_utils.LaikagoPose(
+                  abduction_angle_0=laikago_pose_utils
+                  .LAIKAGO_DEFAULT_ABDUCTION_ANGLE,
+                  hip_angle_0=laikago_pose_utils.LAIKAGO_DEFAULT_HIP_ANGLE,
+                  knee_angle_0=laikago_pose_utils.LAIKAGO_DEFAULT_KNEE_ANGLE,
+                  abduction_angle_1=laikago_pose_utils
+                  .LAIKAGO_DEFAULT_ABDUCTION_ANGLE,
+                  hip_angle_1=laikago_pose_utils.LAIKAGO_DEFAULT_HIP_ANGLE,
+                  knee_angle_1=laikago_pose_utils.LAIKAGO_DEFAULT_KNEE_ANGLE,
+                  abduction_angle_2=laikago_pose_utils
+                  .LAIKAGO_DEFAULT_ABDUCTION_ANGLE,
+                  hip_angle_2=laikago_pose_utils.LAIKAGO_DEFAULT_HIP_ANGLE,
+                  knee_angle_2=laikago_pose_utils.LAIKAGO_DEFAULT_KNEE_ANGLE,
+                  abduction_angle_3=laikago_pose_utils
+                  .LAIKAGO_DEFAULT_ABDUCTION_ANGLE,
+                  hip_angle_3=laikago_pose_utils.LAIKAGO_DEFAULT_HIP_ANGLE,
+                  knee_angle_3=laikago_pose_utils.LAIKAGO_DEFAULT_KNEE_ANGLE)))
+      self._init_pose = init_pose
+      observation = self._gym_env.reset(init_pose, reset_duration)
+    else:
+      # Use minitaur's init pose as zero action.
+      init_pose = np.array(
+          attr.astuple(
+              minitaur_pose_utils.MinitaurPose(
+                  swing_angle_0=MINITAUR_INIT_SWING_POS,
+                  swing_angle_1=MINITAUR_INIT_SWING_POS,
+                  swing_angle_2=MINITAUR_INIT_SWING_POS,
+                  swing_angle_3=MINITAUR_INIT_SWING_POS,
+                  extension_angle_0=MINITAUR_INIT_EXTENSION_POS,
+                  extension_angle_1=MINITAUR_INIT_EXTENSION_POS,
+                  extension_angle_2=MINITAUR_INIT_EXTENSION_POS,
+                  extension_angle_3=MINITAUR_INIT_EXTENSION_POS)))
+      initial_motor_angles = minitaur_pose_utils.leg_pose_to_motor_angles(
+          init_pose)
+      observation = self._gym_env.reset(initial_motor_angles, reset_duration)
+    return self._modify_observation(observation)
+
+  def step(self, action):
+    """Steps the wrapped PMTG inplace environment."""
+    time = self._gym_env.get_time_since_reset()
+
+    # Convert the policy's residual actions to motor space.
+    if self._num_actions == _LAIKAGO_NUM_ACTIONS:
+      action_residual = np.array(
+          attr.astuple(
+              laikago_pose_utils.LaikagoPose(*(action[0:self._num_actions]))))
+    else:
+      action_residual = minitaur_pose_utils.leg_pose_to_motor_angles(
+          action[0:self._num_actions])
+
+    self._last_real_time = time
+    self._tg_phases, tg_extensions = tg_inplace.step(
+        self._tg_phases, action[-_NUM_LEGS:], self._gym_env.env_time_step,
+        self._tg_params)
+    # Convert TG's actions to motor space depending on the robot type.
+    if self._num_actions == _LAIKAGO_NUM_ACTIONS:
+      # If the legs have 3 DOF, apply extension directly to knee joints
+      action_tg_motor_space = np.zeros(self._num_actions)
+      for tg_idx, knee_idx in zip(
+          range(_NUM_LEGS), _LAIKAGO_KNEE_ACTION_INDEXES):
+        action_tg_motor_space[knee_idx] = tg_extensions[tg_idx]
+    else:
+      # Conversion to motor space for minitaur robot.
+      action_tg_motor_space = []
+      for idx in range(_NUM_LEGS):
+        extend = tg_extensions[idx]
+        action_tg_motor_space.extend(
+            minitaur_pose_utils.swing_extend_to_motor_angles(idx, 0, extend))
+    new_action = action_tg_motor_space + action_residual
+    if self._num_actions == _LAIKAGO_NUM_ACTIONS:
+      new_action += self._init_pose
+    original_observation, reward, done, _ = self._gym_env.step(new_action)
+
+    return self._modify_observation(original_observation), reward, done, _
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/pmtg_wrapper_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/pmtg_wrapper_env.py
@@ -0,0 +1,290 @@
+"""A wrapped MinitaurGymEnv with a built-in controller."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from gym import spaces
+import numpy as np
+import gin
+from pybullet_envs.minitaur.agents.trajectory_generator import tg_simple
+from pybullet_envs.minitaur.envs_v2.utilities import robot_pose_utils
+from pybullet_envs.minitaur.robots.utilities import action_filter
+
+_DELTA_TIME_LOWER_BOUND = 0.0
+_DELTA_TIME_UPPER_BOUND = 3
+
+_GAIT_PHASE_MAP = {
+    "walk": [0, 0.25, 0.5, 0.75],
+    "trot": [0, 0.5, 0.5, 0],
+    "bound": [0, 0.5, 0, 0.5],
+    "pace": [0, 0, 0.5, 0.5],
+    "pronk": [0, 0, 0, 0]
+}
+
+
+@gin.configurable
+class PmtgWrapperEnv(object):
+  """A wrapped LocomotionGymEnv with a built-in trajectory generator."""
+
+  def __init__(self,
+               gym_env,
+               intensity_upper_bound=1.5,
+               min_delta_time=_DELTA_TIME_LOWER_BOUND,
+               max_delta_time=_DELTA_TIME_UPPER_BOUND,
+               integrator_coupling_mode="all coupled",
+               walk_height_coupling_mode="all coupled",
+               variable_swing_stance_ratio=True,
+               swing_stance_ratio=1.0,
+               residual_range=0.15,
+               init_leg_phase_offsets=None,
+               init_gait=None,
+               default_walk_height=0,
+               action_filter_enable=True,
+               action_filter_order=1,
+               action_filter_low_cut=0,
+               action_filter_high_cut=3.0,
+               action_filter_initialize=False,
+               leg_pose_class=None):
+    """Initialzes the wrapped env.
+
+    Args:
+      gym_env: An instance of LocomotionGymEnv.
+      intensity_upper_bound: The upper bound for intensity of the trajectory
+        generator. It can be used to limit the leg movement.
+      min_delta_time: Lower limit for the time in seconds that the trajectory
+        generator can be moved forward at each simulation step. The effective
+        frequency of the gait is based on the delta time multiplied by the
+        internal frequency of the trajectory generator.
+      max_delta_time: Upper limit for the time in seconds that the trajectory
+        generator can be moved forward at each simulation step.
+      integrator_coupling_mode: How the legs should be coupled for integrators.
+      walk_height_coupling_mode: The same coupling mode used for walking walking
+        heights for the legs.
+      variable_swing_stance_ratio: A boolean to indicate if the swing stance
+        ratio can change per time step or not.
+      swing_stance_ratio: Time taken by swing phase vs stance phase. This is
+        only relevant if variable_swing_stance_ratio is False.
+      residual_range: The upper limit for the residual actions that adds to the
+        leg motion. It is 0.15 by default, can be increased for more flexibility
+        or decreased to only to use the trajectory generator's motion.
+      init_leg_phase_offsets: The initial phases of the legs. A list of 4
+        variables within [0,1). The order is front-left, rear-left, front-right
+        and rear-right.
+      init_gait: The initial gait that sets the starting phase difference
+        between the legs. Overrides the arg init_phase_offsets. Has to be
+        "walk", "trot", "bound" or "pronk". Used in vizier search.
+      default_walk_height: Offset for the extension of the legs for the robot.
+        Applied to the legs at every time step. Implicitly affects the walking
+        and standing height of the policy. Zero by default. Units is in
+        extension space (can be considered in radiant since it is a linear
+        transformation to motor angles based on the robot's geometry).
+      action_filter_enable: Use a butterworth filter for the output of the PMTG
+        actions (before conversion to leg swing-extend model). It forces
+        smoother behaviors depending on the parameters used.
+      action_filter_order: The order for the action_filter (1 by default).
+      action_filter_low_cut: The cut for the lower frequencies (0 by default).
+      action_filter_high_cut: The cut for the higher frequencies (3 by default).
+      action_filter_initialize: If the action filter should be initialized when
+        the first action is taken. If enabled, the filter does not affect action
+        value the first time it is called and fills the history with that value.
+      leg_pose_class: A class providing a convert_leg_pose_to_motor_angle
+        instance method or None. If None, robot_pose_utils is used.
+
+    Raises:
+      ValueError if the controller does not implement get_action and
+      get_observation.
+
+    """
+    self._gym_env = gym_env
+    self._num_actions = gym_env.robot.num_motors
+    self._residual_range = residual_range
+    self._min_delta_time = min_delta_time
+    self._max_delta_time = max_delta_time
+    self._leg_pose_util = leg_pose_class() if leg_pose_class else None
+    # If not specified, default leg phase offsets to walking.
+    if init_gait:
+      if init_gait in _GAIT_PHASE_MAP:
+        init_leg_phase_offsets = _GAIT_PHASE_MAP[init_gait]
+      else:
+        raise ValueError("init_gait is not one of the defined gaits.")
+    else:
+      init_leg_phase_offsets = init_leg_phase_offsets or [0, 0.25, 0.5, 0.75]
+    # Create the Trajectory Generator based on the parameters.
+    self._trajectory_generator = tg_simple.TgSimple(
+        intensity_upper_bound=intensity_upper_bound,
+        integrator_coupling_mode=integrator_coupling_mode,
+        walk_height_coupling_mode=walk_height_coupling_mode,
+        variable_swing_stance_ratio=variable_swing_stance_ratio,
+        swing_stance_ratio=swing_stance_ratio,
+        init_leg_phase_offsets=init_leg_phase_offsets)
+
+    action_dim = self._extend_action_space()
+    self._extend_obs_space()
+
+    self._default_walk_height = default_walk_height
+    self._action_filter_enable = action_filter_enable
+    if self._action_filter_enable:
+      self._action_filter_initialize = action_filter_initialize
+      self._action_filter_order = action_filter_order
+      self._action_filter_low_cut = action_filter_low_cut
+      self._action_filter_high_cut = action_filter_high_cut
+      self._action_filter = self._build_action_filter(action_dim)
+
+  def _extend_obs_space(self):
+    """Extend observation space to include pmtg phase variables."""
+    # Set the observation space and boundaries.
+    lower_bound, upper_bound = self._get_observation_bounds()
+    if hasattr(self._gym_env.observation_space, "spaces"):
+      new_obs_space = spaces.Box(np.array(lower_bound), np.array(upper_bound))
+      self.observation_space.spaces.update({"pmtg_phase": new_obs_space})
+    else:
+      lower_bound = np.append(self._gym_env.observation_space.low, lower_bound)
+      upper_bound = np.append(self._gym_env.observation_space.high, upper_bound)
+      self.observation_space = spaces.Box(
+          np.array(lower_bound), np.array(upper_bound), dtype=np.float32)
+
+  def _extend_action_space(self):
+    """Extend the action space to include pmtg parameters."""
+    # Add the action boundaries for delta time, one per integrator.
+    action_low = [-self._residual_range] * self._num_actions
+    action_high = [self._residual_range] * self._num_actions
+    action_low = np.append(action_low, [self._min_delta_time] *
+                           self._trajectory_generator.num_integrators)
+    action_high = np.append(action_high, [self._max_delta_time] *
+                            self._trajectory_generator.num_integrators)
+
+    # Add the action boundaries for parameters of the trajectory generator.
+    l_bound, u_bound = self._trajectory_generator.get_parameter_bounds()
+    action_low = np.append(action_low, l_bound)
+    action_high = np.append(action_high, u_bound)
+    self.action_space = spaces.Box(
+        np.array(action_low), np.array(action_high), dtype=np.float32)
+    return len(action_high)
+
+  def __getattr__(self, attrb):
+    return getattr(self._gym_env, attrb)
+
+  def _modify_observation(self, observation):
+    if isinstance(observation, dict):
+      observation["pmtg_phase"] = self._trajectory_generator.get_state()
+      return observation
+    else:
+      return np.append(observation, self._trajectory_generator.get_state())
+
+  def reset(self, initial_motor_angles=None, reset_duration=1.0):
+    """Resets the environment as well as the trajectory generator(s).
+
+    Args:
+      initial_motor_angles: Unused for PMTG. Instead, it sets the legs to a pose
+        with the neutral action for the trajectory generator.
+      reset_duration: Float. The time (in seconds) needed to rotate all motors
+        to the desired initial values.
+
+    Returns:
+      A numpy array contains the initial observation after reset.
+    """
+    del initial_motor_angles
+    if self._action_filter_enable:
+      self._reset_action_filter()
+    self._last_real_time = 0
+    self._num_step = 0
+    self._target_speed_coef = 0.0
+    if self._trajectory_generator:
+      self._trajectory_generator.reset()
+    if self._leg_pose_util:
+      initial_motor_angles = self._leg_pose_util.convert_leg_pose_to_motor_angles(
+          [0, 0, 0] * 4)
+    else:
+      initial_motor_angles = robot_pose_utils.convert_leg_pose_to_motor_angles(
+          self._gym_env.robot_class, [0, 0, 0] * 4)
+    observation = self._gym_env.reset(initial_motor_angles, reset_duration)
+    return self._modify_observation(observation)
+
+  def step(self, action):
+    """Steps the wrapped environment.
+
+    Args:
+      action: Numpy array. The input action from an NN agent.
+
+    Returns:
+      The tuple containing the modified observation, the reward, the epsiode end
+      indicator.
+
+    Raises:
+      ValueError if input action is None.
+
+    """
+
+    if action is None:
+      raise ValueError("Action cannot be None")
+
+    if self._action_filter_enable:
+      action = self._filter_action(action)
+
+    time = self._gym_env.get_time_since_reset()
+
+    action_residual = action[0:self._num_actions]
+    # Add the default walking height offset to extension.
+    dimensions = len(action_residual) // 4
+    action_residual[dimensions - 1::dimensions] += self._default_walk_height
+    # Calculate trajectory generator's output based on the rest of the actions.
+    delta_real_time = time - self._last_real_time
+    self._last_real_time = time
+    action_tg = self._trajectory_generator.get_actions(
+        delta_real_time, action[self._num_actions:])
+    # If the residuals have a larger dimension, extend trajectory generator's
+    # actions to include abduction motors.
+    if len(action_tg) == 8 and len(action_residual) == 12:
+      for i in [0, 3, 6, 9]:
+        action_tg.insert(i, 0)
+    # Add TG actions with residual actions (in swing - extend space).
+    action_total = [a + b for a, b in zip(action_tg, action_residual)]
+    # Convert them to motor space based on the robot-specific conversions.
+    if self._leg_pose_util:
+      action_motor_space = self._leg_pose_util.convert_leg_pose_to_motor_angles(
+          action_total)
+    else:
+      action_motor_space = robot_pose_utils.convert_leg_pose_to_motor_angles(
+          self._gym_env.robot_class, action_total)
+    original_observation, reward, done, _ = self._gym_env.step(
+        action_motor_space)
+
+    return self._modify_observation(original_observation), np.float32(
+        reward), done, _
+
+  def _get_observation_bounds(self):
+    """Get the bounds of the observation added from the trajectory generator.
+
+    Returns:
+      lower_bounds: Lower bounds for observations.
+      upper_bounds: Upper bounds for observations.
+    """
+    lower_bounds = self._trajectory_generator.get_state_lower_bounds()
+    upper_bounds = self._trajectory_generator.get_state_upper_bounds()
+    return lower_bounds, upper_bounds
+
+  def _build_action_filter(self, num_joints):
+    order = self._action_filter_order
+    low_cut = self._action_filter_low_cut
+    high_cut = self._action_filter_high_cut
+    sampling_rate = 1 / (0.01)
+    a_filter = action_filter.ActionFilterButter([low_cut], [high_cut],
+                                                sampling_rate, order,
+                                                num_joints)
+    return a_filter
+
+  def _reset_action_filter(self):
+    self._action_filter.reset()
+    self._action_filter_empty = True
+    return
+
+  def _filter_action(self, action):
+    if self._action_filter_empty and self._action_filter_initialize:
+      # If initialize is selected and it is the first time filter is called,
+      # fill the buffer with that action so that it starts from that value
+      # instead of zero(s).
+      init_action = np.array(action).reshape(len(action), 1)
+      self._action_filter.reset(init_action)
+      self._action_filter_empty = False
+    filtered_action = self._action_filter.filter(np.array(action))
+    return filtered_action
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/simple_openloop.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/simple_openloop.py
@@ -0,0 +1,127 @@
+"""Simple openloop trajectory generators."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import attr
+from gym import spaces
+import numpy as np
+
+import gin
+from pybullet_envs.minitaur.envs_v2.utilities import laikago_pose_utils
+from pybullet_envs.minitaur.envs_v2.utilities import minitaur_pose_utils
+
+
+@gin.configurable
+class MinitaurPoseOffsetGenerator(object):
+  """A trajectory generator that return a constant leg pose."""
+
+  def __init__(self,
+               init_swing=0,
+               init_extension=2.0,
+               init_pose=None,
+               action_scale=1.0,
+               action_limit=0.5):
+    """Initializes the controller.
+
+    Args:
+      init_swing: the swing of the default pose offset
+      init_extension: the extension of the default pose offset
+      init_pose: the default pose offset, which is None by default. If not None,
+        it will define the default pose offset while ignoring init_swing and
+        init_extension.
+      action_scale: changes the magnitudes of actions
+      action_limit: clips actions
+    """
+    if init_pose is None:
+      self._pose = np.array(
+          attr.astuple(
+              minitaur_pose_utils.MinitaurPose(
+                  swing_angle_0=init_swing,
+                  swing_angle_1=init_swing,
+                  swing_angle_2=init_swing,
+                  swing_angle_3=init_swing,
+                  extension_angle_0=init_extension,
+                  extension_angle_1=init_extension,
+                  extension_angle_2=init_extension,
+                  extension_angle_3=init_extension)))
+    else:  # Ignore init_swing and init_extension
+      self._pose = np.array(init_pose)
+    action_high = np.array([action_limit] * minitaur_pose_utils.NUM_MOTORS)
+    self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
+    self._action_scale = action_scale
+
+  def reset(self):
+    pass
+
+  def get_action(self, current_time=None, input_action=None):
+    """Computes the trajectory according to input time and action.
+
+    Args:
+      current_time: The time in gym env since reset.
+      input_action: A numpy array. The input leg pose from a NN controller.
+
+    Returns:
+      A numpy array. The desired motor angles.
+    """
+    del current_time
+    return minitaur_pose_utils.leg_pose_to_motor_angles(
+        self._pose + self._action_scale * np.array(input_action))
+
+  def get_observation(self, input_observation):
+    """Get the trajectory generator's observation."""
+
+    return input_observation
+
+
+@gin.configurable
+class LaikagoPoseOffsetGenerator(object):
+  """A trajectory generator that return constant motor angles."""
+
+  def __init__(
+      self,
+      init_abduction=laikago_pose_utils.LAIKAGO_DEFAULT_ABDUCTION_ANGLE,
+      init_hip=laikago_pose_utils.LAIKAGO_DEFAULT_HIP_ANGLE,
+      init_knee=laikago_pose_utils.LAIKAGO_DEFAULT_KNEE_ANGLE,
+      action_limit=0.5,
+      ):
+    """Initializes the controller."""
+    self._pose = np.array(
+        attr.astuple(
+            laikago_pose_utils.LaikagoPose(
+                abduction_angle_0=init_abduction,
+                hip_angle_0=init_hip,
+                knee_angle_0=init_knee,
+                abduction_angle_1=init_abduction,
+                hip_angle_1=init_hip,
+                knee_angle_1=init_knee,
+                abduction_angle_2=init_abduction,
+                hip_angle_2=init_hip,
+                knee_angle_2=init_knee,
+                abduction_angle_3=init_abduction,
+                hip_angle_3=init_hip,
+                knee_angle_3=init_knee)))
+    action_high = np.array([action_limit] * 12)
+    self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
+
+  def reset(self):
+    pass
+
+  def get_action(self, current_time=None, input_action=None):
+    """Computes the trajectory according to input time and action.
+
+    Args:
+      current_time: The time in gym env since reset.
+      input_action: A numpy array. The input leg pose from a NN controller.
+
+    Returns:
+      A numpy array. The desired motor angles.
+    """
+    del current_time
+    return self._pose + input_action
+
+  def get_observation(self, input_observation):
+    """Get the trajectory generator's observation."""
+
+    return input_observation
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/state_machine_based_wrapper_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/state_machine_based_wrapper_env.py
@@ -0,0 +1,321 @@
+"""A wrapped Quadruped with State-machine based controller."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import enum
+import gin
+import gym
+import numpy as np
+
+NUM_LEGS = 4
+ACTION_DIM_COM = 2
+ACTION_DIM_TOE = 1
+ACTION_DIM_TOTAL = ACTION_DIM_COM + ACTION_DIM_TOE
+OBSERVATION_DIM_LEG_ID = NUM_LEGS
+OBSERVATION_DIM_TOE_POS = 2 * NUM_LEGS
+OBSERVATION_DIM_TOTAL = OBSERVATION_DIM_TOE_POS + OBSERVATION_DIM_LEG_ID
+
+
+# States of the state machine.
+class GaitStateMachine(enum.IntEnum):
+  """The state machine for quadruped gait."""
+  STEP_LEFT_FRONT_TOE = 0
+  STEP_RIGHT_HIND_TOE = 1
+  STEP_RIGHT_FRONT_TOE = 2
+  STEP_LEFT_HIND_TOE = 3
+  TOTAL_GAIT_STATE_NUM = 4
+
+
+@gin.configurable
+class StateMachineBasedWrapperEnv(object):
+  """An env using IK to convert toe positions to joint angles.
+
+    The state machine consists of 4 states. During each state, the center of
+    mass of the base link is moved first, then one of the legs will take a
+    step by following a planned elliptical trajectory. The legs will move in
+    the order of front left -> hind right -> front right -> hind left.
+    The state transition is determined by elapsed time since last transition.
+    Observation (16 dimensions):
+    [one hot vector of the state id, local toe positions in x and y direction]
+    Action (3 dimensions):
+    [target moving distance of the current moving leg in x direction,
+     target moving distance of base COM in x direction,
+     target moving distance of base COM in y direction]
+  """
+
+  def __init__(self,
+               gym_env,
+               default_local_toe_positions,
+               toe_link_indices=(3, 7, 11, 15),
+               foot_lift_height=0.15,
+               state_duration=2.0,
+               action_lower_bound=(-0.0, -0.25, -0.25),
+               action_upper_bound=(0.3, 0.25, 0.25),
+               state_to_foot_id=(0, 3, 2, 1)):
+    """Initialzes the wrapped env.
+
+    Args:
+      gym_env: An instance of LocomotionGymEnv.
+      default_local_toe_positions: A list of vectors that contains the default
+        local position of each toe.
+      toe_link_indices: A list of indices to the toe link. Used for calculating
+        local toe positions.
+      foot_lift_height: Specifies how high the foot lifts during swing stage.
+      state_duration: Specifies the duration of each state.
+      action_lower_bound: Lower bound for the actions.
+      action_upper_bound: Upper bound for the actions.
+      state_to_foot_id: Mapping from state machine state to foot id.
+    """
+    self._gym_env = gym_env
+
+    assert len(action_lower_bound) == ACTION_DIM_TOTAL
+    assert len(action_upper_bound) == ACTION_DIM_TOTAL
+    self.action_space = gym.spaces.Box(
+        np.array(action_lower_bound), np.array(action_upper_bound))
+    observation_lower_bound = np.array([-1.0] * OBSERVATION_DIM_TOTAL)
+    observation_upper_bound = np.array([1.0] * OBSERVATION_DIM_TOTAL)
+    self.observation_space = gym.spaces.Box(observation_lower_bound,
+                                            observation_upper_bound)
+    self._default_local_toe_positions = default_local_toe_positions
+    self._toe_link_indices = toe_link_indices
+    self._foot_lift_height = foot_lift_height
+    self._state_to_foot_id = state_to_foot_id
+
+    # Use the largest value for bounding the toe movement
+    self._toe_move_bound = np.max(
+        np.abs(np.concatenate([action_lower_bound, action_upper_bound])))
+
+    # Duration of each state
+    self.state_machine_state_duration = [state_duration] * int(
+        GaitStateMachine.TOTAL_GAIT_STATE_NUM)
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def _state_machine_observation(self):
+    """Get the current observation: [state_id, local_toe_positions]."""
+    observation = []
+    # One-hot vector for the current state machine state.
+    one_hot_foot_id = np.zeros(GaitStateMachine.TOTAL_GAIT_STATE_NUM)
+    one_hot_foot_id[self.current_state_machine_state] = 1
+    observation.extend(one_hot_foot_id)
+
+    # Toe positions in X and Y direction in the local space.
+    for toe_index in range(NUM_LEGS):
+      toe_pos_local = self.current_local_toe_positions[toe_index]
+      toe_pos_local_xy = [toe_pos_local[0], toe_pos_local[1]]
+      observation.extend(toe_pos_local_xy)
+
+    return observation
+
+  def _get_constant_accel_interpolation(self, interp_ratio):
+    """Modify an interpolation between 0 and 1 to have constant acceleration."""
+    assert interp_ratio <= 1.0 and interp_ratio >= 0.0
+    if interp_ratio < 0.5:
+      return 0.5 * (2 * interp_ratio)**2
+    else:
+      return -0.5 * (2 * interp_ratio - 2)**2 + 1
+
+  def _move_com(self, target_com_movement, time_since_transition,
+                state_duration):
+    """Get the ik action for moving the COM.
+
+    Args:
+      target_com_movement: Target COM movement relative to the previous COM
+        position in the x-y plane.
+      time_since_transition: Time elapsed since last state machien transition.
+      state_duration: Duration of the state machine. The first half will be used
+        for moving COM and the second half for moving the swing leg.
+
+    Returns:
+      ik_action: ik targets for moving the COM.
+    """
+    com_movement_ratio = np.clip(
+        time_since_transition / ((state_duration - 0.0) / 2.0), 0, 1)
+    com_movement_ratio = self._get_constant_accel_interpolation(
+        com_movement_ratio)
+    current_com_movement = np.array(target_com_movement) * com_movement_ratio
+
+    ik_action = []
+    for toe_index in range(NUM_LEGS):
+      toe_pos_local = np.copy(self.current_local_toe_positions[toe_index])
+      toe_pos_local[2] = self._default_local_toe_positions[toe_index][2]
+      toe_pos_local[0] -= current_com_movement[0]
+      toe_pos_local[1] -= current_com_movement[1]
+      toe_pos_local[0] = np.clip(
+          toe_pos_local[0], self._default_local_toe_positions[toe_index][0] -
+          self._toe_move_bound,
+          self._default_local_toe_positions[toe_index][0] +
+          self._toe_move_bound)
+      toe_pos_local[1] = np.clip(
+          toe_pos_local[1], self._default_local_toe_positions[toe_index][1] -
+          self._toe_move_bound,
+          self._default_local_toe_positions[toe_index][1] +
+          self._toe_move_bound)
+      ik_action.extend(toe_pos_local)
+
+    zero_translation = [0, 0, 0]
+    identity_rotation = [0, 0, 0, 1]
+    ik_action.extend(zero_translation)
+    ik_action.extend(identity_rotation)
+
+    return ik_action
+
+  def _move_leg(self, target_toe_movement, time_since_transition,
+                state_duration):
+    """Get the ik action for moving the swing leg.
+
+    Args:
+      target_toe_movement: Target toe movement relative to the default toe
+        position in the positive x direction.
+      time_since_transition: Time elapsed since last state machien transition.
+      state_duration: Duration of the state machine. The first half will be used
+        for moving COM and the second half for moving the swing leg.
+
+    Returns:
+      ik_action: ik targets for moving the swing leg.
+    """
+
+    # The target toe position at the end of the movement.
+    target_toe_local_position = np.array(self._default_local_toe_positions[
+        self._state_to_foot_id[self.current_state_machine_state]])
+    target_toe_local_position[0] += target_toe_movement
+
+    # Toe position at the beginning of the movement.
+    initial_toe_local_position = np.array(self.current_local_toe_positions[
+        self._state_to_foot_id[self.current_state_machine_state]])
+
+    # Auxiliary variables for computing the interpolation between the current
+    # toe position and the target toe position.
+    toe_circle_radius = 0.5 * np.linalg.norm(
+        np.array(target_toe_local_position) - initial_toe_local_position)
+    toe_moving_direction = np.array(
+        target_toe_local_position) - initial_toe_local_position
+    toe_moving_direction /= np.max([np.linalg.norm(toe_moving_direction), 1e-5])
+    toe_traj_scale_ratio = self._foot_lift_height / np.max(
+        [toe_circle_radius, 1e-5])
+
+    # Current percentage of time into the state machine.
+    toe_movement_ratio = np.clip(
+        (time_since_transition - state_duration / 2.0) /
+        ((state_duration - 0.0) / 2.0), 0, 1)
+    toe_movement_ratio = self._get_constant_accel_interpolation(
+        toe_movement_ratio)
+    toe_circle_moved_angle = np.pi * toe_movement_ratio
+
+    # Target horizontal movement from the previous local toe position.
+    target_toe_horizontal_movement = toe_circle_radius - np.cos(
+        toe_circle_moved_angle) * toe_circle_radius
+    current_toe_target = np.array([
+        target_toe_horizontal_movement * toe_moving_direction[0],
+        target_toe_horizontal_movement * toe_moving_direction[1],
+        np.sin(toe_circle_moved_angle) * toe_circle_radius *
+        toe_traj_scale_ratio
+    ]) + initial_toe_local_position
+
+    ik_action = []
+    for toe_index in range(NUM_LEGS):
+      toe_pos_local = np.copy(self.current_local_toe_positions[toe_index])
+      toe_pos_local[2] = self._default_local_toe_positions[toe_index][2]
+      if toe_index == self._state_to_foot_id[self.current_state_machine_state]:
+        toe_pos_local[0] = current_toe_target[0]
+        toe_pos_local[1] = current_toe_target[1]
+        toe_pos_local[2] = current_toe_target[2]
+
+      ik_action.extend(toe_pos_local)
+
+    zero_translation = [0, 0, 0]
+    identity_rotation = [0, 0, 0, 1]
+    ik_action.extend(zero_translation)
+    ik_action.extend(identity_rotation)
+    return ik_action
+
+  def _update_state_machine_transition(self):
+    """Update the state machine state if the duration has been reached."""
+    self.current_state_machine_state = (self.current_state_machine_state + 1
+                                       ) % GaitStateMachine.TOTAL_GAIT_STATE_NUM
+    self.last_state_transition_time = self.robot.GetTimeSinceReset()
+
+  def _update_local_toe_positions(self):
+    """Update the local position of the toes."""
+    identity_orientation = [0, 0, 0, 1]
+    base_position = self.robot.GetBasePosition()
+    base_orientation = self.robot.GetBaseOrientation()
+    inv_base_position, inv_base_orientation = self.pybullet_client.invertTransform(
+        base_position, base_orientation)
+    for toe_index in range(NUM_LEGS):
+      toe_pose_world = self.pybullet_client.getLinkState(
+          self.robot.quadruped, self._toe_link_indices[toe_index])[0]
+      toe_pos_local, _ = self.pybullet_client.multiplyTransforms(
+          inv_base_position, inv_base_orientation, toe_pose_world,
+          identity_orientation)
+      self.current_local_toe_positions[toe_index] = np.array(toe_pos_local)
+
+  def step(self, action):
+    """Steps the wrapped environment.
+
+    Args:
+      action: Numpy array. The input action from an NN agent.
+
+    Returns:
+      The tuple containing the modified observation, the reward, the epsiode end
+      indicator.
+
+    Raises:
+      ValueError if input action is None.
+
+    """
+    if action is None:
+      raise ValueError("Action cannot be None")
+    action = np.clip(action, self.action_space.low, self.action_space.high)
+    sum_reward = 0
+    step_num = 0
+    state_duration = self.state_machine_state_duration[
+        self.current_state_machine_state]
+    time_since_transition = self.robot.GetTimeSinceReset(
+    ) - self.last_state_transition_time
+
+    # Move COM
+    while time_since_transition < state_duration / 2.0:
+      ik_actions = self._move_com(action[1:3], time_since_transition,
+                                  state_duration)
+      _, reward, done, _ = self._gym_env.step(ik_actions)
+      sum_reward += reward
+      step_num += 1
+      time_since_transition = self.robot.GetTimeSinceReset(
+      ) - self.last_state_transition_time
+      if done:
+        break
+    self._update_local_toe_positions()
+    # Move Leg
+    while time_since_transition < state_duration:
+      ik_actions = self._move_leg(action[0], time_since_transition,
+                                  state_duration)
+      _, reward, done, _ = self._gym_env.step(ik_actions)
+      sum_reward += reward
+      step_num += 1
+      time_since_transition = self.robot.GetTimeSinceReset(
+      ) - self.last_state_transition_time
+      if done:
+        break
+    self._update_local_toe_positions()
+    self._update_state_machine_transition()
+
+    state_machine_observation = self._state_machine_observation()
+
+    return state_machine_observation, sum_reward, done, _
+
+  def reset(self):
+    """Reset the simulation and state machine states."""
+    self.current_state_machine_state = GaitStateMachine.STEP_LEFT_FRONT_TOE
+    self.last_state_transition_time = 0
+    self.current_local_toe_positions = np.copy(
+        self._default_local_toe_positions)
+
+    self._gym_env.reset()
+
+    state_machine_observation = self._state_machine_observation()
+
+    return state_machine_observation
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/step_based_curriculum_wrapper_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/step_based_curriculum_wrapper_env.py
@@ -0,0 +1,231 @@
+"""A wrapped LocomotionGymEnv with functions that change the world and task."""
+
+import numbers
+from typing import Optional, Sequence, Text, Tuple, Union
+
+import numpy as np
+
+import gin
+
+
+@gin.configurable
+class CurriculumParameter(object):
+  """Definition of a env parameter tuned by a curriculum."""
+
+  def __init__(self,
+               name: Text,
+               init_val: numbers.Real,
+               bounds: Tuple[numbers.Real, numbers.Real]):
+    """A parameter to tune throughout the curriculum.
+
+    Args:
+      name: The name of the curriculum parameter. This must be the name of
+        attribute of the scene class.
+      init_val: The value to use at the start of the curriculum.
+      bounds: A tuple of [lower_bound, upper_bound] defining the minimum and
+        maximum values of the parameter.
+    """
+    self.name = name
+
+    if not isinstance(bounds[0], type(bounds[1])):
+      raise ValueError("All elements in [bounds] must be of the same type.")
+
+    if (init_val < min(bounds)) or (init_val > max(bounds)):
+      raise ValueError("Initial parameter value must lie in range defined"
+                       " by [bounds].")
+
+    if not isinstance(init_val, type(bounds[0])):
+      raise ValueError("[init_val] type must match the type of the elements"
+                       " in [bounds].")
+
+    self.init_val = init_val
+    self.bounds = bounds
+    self.dtype = type(init_val)
+
+
+@gin.configurable
+class RandomSamplingCurriculumParameter(CurriculumParameter):
+  """Env parameter whose value is sampled randomly without curriculum."""
+
+  def __init__(self, name: Text, bounds: Tuple[numbers.Real, numbers.Real]):
+    """A parameter whose value to sample randomly.
+
+    Args:
+      name: The name of the parameter.
+      bounds: A tuple of [lower_bound, upper_bound] defining the minimum and
+        maximum values of the parameter.
+    """
+    super(RandomSamplingCurriculumParameter, self).__init__(
+        name=name, init_val=bounds[0], bounds=bounds)
+
+  def sample(
+      self, step: numbers.Real,
+      curriculum_steps: Optional[numbers.Real] = None) -> Union[int, float]:
+    del step, curriculum_steps
+    sampled_val = np.random.uniform(*self.bounds)
+
+    if not isinstance(self.bounds[0], float):
+      sampled_val = int(round(sampled_val))
+
+    return sampled_val
+
+  def __call__(
+      self, step: numbers.Real,
+      curriculum_steps: Optional[numbers.Real] = None) -> Union[int, float]:
+    return self.sample(step, curriculum_steps)
+
+
+@gin.configurable
+class LinearStepBasedCurriculumParameter(CurriculumParameter):
+  """Definition of a env parameter tuned by a linear time-based curriculum."""
+
+  def __init__(self,
+               name: Text,
+               init_val: numbers.Real,
+               bounds: Tuple[numbers.Real, numbers.Real],
+               curriculum_steps: Optional[numbers.Real] = None):
+    """A parameter to tune throughout the curriculum.
+
+    Args:
+      name: The name of the curriculum parameter. This must be the name of
+        attribute of the scene class.
+      init_val: The value to use at the start of the curriculum.
+      bounds: A tuple of [lower_bound, upper_bound] defining the minimum and
+        maximum values of the parameter.
+      curriculum_steps: Integer defining the number of steps to take when
+        varying the curriculum parameter value from the init_val to either
+        bound. If None is specified, then the curriculum must provide the
+        curriculum_steps when sampling.
+    """
+    super(LinearStepBasedCurriculumParameter, self).__init__(
+        name=name, init_val=init_val, bounds=bounds)
+    self.curriculum_steps = curriculum_steps
+
+  def get_bounds_at_step(
+      self, step: numbers.Real,
+      curriculum_steps: Optional[numbers.Real] = None
+    ) -> Tuple[Union[int, float], Union[int, float]]:
+    """Compute the bounds of the parameter at the current step.
+
+    Args:
+      step: An integer defining the current timestep.
+      curriculum_steps: Optional curriculum steps. Must be passed if not passed
+        during initialization.
+    Returns:
+      A tuple containing the lower bound and upper bound at the current step.
+    """
+
+    if not self.curriculum_steps and not curriculum_steps:
+      raise ValueError("curriculum_steps not defined. Must be passed upon"
+                       " initialization or must specified by curriculum"
+                       " wrapper env on method call.")
+
+    if self.curriculum_steps:
+      curriculum_steps = self.curriculum_steps
+
+    prog = min(float(step) / curriculum_steps, 1.0)
+    curr_lower_bound = self.init_val - prog * (self.init_val - self.bounds[0])
+    curr_upper_bound = self.init_val + prog * (self.bounds[1] - self.init_val)
+
+    if not isinstance(self.bounds[0], float):
+      curr_lower_bound = int(round(curr_lower_bound))
+      curr_upper_bound = int(round(curr_upper_bound))
+
+    return (curr_lower_bound, curr_upper_bound)
+
+  def sample(
+      self, step: numbers.Real,
+      curriculum_steps: Optional[numbers.Real] = None) -> Union[int, float]:
+    sampled_val = np.random.uniform(*self.get_bounds_at_step(
+        step, curriculum_steps))
+
+    if not isinstance(self.bounds[0], float):
+      sampled_val = int(round(sampled_val))
+
+    return sampled_val
+
+  def __call__(
+      self, step: numbers.Real,
+      curriculum_steps: Optional[numbers.Real] = None) -> Union[int, float]:
+    return self.sample(step, curriculum_steps)
+
+
+@gin.configurable
+class Task(object):
+  """Defines a single task in the environment and its corresponding params."""
+
+  def __init__(self, name: Text,
+               curriculum_parameters: Sequence[CurriculumParameter]):
+    """Initialize the task.
+
+    Args:
+      name: The name of the task.
+      curriculum_parameters: A list of CurriculumParameter instances which
+        define the parameters that this task makes use of.
+    """
+    self.name = name
+    self.curriculum_parameters = curriculum_parameters
+
+
+@gin.configurable
+class StepBasedCurriculumWrapperEnv(object):
+  """A wrapper to tune the scene parameters linearly with the steps taken."""
+
+  def __init__(self, env, tasks: Sequence[Task],
+               default_curriculum_steps: Optional[numbers.Real] = None,
+               reset_total_step_count_val: numbers.Real = -1,
+               steps_before_curriculum_start: numbers.Real = 0):
+    """Initializes the linear curriculum wrapper env.
+
+    Args:
+      env: An instance of a (potentially previously wrapped) LocomotionGymEnv.
+      tasks: Various tasks to shuffle through throughout the curriculum.
+      default_curriculum_steps: Optional default value for curriculum steps.
+      reset_total_step_count_val: Step at which to reset the total step count.
+        The internal total_step_count is reset to 0 once this value is reached.
+      steps_before_curriculum_start: Steps to take in environment before the
+        curriculum begins.
+    """
+    self._gym_env = env
+    self._tasks = tasks
+    self._default_curriculum_steps = default_curriculum_steps
+    self._reset_total_step_count_val = reset_total_step_count_val
+    self._steps_before_curriculum_start = steps_before_curriculum_start
+
+    # Total number of environment steps.
+    self._total_step_count = 0
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def set_scene_params(self):
+    # Choose a task at random.
+    curr_task = np.random.choice(self._tasks)
+
+    # Cycle through the task's parameters and set them in the scene.
+    self._gym_env.scene.reset_scene_params()
+    self._gym_env.scene.scene_type = curr_task.name
+    for curriculum_parameter in curr_task.curriculum_parameters:
+      setattr(
+          self._gym_env.scene, curriculum_parameter.name,
+          curriculum_parameter(
+              self._total_step_count - self._steps_before_curriculum_start,
+              self._default_curriculum_steps))
+
+  def reset(self, *args, **kwargs):
+    """Reset and adjust the environment."""
+
+    # Update the total step count.
+    self._total_step_count += self._gym_env.env_step_counter
+    if self._reset_total_step_count_val >= 0:
+      if self._total_step_count >= self._reset_total_step_count_val:
+        self._total_step_count = 0
+
+    if self._total_step_count < self._steps_before_curriculum_start:
+      return self._get_observation()
+
+    self.set_scene_params()
+    self._gym_env.reset(*args, **kwargs)
+
+    return self._get_observation()
+
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/trajectory_generator_wrapper_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/trajectory_generator_wrapper_env.py
@@ -0,0 +1,81 @@
+"""A wrapped MinitaurGymEnv with a built-in controller."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import gin
+
+
+@gin.configurable
+class TrajectoryGeneratorWrapperEnv(object):
+  """A wrapped LocomotionGymEnv with a built-in trajectory generator."""
+
+  def __init__(self, gym_env, trajectory_generator):
+    """Initialzes the wrapped env.
+
+    Args:
+      gym_env: An instance of LocomotionGymEnv.
+      trajectory_generator: A trajectory_generator that can potentially modify
+        the action and observation. Typticall generators includes the PMTG and
+        openloop signals. Expected to have get_action and get_observation
+        interfaces.
+
+    Raises:
+      ValueError if the controller does not implement get_action and
+      get_observation.
+
+    """
+    self._gym_env = gym_env
+    if not hasattr(trajectory_generator, 'get_action') or not hasattr(
+        trajectory_generator, 'get_observation'):
+      raise ValueError(
+          'The controller does not have the necessary interface(s) implemented.'
+      )
+
+    self._trajectory_generator = trajectory_generator
+
+    # The trajectory generator can subsume the action/observation space.
+    if hasattr(trajectory_generator, 'observation_space'):
+      self.observation_space = self._trajectory_generator.observation_space
+
+    if hasattr(trajectory_generator, 'action_space'):
+      self.action_space = self._trajectory_generator.action_space
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def _modify_observation(self, observation):
+    return self._trajectory_generator.get_observation(observation)
+
+  def reset(self, initial_motor_angles=None, reset_duration=1.0):
+    if getattr(self._trajectory_generator, 'reset'):
+      self._trajectory_generator.reset()
+    observation = self._gym_env.reset(initial_motor_angles, reset_duration)
+    return self._modify_observation(observation)
+
+  def step(self, action):
+    """Steps the wrapped environment.
+
+    Args:
+      action: Numpy array. The input action from an NN agent.
+
+    Returns:
+      The tuple containing the modified observation, the reward, the epsiode end
+      indicator.
+
+    Raises:
+      ValueError if input action is None.
+
+    """
+
+    if action is None:
+      raise ValueError('Action cannot be None')
+
+    new_action = self._trajectory_generator.get_action(
+        self._gym_env.robot.GetTimeSinceReset(), action)
+
+    original_observation, reward, done, _ = self._gym_env.step(new_action)
+
+    return self._modify_observation(original_observation), reward, done, _
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/walking_wrapper.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/env_wrappers/walking_wrapper.py
@@ -0,0 +1,134 @@
+"""Wraps a quadruped walking controller for navigation control."""
+
+from typing import Any
+import gin
+from gym import spaces
+import numpy as np
+
+from pybullet_envs.minitaur.agents.baseline_controller import com_velocity_estimator
+from pybullet_envs.minitaur.agents.baseline_controller import locomotion_controller
+from pybullet_envs.minitaur.agents.baseline_controller import openloop_gait_generator
+from pybullet_envs.minitaur.agents.baseline_controller import raibert_swing_leg_controller
+from pybullet_envs.minitaur.agents.baseline_controller import torque_stance_leg_controller
+
+_N_LEGS = 4
+_STANCE_DURATION_SECONDS = [
+    0.25
+] * _N_LEGS  # The stance phase duration for each leg.
+_DUTY_FACTOR = [
+    0.6
+] * _N_LEGS  # Percentage of the leg in the stance phase within the cycle.
+_BODY_HEIGHT = 0.45
+_INIT_PHASE_FULL_CYCLE = [0, 0.5, 0.5, 0]
+
+
+def _setup_controller(robot: Any) -> locomotion_controller.LocomotionController:
+  """Creates the controller."""
+  desired_speed = (0, 0)
+  desired_twisting_speed = 0
+
+  gait_generator = openloop_gait_generator.OpenloopGaitGenerator(
+      robot,
+      stance_duration=_STANCE_DURATION_SECONDS,
+      duty_factor=_DUTY_FACTOR,
+      initial_leg_phase=_INIT_PHASE_FULL_CYCLE)
+  state_estimator = com_velocity_estimator.COMVelocityEstimator(robot)
+  sw_controller = raibert_swing_leg_controller.RaibertSwingLegController(
+      robot,
+      gait_generator,
+      state_estimator,
+      desired_speed=desired_speed,
+      desired_twisting_speed=desired_twisting_speed,
+      desired_height=_BODY_HEIGHT,
+  )
+  st_controller = torque_stance_leg_controller.TorqueStanceLegController(
+      robot,
+      gait_generator,
+      state_estimator,
+      desired_speed=desired_speed,
+      desired_twisting_speed=desired_twisting_speed,
+      desired_body_height=_BODY_HEIGHT,
+      body_mass=215 / 9.8,
+      body_inertia=(0.07335, 0, 0, 0, 0.25068, 0, 0, 0, 0.25447),
+  )
+
+  controller = locomotion_controller.LocomotionController(
+      robot=robot,
+      gait_generator=gait_generator,
+      state_estimator=state_estimator,
+      swing_leg_controller=sw_controller,
+      stance_leg_controller=st_controller,
+      clock=robot.GetTimeSinceReset)
+  return controller
+
+
+def _update_controller_params(
+    controller: locomotion_controller.LocomotionController,
+    lin_speed: np.ndarray, ang_speed: float):
+  """Apply the desired speed and twisting speed."""
+  controller.swing_leg_controller.desired_speed = lin_speed
+  controller.swing_leg_controller.desired_twisting_speed = ang_speed
+  controller.stance_leg_controller.desired_speed = lin_speed
+  controller.stance_leg_controller.desired_twisting_speed = ang_speed
+
+
+@gin.configurable
+class WalkingWrapper(object):
+  """Wraps a baseline walking controller for Laikago/Vision60."""
+
+  def __init__(self,
+               gym_env: Any,
+               action_repeat=20,
+               speed_bound=(-0.3, 0.3),
+               angular_speed_bound=(-0.3, 0.3)):
+    """Initialzes the wrapped env.
+
+    Args:
+      gym_env: An instance of LocomotionGymEnv.
+      action_repeat: Number of control steps to run low level controller with
+        the high level inputs per step().
+      speed_bound: The min/max of the input speed.
+      angular_speed_bound: The min/max of the twisting speed.
+    """
+    self._gym_env = gym_env
+    self._controller = _setup_controller(self._gym_env.robot)
+    self._action_repeat = action_repeat
+
+    action_low = np.array(speed_bound)
+    action_high = np.array(angular_speed_bound)
+
+    # Overwrite the action space.
+    self.action_space = spaces.Box(action_low, action_high)
+
+  def reset(self, *args, **kwargs) -> Any:
+    obs = self._gym_env.reset(*args, **kwargs)
+    # The robot instance might have been replaced if hard_reset is called. We
+    # just recreate the controller.
+    self._controller = _setup_controller(self._gym_env.robot)
+    self._controller.reset()
+    return obs
+
+  def __getattr__(self, attr):
+    return getattr(self._gym_env, attr)
+
+  def step(self, action) -> Any:
+    """Steps the wrapped env with high level commands.
+
+    Args:
+      action: A high level command containing the desired linear and angular
+        speed of the robot base. The speed can be adjusted at any time.
+
+    Returns:
+      The gym env observation, reward, termination, and additional info.
+
+    """
+    lin_speed = np.array((action[0], 0, 0))
+    ang_speed = action[1]
+    _update_controller_params(self._controller, lin_speed, ang_speed)
+    for _ in range(self._action_repeat):
+      self._controller.update()
+      hybrid_action = self._controller.get_action()
+      obs, reward, done, info = self._gym_env.step(hybrid_action)
+      if done:
+        break
+    return obs, reward, done, info
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/evaluation/init.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/evaluation/init.py
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/evaluation/metric.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/evaluation/metric.py
@@ -0,0 +1,125 @@
+"""The metric reporting system used in our env.
+
+In the gym environment, there are many variables or quantities that can help
+researchers to debug, evaluate policy performance. Such quantities may
+include: Motor torques for quadrupeds, even when they are controlled in
+POSITION mode; Distance to walls while a wheeled robot is navigating the
+indoor environment. Often, these metrics are private variables (or can only be
+computed from private variables). To expose the user interested metrics
+outside the environment's observations, we designed this Metric system that
+can be invoked in any modules (robot, tasks, sensors) inside the gym env.
+"""
+
+import enum
+import logging
+from typing import Any, Callable, Dict, Sequence, Text
+import gin
+
+
+@gin.constants_from_enum
+class MetricScope(enum.Enum):
+  """The supported scope of metrics."""
+  # The performance metrics.
+  PERFORMANCE = 1,
+
+  # The debug metrics for diagnostic purposes.
+  DEBUG = 2,
+
+  # The safety metrics.
+  SAFETY = 3,
+
+  # The statistics of episodes in metric format.
+  STATISTIC = 4,
+
+
+class MetricCore(object):
+  """Aggregates values of interest to compute statistics."""
+
+  def __init__(
+      self,
+      name: Text,
+      scope: MetricScope,
+      single_ep_aggregator: Callable[[Sequence[Any]], Any],
+      multi_ep_aggregator: Callable[[Sequence[Any]], Dict[Text, Any]],
+  ):
+    """Initializes the class.
+
+    Args:
+      name: The name of the metric, for example "motor_torques",
+        "distance_to_wall", etc. The full name of the metric will have scope
+        name in the prefix, i.e. "scope/name".
+      scope: The scope of this metric. Most metric should be for DEBUG purpose.
+        The scope name will be added to the final name of metric in this way:
+          "scope/name", which is standarded format for Tensorboard to group
+          named variables.
+      single_ep_aggregator: The function to process all aggregated metric
+        values. The derived MetricReporter (see below) will implements
+        reset_episode() which clears the episode data, and will be called during
+        env.reset().
+      multi_ep_aggregator: The functions to process multi-episode metric values.
+        We assume the inputs to the functions is a list of per episode metric
+        values, i.e. each element of the list is the output from the
+        single_ep_aggregator.
+    """
+    self._name = scope.name + "/" + name
+    self._single_ep_aggregator = single_ep_aggregator
+    self._multi_ep_aggregator = multi_ep_aggregator
+    self._episode_data = []
+
+  def report(self, metric_value: Any):
+    """Stores the reported metric in the internal buffer.
+
+    Args:
+      metric_value: The metric we are interested to report.
+    """
+    self._episode_data.append(metric_value)
+
+
+class MetricReporter(MetricCore):
+  """Reports the metric using the provided aggregator functions."""
+
+  def get_episode_metric(self) -> Dict[Text, Sequence[Any]]:
+    """Processes and returns episode metric values.
+
+    Returns:
+      Aggregated metrics for the current episode.
+    """
+    if self._episode_data:
+      return {self._name: self._single_ep_aggregator(self._episode_data)}
+    else:
+      return {}
+
+  def get_multi_ep_metric(
+      self, episodic_metric: Dict[Text, Sequence[Any]]) -> Dict[Text, Any]:
+    """Processes the aggregated metrics over many episodes.
+
+    Will not be affected by reset_episode, since we take multi-episode data as
+    inputs.
+
+    Args:
+      episodic_metric: The per episode metrics. We expect the inputs to contain
+        the same key as self._name, and that the value is a list of metric
+        values computed using self.get_episode_metrc().
+
+    Returns:
+      The processed multi-episode metrics.
+    """
+    if self._name not in episodic_metric:
+      logging.warning(
+          "The inputs does not contain the key for the current metric: %s",
+          self._name)
+      return {}
+    outputs = {}
+    for key, val in self._multi_ep_aggregator(
+        episodic_metric[self._name]).items():
+      outputs[self._name + "_" + key] = val
+    return outputs
+
+  def reset_episode(self):
+    """Clears the episode data stored.
+
+    Will be invoked during env.reset(). This will effect how get_episode_metric
+    gets computed.
+
+    """
+    self._episode_data = []
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/evaluation/metric_logger.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/evaluation/metric_logger.py
@@ -0,0 +1,155 @@
+# Lint as: python3
+"""The system to log and manage all metrics."""
+
+import logging
+from typing import Any, Callable, Dict, Sequence, Text, Union
+import numpy as np
+
+from pybullet_envs.minitaur.envs_v2.evaluation import metric as metric_lib
+
+
+def _merge_dict_throw_on_duplicates(
+    dict_1: Dict[Text, Any],
+    dict_2: Dict[Text, Any],
+):
+  """Merge the contents of dict_2 to dict_1.
+
+  Args:
+    dict_1: The dictionary to merge into.
+    dict_2: The dictionary to merge from.
+
+  Raises:
+    KeyError: if duplicated keys are found.
+  """
+  for key, val in dict_2.items():
+    if key in dict_1:
+      raise KeyError(f"Duplicate key: {key} found in both "
+                     f"dictionaries: {dict_1} {dict_2}.")
+    dict_1[key] = val
+
+
+def common_stats(x: Sequence[Union[float, Sequence[float]]],
+                 flatten_array=False):
+  out = x
+  if flatten_array:
+    # Deals with array of arrays.
+    out = np.concatenate(x).flatten()
+  return {
+      "mean": np.mean(out),
+      "max": np.max(out),
+      "min": np.min(out),
+      "std": np.std(out),
+  }
+
+
+class MetricLogger(object):
+  """The central system to manage all metrics."""
+
+  def __init__(self, ignore_duplicate_metrics: bool = True):
+    """Initializes the system.
+
+    Args:
+      ignore_duplicate_metrics: Don't throw error when users want to recreate
+        the same metric.
+    """
+    self._metric_reporters = {}
+    self._ignore_duplicate_metrics = ignore_duplicate_metrics
+
+  def reset_episode(self):
+    """Resets all metric reporters's internal buffer.
+
+    Will be called by the gym env during reset.
+    """
+    for metric in self._metric_reporters.values():
+      metric.reset_episode()
+
+  def create_metric(
+      self,
+      name: Text,
+      scope: metric_lib.MetricScope,
+      single_ep_aggregator: Callable[[Sequence[Any]], Any],
+      multi_ep_aggregator: Callable[[Sequence[Any]], Dict[Text, Any]],
+  ) -> metric_lib.MetricCore:
+    """Creates a new metric.
+
+    Args:
+      name: The name of the metric, for example "motor_torques",
+        "distance_to_wall", etc. The full name of the metric will have scope
+        name in the prefix, i.e. "scope/name".
+      scope: The scope of this metric. Most metric should be for DEBUG purpose.
+        The scope name will be added to the final name of metric in this way:
+          "scope/name", which is standarded format for Tensorboard to group
+          named variables.
+      single_ep_aggregator: The function to process all aggregated metric
+        values. The derived MetricReporter (see below) will implements
+        reset_episode() which clears the episode data, and will be called during
+        env.reset().
+      multi_ep_aggregator: The function to process multi-episode metric values.
+        We assume the inputs to the function is a list of per episode metric
+        values, i.e. each element of the list is the output from the
+        single_ep_aggregator.
+
+    Returns:
+      A MetricCore which can be used to report values of interest.
+    """
+    if name in self._metric_reporters:
+      if self._ignore_duplicate_metrics:
+        logging.warning("Trying to create an existing metric: %s", name)
+        name = self._get_valid_duplicate_name(name)
+        logging.warning("Changed duplicate metric to new name: %s", name)
+      else:
+        raise ValueError(f"Duplicated metrics found: {name}")
+
+    self._metric_reporters[name] = metric_lib.MetricReporter(
+        name, scope, single_ep_aggregator, multi_ep_aggregator)
+    return self._metric_reporters[name]
+
+  def create_scalar_metric(
+      self,
+      name: Text,
+      scope: metric_lib.MetricScope,
+      single_ep_aggregator: Callable[[Sequence[Any]], Any],
+  ) -> metric_lib.MetricCore:
+    """Shortcut to create a metric for scalar variables."""
+    return self.create_metric(
+        name,
+        scope,
+        single_ep_aggregator,
+        multi_ep_aggregator=common_stats,
+    )
+
+  def _get_valid_duplicate_name(self, original_name: Text) -> Text:
+    counter = 1
+    test_name = "{}_duplicate_{}".format(original_name, str(counter))
+    while test_name in self._metric_reporters:
+      counter += 1
+      test_name = "{}_duplicate_{}".format(original_name, str(counter))
+    return test_name
+
+  def get_episode_metrics(self) -> Dict[Text, Any]:
+    """Return all metrics registered in the logger for the current episode."""
+    ep_stats = {}
+    for metric in self._metric_reporters.values():
+      _merge_dict_throw_on_duplicates(ep_stats, metric.get_episode_metric())
+    return ep_stats
+
+  def get_multi_episode_metrics(
+      self, episodic_metrics: Dict[Text, Sequence[Any]]) -> Dict[Text, Any]:
+    """Processes the aggregated metrics over many episodes.
+
+    Will not be affected by reset_episode, since we take multi-episode data as
+    inputs.
+
+    Args:
+      episodic_metrics: The per episode metrics. For each key in the inputs, we
+        expect at least one metric reporter can process the corresponding value,
+        which contains a list of episodic metrics.
+
+    Returns:
+      The processed multi-episode metrics.
+    """
+    stats = {}
+    for metric in self._metric_reporters.values():
+      _merge_dict_throw_on_duplicates(
+          stats, metric.get_multi_ep_metric(episodic_metrics))
+    return stats
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/evaluation/metric_utils.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/evaluation/metric_utils.py
@@ -0,0 +1,29 @@
+"""Helper class and functions to make computing meteric statistics easier."""
+from typing import Any, Sequence
+
+import numpy as np
+
+
+class MetricStats(object):
+  """Helper class to make computing meteric statistics easier to manage."""
+
+  def __init__(self, data: Sequence[Any]):
+    if None or not list(data):
+      raise ValueError("Input data for ComputeMetricStats cannot be empty.")
+    self._data = np.asarray(data).flatten()
+
+  @property
+  def avg(self):
+    return np.mean(self._data)
+
+  @property
+  def min(self):
+    return np.min(self._data)
+
+  @property
+  def max(self):
+    return np.max(self._data)
+
+  @property
+  def sum(self):
+    return np.sum(self._data)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/examples/laikago_mpc_wrapper_example.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/examples/laikago_mpc_wrapper_example.py
@@ -0,0 +1,96 @@
+# Lint as: python3
+r"""An example where Laikago walks forward using mpc controller.
+
+This script illustrates an example of MPCLocomotionWrapper class for
+controlling a simulated Laikago robot to walk on a flat ground. The wrapped
+environment takes as input the target local foothold locations and the desired
+base pose. An MPC-based controller is executed internally to compute the
+required forces to achieve the desired foothold position and base movement.
+
+
+blaze run -c opt \
+//robotics/reinforcement_learning/minitaur/envs_v2/examples\
+:laikago_mpc_wrapper_example
+"""
+import os
+import tempfile
+
+from absl import app
+from absl import flags
+import gin
+import numpy as np
+import pybullet_data as pd
+
+from pybullet_envs.minitaur.envs_v2 import env_loader
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string("video_file", None, "The filename for saving the videos.")
+
+CONFIG_FILE_SIM = (pd.getDataPath()+"/configs/laikago_mpc_example_flat.gin")
+
+NUM_STEPS = 100
+ENABLE_RENDERING = True  # Will be disabled for tests
+ENV_RANDOM_SEED = 100
+DEFAULT_TARGET_FOOTHOLD = (0.05, 0.0, -0.01)
+DEFAULT_BASE_VELOCITY = (0.0, 0.0)
+DEFAULT_TWIST_SPEED = 0.0
+DEFAULT_BODY_HEIGHT = 0.45
+DEFAULT_ROLL_PITCH = (0.0, 0.0)
+DEFAULT_SWING_HEIGHT = 0.07
+
+
+def _build_env():
+  """Builds the environment for the Laikago robot.
+
+  Returns:
+    The OpenAI gym environment.
+  """
+  gin.parse_config_file(CONFIG_FILE_SIM)
+  gin.bind_parameter("SimulationParameters.enable_rendering", ENABLE_RENDERING)
+  env = env_loader.load()
+  env.seed(ENV_RANDOM_SEED)
+
+  return env
+
+
+def _run_example():
+  """An example that Laikago moves with a constant speed and predicts foothold.
+
+  Returns:
+    env: the environment after the simulation
+  """
+
+  env = _build_env()
+
+  env.reset()
+  if FLAGS.video_file is not None:
+    pybullet = env.pybullet_client
+    pybullet.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)
+    log_id = pybullet.startStateLogging(pybullet.STATE_LOGGING_VIDEO_MP4,
+                                        FLAGS.video_file)
+
+  try:
+    max_step = NUM_STEPS
+    for _ in range(max_step):
+      target_foothold = np.array(DEFAULT_TARGET_FOOTHOLD)
+      action = np.concatenate([
+          target_foothold, target_foothold, target_foothold, target_foothold,
+          [
+              DEFAULT_SWING_HEIGHT, DEFAULT_SWING_HEIGHT, DEFAULT_SWING_HEIGHT,
+              DEFAULT_SWING_HEIGHT
+          ], DEFAULT_BASE_VELOCITY, [DEFAULT_TWIST_SPEED],
+          [DEFAULT_BODY_HEIGHT], DEFAULT_ROLL_PITCH
+      ])
+      _ = env.step(action)
+  finally:
+    if FLAGS.video_dir is not None:
+      pybullet.stopStateLogging(log_id)
+
+
+def main(argv):
+  del argv
+  _run_example()
+
+
+if __name__ == "__main__":
+  app.run(main)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/examples/laikago_pmtg_example.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/examples/laikago_pmtg_example.py
@@ -0,0 +1,84 @@
+r"""An example to run an OpenAI gym environment with laikago and PMTG wrapper.
+
+This is an open-loop controller where we use 0 residuals and only execute the
+trajectory generator. The parameters that are (normally modulated by the policy)
+are fixed (except the intensity) and not optimized. They are hand picked as
+follows:
+ - The gait cycle frequency is 3 Hz.
+ - Walking height is neutral (0).
+ - Swing vs stance ratio is 2 (swing takes half the time vs stance).
+ - Intensity starts from zero and is gradually increased over time.
+
+blaze run -c opt \
+//robotics/reinforcement_learning/minitaur/envs_v2/examples\
+:laikago_pmtg_example
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import gin
+import tensorflow.compat.v1 as tf
+from pybullet_envs.minitaur.envs_v2 import env_loader
+import pybullet_data as pd
+
+CONFIG_DIR = pd.getDataPath()+"/configs_v2/"
+CONFIG_FILES = [
+    os.path.join(CONFIG_DIR, "base/laikago_with_imu.gin"),
+    os.path.join(CONFIG_DIR, "tasks/fwd_task_no_termination.gin"),
+    os.path.join(CONFIG_DIR, "wrappers/pmtg_wrapper.gin"),
+    os.path.join(CONFIG_DIR, "scenes/simple_scene.gin")
+]
+# Constants used to show example PMTG behavior with zero residual.
+# Since we use the default PMTG configuration, there is only one trajectory
+# generator. So the parameters that can be changed are:
+# - Multiplier for delta time (similar to frequency but per time step).
+# - Intensity of the trajectory generator.
+# - Walking heights used for the legs.
+# - The ratio of the speed of the leg during swing vs stance phase.
+# For more details, check out TgSimple._process_tg_params method.
+_PMTG_DELTA_TIME_MULTIPLIER = 2.0
+_PMTG_INTENSITY_RANGE = (0.0, 1.5)
+_PMTG_INTENSITY_STEP_SIZE = 0.0001
+_PMTG_WALKING_HEIGHT = -0.3
+_PMTG_SWING_VS_STANCE = 2
+_NUM_MOTORS = 12
+
+
+def main(argv):
+  del argv  # Unused.
+
+  # Parse the gym config and create the environment.
+  for gin_file in CONFIG_FILES:
+    gin.parse_config_file(gin_file)
+  gin.bind_parameter("SimulationParameters.enable_rendering", True)
+  gin.bind_parameter("terminal_conditions.maxstep_terminal_condition.max_step",
+                     10000)
+  env = env_loader.load()
+  tg_intensity = _PMTG_INTENSITY_RANGE[0]
+  sum_reward = 0
+  env.reset()
+  done = False
+  # Use zero residual, only use the output of the trajectory generator.
+  residual = [0] * _NUM_MOTORS
+  # Since we fix residuals and all the parameters of the TG, this example
+  # is practically an open loop controller. A learned policy would provide
+  # different values for these parameters at every timestep.
+  while not done:
+    # Increase the intensity of the trajectory generator gradually
+    # to illustrate increasingly larger steps.
+    if tg_intensity < _PMTG_INTENSITY_RANGE[1]:
+      tg_intensity += _PMTG_INTENSITY_STEP_SIZE
+    tg_params = [
+        _PMTG_DELTA_TIME_MULTIPLIER, tg_intensity, _PMTG_WALKING_HEIGHT,
+        _PMTG_SWING_VS_STANCE
+    ]
+    action = residual + tg_params
+    _, reward, done, _ = env.step(action)
+    sum_reward += reward
+
+
+if __name__ == "__main__":
+  tf.app.run(main)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/examples/laikago_static_gait_example.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/examples/laikago_static_gait_example.py
@@ -0,0 +1,52 @@
+# Lint as: python3
+r"""An example that the Laikago walks forward using a static gait.
+
+blaze run -c opt //robotics/reinforcement_learning/minitaur/envs_v2/examples:\
+laikago_static_gait_example
+"""
+from absl import app
+from absl import flags
+import gin
+from pybullet_envs.minitaur.agents.baseline_controller import static_gait_controller
+from pybullet_envs.minitaur.envs_v2 import env_loader
+import pybullet_data as pd
+
+flags.DEFINE_bool("render", True, "Whether to render the example.")
+
+FLAGS = flags.FLAGS
+_CONFIG_FILE = pd.getDataPath()+"/configs/laikago_walk_static_gait.gin"
+_NUM_STEPS = 10000
+_ENV_RANDOM_SEED = 13
+
+
+def _load_config(render=False):
+  gin.parse_config_file(_CONFIG_FILE)
+  gin.bind_parameter("SimulationParameters.enable_rendering", render)
+
+
+def run_example(num_max_steps=_NUM_STEPS):
+  """Runs the example.
+
+  Args:
+    num_max_steps: Maximum number of steps this example should run for.
+  """
+  env = env_loader.load()
+
+  env.seed(_ENV_RANDOM_SEED)
+  observation = env.reset()
+  policy = static_gait_controller.StaticGaitController(env.robot)
+
+  for _ in range(num_max_steps):
+    action = policy.act(observation)
+    _, _, done, _ = env.step(action)
+    if done:
+      break
+
+
+def main(_):
+  _load_config(FLAGS.render)
+  run_example()
+
+
+if __name__ == "__main__":
+  app.run(main)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/locomotion_gym_config.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/locomotion_gym_config.py
@@ -0,0 +1,58 @@
+"""A gin-config class for locomotion_gym_env.
+
+This should be identical to locomotion_gym_config.proto.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from typing import Sequence, Text
+
+import attr
+import gin
+
+
+@gin.configurable
+@attr.s
+class SimulationParameters(object):
+  """Parameters specific for the pyBullet simulation."""
+  sim_time_step_s = attr.ib(type=float, default=0.002)
+  num_action_repeat = attr.ib(type=int, default=5)
+  enable_hard_reset = attr.ib(type=bool, default=False)
+  enable_rendering = attr.ib(type=bool, default=False)
+  enable_rendering_gui = attr.ib(type=bool, default=True)
+  robot_on_rack = attr.ib(type=bool, default=False)
+  camera_target = attr.ib(type=Sequence[float], default=None)
+  camera_distance = attr.ib(type=float, default=1.0)
+  camera_yaw = attr.ib(type=float, default=0)
+  camera_pitch = attr.ib(type=float, default=-30)
+  render_width = attr.ib(type=int, default=480)
+  render_height = attr.ib(type=int, default=360)
+  egl_rendering = attr.ib(type=bool, default=False)
+
+
+@gin.configurable
+@attr.s
+class ScalarField(object):
+  """A named scalar space with bounds."""
+  # TODO(sehoonha) extension to vector fields.
+  name = attr.ib(type=str)
+  upper_bound = attr.ib(type=float)
+  lower_bound = attr.ib(type=float)
+
+
+@gin.configurable
+@attr.s
+class LocomotionGymConfig(object):
+  """Grouped Config Parameters for LocomotionGym."""
+  simulation_parameters = attr.ib(type=SimulationParameters)
+  # TODO(sehoonha) implement attr validators for the list
+  actions = attr.ib(type=list, default=None)  # pylint: disable=g-bare-generic
+  log_path = attr.ib(type=Text, default=None)
+  data_dir = attr.ib(
+      type=Text,
+      default='robotics/reinforcement_learning/minitaur/data/')
+  profiling_path = attr.ib(type=Text, default=None)
+  seed = attr.ib(type=int, default=None)
+  ignored_sensor_list = attr.ib(type=Sequence[Text], default=())
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/locomotion_gym_config_test.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/locomotion_gym_config_test.py
@@ -0,0 +1,75 @@
+"""Tests for pybullet_envs.minitaur.envs.locomotion_gym_config."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gin
+from pybullet_envs.minitaur.envs_v2 import locomotion_gym_config
+import tensorflow.compat.v1 as tf
+from absl.testing import parameterized
+
+
+
+class LocomotionGymConfigTest(tf.test.TestCase, parameterized.TestCase):
+
+  def testSimulationParametersFromGinString(self):
+    config_text = (
+        'import pybullet_envs.minitaur'
+        '.envs_v2.locomotion_gym_config\n'
+        'locomotion_gym_config.SimulationParameters.sim_time_step_s = 0.005\n'
+        'locomotion_gym_config.SimulationParameters.camera_distance = 5.0\n'
+        'locomotion_gym_config.SimulationParameters.camera_yaw = 10\n'
+        'locomotion_gym_config.SimulationParameters.camera_pitch = -50\n'
+    )
+    gin.parse_config(config_text)
+
+    cfg = locomotion_gym_config.SimulationParameters()
+    self.assertEqual(cfg.sim_time_step_s, 0.005)
+    self.assertFalse(cfg.enable_hard_reset)
+    self.assertEqual(cfg.camera_distance, 5.0)
+    self.assertEqual(cfg.camera_yaw, 10)
+    self.assertEqual(cfg.camera_pitch, -50)
+
+  def testScalarFieldFromGinString(self):
+    config_text = (
+        'import pybullet_envs.minitaur'
+        '.envs_v2.locomotion_gym_config\n'
+        'locomotion_gym_config.ScalarField.name = "MotorUpperLimit"\n'
+        'locomotion_gym_config.ScalarField.upper_bound = 1.0\n'
+        'locomotion_gym_config.ScalarField.lower_bound = -1.0\n'
+    )
+    gin.parse_config(config_text)
+
+    cfg = locomotion_gym_config.ScalarField()
+    self.assertEqual(cfg.name, 'MotorUpperLimit')
+    self.assertEqual(cfg.upper_bound, 1.0)
+    self.assertEqual(cfg.lower_bound, -1.0)
+
+  def testLocomotionGymConfigFromGinString(self):
+    config_text = (
+        'import pybullet_envs.minitaur'
+        '.envs_v2.locomotion_gym_config\n'
+        # SimulationParameters
+        'locomotion_gym_config.SimulationParameters.sim_time_step_s = 0.005\n'
+        # Actions
+        'Motor/locomotion_gym_config.ScalarField.name = "MotorUpperLimit"\n'
+        'Motor/locomotion_gym_config.ScalarField.upper_bound = 1.0\n'
+        'Motor/locomotion_gym_config.ScalarField.lower_bound = -1.0\n'
+        # LocomotionGymConfigs
+        'locomotion_gym_config.LocomotionGymConfig.simulation_parameters = '
+        '@locomotion_gym_config.SimulationParameters()\n'
+        'locomotion_gym_config.LocomotionGymConfig.actions = ['
+        '@Motor/locomotion_gym_config.ScalarField()]\n'
+        'locomotion_gym_config.LocomotionGymConfig.ignored_sensor_list = ['
+        '"Collisions"]\n')
+    gin.parse_config(config_text)
+
+    cfg = locomotion_gym_config.LocomotionGymConfig()
+    self.assertEqual(cfg.simulation_parameters.sim_time_step_s, 0.005)
+    self.assertEqual(cfg.actions[0].upper_bound, 1.0)
+    self.assertEqual(cfg.ignored_sensor_list, ['Collisions'])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/locomotion_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/locomotion_gym_env.py
@@ -0,0 +1,731 @@
+# Lint as: python3
+"""This file implements the locomotion gym env."""
+
+import atexit
+import collections
+import time
+from typing import Any, Callable, Sequence, Text, Union
+import gin
+import gym
+import numpy as np
+
+from pybullet_envs.minitaur.envs_v2 import base_client
+from pybullet_utils import bullet_client
+import pybullet_data
+import pybullet
+from pybullet_envs.minitaur.envs import minitaur_logging
+from pybullet_envs.minitaur.envs import minitaur_logging_pb2
+#from pybullet_envs.minitaur.envs import minitaur_logging
+#from pybullet_envs.minitaur.envs import minitaur_logging_pb2
+from pybullet_envs.minitaur.envs_v2.evaluation import metric_logger
+from pybullet_envs.minitaur.envs_v2.scenes import scene_base
+from pybullet_envs.minitaur.envs_v2.sensors import sensor
+from pybullet_envs.minitaur.envs_v2.sensors import space_utils
+from pybullet_envs.minitaur.envs_v2.utilities import rendering_utils
+from pybullet_envs.minitaur.robots import autonomous_object
+from pybullet_envs.minitaur.robots import robot_base
+
+_ACTION_EPS = 0.01
+_NUM_SIMULATION_ITERATION_STEPS = 300
+_LOG_BUFFER_LENGTH = 5000
+
+SIM_CLOCK = 'SIM_CLOCK'
+
+# Exports this symbol so we can use it in the config file.
+gin.constant('locomotion_gym_env.SIM_CLOCK', SIM_CLOCK)
+
+# This allows us to bind @time.time in the gin configuration.
+gin.external_configurable(time.time, module='time')
+
+
+# TODO(b/122048194): Enable position/torque/hybrid control mode.
+@gin.configurable
+class LocomotionGymEnv(gym.Env):
+  """The gym environment for the locomotion tasks."""
+  metadata = {
+      'render.modes': ['human', 'rgb_array', 'topdown'],
+      'video.frames_per_second': 100
+  }
+
+  def __init__(self,
+               gym_config,
+               clock: Union[Callable[..., float], Text] = 'SIM_CLOCK',
+               robot_class: Any = None,
+               scene: scene_base.SceneBase = None,
+               sensors: Sequence[sensor.Sensor] = None,
+               task: Any = None,
+               env_randomizers: Any = None):
+    """Initializes the locomotion gym environment.
+
+    Args:
+      gym_config: An instance of LocomotionGymConfig.
+      clock: The clock source to be used for the gym env. The clock should
+        return a timestamp in seconds. Setting clock == "SIM_CLOCK" will enable
+        the built-in simulation clock. For real robot experiments, we can use
+        time.time or other clock wall clock sources.
+      robot_class: A class of a robot. We provide a class rather than an
+        instance due to hard_reset functionality. Parameters are expected to be
+        configured with gin.
+      scene: An object for managing the robot's surroundings.
+      sensors: A list of environmental sensors for observation.
+      task: A callable function/class to calculate the reward and termination
+        condition. Takes the gym env as the argument when calling.
+      env_randomizers: A list of EnvRandomizer(s). An EnvRandomizer may
+        randomize the physical property of minitaur, change the terrrain during
+        reset(), or add perturbation forces during step().
+      client_factory: A function to create a simulation client, it can be a
+        pybullet client.
+
+    Raises:
+      ValueError: If the num_action_repeat is less than 1.
+
+    """
+    self._pybullet_client = None
+    self._metric_logger = metric_logger.MetricLogger()
+    # TODO(sehoonha) split observation and full-state sensors (b/129858214)
+
+    # Makes sure that close() is always called to flush out the logs to the
+    # disk.
+    atexit.register(self.close)
+    self.seed()
+    self._gym_config = gym_config
+    if robot_class is None:
+      raise ValueError('robot_class cannot be None.')
+    self._robot_class = robot_class
+    if issubclass(self._robot_class, robot_base.RobotBase):
+      self._use_new_robot_class = True
+    else:
+      self._use_new_robot_class = False
+    self._robot = None
+
+    self._scene = scene or scene_base.SceneBase()
+
+    # TODO(sehoonha) change the data structure to dictionary
+    self._env_sensors = list(sensors) if sensors is not None else list()
+
+    # TODO(b/152161457): Make logging a standalone module.
+    self._log_path = gym_config.log_path
+    self._logging = minitaur_logging.MinitaurLogging(self._log_path)
+    self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
+    self._data_dir = gym_config.data_dir
+
+    self._task = task
+
+    self._env_randomizers = env_randomizers if env_randomizers else []
+
+    # Simulation related parameters.
+    self._num_action_repeat = gym_config.simulation_parameters.num_action_repeat
+    self._on_rack = gym_config.simulation_parameters.robot_on_rack
+    if self._num_action_repeat < 1:
+      raise ValueError('number of action repeats should be at least 1.')
+    # TODO(b/73829334): Fix the value of self._num_bullet_solver_iterations.
+    self._num_bullet_solver_iterations = int(_NUM_SIMULATION_ITERATION_STEPS /
+                                             self._num_action_repeat)
+
+    self._sim_time_step = gym_config.simulation_parameters.sim_time_step_s
+    # The sim step counter is an internal varialbe to count the number of
+    # pybullet stepSimulation() has been called since last reset.
+    self._sim_step_counter = 0
+
+    self._env_time_step = self._num_action_repeat * self._sim_time_step
+    # The env step counter accounts for how many times env.step has been
+    # called since reset.
+    self._env_step_counter = 0
+
+    if clock == SIM_CLOCK:
+      self._clock = self._get_sim_time
+    else:
+      self._clock = clock
+
+    # Creates the bullet client.
+    self._is_render = gym_config.simulation_parameters.enable_rendering
+    # The wall-clock time at which the last frame is rendered.
+    self._last_frame_time = 0.0
+    
+    if gym_config.simulation_parameters.enable_rendering:
+      self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
+      self._pybullet_client.configureDebugVisualizer(
+        pybullet.COV_ENABLE_GUI,
+        gym_config.simulation_parameters.enable_rendering_gui)
+    else:
+      self._pybullet_client = bullet_client.BulletClient()
+    if gym_config.simulation_parameters.egl_rendering:
+      self._pybullet_client.loadPlugin('eglRendererPlugin')
+
+    self._pybullet_client.setAdditionalSearchPath(
+      pybullet_data.getDataPath())
+
+    # If enabled, save the performance profile to profiling_path
+    # Use Google Chrome about://tracing to open the file
+    if gym_config.profiling_path is not None:
+      self._profiling_slot = self._pybullet_client.startStateLogging(
+          self._pybullet_client.STATE_LOGGING_PROFILE_TIMINGS,
+          gym_config.profiling_path)
+      self._profiling_counter = 10
+    else:
+      self._profiling_slot = -1
+
+    # Set the default render options. TODO(b/152161124): Make rendering a
+    # standalone module.
+    self._camera_target = gym_config.simulation_parameters.camera_target
+    self._camera_dist = gym_config.simulation_parameters.camera_distance
+    self._camera_yaw = gym_config.simulation_parameters.camera_yaw
+    self._camera_pitch = gym_config.simulation_parameters.camera_pitch
+    self._render_width = gym_config.simulation_parameters.render_width
+    self._render_height = gym_config.simulation_parameters.render_height
+
+    # Loads the environment and robot. Actions space will be created as well.
+    self._hard_reset = True
+    self._observation_dict = {}
+    self.reset()
+    self._hard_reset = gym_config.simulation_parameters.enable_hard_reset
+
+    # Construct the observation space from the list of sensors.
+    self.observation_space = (
+        space_utils.convert_sensors_to_gym_space_dictionary([
+            sensor for sensor in self.all_sensors()
+            if sensor.get_name() not in self._gym_config.ignored_sensor_list
+        ]))
+
+  def __del__(self):
+    self.close()
+
+  def _load_old_robot_class(self):
+    self._robot = self._robot_class(
+        pybullet_client=self._pybullet_client, on_rack=self._on_rack)
+    self._action_list = []
+    action_upper_bound = []
+    action_lower_bound = []
+    for action in self._gym_config.actions:
+      self._action_list.append(action.name)
+      action_upper_bound.append(action.upper_bound)
+      action_lower_bound.append(action.lower_bound)
+    self.action_space = gym.spaces.Box(
+        np.array(action_lower_bound),
+        np.array(action_upper_bound),
+        dtype=np.float32)
+
+  def _load_new_robot_class(self):
+    self._robot = self._robot_class(
+        pybullet_client=self._pybullet_client, clock=self._clock)
+    self.action_space = self._robot.action_space
+
+  def _load(self):
+    self._pybullet_client.resetSimulation()
+    self._pybullet_client.setPhysicsEngineParameter(
+        numSolverIterations=self._num_bullet_solver_iterations)
+    self._pybullet_client.setTimeStep(self._sim_time_step)
+    self._pybullet_client.setGravity(0, 0, -10)
+    self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
+
+    # Disable rendering during scene loading will speed up simulation.
+    if self._is_render:
+      self._pybullet_client.configureDebugVisualizer(
+          self._pybullet_client.COV_ENABLE_RENDERING, 0)
+
+    # Rebuild the scene.
+    self._scene.build_scene(self._pybullet_client)
+
+    # TODO(b/151975607): Deprecate old robot support.
+    if self._use_new_robot_class:
+      self._load_new_robot_class()
+    else:
+      self._load_old_robot_class()
+
+    # Check action space.
+    if (isinstance(self.action_space, gym.spaces.Box) and
+        not np.all(self.action_space.low < self.action_space.high)):
+      raise ValueError(f'Action space contains invalid dimensions, '
+                       f'action space low = {self.action_space.low}, '
+                       f'action space high = {self.action_space.high}')
+
+    for an_object in self._dynamic_objects():
+      an_object.set_clock(self._clock)
+
+    # Enable rendering after loading finishes.
+    if self._is_render:
+      self._pybullet_client.configureDebugVisualizer(
+          self._pybullet_client.COV_ENABLE_RENDERING, 1)
+
+  def close(self):
+    atexit.unregister(self.close)
+    #if self._pybullet_client:
+    
+    if self._log_path is not None:
+      self._logging.save_episode(self._episode_proto)
+    for sensor_ in self.all_sensors():
+      sensor_.on_terminate(self)
+    if self._robot:
+      if self._use_new_robot_class:
+        self._robot.terminate()
+      else:
+        self._robot.Terminate()
+    if self._pybullet_client:
+      self._pybullet_client.disconnect()
+      self._pybullet_client = None
+
+  def seed(self, seed=None):
+    self.np_random, self.np_random_seed = gym.utils.seeding.np_random(seed)
+    return [self.np_random_seed]
+
+  def _dynamic_objects(self):
+    """Returns the python objects controlling moving obstacles."""
+    if self._scene:
+      return self._scene.dynamic_objects
+    else:
+      return []
+
+  def all_sensors(self):
+    """Returns all robot, environmental and dynamic objects sensors."""
+    if self._use_new_robot_class:
+      all_sensors = list(self._env_sensors)
+      if self._robot:
+        all_sensors.extend(list(self._robot.sensors))
+      for obj in self._dynamic_objects():
+        all_sensors.extend(obj.sensors)
+
+      # The new way of adding task specific sensors to the sensor lists.
+      if hasattr(self._task, 'sensors'):
+        all_sensors.extend(self._task.sensors)
+      return all_sensors
+    else:
+      # This is a workaround due to the issue in b/130128505#comment5
+      task_sensor = ([self._task]
+                     if isinstance(self._task, sensor.Sensor) else [])
+      robot_sensors = []
+      if self._robot:
+        robot_sensors = self._robot.GetAllSensors()
+      return robot_sensors + self._env_sensors + task_sensor
+
+  def sensor_by_name(self, name):
+    """Returns the sensor with the given name, or None if not exist."""
+    # TODO(b/154162104): Store sensors as dictionary.
+    for sensor_ in self.all_sensors():
+      if sensor_.get_name() == name:
+        return sensor_
+    return None
+
+  @gin.configurable('locomotion_gym_env.LocomotionGymEnv.reset')
+  def reset(
+      self,
+      initial_motor_angles=None,
+      reset_duration=1.0,
+      reset_visualization_camera=True,
+  ):
+    """Resets the robot's position in the world or rebuild the sim world.
+
+    The simulation world will be rebuilt if self._hard_reset is True.
+
+    Args:
+      initial_motor_angles: A list of Floats. The desired joint angles after
+        reset. If None, the robot will use its built-in value.
+      reset_duration: Float. The time (in seconds) needed to rotate all motors
+        to the desired initial values.
+      reset_visualization_camera: Whether to reset debug visualization camera on
+        reset.
+
+    Returns:
+      A numpy array contains the initial observation after reset.
+    """
+    
+
+    self._env_step_counter = 0
+    self._sim_step_counter = 0
+    self._last_reset_time = self._clock()
+    self._metric_logger.reset_episode()
+
+    # Clear the simulation world and rebuild the robot interface.
+    if self._hard_reset:
+      self._load()
+
+    # Resets the scene
+    self._scene.reset()
+
+    # Resets the robot with the provided init parameters.
+    if self._use_new_robot_class:
+      self._robot.reset()
+    else:
+      self._robot.Reset(
+          reload_urdf=False,
+          default_motor_angles=initial_motor_angles,
+          reset_time=reset_duration)
+
+    # Flush the logs to disc and reinitialize the logging system.
+    if self._log_path is not None:
+      self._logging.save_episode(self._episode_proto)
+      self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
+      minitaur_logging.preallocate_episode_proto(self._episode_proto,
+                                                 _LOG_BUFFER_LENGTH,
+                                                 self._robot)
+
+    # TODO(b/152161124): Move this part to the renderer module.
+    if reset_visualization_camera:
+      self._pybullet_client.resetDebugVisualizerCamera(self._camera_dist,
+                                                       self._camera_yaw,
+                                                       self._camera_pitch,
+                                                       [0, 0, 0])
+
+    # Create an example last action based on the type of action space.
+    self._last_action = space_utils.create_constant_action(self.action_space)
+
+    for s in self.all_sensors():
+      s.on_reset(self)
+
+    if self._task and hasattr(self._task, 'reset'):
+      self._task.reset(self)
+
+    # Loop over all env randomizers.
+    for env_randomizer in self._env_randomizers:
+      env_randomizer.randomize_env(self)
+
+    for obj in self._dynamic_objects():
+      obj.reset()
+
+    # Initialize the robot base position.
+    if self._use_new_robot_class:
+      self._last_base_position = self._robot.base_position
+    else:
+      self._last_base_position = self._robot.GetBasePosition()
+
+    # Resets the observations again, since randomizers might change the env.
+    for s in self.all_sensors():
+      s.on_reset(self)
+
+    
+    self._last_reset_time = self._clock()
+    return self._get_observation()
+
+  def _wait_for_rendering(self):
+    # Sleep, otherwise the computation takes less time than real time,
+    # which will make the visualization like a fast-forward video.
+    time_spent = time.time() - self._last_frame_time
+    self._last_frame_time = time.time()
+    time_to_sleep = self._env_time_step - time_spent
+    if time_to_sleep > 0:
+      time.sleep(time_to_sleep)
+
+    # Also keep the previous orientation of the camera set by the user.
+    [yaw, pitch, dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
+    self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch,
+                                                     self._last_base_position)
+
+  def _step_old_robot_class(self, action):
+    self._last_base_position = self._robot.GetBasePosition()
+    self._last_action = action
+
+    if self._is_render:
+      self._wait_for_rendering()
+
+    for env_randomizer in self._env_randomizers:
+      env_randomizer.randomize_step(self)
+
+    self._robot.Step(action)
+
+    if self._profiling_slot >= 0:
+      self._profiling_counter -= 1
+      if self._profiling_counter == 0:
+        self._pybullet_client.stopStateLogging(self._profiling_slot)
+
+    if self._log_path is not None:
+      minitaur_logging.update_episode_proto(self._episode_proto, self._robot,
+                                            action, self._env_step_counter)
+    reward = self._reward()
+
+    for s in self.all_sensors():
+      s.on_step(self)
+
+    if self._task and hasattr(self._task, 'update'):
+      self._task.update(self)  # TODO(b/154635313): resolve API mismatch
+
+    done = self._termination()
+    self._env_step_counter += 1
+    # TODO(b/161941829): terminate removed for now, change terminate to other
+    # names.
+    return self._get_observation(), reward, done, {}
+
+  def _step_new_robot_class(self, action):
+    self._last_base_position = self._robot.base_position
+    self._last_action = action
+
+    if self._is_render:
+      self._wait_for_rendering()
+
+    for env_randomizer in self._env_randomizers:
+      env_randomizer.randomize_step(self)
+
+    action = self._robot.pre_control_step(action, self._env_time_step)
+    for obj in self._dynamic_objects():
+      obj.pre_control_step(autonomous_object.AUTONOMOUS_ACTION)
+    for _ in range(self._num_action_repeat):
+      self._robot.apply_action(action)
+      for obj in self._dynamic_objects():
+        obj.update(self.get_time_since_reset(), self._observation_dict)
+        obj.apply_action(autonomous_object.AUTONOMOUS_ACTION)
+
+      self._pybullet_client.stepSimulation()
+      self._sim_step_counter += 1
+
+      self._robot.receive_observation()
+      for obj in self._dynamic_objects():
+        obj.receive_observation()
+
+      for s in self.all_sensors():
+        s.on_new_observation()
+
+    self._robot.post_control_step()
+    for obj in self._dynamic_objects():
+      obj.post_control_step()
+
+    if self._profiling_slot >= 0:
+      self._profiling_counter -= 1
+      if self._profiling_counter == 0:
+        self._pybullet_client.stopStateLogging(self._profiling_slot)
+
+    if self._log_path is not None:
+      minitaur_logging.update_episode_proto(self._episode_proto, self._robot,
+                                            action, self._env_step_counter)
+    reward = self._reward()
+
+    for s in self.all_sensors():
+      s.on_step(self)
+
+    if self._task and hasattr(self._task, 'update'):
+      self._task.update(self)  # TODO(b/154635313): resolve API mismatch
+
+    done = self._termination()
+    self._env_step_counter += 1
+    # TODO(b/161941829): terminate removed for now, change terminate to other
+    # names.
+    return self._get_observation(), reward, done, {}
+
+  def step(self, action):
+    """Step forward the simulation, given the action.
+
+    Args:
+      action: Can be a list of desired motor angles for all motors when the
+        robot is in position control mode; A list of desired motor torques. Or a
+        list of tuples (q, qdot, kp, kd, tau) for hybrid control mode. The
+        action must be compatible with the robot's motor control mode. Also, we
+        are not going to use the leg space (swing/extension) definition at the
+        gym level, since they are specific to Minitaur.
+
+    Returns:
+      observations: The observation dictionary. The keys are the sensor names
+        and the values are the sensor readings.
+      reward: The reward for the current state-action pair.
+      done: Whether the episode has ended.
+      info: A dictionary that stores diagnostic information.
+
+    Raises:
+      ValueError: The action dimension is not the same as the number of motors.
+      ValueError: The magnitude of actions is out of bounds.
+    """
+    # TODO(b/151975607): Finish the migration and remove old robot class
+    # support.
+    if self._use_new_robot_class:
+      return self._step_new_robot_class(action)
+    else:
+      return self._step_old_robot_class(action)
+
+  @gin.configurable('locomotion_gym_env.LocomotionGymEnv.render')
+  def render(self, mode='rgb_array'):
+    
+    if mode == 'topdown':
+      # Provide ground height if we know it. Otherwise leave it as gin
+      # configurable.
+      if hasattr(self.scene, 'ground_height'):
+        return rendering_utils.render_topdown(
+            self._pybullet_client, ground_height=self.scene.ground_height)
+      else:
+        return rendering_utils.render_topdown(self._pybullet_client)
+
+    if mode != 'rgb_array':
+      raise ValueError('Unsupported render mode:{}'.format(mode))
+
+    if self._camera_target is not None:
+      target_position = self._camera_target
+    else:
+      target_position = self._last_base_position
+    view_matrix = self._pybullet_client.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=target_position,
+        distance=self._camera_dist,
+        yaw=self._camera_yaw,
+        pitch=self._camera_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
+        fov=60,
+        aspect=float(self._render_width) / self._render_height,
+        nearVal=0.1,
+        farVal=100.0)
+    return rendering_utils.render_image(self._pybullet_client,
+                                        self._render_width, self._render_height,
+                                        view_matrix, proj_matrix)
+
+  @property
+  def scene(self):
+    return self._scene
+
+  @property
+  def rendering_enabled(self):
+    return self._is_render
+
+  @property
+  def env_randomizers(self):
+    return self._env_randomizers
+
+  @property
+  def last_base_position(self):
+    return self._last_base_position
+
+  @property
+  def gym_config(self):
+    return self._gym_config
+
+  def _termination(self):
+    if not self._robot.is_safe:
+      return True
+
+    if self._task and hasattr(self._task, 'done'):
+      return self._task.done(self)  # TODO(b/154635313): resolve API mismatch
+
+    return False
+
+  def _reward(self):
+    if self._task:
+      return self._task.reward(self)  # TODO(b/154635313): resolve API mismatch
+    return 0
+
+  def _get_observation(self):
+    """Get observation of this environment from a list of sensors.
+
+    Returns:
+      observations: dictionary of sensory observation with sensor name as key
+        and corresponding observation in numpy array as value.
+    """
+    sensors_dict = {}
+    for s in self.all_sensors():
+      if s.get_name() in self._gym_config.ignored_sensor_list:
+        continue
+
+      obs = s.get_observation()
+      if isinstance(obs, dict):
+        sensors_dict.update(obs)
+      else:
+        sensors_dict[s.get_name()] = obs
+
+    self._observation_dict = collections.OrderedDict(
+        sorted(sensors_dict.items()))
+    return self._observation_dict
+
+  def set_time_step(self, num_action_repeat, sim_step=0.001):
+    """Sets the time step of the environment.
+
+    Args:
+      num_action_repeat: The number of simulation steps/action repeats to be
+        executed when calling env.step().
+      sim_step: The simulation time step in PyBullet. By default, the simulation
+        step is 0.001s, which is a good trade-off between simulation speed and
+        accuracy.
+
+    Raises:
+      ValueError: If the num_action_repeat is less than 1.
+    """
+    if num_action_repeat < 1:
+      raise ValueError('number of action repeats should be at least 1.')
+    self._sim_time_step = sim_step
+    self._num_action_repeat = num_action_repeat
+    self._env_time_step = sim_step * num_action_repeat
+    self._num_bullet_solver_iterations = int(
+        _NUM_SIMULATION_ITERATION_STEPS / self._num_action_repeat)
+    self._pybullet_client.setPhysicsEngineParameter(
+        numSolverIterations=self._num_bullet_solver_iterations)
+    self._pybullet_client.setTimeStep(self._sim_time_step)
+    if not self._use_new_robot_class:
+      self._robot.SetTimeSteps(self._num_action_repeat, self._sim_time_step)
+
+  def _get_sim_time(self):
+    """Returns the simulation time since the sim resets."""
+    return self._sim_step_counter * self._sim_time_step
+
+  def get_time_since_reset(self):
+    """Get the time passed (in seconds) since the last reset.
+
+    Returns:
+      Time in seconds since the last reset.
+    """
+    if self._use_new_robot_class:
+      return self._clock() - self._last_reset_time
+    else:
+      return self._robot.GetTimeSinceReset()
+
+  def get_time(self):
+    """Gets the time reading from the clock source."""
+    return self._clock()
+
+  @property
+  def observation(self):
+    return self._observation_dict
+
+  @property
+  def pybullet_client(self):
+    return self._pybullet_client
+
+  @property
+  def robot(self):
+    return self._robot
+
+  @property
+  def num_action_repeat(self):
+    return self._num_action_repeat
+
+  @property
+  def sim_time_step(self):
+    return self._sim_time_step
+
+  @property
+  def env_step_counter(self):
+    return self._env_step_counter
+
+  @property
+  def hard_reset(self):
+    return self._hard_reset
+
+  @property
+  def last_action(self):
+    return self._last_action
+
+  @property
+  def env_time_step(self):
+    return self._env_time_step
+
+  @property
+  def data_dir(self):
+    return self._data_dir
+
+  @property
+  def task(self):
+    return self._task
+
+  @property
+  def robot_class(self):
+    return self._robot_class
+
+  @property
+  def action_names(self):
+    """Name of each action in the action space.
+
+    By default this returns the actions the robot executes (e.g.
+    "VELOCITY_elbow_joint"), but env wrappers may override this if they change
+    the action space (e.g. if they convert twist to motor commands).
+
+    Returns:
+      Tuple of strings, the action names.
+    """
+    if self._use_new_robot_class:
+      return self._robot.action_names
+    return self._action_list
+
+  @property
+  def metric_logger(self):
+    return self._metric_logger
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/locomotion_gym_env_test.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/locomotion_gym_env_test.py
@@ -0,0 +1,241 @@
+# Lint as: python3
+r"""Tests for locomotion_gym_env.
+
+
+"""
+
+import math
+import random
+
+import gin
+import mock
+import numpy as np
+import tensorflow.compat.v1 as tf
+from absl.testing import parameterized
+
+from pybullet_envs.minitaur.envs_v2 import locomotion_gym_env
+from pybullet_envs.minitaur.envs_v2.evaluation import metric as metric_lib
+from pybullet_envs.minitaur.envs_v2.scenes import scene_base
+from pybullet_envs.minitaur.envs_v2.scenes import simple_scene
+from pybullet_envs.minitaur.envs_v2.tasks import task_interface
+from pybullet_envs.minitaur.envs_v2.utilities import env_utils
+from pybullet_envs.minitaur.robots import autonomous_object
+from pybullet_envs.minitaur.robots import minitaur_v2
+import pybullet_data as pd
+
+import unittest
+
+
+_POSITION_GAIN = 1.0
+_VELOCITY_GAIN = 0.015
+_CONTROL_LATENCY = 0.015
+_CONFIG_FILE = (pd.getDataPath()+'/configs/minitaur_gym_env.gin')
+_CONFIG_FILE_NEW_ROBOT = (pd.getDataPath()+'/configs_v2/base/laikago_reactive.gin')
+
+
+class TestTask(task_interface.Task):
+  """A step counter task for test purpose."""
+
+  def __init__(self):
+    self._counter = 0
+
+  def reset(self, env):
+    del env
+    self._counter = 0
+
+  def reward(self, env):
+    del env  # TODO(b/154635313): resolve the API mismatch
+    self._counter += 1
+    return self._counter
+
+  def update(self, env):
+    del env  # TODO(b/154635313): resolve the API mismatch
+
+  def done(self, env):
+    del env  # TODO(b/154635313): resolve the API mismatch
+    return False
+
+
+class LocomotionGymEnvTest(tf.test.TestCase, parameterized.TestCase):
+
+  def setUp(self):
+    super().setUp()
+    gin.clear_config()
+
+  def test_env_from_gin(self):
+    # TODO(sehoonha) rename locomotion_gym_*test.gin to locomotion_gym_*.gin
+    gin.parse_config_file(_CONFIG_FILE)
+    env = locomotion_gym_env.LocomotionGymEnv()
+    self.assertIsInstance(env.robot, minitaur_v2.Minitaur)
+    # The robot will stand on the ground.
+    self.assertNear(env.robot.base_position[2], 0.25, 5e-2)
+
+  def test_reset_gym(self):
+    gin.parse_config_file(_CONFIG_FILE)
+    env = locomotion_gym_env.LocomotionGymEnv(task=None)
+
+    desired_init_motor_angle = math.pi / 2
+    action_dim = len(env.action_space.high)
+    observations = env.reset(initial_motor_angles=[desired_init_motor_angle] *
+                             action_dim)
+    observations = env_utils.flatten_observations(observations)
+    self.assertEqual(observations.size, 12)
+    self.assertNear(observations[0], 0, 1e-2)
+    self.assertNear(observations[4], desired_init_motor_angle, 2e-1)
+
+  def test_step_gym(self):
+    gin.parse_config_file(_CONFIG_FILE)
+    env = locomotion_gym_env.LocomotionGymEnv(task=TestTask())
+
+    desired_motor_angle = math.pi / 3
+    steps = 1000
+    action_dim = len(env.action_space.high)
+    for _ in range(steps):
+      observations, reward, done, _ = env.step([desired_motor_angle] *
+                                               action_dim)
+      observations = env_utils.flatten_observations(observations)
+
+    self.assertFalse(done)
+    self.assertEqual(reward, steps)
+    self.assertEqual(observations.size, 12)
+    self.assertNear(observations[0], 0, 1e-2)
+    self.assertNear(observations[4], desired_motor_angle, 2e-1)
+    np.testing.assert_allclose(env._last_action,
+                               [desired_motor_angle] * action_dim, 2e-1)
+
+  def test_scene(self):
+    gin.parse_config_file(_CONFIG_FILE)
+    data_root = 'urdf/'
+    env = locomotion_gym_env.LocomotionGymEnv(
+        task=TestTask(), scene=simple_scene.SimpleScene(data_root=data_root))
+    desired_motor_angle = math.pi / 3
+    steps = 2
+    action_dim = len(env.action_space.high)
+    for _ in range(steps):
+      _, reward, _, _ = env.step([desired_motor_angle] * action_dim)
+    self.assertEqual(reward, steps)
+
+  def test_except_on_invalid_config(self):
+    gin.parse_config_file(_CONFIG_FILE)
+    gin.bind_parameter('SimulationParameters.num_action_repeat', 0)
+    with self.assertRaises(ValueError):
+      locomotion_gym_env.LocomotionGymEnv(task=None)
+
+  def test_no_scene(self):
+    gin.parse_config_file(_CONFIG_FILE)
+    env = locomotion_gym_env.LocomotionGymEnv(task=None, scene=None)
+
+    # The robot will free fall.
+    self.assertNear(env.robot.base_position[2], 0.15, 5e-2)
+
+  def test_seed_draw_with_np(self):
+    gin.parse_config_file(_CONFIG_FILE)
+    env = locomotion_gym_env.LocomotionGymEnv(task=None)
+    # first draw
+    env.seed(42)
+    nums1 = []
+    for _ in range(3):
+      nums1.append(env.np_random.randint(2**31 - 1))
+    # second draw
+    env.seed(42)
+    nums2 = []
+    for _ in range(3):
+      nums2.append(env.np_random.randint(2**31 - 1))
+    self.assertListEqual(nums1, nums2)
+
+  def test_get_observations(self):
+    gin.parse_config_file(_CONFIG_FILE)
+    env = locomotion_gym_env.LocomotionGymEnv(task=None)
+    desired_init_motor_angle = math.pi / 2
+    action_dim = len(env.action_space.high)
+    observations = env.reset(initial_motor_angles=[desired_init_motor_angle] *
+                             action_dim)
+    self.assertLen(observations, 2)
+    self.assertLen(observations['IMU'], 4)
+    self.assertNear(observations['IMU'][0], 0, 1e-2)
+    self.assertLen(observations['MotorAngle'], 8)
+    self.assertNear(observations['MotorAngle'][0], desired_init_motor_angle,
+                    2e-1)
+
+  
+
+  
+
+  def test_step_with_dynamic_objects(self):
+    gin.parse_config_file(_CONFIG_FILE_NEW_ROBOT)
+
+    gin.parse_config([
+        'AutonomousObject.urdf_file = "urdf/mug.urdf"',
+        'SceneBase.dynamic_objects = [@AutonomousObject(), @AutonomousObject()]',
+        'LocomotionGymEnv.scene = @SceneBase()',
+    ])
+    env = locomotion_gym_env.LocomotionGymEnv()
+
+    self.assertLen(env.scene.dynamic_objects, 2)
+    for obj in env.scene.dynamic_objects:
+      self.assertIsInstance(obj, autonomous_object.AutonomousObject)
+
+    # Replace dynamic objects with mocks for step tests.
+    mock_objects = [
+        mock.create_autospec(autonomous_object.AutonomousObject),
+        mock.create_autospec(autonomous_object.AutonomousObject)
+    ]
+    env.scene._type_to_objects_dict[
+        scene_base.ObjectType.DYNAMIC_OBJECT] = mock_objects
+    env.step(env.robot.action_space.sample())
+
+    expected_update_calls = [
+        mock.call(i * env.sim_time_step, mock.ANY)
+        for i in range(env.num_action_repeat)
+    ]
+    expected_apply_action_calls = [
+        mock.call(autonomous_object.AUTONOMOUS_ACTION)
+        for i in range(env.num_action_repeat)
+    ]
+    expected_receive_observation_calls = [
+        mock.call() for i in range(env.num_action_repeat)
+    ]
+
+    for mock_obj in mock_objects:
+      mock_obj.pre_control_step.assert_called_once_with(
+          autonomous_object.AUTONOMOUS_ACTION)
+      self.assertEqual(mock_obj.update.call_args_list, expected_update_calls)
+      self.assertEqual(mock_obj.apply_action.call_args_list,
+                       expected_apply_action_calls)
+      self.assertEqual(mock_obj.receive_observation.call_args_list,
+                       expected_receive_observation_calls)
+      mock_obj.post_control_step.assert_called_once_with()
+
+ 
+  def test_env_metrics(self):
+    gin.parse_config_file(_CONFIG_FILE_NEW_ROBOT)
+    env = locomotion_gym_env.LocomotionGymEnv()
+    metric_logger = env.metric_logger
+    test_metric_1 = metric_logger.create_scalar_metric(
+        'test_metric_1',
+        scope=metric_lib.MetricScope.DEBUG,
+        single_ep_aggregator=np.mean)
+    test_metric_1.report(22)
+
+    test_metric_2 = metric_logger.create_scalar_metric(
+        'test_metric_2',
+        scope=metric_lib.MetricScope.PERFORMANCE,
+        single_ep_aggregator=np.max)
+    test_metric_2.report(15)
+    test_metric_2.report(16)
+
+    all_metrics = metric_logger.get_episode_metrics()
+
+    self.assertDictEqual(all_metrics, {
+        'DEBUG/test_metric_1': 22,
+        'PERFORMANCE/test_metric_2': 16
+    })
+
+    env.reset()
+
+    all_metrics = metric_logger.get_episode_metrics()
+    self.assertDictEqual(all_metrics, {})
+
+
+if __name__ == '__main__':
+   tf.test.main()
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/multiagent_mobility_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/multiagent_mobility_gym_env.py
@@ -0,0 +1,485 @@
+"""This file implements the locomotion gym env."""
+# pylint: disable=dangerous-default-value
+
+import atexit
+import collections
+import time
+
+import gin
+from gym import spaces
+import numpy as np
+
+from pybullet_utils import bullet_client
+from pybullet_envs.minitaur.envs import minitaur_logging
+from pybullet_envs.minitaur.envs import minitaur_logging_pb2
+from pybullet_envs.minitaur.envs_v2 import locomotion_gym_env
+from pybullet_envs.minitaur.envs_v2.scenes import scene_base
+from pybullet_envs.minitaur.envs_v2.sensors import sensor
+from pybullet_envs.minitaur.envs_v2.sensors import space_utils
+import pybullet
+
+_ACTION_EPS = 0.01
+_NUM_SIMULATION_ITERATION_STEPS = 300
+_LOG_BUFFER_LENGTH = 5000
+
+
+@gin.configurable
+class MultiagentMobilityGymEnv(locomotion_gym_env.LocomotionGymEnv):
+  """The gym environment for the locomotion tasks."""
+  metadata = {
+      'render.modes': ['human', 'rgb_array'],
+      'video.frames_per_second': 100
+  }
+
+  def __init__(self,
+               gym_config,
+               robot_classes,
+               scene: scene_base.SceneBase = scene_base.SceneBase(),
+               sensors=None,
+               tasks=[],
+               global_task=None,
+               single_reward=False,
+               env_randomizers=None):
+    """Initializes the locomotion gym environment.
+
+    Args:
+      gym_config: An instance of LocomotionGymConfig.
+      robot_classes: A list of robot classes. We provide a class rather than an
+        instance due to hard_reset functionality. Parameters are expected to be
+        configured with gin.
+      scene: An object for managing the robot's surroundings.
+      sensors: A list of environmental sensors for observation. This does not
+        include on-robot sensors.
+      tasks: A list of callable function/class to calculate the reward and
+        termination condition. Takes the gym env as the argument when calling.
+      global_task: A callable function/class to calculate the reward and
+        termination condition for all robots. Takes the gym env as the argument
+        when calling.
+      single_reward: Whether the environment returns a single reward for all
+        agents or a dictionary.
+      env_randomizers: A list of EnvRandomizer(s). An EnvRandomizer may
+        randomize the physical property of minitaur, change the terrrain during
+        reset(), or add perturbation forces during step().
+
+    Raises:
+      ValueError: If the num_action_repeat is less than 1, or if number of
+        unique robot names do not match the number of robot classes.
+
+    """
+    # TODO(sehoonha) split observation and full-state sensors (b/129858214)
+
+    # Makes sure that close() is always called to flush out the logs to the
+    # disk.
+    atexit.register(self.close)
+    self.seed()
+    self._gym_config = gym_config
+    self._robot_classes = robot_classes
+    # Checking uniqueness of names and number of names
+    self._scene = scene
+    # TODO(sehoonha) change the data structure to dictionary
+    # TODO(b/144521291) make sure sensors have their own robot names
+    self._sensors = sensors if sensors is not None else list()
+    self._log_path = gym_config.log_path
+    self._logging = minitaur_logging.MinitaurLogging(self._log_path)
+    self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
+    self._data_dir = gym_config.data_dir
+
+    # A dictionary containing the objects in the world other than the robot.
+    self._tasks = tasks
+    self._global_task = global_task
+    self._single_reward = single_reward
+
+    self._env_randomizers = env_randomizers if env_randomizers else []
+
+    # This is a workaround due to the issue in b/130128505#comment5
+    for task in self._tasks:
+      if isinstance(task, sensor.Sensor):
+        self._sensors.append(task)
+    if global_task and isinstance(global_task, sensor.Sensor):
+      self._sensors.append(global_task)
+
+    # Simulation related parameters.
+    self._num_action_repeat = gym_config.simulation_parameters.num_action_repeat
+    self._on_rack = gym_config.simulation_parameters.robot_on_rack
+    if self._num_action_repeat < 1:
+      raise ValueError('number of action repeats should be at least 1.')
+    self._sim_time_step = gym_config.simulation_parameters.sim_time_step_s
+    self._env_time_step = self._num_action_repeat * self._sim_time_step
+    self._env_step_counter = 0
+
+    # TODO(b/73829334): Fix the value of self._num_bullet_solver_iterations.
+    self._num_bullet_solver_iterations = int(_NUM_SIMULATION_ITERATION_STEPS /
+                                             self._num_action_repeat)
+    self._is_render = gym_config.simulation_parameters.enable_rendering
+
+    # The wall-clock time at which the last frame is rendered.
+    self._last_frame_time = 0.0
+    if self._is_render:
+      self._pybullet_client = bullet_client.BulletClient(
+          connection_mode=pybullet.GUI)
+    else:
+      self._pybullet_client = bullet_client.BulletClient()
+
+    if gym_config.simulation_parameters.egl_rendering:
+      self._pybullet_client.loadPlugin('eglRendererPlugin')
+    self._pybullet_client.enable_cns()
+
+    # If enabled, save the performance profile to profiling_path
+    # Use Google Chrome about://tracing to open the file
+    if gym_config.profiling_path is not None:
+      self._profiling_slot = self._pybullet_client.startStateLogging(
+          self._pybullet_client.STATE_LOGGING_PROFILE_TIMINGS,
+          gym_config.profiling_path)
+      self._profiling_counter = 10
+    else:
+      self._profiling_slot = -1
+    # Build the action space. The action space must be compatible with the
+    # robot configuration.
+
+    # The action list contains the name of all actions.
+    # TODO(b/144479707): Allow robots to set the action space automatically.
+
+    action_space = collections.OrderedDict()
+    for robot_name, action in gym_config.actions.items():
+      action_lower_bound = []
+      action_upper_bound = []
+      for action_scalar in action:
+        action_upper_bound.append(action_scalar.upper_bound)
+        action_lower_bound.append(action_scalar.lower_bound)
+      action_space[robot_name] = spaces.Box(
+          np.asarray(action_lower_bound),
+          np.asarray(action_upper_bound),
+          dtype=np.float32)
+    self.action_space = spaces.Dict(action_space)
+
+    # Set the default render options.
+    self._camera_dist = gym_config.simulation_parameters.camera_distance
+    self._camera_yaw = gym_config.simulation_parameters.camera_yaw
+    self._camera_pitch = gym_config.simulation_parameters.camera_pitch
+    self._render_width = gym_config.simulation_parameters.render_width
+    self._render_height = gym_config.simulation_parameters.render_height
+
+    self._hard_reset = True
+    self.reset()
+
+    self._hard_reset = gym_config.simulation_parameters.enable_hard_reset
+
+    # Construct the observation space from the list of sensors. Note that we
+    # will reconstruct the observation_space after the robot is created.
+    self.observation_space = (
+        space_utils.convert_sensors_to_gym_space_dictionary(self.all_sensors()))
+
+  def close(self):
+    if self._log_path is not None:
+      self._logging.save_episode(self._episode_proto)
+
+    for robot in self._robots:
+      robot.Terminate()
+
+  def all_sensors(self):
+    """Returns all robot and environmental sensors."""
+    robot_sensors = []
+    for robot in self._robots:
+      robot_sensors += robot.GetAllSensors()
+    return robot_sensors + self._sensors
+
+  @gin.configurable('multiagent_mobility_gym_env.MultiagentMobilityGymEnv.reset'
+                   )
+  def reset(self,
+            initial_motor_angles=None,
+            reset_duration=1.0,
+            reset_visualization_camera=True):
+    """Resets the robot's position in the world or rebuild the sim world.
+
+    The simulation world will be rebuilt if self._hard_reset is True.
+
+    Args:
+      initial_motor_angles: A list of Floats. The desired joint angles after
+        reset. If None, the robot will use its built-in value.
+      reset_duration: Float. The time (in seconds) needed to rotate all motors
+        to the desired initial values.
+      reset_visualization_camera: Whether to reset debug visualization camera on
+        reset.
+
+    Returns:
+      A numpy array contains the initial observation after reset.
+    """
+    if self._is_render:
+      self._pybullet_client.configureDebugVisualizer(
+          self._pybullet_client.COV_ENABLE_RENDERING, 0)
+
+    # Clear the simulation world and rebuild the robot interface.
+    if self._hard_reset:
+      self._pybullet_client.resetSimulation()
+      self._pybullet_client.setPhysicsEngineParameter(
+          numSolverIterations=self._num_bullet_solver_iterations)
+      self._pybullet_client.setTimeStep(self._sim_time_step)
+      self._pybullet_client.setGravity(0, 0, -10)
+
+      # Rebuild the world.
+      self._scene.build_scene(self._pybullet_client)
+
+      # Rebuild the robots
+      # TODO(b/144545080): Make this scale to more than two agents
+      # Have multiple robot classes as a list.
+      self._robots = []
+      for robot_class in self._robot_classes:
+
+        self._robots.append(
+            robot_class(
+                pybullet_client=self._pybullet_client,
+                # TODO(rosewang): Remove on rack in multiagent acase
+                on_rack=self._on_rack))
+
+    # Reset the pose of the robot.
+    for robot in self._robots:
+      robot.Reset(
+          reload_urdf=False,
+          default_motor_angles=initial_motor_angles,
+          reset_time=reset_duration)
+
+    self._env_step_counter = 0
+    self._pybullet_client.resetDebugVisualizerCamera(self._camera_dist,
+                                                     self._camera_yaw,
+                                                     self._camera_pitch,
+                                                     [0, 0, 0])
+
+    # Flush the logs to disc and reinitialize the logging system.
+    if self._log_path is not None:
+      self._logging.save_episode(self._episode_proto)
+      self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
+      minitaur_logging.preallocate_episode_proto(self._episode_proto,
+                                                 _LOG_BUFFER_LENGTH,
+                                                 self._robots[0])
+    self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
+    self._env_step_counter = 0
+    if reset_visualization_camera:
+      self._pybullet_client.resetDebugVisualizerCamera(self._camera_dist,
+                                                       self._camera_yaw,
+                                                       self._camera_pitch,
+                                                       [0, 0, 0])
+
+    self._last_action = {
+        robot_name: np.zeros(space.shape)
+        for robot_name, space in self.action_space.spaces.items()
+    }
+
+    if self._is_render:
+      self._pybullet_client.configureDebugVisualizer(
+          self._pybullet_client.COV_ENABLE_RENDERING, 1)
+
+    for s in self.all_sensors():
+      # set name
+      if any([r.name in s.get_name() for r in self.robots]):
+        robot = [r for r in self.robots if r.name in s.get_name()][0]
+        s.set_robot(robot)
+
+    for task in self._tasks:
+      if hasattr(task, 'reset'):
+        task.reset(self)
+    if self._global_task and hasattr(self._global_task, 'reset'):
+      self._global_task.reset(self)
+
+    # Loop over all env randomizers.
+    for env_randomizer in self._env_randomizers:
+      env_randomizer.randomize_env(self)
+
+    for s in self.all_sensors():
+      s.on_reset(self)
+
+    return self._get_observation()
+
+  def get_robot(self, name):
+    for robot in self.robots:
+      if robot.name == name:
+        return robot
+
+  def _reward(self):
+    """Returns a list of rewards.
+
+    Returns:
+      A list of rewards corresponding to each robot and their task.
+    """
+    global_reward = 0
+    if self._global_task:
+      global_reward = self._global_task(self)
+    if self._single_reward:  # Needed for tfagents compatibility.
+      if self._tasks:
+        return min([task(self) + global_reward for task in self._tasks])
+      return 0
+    else:
+      if self._tasks:
+        return [task(self) + global_reward for task in self._tasks]
+      return [0 for _ in self.robots]
+
+  def step(self, actions):
+    """Step forward the simulation, given the actions.
+
+    Args:
+      actions: A dictionary of actions for all robots. The action for each robot
+        can be joint angles for legged platforms like Laikago, and base
+        velocity/steering for kinematic robots such like Fetch.
+
+    Returns:
+      observations: The observation dictionary. The keys are the sensor names
+        and the values are the sensor readings.
+      reward: The reward for the current state-action pair.
+      done: Whether the episode has ended.
+      info: A dictionary that stores diagnostic information.
+
+    Raises:
+      ValueError: The action dimension is not the same as the number of motors.
+      ValueError: The magnitude of actions is out of bounds.
+    """
+    self._last_base_position = [
+        robot.GetBasePosition() for robot in self._robots
+    ]
+    self._last_action = actions
+
+    if self._is_render:
+      # Sleep, otherwise the computation takes less time than real time,
+      # which will make the visualization like a fast-forward video.
+      time_spent = time.time() - self._last_frame_time
+      self._last_frame_time = time.time()
+      time_to_sleep = self._env_time_step - time_spent
+      if time_to_sleep > 0:
+        time.sleep(time_to_sleep)
+      camera_target = np.mean(
+          [robot.GetBasePosition() for robot in self._robots], axis=0)
+
+      # Also keep the previous orientation of the camera set by the user.
+      [yaw, pitch,
+       dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
+      self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch,
+                                                       camera_target)
+
+    for env_randomizer in self._env_randomizers:
+      env_randomizer.randomize_step(self)
+
+    # Stepping broken down into their parts
+    for robot in self._robots:
+      robot.PreStepPerStep(actions)
+
+    for _ in range(self._num_action_repeat):
+      for robot in self._robots:
+        robot.PreStepPerActionRepeat(actions)
+
+      self._pybullet_client.stepSimulation()
+
+      for robot in self._robots:
+        robot.PostStepPerActionRepeat()
+
+    for robot in self._robots:
+      robot.PostStepPerStep()
+
+    if self._profiling_slot >= 0:
+      self._profiling_counter -= 1
+      if self._profiling_counter == 0:
+        self._pybullet_client.stopStateLogging(self._profiling_slot)
+    if self._log_path is not None:
+      minitaur_logging.update_episode_proto(self._episode_proto,
+                                            self._robots[0], actions,
+                                            self._env_step_counter)
+    reward = self._reward()
+
+    for s in self.all_sensors():
+      s.on_step(self)
+
+    for task in self._tasks:
+      if hasattr(task, 'update'):
+        task.update(self)
+    if self._global_task and hasattr(self._global_task, 'update'):
+      self._global_task.update(self)
+
+    done = self._termination()
+    self._env_step_counter += 1
+    if done:
+      for robot in self._robots:
+        robot.Terminate()
+    return self._get_observation(), reward, done, {}
+
+  def render(self, mode='rgb_array'):
+    if mode != 'rgb_array':
+      raise ValueError('Unsupported render mode:{}'.format(mode))
+    base_pos = np.mean([robot.GetBasePosition() for robot in self._robots],
+                       axis=0)
+    view_matrix = self._pybullet_client.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._camera_dist,
+        yaw=self._camera_yaw,
+        pitch=self._camera_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
+        fov=60,
+        aspect=float(self._render_width) / self._render_height,
+        nearVal=0.1,
+        farVal=100.0)
+    (_, _, px, _, _) = self._pybullet_client.getCameraImage(
+        width=self._render_width,
+        height=self._render_height,
+        renderer=self._pybullet_client.ER_BULLET_HARDWARE_OPENGL,
+        viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
+
+  def _termination(self):
+    if not all([robot.is_safe for robot in self._robots]):
+      return True
+
+    if self._tasks:
+      return (self._global_task and self._global_task.done(self)) or any(
+          [task.done(self) for task in self._tasks])
+
+    for s in self.all_sensors():
+      s.on_terminate(self)
+
+    return False
+
+  def set_time_step(self, num_action_repeat, sim_step=0.001):
+    """Sets the time step of the environment.
+
+    Args:
+      num_action_repeat: The number of simulation steps/action repeats to be
+        executed when calling env.step().
+      sim_step: The simulation time step in PyBullet. By default, the simulation
+        step is 0.001s, which is a good trade-off between simulation speed and
+        accuracy.
+
+    Raises:
+      ValueError: If the num_action_repeat is less than 1.
+    """
+    if num_action_repeat < 1:
+      raise ValueError('number of action repeats should be at least 1.')
+    self._sim_time_step = sim_step
+    self._num_action_repeat = num_action_repeat
+    self._env_time_step = sim_step * num_action_repeat
+    self._num_bullet_solver_iterations = (
+        _NUM_SIMULATION_ITERATION_STEPS / self._num_action_repeat)
+    self._pybullet_client.setPhysicsEngineParameter(
+        numSolverIterations=self._num_bullet_solver_iterations)
+    self._pybullet_client.setTimeStep(self._sim_time_step)
+    for robot in self._robots:
+      robot.SetTimeSteps(self._num_action_repeat, self._sim_time_step)
+
+  def get_time_since_reset(self):
+    """Get the time passed (in seconds) since the last reset.
+
+    Returns:
+      List of time in seconds since the last reset for each robot.
+    """
+    return self._robots[0].GetTimeSinceReset()
+
+  @property
+  def tasks(self):
+    return self._tasks
+
+  @property
+  def robots(self):
+    return self._robots
+
+  @property
+  def num_robots(self):
+    return len(self._robots)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/scenes/scene_base.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/scenes/scene_base.py
@@ -0,0 +1,256 @@
+# Lint as: python3
+"""Class for loading and managing a scene in pybullet."""
+
+import enum
+from typing import Any, Dict, List, Optional, Sequence, Text
+import gin
+import numpy as np
+
+from pybullet_envs.minitaur.envs_v2.scenes import world_asset_pb2
+from pybullet_envs.minitaur.envs_v2 import base_client
+from pybullet_utils import bullet_client
+from pybullet_envs.minitaur.robots import autonomous_object
+
+
+
+# The 2D coordinates of the corners of a polygon. The corners are specified in
+# counterclock-wise direction.
+Polygon = Sequence[Sequence[float]]
+
+
+class ObjectType(enum.Enum):
+  """Categories of objects that may be found in a scene."""
+  OTHER = 0
+  GROUND = 1
+  OBSTACLE = 2
+  GOAL = 3
+  DYNAMIC_OBJECT = 4
+
+
+@gin.configurable
+class SceneBase(object):
+  """Class for loading and managing a scene."""
+
+  def __init__(
+      self,
+      data_root: Text = None,
+      dynamic_objects: Sequence[autonomous_object.AutonomousObject] = ()):
+    """Initializes SceneBase.
+
+    Args:
+      data_root: Root directory for finding object models.
+      dynamic_objects: Dynamic objects to be added into the scene.
+      crowd_builders: Builders of crowds formed by autonomous objects.
+    """
+    self._pybullet_client = None
+    self._data_root = data_root
+    temp_dynamic_objects = list(dynamic_objects)
+    
+    self._dynamic_objects = tuple(temp_dynamic_objects)
+    # Dictionaries and world_asset are declared outside init to make sure they
+    # are all reset in _reset_scene_tracking().
+    self._reset_scene_tracking()
+
+  def _reset_scene_tracking(self):
+    """Clears all scene dicts. Used when the simulation is reset."""
+    self._type_to_ids_dict = {object_type: [] for object_type in ObjectType}
+    self._type_to_objects_dict = {object_type: [] for object_type in ObjectType}
+    self._id_to_type_dict = {}
+    self._id_to_object_dict = {}
+    self._world_asset = None
+
+  def destroy_scene(self):
+    """Destroys contents of scene to get ready for another build_scene call."""
+    id_to_remove = list(self._id_to_object_dict.keys())
+    for object_id in id_to_remove:
+      self.remove_object(object_id)
+    self._reset_scene_tracking()
+
+  def build_scene(self, pybullet_client: bullet_client.BulletClient):
+    """Loads and positions all scene objects in pybullet.
+
+    Override this function in subclass to implement customized scene. The
+    overriding function must call base function first.
+
+    Args:
+      pybullet_client: A pybullet client in which the scene will be built.
+    """
+    self._reset_scene_tracking()
+    self._pybullet_client = pybullet_client
+    self._init_dynamic_objects()
+
+  def reset(self):
+    """The soft reset of scene.
+
+    Unlike "build_scene", this is called at each env.reset() before robot
+    resetting. Typically we use this API to do some soft resetting like putting
+    objects back to its place. Howevever, for special cases such as P2P multimap
+    training, we can reload a different mesh scene once a while.
+    """
+    pass
+    
+  def _init_dynamic_objects(self):
+    """Adds dynamic objects to scene."""
+    for an_object in self._dynamic_objects:
+      an_object.set_sim_client(self._pybullet_client)
+      self.add_object(an_object.sim_object_id, ObjectType.DYNAMIC_OBJECT,
+                      an_object)
+
+  @property
+  def pybullet_client(self) -> bullet_client.BulletClient:
+    if self._pybullet_client is None:
+      raise ValueError("pybullet_client is None; did you call build_scene()?")
+    return self._pybullet_client
+
+  @property
+  def ground_height(self) -> float:
+    """Returns ground height of the scene."""
+    return 0.0
+
+  @property
+  def ground_ids(self) -> List[int]:
+    """Returns the pybullet ids of the ground."""
+    return self._type_to_ids_dict[ObjectType.GROUND]
+
+  @property
+  def obstacle_ids(self) -> List[int]:
+    """Returns the pybullet ids of all obstacles in the scene."""
+    return self._type_to_ids_dict[ObjectType.OBSTACLE]
+
+  @property
+  def goal_ids(self) -> List[int]:
+    """Returns the pybullet ids of any goals in the scene."""
+    return self._type_to_ids_dict[ObjectType.GOAL]
+
+  @property
+  def dynamic_object_ids(self) -> List[int]:
+    """Returns the pybullet ids of dynamic objects."""
+    return self._type_to_ids_dict[ObjectType.DYNAMIC_OBJECT]
+
+  @property
+  def dynamic_objects(self) -> List[autonomous_object.AutonomousObject]:
+    """Returns the dynamic objects python object (AutonomousObject)."""
+    return self._type_to_objects_dict[ObjectType.DYNAMIC_OBJECT]
+
+  @property
+  def world_asset(self) -> world_asset_pb2.WorldAsset:
+    """Returns a proto describing the semantics of the scene.
+
+    If the scene keeps a WorldAsset, then mutating this proto will mutate it for
+    everyone. If the scene generates a WorldAsset from _type_to_ids_dict, then
+    this is not an issue.
+    """
+    if self._world_asset:
+      return self._world_asset
+    return self._dict_to_world_asset(self._type_to_ids_dict)
+
+  def add_object(self,
+                 object_id: int,
+                 class_label: ObjectType,
+                 python_object: Optional[Any] = None):
+    """Adds an object to be tracked.
+
+    Does not load anything into pybullet.
+
+    Args:
+      object_id: objectUniqueId from pybullet.
+      class_label: What type to consider the new object.
+      python_object: Associated python object for the pybullet object of
+        objectUniqueId == object_id. Environment uses the python object to
+        control object in pybullet in these cases. One example is python objects
+        of class label DYNAMIC_OBJECT: they are associated with python objects
+          of type AutonomousObject.
+    """
+    if python_object is not None:
+      if (isinstance(python_object, autonomous_object.AutonomousObject) and
+          python_object.sim_object_id != object_id):
+        raise ValueError(
+            f"Mismatch object ids, object_id = {object_id}, sim_object_id = "
+            f"{python_object.sim_object_id}")
+      self._type_to_objects_dict[class_label].append(python_object)
+    self._type_to_ids_dict[class_label].append(object_id)
+    self._id_to_type_dict[object_id] = class_label
+    self._id_to_object_dict[object_id] = python_object
+
+  def remove_object(self, object_id: int):
+    """Removes an object from tracking and from pybullet.
+
+    Args:
+      object_id: objectUniqueID from pybullet.
+
+    Raises:
+      KeyError: if object_id does not exist in the record.
+    """
+    if object_id not in self._id_to_type_dict.keys():
+      raise KeyError(
+          f"Object with object_id = {object_id} does not exist in the record.")
+
+    self.pybullet_client.removeBody(object_id)
+    object_type = self._id_to_type_dict[object_id]
+    self._type_to_ids_dict[object_type].remove(object_id)
+
+    object_to_remove = self.id_to_object(object_id)
+    if object_to_remove is not None:
+      # Removes item by identity comparison and avoid slow down due to objects
+      # with complex equality comparison function. list.remove() compares
+      # equality instead of identity.
+      for i, an_object in enumerate(self._type_to_objects_dict[object_type]):
+        if an_object is object_to_remove:
+          del self._type_to_objects_dict[object_type][i]
+          break
+    del self._id_to_type_dict[object_id]
+    del self._id_to_object_dict[object_id]
+
+  def id_to_object(self, object_id: int) -> Any:
+    """Returns underlying python object from sim object id.
+
+    Args:
+      object_id: objectUniqueID from pybullet.
+
+    Returns:
+      None is returned if the sim object does not have a corresponding python
+      object.
+    """
+    return self._id_to_object_dict[object_id]
+
+  def _dict_to_world_asset(
+      self, type_to_ids_dict: Dict[ObjectType,
+                                   List[int]]) -> world_asset_pb2.WorldAsset:
+    """Converts a dictionary to a WorldAsset.
+
+    Args:
+      type_to_ids_dict: Dictionary that describes the scene. Keys are
+        ObjectTypes and values are lists of integers, where each integer is a
+        pybullet id for an object of a given type.
+
+    Returns:
+      A WorldAsset proto with the types, locations and bounding boxes of all
+      objects in the scene.
+    """
+    world_asset = world_asset_pb2.WorldAsset()
+    for object_type in type_to_ids_dict.keys():
+      for obj_id in type_to_ids_dict[object_type]:
+        bbox = np.array(self.pybullet_client.getAABB(obj_id))
+        bbox_center = np.mean(bbox, axis=0)
+        bbox_dimensions = bbox[1] - bbox[0]
+
+        obj = world_asset_pb2.Object()
+        obj.id = str(obj_id)
+        obj.label = str(object_type)
+        obj.bounding_box.center.x = bbox_center[0]
+        obj.bounding_box.center.y = bbox_center[1]
+        obj.bounding_box.center.z = bbox_center[2]
+        obj.bounding_box.dimensions.x = bbox_dimensions[0]
+        obj.bounding_box.dimensions.y = bbox_dimensions[1]
+        obj.bounding_box.dimensions.z = bbox_dimensions[2]
+        world_asset.objects.append(obj)
+    return world_asset
+
+  def close(self):
+    """Closes the scene at the end of life cycle of the environment."""
+    pass
+
+  @property
+  def vectorized_map(self) -> Sequence[Polygon]:
+    """Returns vectorized map containing a list of polygon obstacles."""
+    raise NotImplementedError("vectorized_map is not implemented by default.")
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/scenes/simple_scene.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/scenes/simple_scene.py
@@ -0,0 +1,35 @@
+# Lint as: python3
+"""A scene containing only a planar floor."""
+
+from typing import Sequence
+
+import gin
+from pybullet_envs.minitaur.envs_v2 import base_client
+from pybullet_envs.minitaur.envs_v2.scenes import scene_base
+
+_PLANE_URDF = (
+    "plane.urdf")
+
+
+@gin.configurable
+class SimpleScene(scene_base.SceneBase):
+  """A scene containing only a planar floor."""
+
+  def build_scene(self, pybullet_client):
+    super().build_scene(pybullet_client)
+
+    visual_shape_id = self._pybullet_client.createVisualShape(
+        shapeType=self._pybullet_client.GEOM_PLANE)
+    collision_shape_id = self._pybullet_client.createCollisionShape(
+        shapeType=self._pybullet_client.GEOM_PLANE)
+    ground_id = self._pybullet_client.createMultiBody(
+        baseMass=0,
+        baseCollisionShapeIndex=collision_shape_id,
+        baseVisualShapeIndex=visual_shape_id)
+    self._pybullet_client.changeDynamics(ground_id, -1, lateralFriction=1.0)
+    self.add_object(ground_id, scene_base.ObjectType.GROUND)
+
+  @property
+  def vectorized_map(self) -> Sequence[scene_base.Polygon]:
+    """Returns vectorized map containing a list of polygon obstacles."""
+    return []
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/scenes/world_asset.proto
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/scenes/world_asset.proto
@@ -0,0 +1,86 @@
+// A set of protocol buffer definitions for representing 'Worlds' for
+// used by simulation engines.
+syntax = "proto3";
+
+package robotics.messages;
+option cc_enable_arenas = true;
+
+
+// A single precision quaternion.
+message QQuaternionf {
+  // The x-component.
+  float x = 1;
+  // The y-component.
+  float y = 2;
+  // The z-component.
+  float z = 3;
+  // The w-component.
+  float w = 4;
+}
+
+
+
+
+// A three-dimensional single precision vector.
+message VVector3f {
+  float x = 1;
+  float y = 2;
+  float z = 3;
+}
+
+
+
+message Labels {
+  map<string, string> label = 1;
+}
+
+// A general bounding box is specified by its center, dimensions and
+// orientation. If the orientation field is not specified, then the bounding box
+// is aligned with the axes of the world coordinate system.
+message GeneralBox3f {
+  VVector3f center = 1;
+  VVector3f dimensions = 2;
+  QQuaternionf orientation = 3;
+}
+
+// An object is specified by its label, bounding box, and a path to an obj
+// file containing its mesh. Not all to be specified.
+message Object {
+  string id = 1;
+  string label = 2;
+  GeneralBox3f bounding_box = 3;
+  string mesh_path = 4;
+}
+
+message Polygon3f {
+  repeated VVector3f vertex = 1;
+}
+
+// A region is specified by its label, spatial extent, and a path to an obj
+// file containing its mesh. Not all to be specified.
+message Region {
+  string id = 1;
+  string label = 2;
+  oneof spatial_extent {
+    GeneralBox3f bounding_box = 3;
+    Polygon3f polygon = 5;
+  }
+  string mesh_path = 6;
+}
+
+// A world consist of a mesh, and potentially a set of object and regions.
+message WorldAsset {
+  // A set of labels for objects. This the space of all object labels.
+  Labels object_labels = 1;
+  repeated Object objects = 2;
+
+  // A set of labels for regions. This the space of all region labels.
+  Labels region_labels = 3;
+  repeated Region regions = 4;
+
+  // A path to a mesh (as obj file) containing the geometry of the world.
+  string mesh_path = 5;
+
+  // A path to a mesh (as obj file) containing the semantic labels of the world.
+  string segmentation_mesh_path = 6;
+}
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/scenes/world_asset_pb2.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs_v2/scenes/world_asset_pb2.py
@@ -0,0 +1,555 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: world_asset.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='world_asset.proto',
+  package='robotics.messages',
+  syntax='proto3',
+  serialized_options=b'\370\001\001',
+  create_key=_descriptor._internal_create_key,
+  serialized_pb=b'\n\x11world_asset.proto\x12\x11robotics.messages\":\n\x0cQQuaternionf\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x12\t\n\x01w\x18\x04 \x01(\x02\",\n\tVVector3f\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\"k\n\x06Labels\x12\x33\n\x05label\x18\x01 \x03(\x0b\x32$.robotics.messages.Labels.LabelEntry\x1a,\n\nLabelEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xa4\x01\n\x0cGeneralBox3f\x12,\n\x06\x63\x65nter\x18\x01 \x01(\x0b\x32\x1c.robotics.messages.VVector3f\x12\x30\n\ndimensions\x18\x02 \x01(\x0b\x32\x1c.robotics.messages.VVector3f\x12\x34\n\x0borientation\x18\x03 \x01(\x0b\x32\x1f.robotics.messages.QQuaternionf\"m\n\x06Object\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05label\x18\x02 \x01(\t\x12\x35\n\x0c\x62ounding_box\x18\x03 \x01(\x0b\x32\x1f.robotics.messages.GeneralBox3f\x12\x11\n\tmesh_path\x18\x04 \x01(\t\"9\n\tPolygon3f\x12,\n\x06vertex\x18\x01 \x03(\x0b\x32\x1c.robotics.messages.VVector3f\"\xb2\x01\n\x06Region\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05label\x18\x02 \x01(\t\x12\x37\n\x0c\x62ounding_box\x18\x03 \x01(\x0b\x32\x1f.robotics.messages.GeneralBox3fH\x00\x12/\n\x07polygon\x18\x05 \x01(\x0b\x32\x1c.robotics.messages.Polygon3fH\x00\x12\x11\n\tmesh_path\x18\x06 \x01(\tB\x10\n\x0espatial_extent\"\xfb\x01\n\nWorldAsset\x12\x30\n\robject_labels\x18\x01 \x01(\x0b\x32\x19.robotics.messages.Labels\x12*\n\x07objects\x18\x02 \x03(\x0b\x32\x19.robotics.messages.Object\x12\x30\n\rregion_labels\x18\x03 \x01(\x0b\x32\x19.robotics.messages.Labels\x12*\n\x07regions\x18\x04 \x03(\x0b\x32\x19.robotics.messages.Region\x12\x11\n\tmesh_path\x18\x05 \x01(\t\x12\x1e\n\x16segmentation_mesh_path\x18\x06 \x01(\tB\x03\xf8\x01\x01\x62\x06proto3'
+)
+
+
+
+
+_QQUATERNIONF = _descriptor.Descriptor(
+  name='QQuaternionf',
+  full_name='robotics.messages.QQuaternionf',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='x', full_name='robotics.messages.QQuaternionf.x', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='y', full_name='robotics.messages.QQuaternionf.y', index=1,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='z', full_name='robotics.messages.QQuaternionf.z', index=2,
+      number=3, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='w', full_name='robotics.messages.QQuaternionf.w', index=3,
+      number=4, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=40,
+  serialized_end=98,
+)
+
+
+_VVECTOR3F = _descriptor.Descriptor(
+  name='VVector3f',
+  full_name='robotics.messages.VVector3f',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='x', full_name='robotics.messages.VVector3f.x', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='y', full_name='robotics.messages.VVector3f.y', index=1,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='z', full_name='robotics.messages.VVector3f.z', index=2,
+      number=3, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=100,
+  serialized_end=144,
+)
+
+
+_LABELS_LABELENTRY = _descriptor.Descriptor(
+  name='LabelEntry',
+  full_name='robotics.messages.Labels.LabelEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='robotics.messages.Labels.LabelEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='robotics.messages.Labels.LabelEntry.value', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=b'8\001',
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=209,
+  serialized_end=253,
+)
+
+_LABELS = _descriptor.Descriptor(
+  name='Labels',
+  full_name='robotics.messages.Labels',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='label', full_name='robotics.messages.Labels.label', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[_LABELS_LABELENTRY, ],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=146,
+  serialized_end=253,
+)
+
+
+_GENERALBOX3F = _descriptor.Descriptor(
+  name='GeneralBox3f',
+  full_name='robotics.messages.GeneralBox3f',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='center', full_name='robotics.messages.GeneralBox3f.center', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='dimensions', full_name='robotics.messages.GeneralBox3f.dimensions', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='orientation', full_name='robotics.messages.GeneralBox3f.orientation', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=256,
+  serialized_end=420,
+)
+
+
+_OBJECT = _descriptor.Descriptor(
+  name='Object',
+  full_name='robotics.messages.Object',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='id', full_name='robotics.messages.Object.id', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='label', full_name='robotics.messages.Object.label', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='bounding_box', full_name='robotics.messages.Object.bounding_box', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='mesh_path', full_name='robotics.messages.Object.mesh_path', index=3,
+      number=4, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=422,
+  serialized_end=531,
+)
+
+
+_POLYGON3F = _descriptor.Descriptor(
+  name='Polygon3f',
+  full_name='robotics.messages.Polygon3f',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='vertex', full_name='robotics.messages.Polygon3f.vertex', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=533,
+  serialized_end=590,
+)
+
+
+_REGION = _descriptor.Descriptor(
+  name='Region',
+  full_name='robotics.messages.Region',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='id', full_name='robotics.messages.Region.id', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='label', full_name='robotics.messages.Region.label', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='bounding_box', full_name='robotics.messages.Region.bounding_box', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='polygon', full_name='robotics.messages.Region.polygon', index=3,
+      number=5, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='mesh_path', full_name='robotics.messages.Region.mesh_path', index=4,
+      number=6, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+    _descriptor.OneofDescriptor(
+      name='spatial_extent', full_name='robotics.messages.Region.spatial_extent',
+      index=0, containing_type=None,
+      create_key=_descriptor._internal_create_key,
+    fields=[]),
+  ],
+  serialized_start=593,
+  serialized_end=771,
+)
+
+
+_WORLDASSET = _descriptor.Descriptor(
+  name='WorldAsset',
+  full_name='robotics.messages.WorldAsset',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='object_labels', full_name='robotics.messages.WorldAsset.object_labels', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='objects', full_name='robotics.messages.WorldAsset.objects', index=1,
+      number=2, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='region_labels', full_name='robotics.messages.WorldAsset.region_labels', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='regions', full_name='robotics.messages.WorldAsset.regions', index=3,
+      number=4, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='mesh_path', full_name='robotics.messages.WorldAsset.mesh_path', index=4,
+      number=5, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='segmentation_mesh_path', full_name='robotics.messages.WorldAsset.segmentation_mesh_path', index=5,
+      number=6, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=774,
+  serialized_end=1025,
+)
+
+_LABELS_LABELENTRY.containing_type = _LABELS
+_LABELS.fields_by_name['label'].message_type = _LABELS_LABELENTRY
+_GENERALBOX3F.fields_by_name['center'].message_type = _VVECTOR3F
+_GENERALBOX3F.fields_by_name['dimensions'].message_type = _VVECTOR3F
+_GENERALBOX3F.fields_by_name['orientation'].message_type = _QQUATERNIONF
+_OBJECT.fields_by_name['bounding_box'].message_type = _GENERALBOX3F
+_POLYGON3F.fields_by_name['vertex'].message_type = _VVECTOR3F
+_REGION.fields_by_name['bounding_box'].message_type = _GENERALBOX3F
+_REGION.fields_by_name['polygon'].message_type = _POLYGON3F
+_REGION.oneofs_by_name['spatial_extent'].fields.append(
+  _REGION.fields_by_name['bounding_box'])
+_REGION.fields_by_name['bounding_box'].containing_oneof = _REGION.oneofs_by_name['spatial_extent']
+_REGION.oneofs_by_name['spatial_extent'].fields.append(
+  _REGION.fields_by_name['polygon'])
+_REGION.fields_by_name['polygon'].containing_oneof = _REGION.oneofs_by_name['spatial_extent']
+_WORLDASSET.fields_by_name['object_labels'].message_type = _LABELS
+_WORLDASSET.fields_by_name['objects'].message_type = _OBJECT
+_WORLDASSET.fields_by_name['region_labels'].message_type = _LABELS
+_WORLDASSET.fields_by_name['regions'].message_type = _REGION
+DESCRIPTOR.message_types_by_name['QQuaternionf'] = _QQUATERNIONF
+DESCRIPTOR.message_types_by_name['VVector3f'] = _VVECTOR3F
+DESCRIPTOR.message_types_by_name['Labels'] = _LABELS
+DESCRIPTOR.message_types_by_name['GeneralBox3f'] = _GENERALBOX3F
+DESCRIPTOR.message_types_by_name['Object'] = _OBJECT
+DESCRIPTOR.message_types_by_name['Polygon3f'] = _POLYGON3F
+DESCRIPTOR.message_types_by_name['Region'] = _REGION
+DESCRIPTOR.message_types_by_name['WorldAsset'] = _WORLDASSET
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+QQuaternionf = _reflection.GeneratedProtocolMessageType('QQuaternionf', (_message.Message,), {
+  'DESCRIPTOR' : _QQUATERNIONF,
+  '__module__' : 'world_asset_pb2'
+  # @@protoc_insertion_point(class_scope:robotics.messages.QQuaternionf)
+  })
+_sym_db.RegisterMessage(QQuaternionf)
+
+VVector3f = _reflection.GeneratedProtocolMessageType('VVector3f', (_message.Message,), {
+  'DESCRIPTOR' : _VVECTOR3F,
+  '__module__' : 'world_asset_pb2'
+  # @@protoc_insertion_point(class_scope:robotics.messages.VVector3f)
+  })
+_sym_db.RegisterMessage(VVector3f)
+
+Labels = _reflection.GeneratedProtocolMessageType('Labels', (_message.Message,), {
+
+  'LabelEntry' : _reflection.GeneratedProtocolMessageType('LabelEntry', (_message.Message,), {
+    'DESCRIPTOR' : _LABELS_LABELENTRY,
+    '__module__' : 'world_asset_pb2'
+    # @@protoc_insertion_point(class_scope:robotics.messages.Labels.LabelEntry)
+    })
+  ,
+  'DESCRIPTOR' : _LABELS,
+  '__module__' : 'world_asset_pb2'
+  # @@protoc_insertion_point(class_scope:robotics.messages.Labels)
+  })
+_sym_db.RegisterMessage(Labels)
+_sym_db.RegisterMessage(Labels.LabelEntry)
+
+GeneralBox3f = _reflection.GeneratedProtocolMessageType('GeneralBox3f', (_message.Message,), {
+  'DESCRIPTOR' : _GENERALBOX3F,
+  '__module__' : 'world_asset_pb2'
+  # @@protoc_insertion_point(class_scope:robotics.messages.GeneralBox3f)
+  })
+_sym_db.RegisterMessage(GeneralBox3f)
+
+Object = _reflection.GeneratedProtocolMessageType('Object', (_message.Message,), {
+  'DESCRIPTOR' : _OBJECT,
+  '__module__' : 'world_asset_pb2'
+  # @@protoc_insertion_point(class_scope:robotics.messages.Object)
+  })
+_sym_db.RegisterMessage(Object)
+
+Polygon3f = _reflection.GeneratedProtocolMessageType('Polygon3f', (_message.Message,), {
+  'DESCRIPTOR' : _POLYGON3F,
+  '__module__' : 'world_asset_pb2'
+  # @@protoc_insertion_point(class_scope:robotics.messages.Polygon3f)
+  })
+_sym_db.RegisterMessage(Polygon3f)
+
+Region = _reflection.GeneratedProtocolMessageType('Region', (_message.Message,), {
+  'DESCRIPTOR' : _REGION,
+  '__module__' : 'world_asset_pb2'
+  # @@protoc_insertion_point(class_scope:robotics.messages.Region)
+  })
+_sym_db.RegisterMessage(Region)
+
+WorldAsset = _reflection.GeneratedProtocolMessageType('WorldAsset', (_message.Message,), {
+  'DESCRIPTOR' : _WORLDASSET,
+  '__module__' : 'world_asset_pb2'
+  # @@protoc_insertion_point(class_scope:robotics.messages.WorldAsset)
+  })
+_sym_db.RegisterMessage(WorldAsset)
+
+
+DESCRIPTOR._options = None
+_LABELS_LABELENTRY._options = None
+# @@protoc_insertion_point(module_scope)
--- a/Show More
+++ b/Show More