Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Guidelines for modifications:
* Yutong Liang
* Yuyang Li
* Zhigen Zhao
* Yi Zhang

## Acknowledgements

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,39 @@ def joint_pos_limits(env: EnvTypes, env_states: TensorState) -> torch.Tensor:
return torch.sum(out_of_limits, dim=1)


def joint_effort_limits(env: EnvTypes, env_states: TensorState, soft_limit_factor: float = 1.0) -> torch.Tensor:
"""Penalize joint efforts if they cross the soft limits."""
robot_state = env_states.robots[env.name]

# Get joint effort target, compute if None or all zeros
if robot_state.joint_effort_target is None:
# Compute effort from actions if manual PD control is enabled
if hasattr(env, 'manual_pd_on') and env.manual_pd_on and hasattr(env, 'actions'):
processed_actions = (
(env.actions * env.action_scale + env.actions_offset).clip(-env.action_clip, env.action_clip)
)
joint_effort = env._compute_effort(processed_actions, env_states)
else:
# Fallback: return zeros if we can't compute effort
joint_effort = torch.zeros_like(robot_state.joint_pos)
elif torch.all(robot_state.joint_effort_target == 0):
# If all zeros, try to compute from actions
if hasattr(env, 'manual_pd_on') and env.manual_pd_on and hasattr(env, 'actions'):
processed_actions = (
(env.actions * env.action_scale + env.actions_offset).clip(-env.action_clip, env.action_clip)
)
joint_effort = env._compute_effort(processed_actions, env_states)
else:
joint_effort = robot_state.joint_effort_target
else:
joint_effort = robot_state.joint_effort_target

out_of_limits = (torch.abs(joint_effort) - env.torque_limits * soft_limit_factor).clip(
min=0.0
)
return torch.sum(out_of_limits, dim=1)


def energy(env: EnvTypes, env_states: TensorState) -> torch.Tensor:
"""Sum |qdot|*|tau| across joints (\"energy\" usage)."""
base = env_states.robots[env.name]
Expand Down Expand Up @@ -226,7 +259,7 @@ def feet_gait(
reward += ~(is_stance ^ is_contact[:, i])

if command_name == "base_velocity":
cmd_norm = torch.norm(env.commands_manager.value[:, :2], dim=1)
cmd_norm = torch.norm(env.commands_manager.value[:, :3], dim=1) # 包括 lin_vel_x, lin_vel_y, ang_vel_yaw
reward *= (cmd_norm > 0.1).float()
return reward

Expand Down
11 changes: 11 additions & 0 deletions roboverse_learn/rl/unitree_rl/configs/cfg_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,17 @@ class InitialStates:
"right_wrist_roll_joint": -0.15,
},
},
"agibot_a2_dof12": {
"pos": [0.0, 0.0, 0.98],
"default_joint_pos": {
".*_hip_roll": 0.0,
".*_hip_yaw": 0.0,
".*_hip_pitch": -0.115,
".*_tarsus": 0.267,
".*_toe_pitch": -0.152,
".*_toe_roll": 0.0,
},
},
}

initial_states = InitialStates()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
import math
from metasim.utils import configclass
from roboverse_learn.rl.unitree_rl.configs.cfg_base import BaseEnvCfg
from roboverse_learn.rl.unitree_rl.configs.algorithm import (
RslRlOnPolicyRunnerCfg,
RslRlPpoAlgorithmCfg,
RslRlPpoActorCriticRecurrentCfg,
)
from roboverse_learn.rl.unitree_rl.helper.curriculum_utils import lin_vel_cmd_levels
from metasim.queries import ContactForces
from roboverse_learn.rl.unitree_rl.configs.cfg_randomizers import (
MaterialRandomizer,
MassRandomizer,
)
from roboverse_learn.rl.unitree_rl.configs.callback_funcs import (
termination_funcs,
reset_funcs,
step_funcs,
reward_funcs,
)

@configclass
class WalkAgibotA2Dof12EnvCfg(BaseEnvCfg):
episode_length_s = 20.0
obs_len_history = 1
priv_obs_len_history = 1

control = BaseEnvCfg.Control(
action_scale=0.5,
action_clip=100,
soft_joint_pos_limit_factor=0.95,
decimation=5,
)

@configclass
class RewardsScales:
track_lin_vel_xy = (2.0, {"std": math.sqrt(0.25)})
track_ang_vel_z = (1.0, {"std": math.sqrt(0.25)})
lin_vel_z = -2.0
ang_vel_xy = -0.05
flat_orientation = -1.0
base_height = (-100.0, {"target_height": 0.98})
joint_acc = -1e-7
joint_vel = -0.001
action_rate = -0.2
joint_pos_limits = -5.0
joint_effort_limits = (-0.5, {"soft_limit_factor": 0.95})
is_alive = 0.3
joint_deviation_legs = (
-1.0,
{"joint_names": (".*_hip_roll.*", ".*_hip_yaw.*", ".*_toe_roll.*")},
reward_funcs.joint_deviation_l1,
)
feet_slide = (-0.2, {"body_names": (".*toe_roll.*")})
feet_clearance = (
1.0,
{
"std": math.sqrt(0.05),
"tanh_mult": 2.0,
"target_height": 0.18,
"body_names": (".*toe_roll.*"),
},
)
feet_gait = (
0.18,
{
"period": 1.0,
"offset": [0.0, 0.5],
"threshold": 0.55,
"body_names": (".*toe_roll.*"),
},
)
energy = -1e-5
########################

rewards = BaseEnvCfg.Rewards(scales=RewardsScales(), only_positive_rewards=True)

commands = BaseEnvCfg.Commands(
value=None,
resample=step_funcs.resample_commands,
heading_command=True,
resampling_time=10.0,
rel_standing_envs=0.02,
ranges=BaseEnvCfg.Commands.Ranges(
lin_vel_x=(-1.0, 1.0),
lin_vel_y=(-0.5, 0.5),
ang_vel_yaw=(-1.0, 1.0),
heading=(-3.14, 3.14),
),
limit_ranges=BaseEnvCfg.Commands.Ranges(
lin_vel_x=(-1.0, 1.0),
lin_vel_y=(-0.5, 0.5),
ang_vel_yaw=(-1.0, 1.0),
heading=(-3.14, 3.14),
),
)

curriculum = BaseEnvCfg.Curriculum(
enabled=False,
funcs={
"lin_vel_cmd_levels": lin_vel_cmd_levels,
},
)

callbacks_query = {"contact_forces": ContactForces(history_length=3)}
callbacks_setup = {
"material_randomizer": MaterialRandomizer(
obj_name="agibot_a2_dof12",
static_friction_range=(0.1, 1.25),
dynamic_friction_range=(0.1, 1.25),
restitution_range=(0.0, 0.0),
num_buckets=64,
),
"mass_randomizer": MassRandomizer(
obj_name="agibot_a2_dof12",
body_names="base_link",
mass_distribution_params=(-1.0, 3.0),
operation="add",
),
}
callbacks_reset = {
"random_root_state": (
reset_funcs.random_root_state,
{
"pose_range": [
[0., 0., 0, 0, 0, 0],
[0., 0., 0, 0, 0, 0],
],
"velocity_range": [[-0.5] * 6, [0.5] * 6],
},
),
"reset_joints_by_scale": (
reset_funcs.reset_joints_by_scale,
{"position_range": (0.5, 1.5), "velocity_range": (1.0, 1.0)},
),
}
callbacks_post_step = {
"push_robot": (
step_funcs.push_by_setting_velocity,
{
"interval_range_s": (5.0, 5.0),
"velocity_range": [[-1.5, -1.5, 0.0], [1.5, 1.5, 0.0]],
},
)
}
callbacks_terminate = {
"time_out": termination_funcs.time_out,
"undesired_contact": (
termination_funcs.undesired_contact,
{
# TODO(zhangyi): add more undesired contact names here
"contact_names": [
".*base_link.*",
],
"limit_range": 1.0,
},
),
"bad_orientation": (termination_funcs.bad_orientation, {"limit_angle": 0.8}),
"root_height_below_minimum": (termination_funcs.root_height_below_minimum, {"minimum_height": 0.7}),
}

@configclass
class WalkAgibotA2Dof12RslRlTrainCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 50000
save_interval = 100
experiment_name = "" # same as task name
empirical_normalization = False
policy = RslRlPpoActorCriticRecurrentCfg(
init_noise_std=0.8,
actor_hidden_dims=[32],
critic_hidden_dims=[32],
activation="elu",
rnn_type="lstm",
rnn_hidden_dim=64,
rnn_num_layers=1,
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=5,
num_mini_batches=4, # mini batch size = num_envs*nsteps / nminibatches
learning_rate=1.0e-3, # 5.e-4
schedule="adaptive", # could be adaptive, fixed
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
Loading