RoboVerseOrg · zhangyi1357 · Nov 28, 2025 · Nov 30, 2025 · Dec 1, 2025 · Dec 1, 2025
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -47,6 +47,7 @@ Guidelines for modifications:
 * Yutong Liang
 * Yuyang Li
 * Zhigen Zhao
+* Yi Zhang
 
 ## Acknowledgements
 

diff --git a/roboverse_learn/rl/unitree_rl/configs/callback_funcs/reward_funcs.py b/roboverse_learn/rl/unitree_rl/configs/callback_funcs/reward_funcs.py
@@ -106,6 +106,39 @@ def joint_pos_limits(env: EnvTypes, env_states: TensorState) -> torch.Tensor:
     return torch.sum(out_of_limits, dim=1)
 
 
+def joint_effort_limits(env: EnvTypes, env_states: TensorState, soft_limit_factor: float = 1.0) -> torch.Tensor:
+    """Penalize joint efforts if they cross the soft limits."""
+    robot_state = env_states.robots[env.name]
+
+    # Get joint effort target, compute if None or all zeros
+    if robot_state.joint_effort_target is None:
+        # Compute effort from actions if manual PD control is enabled
+        if hasattr(env, 'manual_pd_on') and env.manual_pd_on and hasattr(env, 'actions'):
+            processed_actions = (
+                (env.actions * env.action_scale + env.actions_offset).clip(-env.action_clip, env.action_clip)
+            )
+            joint_effort = env._compute_effort(processed_actions, env_states)
+        else:
+            # Fallback: return zeros if we can't compute effort
+            joint_effort = torch.zeros_like(robot_state.joint_pos)
+    elif torch.all(robot_state.joint_effort_target == 0):
+        # If all zeros, try to compute from actions
+        if hasattr(env, 'manual_pd_on') and env.manual_pd_on and hasattr(env, 'actions'):
+            processed_actions = (
+                (env.actions * env.action_scale + env.actions_offset).clip(-env.action_clip, env.action_clip)
+            )
+            joint_effort = env._compute_effort(processed_actions, env_states)
+        else:
+            joint_effort = robot_state.joint_effort_target
+    else:
+        joint_effort = robot_state.joint_effort_target
+
+    out_of_limits = (torch.abs(joint_effort) - env.torque_limits * soft_limit_factor).clip(
+        min=0.0
+    )
+    return torch.sum(out_of_limits, dim=1)
+
+
 def energy(env: EnvTypes, env_states: TensorState) -> torch.Tensor:
     """Sum |qdot|*|tau| across joints (\"energy\" usage)."""
     base = env_states.robots[env.name]
@@ -226,7 +259,7 @@ def feet_gait(
         reward += ~(is_stance ^ is_contact[:, i])
 
     if command_name == "base_velocity":
-        cmd_norm = torch.norm(env.commands_manager.value[:, :2], dim=1)
+        cmd_norm = torch.norm(env.commands_manager.value[:, :3], dim=1)  # 包括 lin_vel_x, lin_vel_y, ang_vel_yaw
         reward *= (cmd_norm > 0.1).float()
     return reward
 

diff --git a/roboverse_learn/rl/unitree_rl/configs/cfg_base.py b/roboverse_learn/rl/unitree_rl/configs/cfg_base.py
@@ -95,6 +95,17 @@ class InitialStates:
                     "right_wrist_roll_joint": -0.15,
                 },
             },
+            "agibot_a2_dof12": {
+                "pos": [0.0, 0.0, 0.98],
+                "default_joint_pos": {
+                    ".*_hip_roll": 0.0,
+                    ".*_hip_yaw": 0.0,
+                    ".*_hip_pitch": -0.115,
+                    ".*_tarsus": 0.267,
+                    ".*_toe_pitch": -0.152,
+                    ".*_toe_roll": 0.0,
+                },
+            },
         }
 
     initial_states = InitialStates()

diff --git a/roboverse_learn/rl/unitree_rl/configs/locomotion/walk_agibot_a2_dof12.py b/roboverse_learn/rl/unitree_rl/configs/locomotion/walk_agibot_a2_dof12.py
@@ -0,0 +1,191 @@
+import math
+from metasim.utils import configclass
+from roboverse_learn.rl.unitree_rl.configs.cfg_base import BaseEnvCfg
+from roboverse_learn.rl.unitree_rl.configs.algorithm import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoAlgorithmCfg,
+    RslRlPpoActorCriticRecurrentCfg,
+)
+from roboverse_learn.rl.unitree_rl.helper.curriculum_utils import lin_vel_cmd_levels
+from metasim.queries import ContactForces
+from roboverse_learn.rl.unitree_rl.configs.cfg_randomizers import (
+    MaterialRandomizer,
+    MassRandomizer,
+)
+from roboverse_learn.rl.unitree_rl.configs.callback_funcs import (
+    termination_funcs,
+    reset_funcs,
+    step_funcs,
+    reward_funcs,
+)
+
+@configclass
+class WalkAgibotA2Dof12EnvCfg(BaseEnvCfg):
+    episode_length_s = 20.0
+    obs_len_history = 1
+    priv_obs_len_history = 1
+
+    control = BaseEnvCfg.Control(
+        action_scale=0.5,
+        action_clip=100,
+        soft_joint_pos_limit_factor=0.95,
+        decimation=5,
+    )
+
+    @configclass
+    class RewardsScales:
+        track_lin_vel_xy = (2.0, {"std": math.sqrt(0.25)})
+        track_ang_vel_z = (1.0, {"std": math.sqrt(0.25)})
+        lin_vel_z = -2.0
+        ang_vel_xy = -0.05
+        flat_orientation = -1.0
+        base_height = (-100.0, {"target_height": 0.98})
+        joint_acc = -1e-7
+        joint_vel = -0.001
+        action_rate = -0.2
+        joint_pos_limits = -5.0
+        joint_effort_limits = (-0.5, {"soft_limit_factor": 0.95})
+        is_alive = 0.3
+        joint_deviation_legs = (
+            -1.0,
+            {"joint_names": (".*_hip_roll.*", ".*_hip_yaw.*", ".*_toe_roll.*")},
+            reward_funcs.joint_deviation_l1,
+        )
+        feet_slide = (-0.2, {"body_names": (".*toe_roll.*")})
+        feet_clearance = (
+            1.0,
+            {
+                "std": math.sqrt(0.05),
+                "tanh_mult": 2.0,
+                "target_height": 0.18,
+                "body_names": (".*toe_roll.*"),
+            },
+        )
+        feet_gait = (
+            0.18,
+            {
+                "period": 1.0,
+                "offset": [0.0, 0.5],
+                "threshold": 0.55,
+                "body_names": (".*toe_roll.*"),
+            },
+        )
+        energy = -1e-5
+        ########################
+
+    rewards = BaseEnvCfg.Rewards(scales=RewardsScales(), only_positive_rewards=True)
+
+    commands = BaseEnvCfg.Commands(
+        value=None,
+        resample=step_funcs.resample_commands,
+        heading_command=True,
+        resampling_time=10.0,
+        rel_standing_envs=0.02,
+        ranges=BaseEnvCfg.Commands.Ranges(
+            lin_vel_x=(-1.0, 1.0),
+            lin_vel_y=(-0.5, 0.5),
+            ang_vel_yaw=(-1.0, 1.0),
+            heading=(-3.14, 3.14),
+        ),
+        limit_ranges=BaseEnvCfg.Commands.Ranges(
+            lin_vel_x=(-1.0, 1.0),
+            lin_vel_y=(-0.5, 0.5),
+            ang_vel_yaw=(-1.0, 1.0),
+            heading=(-3.14, 3.14),
+        ),
+    )
+
+    curriculum = BaseEnvCfg.Curriculum(
+        enabled=False,
+        funcs={
+            "lin_vel_cmd_levels": lin_vel_cmd_levels,
+        },
+    )
+
+    callbacks_query = {"contact_forces": ContactForces(history_length=3)}
+    callbacks_setup = {
+        "material_randomizer": MaterialRandomizer(
+            obj_name="agibot_a2_dof12",
+            static_friction_range=(0.1, 1.25),
+            dynamic_friction_range=(0.1, 1.25),
+            restitution_range=(0.0, 0.0),
+            num_buckets=64,
+        ),
+        "mass_randomizer": MassRandomizer(
+            obj_name="agibot_a2_dof12",
+            body_names="base_link",
+            mass_distribution_params=(-1.0, 3.0),
+            operation="add",
+        ),
+    }
+    callbacks_reset = {
+        "random_root_state": (
+            reset_funcs.random_root_state,
+            {
+                "pose_range": [
+                    [0., 0., 0, 0, 0, 0],
+                    [0., 0., 0, 0, 0, 0],
+                ],
+                "velocity_range": [[-0.5] * 6, [0.5] * 6],
+            },
+        ),
+        "reset_joints_by_scale": (
+            reset_funcs.reset_joints_by_scale,
+            {"position_range": (0.5, 1.5), "velocity_range": (1.0, 1.0)},
+        ),
+    }
+    callbacks_post_step = {
+        "push_robot": (
+            step_funcs.push_by_setting_velocity,
+            {
+                "interval_range_s": (5.0, 5.0),
+                "velocity_range": [[-1.5, -1.5, 0.0], [1.5, 1.5, 0.0]],
+            },
+        )
+    }
+    callbacks_terminate = {
+        "time_out": termination_funcs.time_out,
+        "undesired_contact": (
+            termination_funcs.undesired_contact,
+            {
+                # TODO(zhangyi): add more undesired contact names here
+                "contact_names": [
+                    ".*base_link.*",
+                ],
+                "limit_range": 1.0,
+            },
+        ),
+        "bad_orientation": (termination_funcs.bad_orientation, {"limit_angle": 0.8}),
+        "root_height_below_minimum": (termination_funcs.root_height_below_minimum, {"minimum_height": 0.7}),
+    }
+
+@configclass
+class WalkAgibotA2Dof12RslRlTrainCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 24
+    max_iterations = 50000
+    save_interval = 100
+    experiment_name = ""  # same as task name
+    empirical_normalization = False
+    policy = RslRlPpoActorCriticRecurrentCfg(
+        init_noise_std=0.8,
+        actor_hidden_dims=[32],
+        critic_hidden_dims=[32],
+        activation="elu",
+        rnn_type="lstm",
+        rnn_hidden_dim=64,
+        rnn_num_layers=1,
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.01,
+        num_learning_epochs=5,
+        num_mini_batches=4,  # mini batch size = num_envs*nsteps / nminibatches
+        learning_rate=1.0e-3,  # 5.e-4
+        schedule="adaptive",  # could be adaptive, fixed
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )