@COMMENT This file was generated by bib2html.pl <http://www.cs.cmu.edu/~pfr/misc_software/index.html#bib2html> version 0.90
@COMMENT written by Patrick Riley <http://www.cs.cmu.edu/~pfr>
@COMMENT This file came from Peter Stone's publication pages at
@COMMENT http://www.cs.utexas.edu/~pstone/papers
@Article{zhu_vision_based_manipulation_auro_2026,
  author   = {Yifeng Zhu and Arisrei Lim and Peter Stone and Yuke Zhu },
  title    = {Vision-based Manipulation from Single Human Video with Open-World Object Graphs},
  journal = { Autonomous Robots},
  volume="50",
  number="27",
  year     = {2026},
  abstract = {This work presents an object-centric approach to learning vision-based
manipulation skills from human videos. We investigate the problem of robot
manipulation via imitation in the open-world setting, where a robot learns to
manipulate novel objects from a single video demonstration. We introduce ORION,
an algorithm that tackles the problem by extracting an object-centric
manipulation plan as an Open-World Object Graph from a single RGB or RGB-D video,
and then deriving a policy that conditions on the resulting plan. Our method
enables the robot to learn from videos captured by daily mobile devices and
generalize to deployment environments with varying visual backgrounds, camera
angles, spatial layouts, and novel object instances. We systematically evaluate
our method on both short-horizon and long-horizon tasks, using RGB-D and RGB-only
demonstration videos. Across our real-world evaluations on varied tasks and
demonstration modalities (RGB-D / RGB), we observe an average success rate of
74.4 percent, demonstrating the efficacy of ORION in learning from a single human video
in the open world. Additional materials can be found on the project website.
  },
}
