Real-World Reinforcement Learning via Multi-Fidelity Simulators

Reinforcement learning (RL) can be a tool for designing policies and controllers for robotic systems. However, the cost of real-world samples remains prohibitive as many RL algorithms require a large number of samples before learning useful policies. Simulators are one way to decrease the number of required real-world samples, but imperfect models make deciding when and how to trust samples from a simulator difficult. This project presents a framework, called Multi-Fidelity Reinforcement Learning (MFRL), for efficient RL in a scenario where multiple simulators of a target task are available, each with varying levels of fidelity. The framework is designed to limit the number of samples used in each successively higher-fidelity/cost simulator by allowing a learning agent to choose to run trajectories at the lowest level simulator that will still provide it with useful information. Theoretical proofs of the framework's sample complexity are given and empirical results are demonstrated on a remote controlled car with multiple simulators. The approach enables RL algorithms to find near-optimal policies in a physical robot domain with fewer expensive real-world samples than previous transfer approaches or learning without simulators.


This simple toy domain illustrates the progression of the algorithm. On the right, the 'real' world consists of an agent starting in the lower-left corner of the grid world and trying to find a policy to lead it to the upper-right goal region. Negative reward is a accumulated in the puddle. The worlds on the right consist of low- and medium-fidelity models of the real world. The learning agent transitions between levels several times, leveraging the lower-fidelity worlds to learn an optimal policy in the real world while minimizing the steps taken there.


The MFRL framework is demonstrated in this real-world remote-controlled (RC) car domain. The RC car learns a policy for quickly racing around a track by efficiently utilizing two available simulators.


Recently, we have extended the MFRL framework to include domains with continuous representations of the states and actions. Here, an inverted pendulum is balanced by using simulated data as a prior for both policy parameters the dynamics model. Using the simulated data leads to learning with 3 times less data than without using a simulator.

Get in Touch

@inproceedings{Michini11_ICRA,
  author = {Michini, Bernard and Redding, Josh and Ure, N. Kemal and Cutler, Mark and How, Jonathan P.},
  title = {Design and Flight Testing of an Autonomous Variable-Pitch Quadrotor},
  booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
  year = {2011},
  pages = {2978--2979},
  month = {May},
  url = {http://markjcutler.com/papers/Michini11_ICRA.pdf}
}
@article{Barrett09_EJLA,
  title = {Minimum rank of edge subdivisions of graphs},
  author = {Barrett, Wayne and Bowcutt, Ryan and Cutler, Mark and Gibelyou, Seth and Owens, Kayla},
  journal = {Electronic Journal of Linear Algebra},
  volume = {18},
  pages = {530--563},
  year = {2009},
  url = {http://markjcutler.com/papers/Barret09_EJLA.pdf}
}
@inproceedings{Chen15_ICRA,
  address = {Seattle, WA},
  author = {Chen, Yufan and Cutler, Mark and How, Jonathan P.},
  booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
  month = {May},
  title = {Decoupled Multiagent Path Planning via Incremental Sequential Convex Programming},
  year = {2015},
  url = {http://markjcutler.com/papers/Chen15_ICRA.pdf}
}
@inproceedings{Chowdhary12_GNC,
  author = {Chowdhary, Girish and Wu, Tongbin and Cutler, Mark and Ure, N. Kemal and How, Jonathan},
  title = {Experimental Results of Concurrent Learning Adaptive Controller},
  booktitle = {AIAA Guidance, Navigation, and Control Conference (GNC)},
  year = {2012},
  address = {Minneapolis, MN},
  month = {August},
  pages = {1--14},
  note = {Invited},
  url = {http://markjcutler.com/papers/Chowdhary12_GNC.pdf}
}
@inproceedings{Chowdhary13_ICRA,
  author = {Chowdhary, Girish and Wu, Tongbin and Cutler, Mark and How, Jonathan P.},
  title = {Rapid Transfer of Controllers Between {UAVs} using Learning Based Adaptive Control},
  booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
  year = {2013},
  pages={5409--5416},
  month={May},
  address={Karlsruhe, Germany},
  url = {http://markjcutler.com/papers/Chowdhary13_ICRA.pdf}
}
@inproceedings{Cutler10_GNC,
  title = {Energy Harvesting and Mission Effectiveness for Small Unmanned Aircraft},
  author = {Cutler, Mark and McLain, Timothy W and Beard, Randal W and Capozzi, Brian},
  booktitle = {AIAA Guidance, Navigation, and Control Conference (GNC)},
  pages = {1--13},
  address = {Toronto, Canada},
  month = {August},
  year = {2010},
  url = {http://markjcutler.com/papers/Cutler10_GNC.pdf}
}
@inproceedings{Cutler11_GNC,
  author = {Cutler, Mark and Ure, N. Kemal and Michini, Bernard and How, Jonathan P.},
  title = {Comparison of Fixed and Variable Pitch Actuators for Agile Quadrotors},
  booktitle = {AIAA Guidance, Navigation, and Control Conference (GNC)},
  year = {2011},
  address = {Portland, OR},
  month = {August},
  pages = {1--17},
  url = {http://markjcutler.com/papers/Cutler11_GNC.pdf}
}
@inproceedings{Cutler12_GNC,
  author = {Cutler, Mark and How, Jonathan P.},
  title = {Actuator Constrained Trajectory Generation and Control for Variable-Pitch Quadrotors},
  booktitle = {AIAA Guidance, Navigation, and Control Conference (GNC)},
  year = {2012},
  address = {Minneapolis, Minnesota},
  month = {August},
  pages = {1--15},
  url = {http://markjcutler.com/papers/Cutler12_GNC.pdf}
}
@mastersthesis{Cutler12_Masters,
  author = {Cutler, Mark},
  title = {Design and Control of an Autonomous Variable-Pitch Quadrotor Helicopter},
  school = {Massachusetts Institute of Technology, Department of Aeronautics and Astronautics},
  year = {2012},
  month = {August},
  url = {http://markjcutler.com/papers/Cutler12_Masters.pdf}
}
@inproceedings{Cutler13_ICUAS,
  author = {Cutler, Mark and Michini, Bernard and How, Jonathan P.},
  title = {Lightweight Infrared Sensing for Relative Navigation of Quadrotors},
  booktitle = {International Conference on Unmanned Aircraft Systems (ICUAS)},
  year = {2013},
  pages = {1156--1164},
  address = {Atlanta GA},
  month = {May},
  url = {http://markjcutler.com/papers/Cutler13_ICUAS.pdf}
}
@inproceedings{Cutler13_NIPS,
  author = {Cutler, Mark and Walsh, Thomas J. and How, Jonathan P.},
  title = {Reinforcement Learning with Multi-Fidelity Simulators (Poster)},
  booktitle = {NIPS Transfer and Multi-Task Learning Workshop},
  month = {December},
  year = {2013}
}
@inproceedings{Cutler14_ICRA,
  author = {Cutler, Mark and Walsh, Thomas J. and How, Jonathan P.},
  title = {Reinforcement Learning with Multi-Fidelity Simulators},
  booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
  year = {2014},
  address = {Hong Kong},
  pages = {3888--3895},
  month = {June},
  url = {http://markjcutler.com/papers/Cutler14_ICRA.pdf}
}
@article{Cutler15_TRO,
  author = {Mark Cutler and Thomas J. Walsh and Jonathan P. How},
  journal = {IEEE Transactions on Robotics},
  title = {Real-World Reinforcement Learning via Multifidelity Simulators},
  year = {2015},
  month = {June}, 
  volume = {31}, 
  number = {3}, 
  pages = {655-671},
  url = {http://markjcutler.com/papers/Cutler15_TRO.pdf}   
}
@article{Cutler15_DSMC,
  author = {Mark Cutler and Jonathan P. How},
  journal = {ASME Journal of Dynamic Systems, Measurement and Control},
  title = {Analysis and Control of a Variable-Pitch Quadrotor for Agile Flight},
  year = {2015},
  month = {October}, 
  volume = {137}, 
  number = {10}, 
  pages = {101002--101002-14},
  url = {http://markjcutler.com/papers/Cutler15_TRO.pdf}
}	
@inproceedings{Cutler15_ICRA,
  address = {Seattle, WA},
  author = {Cutler, Mark and How, Jonathan P.},
  booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
  month = {May},
  title = {Efficient Reinforcement Learning for Robots using Informative Simulated Priors},
  year = {2015},
  url = {http://markjcutler.com/papers/Cutler15_ICRA.pdf}
}
@inproceedings{Michini13_ICRA,
  author = {Michini, Bernard and Cutler, Mark and How, Jonathan P.},
  title = {Scalable Reward Learning from Demonstration},
  booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
  year = {2013},
  month = {May},
  address = {Karlsruhe, Germany},
  pages = {303--308},
  url = {http://markjcutler.com/papers/Michini13_ICRA.pdf}
}
@inproceedings{Thomson09_ASM,
  title = {Experiment-Based Optimization of Flapping Wing Kinematics},
  author = {Thomson, Scott L. and Mattson, Christopher A. and Colton, Mark B. and Harston, Stephen P. and Carlson, Daniel C. and Cutler, Mark},
  year = {2009},
  month = {January},
  booktitle = {AIAA Proceedings of the 47th Aerospace Sciences Meeting},
  pages = {1--8},
  url = {http://markjcutler.com/papers/Thomson09_ASM.pdf}
}
@inproceedings{Ure13_ICUAS,
  author = {Ure, N. Kemal and Chowdhary, Girish and Chen, Yu Fan and Cutler, Mark and How, Jonathan P. and Vian, John},
  title = {Decentralized Learning based Planning Multiagent Missions in Presence of Actuator Failures},
  booktitle = {International Conference on Unmanned Aircraft Systems (ICUAS)},
  year = {2013},
  address = {Atlanta GA},
  month = {May},
  pages={1125--1134},
  url = {http://markjcutler.com/papers/Ure13_ICUAS.pdf}
}