@article{corrado2023policy, title={On-policy policy gradient reinforcement learning without on-policy sampling}, author={Corrado, Nicholas E and Hanna, Josiah P}, journal={arXiv preprint arXiv:2311.08290}, year={2023} url={https://arxiv.org/abs/2311.08290} }