<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Developers Digest - Reinforcement Learning</title>
    <link>https://www.developersdigest.tech/tags/reinforcement-learning</link>
    <description>1 item tagged Reinforcement Learning on Developers Digest - blog posts, tools, guides, and tutorials.</description>
    <language>en</language>
    <lastBuildDate>Sun, 03 May 2026 19:03:18 GMT</lastBuildDate>
    <atom:link href="https://www.developersdigest.tech/tags/reinforcement-learning/feed.xml" rel="self" type="application/rss+xml" />
    <image>
      <url>https://avatars.githubusercontent.com/u/124798203?v=4</url>
      <title>Developers Digest - Reinforcement Learning</title>
      <link>https://www.developersdigest.tech/tags/reinforcement-learning</link>
    </image>
    <item>
      <title><![CDATA[DeepSeek R1, PPO, and GRPO Explained for Devs]]></title>
      <link>https://www.developersdigest.tech/blog/hf-grpo-deepseek-r1</link>
      <guid isPermaLink="true">https://www.developersdigest.tech/blog/hf-grpo-deepseek-r1</guid>
      <description><![CDATA[GRPO is suddenly the standard RL recipe for reasoning models. A no-prior-knowledge mental model of PPO, GRPO, and how DeepSeek R1's training works under the hood.]]></description>
      <pubDate>Wed, 29 Apr 2026 00:00:00 GMT</pubDate>
      <category>DeepSeek</category>
      <category>GRPO</category>
      <category>PPO</category>
      <category>RLHF</category>
      <category>Reinforcement Learning</category>
    </item>
  </channel>
</rss>