<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>RLHF - Developers Digest</title>
    <link>https://www.developersdigest.tech/blog/tags/rlhf</link>
    <description>Articles about RLHF on Developers Digest</description>
    <language>en</language>
    <lastBuildDate>Sun, 03 May 2026 17:43:18 GMT</lastBuildDate>
    <atom:link href="https://www.developersdigest.tech/blog/tags/rlhf/feed.xml" rel="self" type="application/rss+xml" />
    <item>
      <title><![CDATA[DeepSeek R1, PPO, and GRPO Explained for Devs]]></title>
      <link>https://www.developersdigest.tech/blog/hf-grpo-deepseek-r1</link>
      <guid isPermaLink="true">https://www.developersdigest.tech/blog/hf-grpo-deepseek-r1</guid>
      <description><![CDATA[GRPO is suddenly the standard RL recipe for reasoning models. A no-prior-knowledge mental model of PPO, GRPO, and how DeepSeek R1's training works under the hood.]]></description>
      <pubDate>Wed, 29 Apr 2026 00:00:00 GMT</pubDate>
      <category>DeepSeek</category>
      <category>GRPO</category>
      <category>PPO</category>
      <category>RLHF</category>
      <category>Reinforcement Learning</category>
      <enclosure url="https://www.developersdigest.tech/images/infographics/ai-coding-models-comparison.webp" type="image/webp" />
    </item>
  </channel>
</rss>