<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Inference Internals - Developers Digest</title>
    <link>https://www.developersdigest.tech/series/inference-internals</link>
    <description></description>
    <language>en</language>
    <lastBuildDate>Fri, 01 May 2026 15:21:46 GMT</lastBuildDate>
    <atom:link href="https://www.developersdigest.tech/series/inference-internals/feed.xml" rel="self" type="application/rss+xml" />
    <image>
      <url>https://avatars.githubusercontent.com/u/124798203?v=4</url>
      <title>Inference Internals - Developers Digest</title>
      <link>https://www.developersdigest.tech/series/inference-internals</link>
    </image>
    <item>
      <title><![CDATA[KV Caching: A Practical Guide to Optimizing Transformer Inference]]></title>
      <link>https://www.developersdigest.tech/blog/kv-caching-transformer-inference-guide</link>
      <guid isPermaLink="true">https://www.developersdigest.tech/blog/kv-caching-transformer-inference-guide</guid>
      <description><![CDATA[How KV caching speeds up LLM inference - the math, the code, the memory tradeoffs, and when it stops helping. Every dev running local models hits this wall.]]></description>
      <pubDate>Wed, 29 Apr 2026 00:00:00 GMT</pubDate>
      <category>LLM</category>
      <category>Inference</category>
      <category>Optimization</category>
      <category>Hugging Face</category>
      <category>Local Models</category>
      <enclosure url="https://www.developersdigest.tech/images/blog/kv-caching-transformer-inference-guide/hero.webp" type="image/webp" />
    </item>
  </channel>
</rss>