[{"data":1,"prerenderedAt":380},["ShallowReactive",2],{"tidbit-llm-sampling-params":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"body":11,"_type":374,"_id":375,"_source":376,"_file":377,"_stem":378,"_extension":379},"/tidbits/llm-sampling-params","tidbits",false,"","LLM Sampling Parameters","Temperature, top_p, top_k—the knobs that shape every LLM response. Understand what each one actually does before you reach for them.","2026-04-23",{"type":12,"children":13,"toc":371},"root",[14,31,40,45,50,54,63,68,106,116,136,155,163,183,214,222,227,237,262,267,275,296,304,309,317,366],{"type":15,"tag":16,"props":17,"children":18},"element","p",{},[19,22,29],{"type":20,"value":21},"text","Every LLM call has a handful of sampling knobs. Most people only touch ",{"type":15,"tag":23,"props":24,"children":26},"code",{"className":25},[],[27],{"type":20,"value":28},"temperature",{"type":20,"value":30},". Here's what the rest actually do.",{"type":15,"tag":16,"props":32,"children":33},{},[34],{"type":15,"tag":35,"props":36,"children":37},"strong",{},[38],{"type":20,"value":39},"The core idea",{"type":15,"tag":16,"props":41,"children":42},{},[43],{"type":20,"value":44},"At each step, the model outputs a probability distribution over the next token. Sampling parameters decide how that distribution gets turned into an actual choice.",{"type":15,"tag":16,"props":46,"children":47},{},[48],{"type":20,"value":49},"Play with it yourself — this is a fixed distribution over plausible next tokens for \"The cat sat on the ___\". The knobs reshape and filter the distribution before the model rolls the dice.",{"type":15,"tag":51,"props":52,"children":53},"token-sampler",{},[],{"type":15,"tag":16,"props":55,"children":56},{},[57,61],{"type":15,"tag":35,"props":58,"children":59},{},[60],{"type":20,"value":28},{"type":20,"value":62}," (usually 0.0–2.0)",{"type":15,"tag":16,"props":64,"children":65},{},[66],{"type":20,"value":67},"Rescales the distribution before sampling. Low values sharpen it (more deterministic, picks likely tokens); high values flatten it (more creative, picks rarer tokens).",{"type":15,"tag":69,"props":70,"children":71},"ul",{},[72,84,95],{"type":15,"tag":73,"props":74,"children":75},"li",{},[76,82],{"type":15,"tag":23,"props":77,"children":79},{"className":78},[],[80],{"type":20,"value":81},"0.0",{"type":20,"value":83}," — greedy, always picks the top token. Good for extraction, classification, structured output.",{"type":15,"tag":73,"props":85,"children":86},{},[87,93],{"type":15,"tag":23,"props":88,"children":90},{"className":89},[],[91],{"type":20,"value":92},"0.7",{"type":20,"value":94}," — a common default. Balanced.",{"type":15,"tag":73,"props":96,"children":97},{},[98,104],{"type":15,"tag":23,"props":99,"children":101},{"className":100},[],[102],{"type":20,"value":103},"1.0+",{"type":20,"value":105}," — creative writing, brainstorming. Above ~1.3 things get weird fast.",{"type":15,"tag":16,"props":107,"children":108},{},[109,114],{"type":15,"tag":35,"props":110,"children":111},{},[112],{"type":20,"value":113},"top_p",{"type":20,"value":115}," (nucleus sampling, 0.0–1.0)",{"type":15,"tag":16,"props":117,"children":118},{},[119,121,126,128,134],{"type":20,"value":120},"Keeps only the smallest set of tokens whose cumulative probability meets ",{"type":15,"tag":23,"props":122,"children":124},{"className":123},[],[125],{"type":20,"value":16},{"type":20,"value":127},", then samples from that set. ",{"type":15,"tag":23,"props":129,"children":131},{"className":130},[],[132],{"type":20,"value":133},"top_p=0.9",{"type":20,"value":135}," means \"consider the top 90% of probability mass.\"",{"type":15,"tag":16,"props":137,"children":138},{},[139,141,146,148,153],{"type":20,"value":140},"Prefer ",{"type":15,"tag":23,"props":142,"children":144},{"className":143},[],[145],{"type":20,"value":113},{"type":20,"value":147}," over ",{"type":15,"tag":23,"props":149,"children":151},{"className":150},[],[152],{"type":20,"value":28},{"type":20,"value":154}," when you want creativity without the long tail of garbage tokens.",{"type":15,"tag":16,"props":156,"children":157},{},[158],{"type":15,"tag":35,"props":159,"children":160},{},[161],{"type":20,"value":162},"top_k",{"type":15,"tag":16,"props":164,"children":165},{},[166,168,174,176,181],{"type":20,"value":167},"Keeps only the top ",{"type":15,"tag":23,"props":169,"children":171},{"className":170},[],[172],{"type":20,"value":173},"k",{"type":20,"value":175}," tokens by probability. Blunter than ",{"type":15,"tag":23,"props":177,"children":179},{"className":178},[],[180],{"type":20,"value":113},{"type":20,"value":182}," but predictable. Not exposed by all providers (Anthropic has it; OpenAI doesn't).",{"type":15,"tag":16,"props":184,"children":185},{},[186,191,193,198,200,206,207,212],{"type":15,"tag":35,"props":187,"children":188},{},[189],{"type":20,"value":190},"Rule of thumb: tune one, not all three.",{"type":20,"value":192}," They interact in confusing ways. Pick ",{"type":15,"tag":23,"props":194,"children":196},{"className":195},[],[197],{"type":20,"value":28},{"type":20,"value":199}," ",{"type":15,"tag":201,"props":202,"children":203},"em",{},[204],{"type":20,"value":205},"or",{"type":20,"value":199},{"type":15,"tag":23,"props":208,"children":210},{"className":209},[],[211],{"type":20,"value":113},{"type":20,"value":213}," and leave the other at its default.",{"type":15,"tag":16,"props":215,"children":216},{},[217],{"type":15,"tag":35,"props":218,"children":219},{},[220],{"type":20,"value":221},"max_tokens",{"type":15,"tag":16,"props":223,"children":224},{},[225],{"type":20,"value":226},"Hard cap on output length. Set this deliberately — it's also your cost ceiling. Too low truncates mid-sentence; too high risks runaway generation.",{"type":15,"tag":16,"props":228,"children":229},{},[230,235],{"type":15,"tag":35,"props":231,"children":232},{},[233],{"type":20,"value":234},"frequency_penalty / presence_penalty",{"type":20,"value":236}," (OpenAI-style, -2.0–2.0)",{"type":15,"tag":69,"props":238,"children":239},{},[240,251],{"type":15,"tag":73,"props":241,"children":242},{},[243,249],{"type":15,"tag":23,"props":244,"children":246},{"className":245},[],[247],{"type":20,"value":248},"frequency_penalty",{"type":20,"value":250}," — penalises tokens proportional to how often they've already appeared. Reduces repetition.",{"type":15,"tag":73,"props":252,"children":253},{},[254,260],{"type":15,"tag":23,"props":255,"children":257},{"className":256},[],[258],{"type":20,"value":259},"presence_penalty",{"type":20,"value":261}," — flat penalty once a token has appeared at all. Encourages new topics.",{"type":15,"tag":16,"props":263,"children":264},{},[265],{"type":20,"value":266},"Useful for long-form generation that loops. Leave at 0 otherwise.",{"type":15,"tag":16,"props":268,"children":269},{},[270],{"type":15,"tag":35,"props":271,"children":272},{},[273],{"type":20,"value":274},"stop sequences",{"type":15,"tag":16,"props":276,"children":277},{},[278,280,286,288,294],{"type":20,"value":279},"Strings that terminate generation when produced. Great for structured output (\"stop at ",{"type":15,"tag":23,"props":281,"children":283},{"className":282},[],[284],{"type":20,"value":285},"\u003C/answer>",{"type":20,"value":287},"\") or role-play transcripts (\"stop at ",{"type":15,"tag":23,"props":289,"children":291},{"className":290},[],[292],{"type":20,"value":293},"\\nUser:",{"type":20,"value":295},"\").",{"type":15,"tag":16,"props":297,"children":298},{},[299],{"type":15,"tag":35,"props":300,"children":301},{},[302],{"type":20,"value":303},"seed",{"type":15,"tag":16,"props":305,"children":306},{},[307],{"type":20,"value":308},"Some providers accept a seed for (best-effort) reproducibility. Not guaranteed — model updates and batching can still cause drift — but helpful for eval runs.",{"type":15,"tag":16,"props":310,"children":311},{},[312],{"type":15,"tag":35,"props":313,"children":314},{},[315],{"type":20,"value":316},"What I actually reach for",{"type":15,"tag":69,"props":318,"children":319},{},[320,340,345],{"type":15,"tag":73,"props":321,"children":322},{},[323,325,331,333,338],{"type":20,"value":324},"Extraction / JSON output: ",{"type":15,"tag":23,"props":326,"children":328},{"className":327},[],[329],{"type":20,"value":330},"temperature=0",{"type":20,"value":332},", tight ",{"type":15,"tag":23,"props":334,"children":336},{"className":335},[],[337],{"type":20,"value":221},{"type":20,"value":339},", stop sequences.",{"type":15,"tag":73,"props":341,"children":342},{},[343],{"type":20,"value":344},"Chat / general use: defaults.",{"type":15,"tag":73,"props":346,"children":347},{},[348,350,356,358,364],{"type":20,"value":349},"Creative writing: ",{"type":15,"tag":23,"props":351,"children":353},{"className":352},[],[354],{"type":20,"value":355},"temperature=1.0",{"type":20,"value":357}," or ",{"type":15,"tag":23,"props":359,"children":361},{"className":360},[],[362],{"type":20,"value":363},"top_p=0.95",{"type":20,"value":365},", nudge a penalty if it loops.",{"type":15,"tag":16,"props":367,"children":368},{},[369],{"type":20,"value":370},"Defaults are defaults for a reason. Tune only when you have a specific behaviour to fix.",{"title":7,"searchDepth":372,"depth":372,"links":373},2,[],"markdown","content:tidbits:llm-sampling-params.md","content","tidbits/llm-sampling-params.md","tidbits/llm-sampling-params","md",1776975651983]