|
298 | 298 | detokenize()
|
299 | 299 | </a>
|
300 | 300 |
|
| 301 | +</li> |
| 302 | + |
| 303 | + <li class="md-nav__item"> |
| 304 | + <a href="#llama_cpp.llama.Llama.generate" class="md-nav__link"> |
| 305 | + generate() |
| 306 | + </a> |
| 307 | + |
301 | 308 | </li>
|
302 | 309 |
|
303 | 310 | <li class="md-nav__item">
|
|
611 | 618 | detokenize()
|
612 | 619 | </a>
|
613 | 620 |
|
| 621 | +</li> |
| 622 | + |
| 623 | + <li class="md-nav__item"> |
| 624 | + <a href="#llama_cpp.llama.Llama.generate" class="md-nav__link"> |
| 625 | + generate() |
| 626 | + </a> |
| 627 | + |
614 | 628 | </li>
|
615 | 629 |
|
616 | 630 | <li class="md-nav__item">
|
@@ -2349,6 +2363,129 @@ <h3 id="llama_cpp.llama.Llama.detokenize" class="doc doc-heading">
|
2349 | 2363 |
|
2350 | 2364 |
|
2351 | 2365 |
|
| 2366 | +<h3 id="llama_cpp.llama.Llama.generate" class="doc doc-heading"> |
| 2367 | +<code class="highlight language-python"><span class="n">generate</span><span class="p">(</span><span class="n">tokens</span><span class="p">,</span> <span class="n">top_k</span><span class="p">,</span> <span class="n">top_p</span><span class="p">,</span> <span class="n">temp</span><span class="p">,</span> <span class="n">repeat_penalty</span><span class="p">)</span></code> |
| 2368 | + |
| 2369 | +</h3> |
| 2370 | + |
| 2371 | + |
| 2372 | + <div class="doc doc-contents "> |
| 2373 | + |
| 2374 | + <details class="quote"> |
| 2375 | + <summary>Source code in <code>llama_cpp/llama.py</code></summary> |
| 2376 | + <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-118">118</a></span> |
| 2377 | +<span class="normal"><a href="#__codelineno-0-119">119</a></span> |
| 2378 | +<span class="normal"><a href="#__codelineno-0-120">120</a></span> |
| 2379 | +<span class="normal"><a href="#__codelineno-0-121">121</a></span> |
| 2380 | +<span class="normal"><a href="#__codelineno-0-122">122</a></span> |
| 2381 | +<span class="normal"><a href="#__codelineno-0-123">123</a></span> |
| 2382 | +<span class="normal"><a href="#__codelineno-0-124">124</a></span> |
| 2383 | +<span class="normal"><a href="#__codelineno-0-125">125</a></span> |
| 2384 | +<span class="normal"><a href="#__codelineno-0-126">126</a></span> |
| 2385 | +<span class="normal"><a href="#__codelineno-0-127">127</a></span> |
| 2386 | +<span class="normal"><a href="#__codelineno-0-128">128</a></span> |
| 2387 | +<span class="normal"><a href="#__codelineno-0-129">129</a></span> |
| 2388 | +<span class="normal"><a href="#__codelineno-0-130">130</a></span> |
| 2389 | +<span class="normal"><a href="#__codelineno-0-131">131</a></span> |
| 2390 | +<span class="normal"><a href="#__codelineno-0-132">132</a></span> |
| 2391 | +<span class="normal"><a href="#__codelineno-0-133">133</a></span> |
| 2392 | +<span class="normal"><a href="#__codelineno-0-134">134</a></span> |
| 2393 | +<span class="normal"><a href="#__codelineno-0-135">135</a></span> |
| 2394 | +<span class="normal"><a href="#__codelineno-0-136">136</a></span> |
| 2395 | +<span class="normal"><a href="#__codelineno-0-137">137</a></span> |
| 2396 | +<span class="normal"><a href="#__codelineno-0-138">138</a></span> |
| 2397 | +<span class="normal"><a href="#__codelineno-0-139">139</a></span> |
| 2398 | +<span class="normal"><a href="#__codelineno-0-140">140</a></span> |
| 2399 | +<span class="normal"><a href="#__codelineno-0-141">141</a></span> |
| 2400 | +<span class="normal"><a href="#__codelineno-0-142">142</a></span> |
| 2401 | +<span class="normal"><a href="#__codelineno-0-143">143</a></span> |
| 2402 | +<span class="normal"><a href="#__codelineno-0-144">144</a></span> |
| 2403 | +<span class="normal"><a href="#__codelineno-0-145">145</a></span> |
| 2404 | +<span class="normal"><a href="#__codelineno-0-146">146</a></span> |
| 2405 | +<span class="normal"><a href="#__codelineno-0-147">147</a></span> |
| 2406 | +<span class="normal"><a href="#__codelineno-0-148">148</a></span> |
| 2407 | +<span class="normal"><a href="#__codelineno-0-149">149</a></span> |
| 2408 | +<span class="normal"><a href="#__codelineno-0-150">150</a></span> |
| 2409 | +<span class="normal"><a href="#__codelineno-0-151">151</a></span> |
| 2410 | +<span class="normal"><a href="#__codelineno-0-152">152</a></span> |
| 2411 | +<span class="normal"><a href="#__codelineno-0-153">153</a></span> |
| 2412 | +<span class="normal"><a href="#__codelineno-0-154">154</a></span> |
| 2413 | +<span class="normal"><a href="#__codelineno-0-155">155</a></span> |
| 2414 | +<span class="normal"><a href="#__codelineno-0-156">156</a></span> |
| 2415 | +<span class="normal"><a href="#__codelineno-0-157">157</a></span> |
| 2416 | +<span class="normal"><a href="#__codelineno-0-158">158</a></span> |
| 2417 | +<span class="normal"><a href="#__codelineno-0-159">159</a></span> |
| 2418 | +<span class="normal"><a href="#__codelineno-0-160">160</a></span> |
| 2419 | +<span class="normal"><a href="#__codelineno-0-161">161</a></span> |
| 2420 | +<span class="normal"><a href="#__codelineno-0-162">162</a></span> |
| 2421 | +<span class="normal"><a href="#__codelineno-0-163">163</a></span> |
| 2422 | +<span class="normal"><a href="#__codelineno-0-164">164</a></span> |
| 2423 | +<span class="normal"><a href="#__codelineno-0-165">165</a></span> |
| 2424 | +<span class="normal"><a href="#__codelineno-0-166">166</a></span> |
| 2425 | +<span class="normal"><a href="#__codelineno-0-167">167</a></span> |
| 2426 | +<span class="normal"><a href="#__codelineno-0-168">168</a></span> |
| 2427 | +<span class="normal"><a href="#__codelineno-0-169">169</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a><span class="k">def</span> <span class="nf">generate</span><span class="p">(</span> |
| 2428 | +</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a> <span class="bp">self</span><span class="p">,</span> |
| 2429 | +</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a> <span class="n">tokens</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">llama_token</span><span class="p">],</span> |
| 2430 | +</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a> <span class="n">top_k</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> |
| 2431 | +</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a> <span class="n">top_p</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> |
| 2432 | +</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a> <span class="n">temp</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> |
| 2433 | +</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a> <span class="n">repeat_penalty</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> |
| 2434 | +</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a><span class="p">)</span> <span class="o">-></span> <span class="n">Generator</span><span class="p">[</span> |
| 2435 | +</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a> <span class="n">llama_cpp</span><span class="o">.</span><span class="n">llama_token</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Sequence</span><span class="p">[</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">llama_token</span><span class="p">]],</span> <span class="kc">None</span> |
| 2436 | +</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a><span class="p">]:</span> |
| 2437 | +</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a> <span class="c1"># Temporary workaround for https://github.com/ggerganov/llama.cpp/issues/684</span> |
| 2438 | +</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a> <span class="k">if</span> <span class="n">temp</span> <span class="o">==</span> <span class="mf">0.0</span><span class="p">:</span> |
| 2439 | +</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a> <span class="n">temp</span> <span class="o">=</span> <span class="mf">1.0</span> |
| 2440 | +</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a> <span class="n">top_p</span> <span class="o">=</span> <span class="mf">0.0</span> |
| 2441 | +</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a> <span class="n">top_k</span> <span class="o">=</span> <span class="mi">1</span> |
| 2442 | +</span><span id="__span-0-133"><a id="__codelineno-0-133" name="__codelineno-0-133"></a> <span class="k">assert</span> <span class="
10000
bp">self</span><span class="o">.</span><span class="n">ctx</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> |
| 2443 | +</span><span id="__span-0-134"><a id="__codelineno-0-134" name="__codelineno-0-134"></a> <span class="n">n_ctx</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">llama_n_ctx</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ctx</span><span class="p">))</span> |
| 2444 | +</span><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a> <span class="n">n_tokens</span> <span class="o">=</span> <span class="mi">0</span> |
| 2445 | +</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a> <span class="n">last_n_tokens</span> <span class="o">=</span> <span class="n">deque</span><span class="p">(</span> |
| 2446 | +</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a> <span class="p">[</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">llama_token</span><span class="p">(</span><span class="mi">0</span><span class="p">)]</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_n_tokens_size</span><span class="p">,</span> |
| 2447 | +</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a> <span class="n">maxlen</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">last_n_tokens_size</span><span class="p">,</span> |
| 2448 | +</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a> <span class="p">)</span> |
| 2449 | +</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a> <span class="k">while</span> <span class="kc">True</span><span class="p">:</span> |
| 2450 | +</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">tokens</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_batch</span><span class="p">):</span> |
| 2451 | +</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a> <span class="n">batch</span> <span class="o">=</span> <span class="n">tokens</span><span class="p">[</span><span class="n">i</span> <span class="p">:</span> <span class="nb">min</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">tokens</span><span class="p">),</span> <span class="n">i</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_batch</span><span class="p">)]</span> |
| 2452 | +</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a> <span class="n">n_past</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">n_ctx</span> <span class="o">-</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">),</span> <span class="n">n_tokens</span><span class="p">)</span> |
| 2453 | +</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a> <span class="n">return_code</span> <span class="o">=</span> <span class="n">llama_cpp</span><span class="o">.</span><span class="n">llama_eval</span><span class="p">(</span> |
| 2454 | +</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a> <span class="n">ctx</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">ctx</span><span class="p">,</span> |
| 2455 | +</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a> <span class="n">tokens</span><span class="o">=</span><span class="p">(</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">llama_token</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">))(</span><span class="o">*</span><span class="n">batch</span><span class="p">),</span> |
| 2456 | +</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a> <span class="n">n_tokens</span><span class="o">=</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">c_int</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)),</span> |
| 2457 | +</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a> <span class="n">n_past</span><span class="o">=</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">c_int</span><span class="p">(</span><span class="n">n_past</span><span class="p">),</span> |
| 2458 | +</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a> <span class="n">n_threads</span><span class="o">=</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">c_int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_threads</span><span class="p">),</span> |
| 2459 | +</span><span id="__span-0-150"><a id="__codelineno-0-150" name="__codelineno-0-150"></a> <span class="p">)</span> |
| 2460 | +</span><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a> <span class="k">if</span> <span class="nb">int</span><span class="p">(</span><span class="n">return_code</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span> |
| 2461 | +</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a> <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"llama_eval returned </span><span class="si">{</span><span class="n">return_code</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| 2462 | +</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a> <span class="n">last_n_tokens</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> |
| 2463 | +</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a> <span class="n">n_tokens</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> |
| 2464 | +</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a> <span class="n">token</span> <span class="o">=</span> <span class="n">llama_cpp</span><span class="o">.</span><span class="n">llama_sample_top_p_top_k</span><span class="p">(</span> |
| 2465 | +</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a> <span class="n">ctx</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">ctx</span><span class="p">,</span> |
| 2466 | +</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a> <span class="n">last_n_tokens_data</span><span class="o">=</span><span class="p">(</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">llama_token</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_n_tokens_size</span><span class="p">)(</span> |
| 2467 | +</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a> <span class="o">*</span><span class="n">last_n_tokens</span> |
| 2468 | +</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a> <span class="p">),</span> |
| 2469 | +</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a> <span class="n">last_n_tokens_size</span><span class="o">=</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">c_int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">last_n_tokens_size</span><span class="p">),</span> |
| 2470 | +</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a> <span class="n">top_k</span><span class="o">=</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">c_int</span><span class="p">(</span><span class="n">top_k</span><span class="p">),</span> |
| 2471 | +</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a> <span class="n">top_p</span><span class="o">=</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">c_float</span><span class="p">(</span><span class="n">top_p</span><span class="p">),</span> |
| 2472 | +</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a> <span class="n">temp</span><span class="o">=</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">c_float</span><span class="p">(</span><span class="n">temp</span><span class="p">),</span> |
| 2473 | +</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a> <span class="n">repeat_penalty</span><span class="o">=</span><span class="n">llama_cpp</span><span class="o">.</span><span class="n">c_float</span><span class="p">(</span><span class="n">repeat_penalty</span><span class="p">),</span> |
| 2474 | +</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a> <span class="p">)</span> |
| 2475 | +</span><span id="__span-0-166"><a id="__codelineno-0-166" name="__codelineno-0-166"></a> <span class="n">tokens_or_none</span> <span class="o">=</span> <span class="k">yield</span> <span class="n">token</span> |
| 2476 | +</span><span id="__span-0-167"><a id="__codelineno-0-167" name="__codelineno-0-167"></a> <span class="n">tokens</span> <span class="o">=</span> <span class="p">[</span><span class="n">token</span><span class="p">]</span> |
| 2477 | +</span><span id="__span-0-168"><a id="__codelineno-0-168" name="__codelineno-0-168"></a> <span class="k">if</span> <span class="n">tokens_or_none</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| 2478 | +</span><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a> <span class="n">tokens</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">tokens_or_none</span><span class="p">)</span> |
| 2479 | +</span></code></pre></div></td></tr></table></div> |
| 2480 | + </details> |
| 2481 | + </div> |
| 2482 | + |
| 2483 | +</div> |
| 2484 | + |
| 2485 | +<div class="doc doc-object doc-function"> |
| 2486 | + |
| 2487 | + |
| 2488 | + |
2352 | 2489 | <h3 id="llama_cpp.llama.Llama.create_embedding" class="doc doc-heading">
|
2353 | 2490 | <code class="highlight language-python"><span class="n">create_embedding</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span></code>
|
2354 | 2491 |
|
|
0 commit comments