informatique:ai_lm:gpu_bench
Différences
Ci-dessous, les différences entre deux révisions de la page.
| Les deux révisions précédentesRévision précédenteProchaine révision | Révision précédente | ||
| informatique:ai_lm:gpu_bench [15/01/2026 13:21] – supprimée - modification externe (Date inconnue) 127.0.0.1 | informatique:ai_lm:gpu_bench [30/01/2026 14:40] (Version actuelle) – cyrille | ||
|---|---|---|---|
| Ligne 1: | Ligne 1: | ||
| + | ====== GPU Bench ====== | ||
| + | |||
| + | * [[https:// | ||
| + | |||
| + | |||
| + | |||
| + | Benchmark d'IA pour [[https:// | ||
| + | * avec service Mistral, modèle Codestral = '' | ||
| + | * RTX3060 + Intel-i7, modèle granite-4.0-h-small-Q8_0 | ||
| + | |||
| + | Selon LeChat: | ||
| + | |||
| + | ^ Carte graphique ^ TOPS (INT8) ^ TOPS (FP16) ^ Architecture ^ | ||
| + | | RTX 3060 (12 Go) | ~120 TOPS | ~60 TOPS | Ampere | | ||
| + | | RTX 5060 Ti (16 Go) | ~759 TOPS | ~380 TOPS | Blackwell | | ||
| + | |||
| + | Bench llama.cpp : | ||
| + | |||
| + | * Text generation: tg128, tg256, tg512 : '' | ||
| + | * Prompt processing: b128, b256, b512 : '' | ||
| + | |||
| + | ^ models | ||
| + | ^ ^ ^ i7-1360P ^ RTX 3060 ^ RTX 5060 Ti ^ | ||
| + | | Qwen2.5-coder-7b-instruct-q5_k_m | tg128 | 5.47 | 57.65 | 73.54 | | ||
| + | | //size: 5.07 GiB// | tg256 | ... | 57.61 | 73.32 | | ||
| + | | | tg512 | ... | 56.20 | 71.80 | | ||
| + | | | b128 | ... | | ||
| + | | | b256 | ... | | ||
| + | | | b512 | ... | | ||
| + | | Qwen2.5-coder-7b-instruct-q8_0 | ||
| + | | //size: 7.54 GiB// | tg256 | ... | 41.38 | 50.33 | | ||
| + | | | tg512 | ... | 40.70 | 49.62 | | ||
| + | | | b128 | 13.98 | | ||
| + | | | b256 | ... | | ||
| + | | | b512 | ... | | ||
| + | | EuroLLM-9B-Instruct-Q4_0 | ||
| + | | //size: 4.94 GiB// | tg256 | ... | 55.96 | 71.15 | | ||
| + | | | tg512 | ... | 53.87 | 69.45 | | ||
| + | | | b128 | ... | | ||
| + | | | b256 | ... | | ||
| + | | | b512 | ... | | ||
| + | | Qwen3-14B-UD-Q5_K_XL | ||
| + | | //size: 9.82 GiB// | tg256 | ... | 29.97 | 38.17 | | ||
| + | | | tg512 | ... | 29.25 | 37.30 | | ||
| + | | | b128 | ... | 903.97 | CUDA error | | ||
| + | | | b256 | ... | 951.71 | ... | | ||
| + | | | b512 | ... | 963.76 | ... | | ||
| + | | Qwen3-4B-UD-Q8_K_XL | ||
| + | | //size: 4.70 GiB// | tg256 | 6.63 | 56.35 | ... | | ||
| + | | | tg512 | 6.24 | 54.56 | ... | | ||
| + | | | b128 | 20.66 | | ||
| + | | | b256 | ... | | ||
| + | | | b512 | ... | | ||
| + | | GemmaCoder3-12B-IQ4_NL.gguf | ||
| + | | //size: 6.41 GiB// | tg256 | ... | 40.67 | ... | | ||
| + | | | tg512 | ... | 39.54 | ... | | ||
| + | | | b128 | ... | | ||
| + | | | b256 | ... | | ||
| + | | | b512 | ... | | ||
| + | | Gemma3-Code-Reasoning-4B.Q8_0 | ||
| + | | //size: 3.84 GiB// | tg256 | ... | 66.95 | ... | | ||
| + | | | tg512 | ... | 65.75 | ... | | ||
| + | | | b128 | ... | | ||
| + | | | b256 | ... | | ||
| + | | | b512 | ... | | ||
| + | | GemmaCoder3-12B-Q5_K_M | ||
| + | | //size: 7.86 GiB// | tg256 | ... | 34.06 | ... | | ||
| + | | | tg512 | ... | 33.28 | ... | | ||
| + | | | b128 | ... | | ||
| + | | | b256 | ... | | ||
| + | | | b512 | ... | | ||
| + | |||
| + | |||
| + | * Les "CUDA error" apparaissent avec la RTX 5060 Ti et le bridge PCIe/THB4 " | ||
| + | * Avec le CPU, laisser le nombre de cœurs en automatique, | ||
| + | * le multi-threads physique est utile. Ex: en auto 7.37 t/s, avec 1 thread 3.39 t/s | ||
| + | |||
| + | La '' | ||
| + | * GemmaCoder3-12B-Q5_K_M, | ||
| + | * GemmaCoder3-12B-IQ4_NL, | ||
| + | * GemmaCoder3-12B-IQ4_XS, | ||
| + | Avec un modèle plus petit, comme '' | ||
| + | |||
| + | ===== Intel® Core™ i7-1360P 13th Gen ===== | ||
| + | |||
| + | Pour comparaison ... | ||
| + | |||
| + | **Qwen2.5-coder-7b-instruct-q5_k_m**: | ||
| + | < | ||
| + | ./ | ||
| + | load_backend: | ||
| + | load_backend: | ||
| + | | model | size | | ||
| + | | ------------------------------ | ---------: | ---------: | ---------- | ------: | --------------: | ||
| + | | qwen2 7B Q5_K - Medium | ||
| + | </ | ||
| + | |||
| + | ===== Gigabyte Windforce OC 12GB Geforce RTX 3060 ===== | ||
| + | |||
| + | {{ : | ||
| + | |||
| + | Avec '' | ||
| + | ^ NVIDIA GeForce RTX 3060 ^^ | ||
| + | | Chip Name | GA104 | | ||
| + | | SM Count | 28 | | ||
| + | | L2 Cache Size | 2,25 MiB | | ||
| + | | Memory Bandwidth | 335,32 GiB/s | | ||
| + | | Memory Size | 11,63 GiB | | ||
| + | | Core Clock | 1,79 GHz | | ||
| + | | Bus Location | 0000: | ||
| + | | GSP firmware version | 580.105.08 | | ||
| + | | Video accelerator tracing | Supported | | ||
| + | |||
| + | |||
| + | Avec llama.cpp et CUDA 12.9. | ||
| + | |||
| + | ==== Qwen2.5-coder-7b-instruct-q5_k_m ==== | ||
| + | |||
| + | < | ||
| + | ./ | ||
| + | ggml_cuda_init: | ||
| + | ggml_cuda_init: | ||
| + | ggml_cuda_init: | ||
| + | Device 0: NVIDIA GeForce RTX 3060, compute capability 8.6, VMM: yes | ||
| + | | model | size | | ||
| + | | ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | ||
| + | | qwen2 7B Q5_K - Medium | ||
| + | | qwen2 7B Q5_K - Medium | ||
| + | | qwen2 7B Q5_K - Medium | ||
| + | </ | ||
| + | |||
| + | ==== GemmaCoder3-12B-Q5_K_M ==== | ||
| + | |||
| + | Pour lancer '' | ||
| + | |||
| + | ^ n-gpu-layers ^ test ^ tokens/s ^ time ^ % perte perf | | ||
| + | | (all) 49 | tg128| | ||
| + | | | b128 | 1041.60 | 0m13,117s | | ||
| + | | 44 | tg128| | ||
| + | | | b128 | | ||
| + | | 39 | tg128| | ||
| + | | | b128 | | ||
| + | | 30 | tg128| | ||
| + | | | b128 | 82.91 | 1m19,729s | 92.04% | ||
| + | | full cpu | tg128| | ||
| + | | | b128 | 4.50 | 22m37,674s | 99.57% | ||
| + | |||
| + | Les valeurs qui permettent de charger ce modèle : | ||
| + | |||
| + | * '' | ||
| + | * avec son context max 131072 c'est 30 layers sur GPU : ''< | ||
| + | * ''< | ||
| + | * et pour tous les layers sur le GPU : ''< | ||
| + | * '' | ||
| + | * ''< | ||
| + | * ''< | ||
| + | * ''< | ||
| + | |||
| + | Avec ''< | ||
| + | |||
| + | < | ||
| + | ... | ||
| + | NVIDIA GeForce RTX 3060, compute capability 8.6, VMM: yes | ||
| + | ... | ||
| + | print_info: n_ctx_train | ||
| + | print_info: n_embd | ||
| + | print_info: n_embd_inp | ||
| + | print_info: n_layer | ||
| + | print_info: n_head | ||
| + | print_info: n_head_kv | ||
| + | print_info: n_rot = 256 | ||
| + | print_info: n_swa = 1024 | ||
| + | print_info: is_swa_any | ||
| + | print_info: n_embd_head_k | ||
| + | print_info: n_embd_head_v | ||
| + | print_info: n_gqa = 2 | ||
| + | print_info: n_embd_k_gqa | ||
| + | print_info: n_embd_v_gqa | ||
| + | print_info: f_norm_eps | ||
| + | print_info: f_norm_rms_eps | ||
| + | print_info: f_clamp_kqv | ||
| + | print_info: f_max_alibi_bias = 0.0e+00 | ||
| + | print_info: f_logit_scale | ||
| + | print_info: f_attn_scale | ||
| + | print_info: n_ff = 15360 | ||
| + | print_info: n_expert | ||
| + | print_info: n_expert_used | ||
| + | print_info: n_expert_groups | ||
| + | print_info: n_group_used | ||
| + | print_info: causal attn = 1 | ||
| + | print_info: pooling type = 0 | ||
| + | print_info: rope type = 2 | ||
| + | print_info: rope scaling | ||
| + | print_info: freq_base_train | ||
| + | print_info: freq_scale_train = 0.125 | ||
| + | print_info: n_ctx_orig_yarn | ||
| + | print_info: rope_finetuned | ||
| + | print_info: model type = 12B | ||
| + | print_info: model params | ||
| + | print_info: general.name | ||
| + | print_info: vocab type = SPM | ||
| + | print_info: n_vocab | ||
| + | print_info: n_merges | ||
| + | ... | ||
| + | print_info: max token length = 48 | ||
| + | ... | ||
| + | load_tensors: | ||
| + | load_tensors: | ||
| + | load_tensors: | ||
| + | load_tensors: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_kv_cache_iswa: | ||
| + | llama_kv_cache: | ||
| + | llama_kv_cache: | ||
| + | llama_kv_cache: | ||
| + | llama_kv_cache_iswa: | ||
| + | llama_kv_cache: | ||
| + | llama_kv_cache: | ||
| + | llama_kv_cache: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | llama_context: | ||
| + | |||
| + | </ | ||
| + | |||
| + | |||
| + | ===== PNY OC 16 Go Geforce RTX 5060 Ti ===== | ||
| + | |||
| + | === Qwen2.5-coder-7b-instruct-q5_k_m === | ||
| + | |||
| + | < | ||
| + | $ ./ | ||
| + | ggml_cuda_init: | ||
| + | ggml_cuda_init: | ||
| + | ggml_cuda_init: | ||
| + | Device 0: NVIDIA GeForce RTX 5060 Ti, compute capability 12.0, VMM: yes | ||
| + | | model | size | | ||
| + | | ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | ||
| + | | qwen2 7B Q5_K - Medium | ||
| + | | qwen2 7B Q5_K - Medium | ||
| + | | qwen2 7B Q5_K - Medium | ||
| + | |||
| + | build: 3f3a4fb9c (7130) | ||
| + | </ | ||
| + | |||
| + | === Stabilité === | ||
| + | |||
| + | Reset nvidia et CUDA: | ||
| + | < | ||
| + | $ sudo rmmod nvidia_uvm nvidia | ||
| + | </ | ||
| + | |||
| + | * Lucie-7B_OpenLLM-France.Instruct-human-data.Q8_0.gguf | ||
| + | * Meta-Llama-3.1-8B-Instruct-Q8_0.gguf | ||
| + | * CUDA0 model buffer size = 7605,33 MiB | ||
| + | * CUDA0 compute buffer size = | ||
| + | * | ||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | ===== Traduction ===== | ||
| + | |||
| + | ==== 1 phrase ==== | ||
| + | |||
| + | Très rapide avec RTX 3060. | ||
| + | |||
| + | '' | ||
| + | |||
| + | Prompt: | ||
| + | |||
| + | > french to english " | ||
| + | |||
| + | Replies: | ||
| + | |||
| + | EuroLLM-9B-Instruct: | ||
| + | > "This is a rare opportunity for students to be able to discuss the career paths of our former students and, in doing so, to answer many questions and even remove some of the barriers they may be facing." | ||
| + | |||
| + | EuroLLM-9B-q8_0: | ||
| + | > The provided text is in French and needs to be translated into English. | ||
| + | > I'll translate it step-by-step: | ||
| + | > | ||
| + | > 1. **Identify the source language:** The text is in French. | ||
| + | > 2. **Analyze the text:** The text appears to be a sentence describing an event or opportunity. It mentions " | ||
| + | > 3. **Translate each part:** | ||
| + | > * " | ||
| + | > * "et ainsi, de répondre à de nombreuses questions voire à lever des freins" | ||
| + | > 4. **Combine the translated parts:** The complete translation is: "This is a rare opportunity for students to be able to discuss the journeys of our former students and thus, to answer many questions and even to lift barriers." | ||
| + | |||
| + | |||
| + | Qwen2.5-coder-7b-instruct-q5_k_m : | ||
| + | > Here's the English translation: | ||
| + | > " | ||
| + | > This translation conveys the meaning that the situation being described is unusual and provides students with a valuable learning experience by allowing them to share and discuss the career paths of alumni, which can help them answer questions and potentially resolve challenges they may face in their own professional development. | ||
| + | |||
| + | Qwen2.5-coder-7b-instruct-q8_0: | ||
| + | > Here's the English translation of the French sentence: | ||
| + | > "This is a rare opportunity for students to exchange about the paths of our former students, and thus, to answer many questions or even to overcome obstacles." | ||
| + | > This translation preserves the meaning and tone of the original French statement, highlighting the value of this unique educational experience. | ||
| + | |||
| + | Qwen3-14B-UD-Q5_K_XL: | ||
| + | > //< | ||
| + | > Okay, let's tackle this French to English translation. The user provided the sentence: “C' | ||
| + | > First, I need to understand the structure and meaning. The sentence starts with " | ||
| + | > Now, I should check for any nuances. " | ||
| + | > I need to make sure the flow is natural in English. Maybe "a rare opportunity for students to exchange about the career paths of our alumni, and thus answer many questions or even overcome barriers." | ||
| + | > Wait, " | ||
| + | > Putting it all together: " | ||
| + | > I should check if " | ||
| + | > Another point: " | ||
| + | > So revised: " | ||
| + | > That seems accurate. Let me make sure there are no other nuances. The original sentence emphasizes that this opportunity is rare, which is captured. The purpose is to answer questions and overcome barriers, which is clear. I think that's a solid translation. | ||
| + | > //</ | ||
| + | > | ||
| + | > It's a rare opportunity for students to discuss the career paths of our alumni and thereby answer many questions or even overcome barriers. | ||
| + | |||
| + | ==== texte long ==== | ||
| + | |||
| + | === 2.7 ko === | ||
| + | |||
| + | Pour quitter après traduction c'est l' | ||
| + | |||
| + | Certains modèle comme '' | ||
| + | |||
| + | Les 4 modèles ont été jusqu' | ||
| + | |||
| + | === 8.8 ko === | ||
| + | |||
| + | Comme pour le texte précédent. | ||
| + | |||
| + | En passant, avec le Intel® Core™ 13th Gen i7-1360P et EuroLLM-9B-Instruct-Q4_0 Statistics: 3.96 tokens/s, 1330 tokens, 335.56s soit presque 6 minutes. | ||
| + | === 19 ko === | ||
| + | |||
| + | * EuroLLM-9B-Instruct-Q4_0 : | ||
| + | * context full and context shift is disabled => stopping | ||
| + | * ''< | ||
| + | * Qwen3-14B-UD-Q5_K_XL : | ||
| + | * prompt is too long (4267 tokens, max 4092) | ||
| + | * ''< | ||
| + | * Qwen2.5-coder-7b-instruct-q8_0 | ||
| + | * prompt is too long (4267 tokens, max 4092) | ||
| + | * ''< | ||
| + | |||
| + | ===== Taille du « context » ===== | ||
| + | |||
| + | Avec Llama.cpp CUDA, RTX 3060 12 GB et opencode avec le modèle '' | ||
| + | |||
| + | < | ||
| + | ./ | ||
| + | |||
| + | # puis | ||
| + | |||
| + | time opencode run -m llamacpp/ | ||
| + | </ | ||
| + | |||
| + | --ctx-size 4096 | ||
| + | |||
| + | < | ||
| + | Error: the request exceeds the available context size, try increasing it | ||
| + | </ | ||
| + | |||
| + | --ctx-size 12288 | ||
| + | |||
| + | < | ||
| + | Async/await in JavaScript is used to simplify asynchronous code by making it look more synchronous. It allows you to write asynchronous code in a cleaner, more readable way. | ||
| + | |||
| + | real 0m6, | ||
| + | user 0m1, | ||
| + | sys 0m0, | ||
| + | </ | ||
| + | |||
| + | --ctx-size 40000 | ||
| + | |||
| + | < | ||
| + | Async/await in JavaScript is a feature that simplifies the syntax for handling asynchronous operations, making the code more readable and manageable. It allows you to write asynchronous code in a synchronous manner, using the `async` and `await` keywords. | ||
| + | |||
| + | Here's a brief explanation: | ||
| + | |||
| + | - **Async Function**: A function declared with the `async` keyword is an async function. Inside an async function, you can use the `await` keyword to pause the function' | ||
| + | |||
| + | - **Await Keyword**: The `await` keyword is used to pause the execution of the async function until the Promise it's waiting on is settled (either resolved or rejected). This makes it easier to read and understand asynchronous code, as it behaves more like synchronous code. | ||
| + | |||
| + | - **Promise**: | ||
| + | |||
| + | Here's an example to illustrate: | ||
| + | |||
| + | ```javascript | ||
| + | async function fetchData() { | ||
| + | try { | ||
| + | let response = await fetch(' | ||
| + | let data = await response.json(); | ||
| + | console.log(data); | ||
| + | } catch (error) { | ||
| + | console.error(' | ||
| + | } | ||
| + | } | ||
| + | ``` | ||
| + | |||
| + | In this example: | ||
| + | - The `fetchData` function is declared as `async`, allowing the use of `await`. | ||
| + | - `await fetch(' | ||
| + | - `await response.json()` waits for the JSON parsing to complete. | ||
| + | - `try...catch` is used to handle any errors that occur during the fetch or parsing operations. | ||
| + | |||
| + | Async/await makes it easier to handle multiple asynchronous operations in sequence, reducing the need for nested callbacks and improving the overall code structure. | ||
| + | |||
| + | real 0m16, | ||
| + | user 0m2, | ||
| + | sys 0m0, | ||
| + | </ | ||
| + | |||
| + | mais ça peut aussi répondre : | ||
| + | |||
| + | < | ||
| + | Async/await in JavaScript is a syntax that simplifies the behavior of using promises synchronously. It makes the code cleaner and easier to read by allowing you to write asynchronous code in a more linear, synchronous style. | ||
| + | |||
| + | real 0m2, | ||
| + | user 0m1, | ||
| + | sys 0m0, | ||
| + | </ | ||
| + | |||
| + | --ctx-size 60000 | ||
| + | |||
| + | < | ||
| + | Async/await is a syntax introduced in JavaScript to simplify working with promises, making asynchronous code more readable and easier to write. It allows you to write asynchronous code in a sequential, synchronous style, which can improve the overall flow and readability of your code. Here's a basic explanation: | ||
| + | |||
| + | - **Async Function**: A function declared with the `async` keyword returns a promise. This means that even if the function does not explicitly return a promise, it will implicitly return one. | ||
| + | - **Await Keyword**: The `await` keyword is used inside an async function to pause the execution of the function until a promise is resolved or rejected. This allows you to handle the resolved value directly, making the code look synchronous. | ||
| + | |||
| + | Here's a simple example to illustrate: | ||
| + | |||
| + | ```javascript | ||
| + | // Define an async function | ||
| + | async function fetchData() { | ||
| + | // Await a promise | ||
| + | let response = await fetch(' | ||
| + | let data = await response.json(); | ||
| + | return data; | ||
| + | } | ||
| + | |||
| + | // Call the async function | ||
| + | fetchData().then(data => { | ||
| + | console.log(data); | ||
| + | }).catch(error => { | ||
| + | console.error(' | ||
| + | }); | ||
| + | ``` | ||
| + | |||
| + | In this example: | ||
| + | - `fetchData` is an async function that fetches data from an API. | ||
| + | - The `await` keyword is used to wait for the `fetch` and `response.json()` operations to complete. | ||
| + | - The resolved data is then logged to the console. | ||
| + | |||
| + | Using async/ | ||
| + | |||
| + | real 0m14, | ||
| + | user 0m2, | ||
| + | sys 0m0, | ||
| + | </ | ||
| + | |||
| + | ou bien | ||
| + | |||
| + | < | ||
| + | Async/await is a feature in JavaScript that makes it easier to write and read asynchronous code, which is code that performs operations over time, like fetching data from a server or reading a file. It allows you to write asynchronous code in a more sequential and readable way, similar to synchronous code. | ||
| + | |||
| + | Here's a simple example to illustrate: | ||
| + | |||
| + | ```javascript | ||
| + | // Without async/await | ||
| + | function fetchData(callback) { | ||
| + | setTimeout(() => { | ||
| + | callback(' | ||
| + | }, 1000); | ||
| + | } | ||
| + | |||
| + | fetchData((data) => { | ||
| + | console.log(data); | ||
| + | }); | ||
| + | ``` | ||
| + | |||
| + | With async/ | ||
| + | |||
| + | ```javascript | ||
| + | // With async/await | ||
| + | async function fetchData() { | ||
| + | return new Promise((resolve) => { | ||
| + | setTimeout(() => { | ||
| + | resolve(' | ||
| + | }, 1000); | ||
| + | }); | ||
| + | } | ||
| + | |||
| + | async function getData() { | ||
| + | try { | ||
| + | const data = await fetchData(); | ||
| + | console.log(data); | ||
| + | } catch (error) { | ||
| + | console.error(error); | ||
| + | } | ||
| + | } | ||
| + | |||
| + | getData(); | ||
| + | ``` | ||
| + | |||
| + | In the async/await example: | ||
| + | - The `fetchData` function returns a promise that resolves after 1 second. | ||
| + | - The `getData` function is marked with `async`, allowing the use of `await` inside it. | ||
| + | - `await fetchData()` pauses the execution of `getData` until the promise resolves, and then the resolved value is logged to the console. | ||
| + | |||
| + | This makes the asynchronous code easier to understand and maintain, as it looks more like synchronous code. | ||
| + | |||
| + | real 0m15, | ||
| + | user 0m2, | ||
| + | sys 0m0, | ||
| + | </ | ||
| + | |||
| + | ou encore | ||
| + | |||
| + | < | ||
| + | `async/ | ||
| + | |||
| + | Here's a brief explanation: | ||
| + | |||
| + | 1. **Async Function**: You declare a function as `async` using the `async` keyword. This makes the function return a Promise. | ||
| + | ```javascript | ||
| + | async function fetchData() { | ||
| + | // function body | ||
| + | } | ||
| + | ``` | ||
| + | |||
| + | 2. **Await Keyword**: You use the `await` keyword inside an `async` function to pause the execution of the function until the Promise is resolved or rejected. | ||
| + | ```javascript | ||
| + | async function fetchData() { | ||
| + | const response = await fetch(' | ||
| + | const data = await response.json(); | ||
| + | return data; | ||
| + | } | ||
| + | ``` | ||
| + | |||
| + | 3. **Error Handling**: You can handle errors using `try...catch` blocks. | ||
| + | ```javascript | ||
| + | async function fetchData() { | ||
| + | try { | ||
| + | const response = await fetch(' | ||
| + | const data = await response.json(); | ||
| + | return data; | ||
| + | } catch (error) { | ||
| + | console.error(' | ||
| + | } | ||
| + | } | ||
| + | ``` | ||
| + | |||
| + | In summary, `async/ | ||
| + | |||
| + | real 0m9, | ||
| + | user 0m2, | ||
| + | sys 0m0, | ||
| + | </ | ||
| + | |||
| + | --ctx-size 74000 | ||
| + | |||
| + | Ça passe bien, avec ce prompt, peut-être limite avec plus complexe ... | ||
| + | |||
| + | --ctx-size 76000 | ||
| + | |||
| + | < | ||
| + | # Llama-server démarre bien, mais plante au calcul | ||
| + | |||
| + | slot update_slots: | ||
| + | / | ||
| + | CUDA error: out of memory | ||
| + | current device: 0, in function alloc at / | ||
| + | cuMemCreate(& | ||
| + | ... | ||
| + | </ | ||
| + | |||
| + | --ctx-size 80000 | ||
| + | |||
| + | < | ||
| + | # Llama-server plante dès le démarrage : | ||
| + | |||
| + | ggml_backend_cuda_buffer_type_alloc_buffer: | ||
| + | ggml_gallocr_reserve_n: | ||
| + | graph_reserve: | ||
| + | llama_init_from_model: | ||
| + | </ | ||
| + | |||
| + | Avec la **version Q5 medium** '' | ||
| + | * un '' | ||
| + | * mais '' | ||
| + | |||
