wget https://huggingface.co/Mozilla/Llama-3.2-1B-Instruct-llamafile/resolve/main/Llama-3.2-1B-Instruct.Q6_K.llamafile chmod +x Llama-3.2-1B-Instruct.Q6_K.llamafile ./Llama-3.2-1B-Instruct.Q6_K.llamafile --server
ai = openai.AsyncOpenAI(base_url="http://localhost:8080/v1", api_key="sk-no-key-required") response = await ai.chat.completions.create( messages=[ {"role": "system", "content": "..."}, {"role": "user", "content": "..."}, ], max_tokens=100, model="Llama-3.2-1B-Instruct.Q6_K.gguf", ) content = response.choices[0].message.content