Spaces:
Sleeping
Sleeping
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Inference Proxy</title> | |
| <!-- Include Tailwind CSS --> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <!-- Include Prism.js for syntax highlighting --> | |
| <link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.25.0/themes/prism-tomorrow.min.css" rel="stylesheet" /> | |
| <style> | |
| code { | |
| font-size: 0.75rem ; | |
| } | |
| </style> | |
| </head> | |
| <body class="bg-gray-50 text-gray-800 font-sans antialiased"> | |
| <div class="max-w-4xl mx-auto px-4 py-8 md:py-12"> | |
| <header class="mb-10"> | |
| <h1 class="text-3xl md:text-4xl font-bold text-gray-800 mb-2">Inference Proxy</h1> | |
| <div class="h-1 w-20 bg-gray-300 rounded"></div> | |
| </header> | |
| <main> | |
| <section class="mb-8"> | |
| <h2 class="text-xl md:text-2xl font-semibold text-gray-800 mb-4">Setup</h2> | |
| <p class="mb-6 text-gray-700">This proxy captures and stores traces from LLM API requests to your personal Hugging Face dataset.</p> | |
| <div class="space-y-6"> | |
| <div class="bg-white rounded-lg shadow-md p-6"> | |
| <h3 class="text-lg font-medium text-gray-800 mb-3">1. Duplicate Space</h3> | |
| <p class="text-gray-600 mb-4">First, duplicate this space to your account to set up your own instance.</p> | |
| </div> | |
| <div class="bg-white rounded-lg shadow-md p-6"> | |
| <h3 class="text-lg font-medium text-gray-800 mb-3">2. Set Environment Variables</h3> | |
| <p class="text-gray-600 mb-4">Configure these required environment variables in your space settings:</p> | |
| <div class="bg-gray-100 p-3 rounded-md mb-4"> | |
| <code class="text-sm text-gray-700">HF_ACCESS_TOKEN=your_huggingface_token</code> | |
| </div> | |
| <div class="bg-gray-100 p-3 rounded-md"> | |
| <code class="text-sm text-gray-700">USER_NAME=your_huggingface_username</code> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <section class="mb-8"> | |
| <h2 class="text-xl md:text-2xl font-semibold text-gray-800 mb-4">Example Usage</h2> | |
| <div class="bg-gray-800 rounded-lg shadow-lg overflow-hidden"> | |
| <div class="flex items-center px-4 py-2 bg-gray-900"> | |
| <div class="flex space-x-2 mr-2"> | |
| <div class="w-3 h-3 rounded-full bg-red-500"></div> | |
| <div class="w-3 h-3 rounded-full bg-yellow-500"></div> | |
| <div class="w-3 h-3 rounded-full bg-green-500"></div> | |
| </div> | |
| <p class="text-xs text-gray-400">JavaScript</p> | |
| </div> | |
| <pre class="p-4 overflow-x-auto text-xs font-mono"><code class="language-javascript">import { OpenAI } from "openai"; | |
| const client = new OpenAI({ | |
| <span class="bg-yellow-700 px-1 rounded">baseURL: "{{HOST_URL}}/fireworks-ai/inference/v1",</span> | |
| apiKey: process.env.HF_API_KEY, | |
| }); | |
| let out = ""; | |
| const stream = await client.chat.completions.create({ | |
| model: "accounts/fireworks/models/deepseek-v3", | |
| messages: [ | |
| { | |
| role: "user", | |
| content: "What is the capital of France?", | |
| }, | |
| ], | |
| stream: true, | |
| max_tokens: 500, | |
| }); | |
| for await (const chunk of stream) { | |
| if (chunk.choices && chunk.choices.length > 0) { | |
| const newContent = chunk.choices[0].delta.content; | |
| out += newContent; | |
| console.log(newContent); | |
| } | |
| }</code></pre> | |
| </div> | |
| </section> | |
| </main> | |
| </div> | |
| <!-- Include Prism.js JavaScript for syntax highlighting --> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.25.0/components/prism-core.min.js"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.25.0/plugins/autoloader/prism-autoloader.min.js"></script> | |
| </body> | |
| </html> | |