Spaces:

huggingface
/

inference-playground

Running on CPU Upgrade

App Files Files Community

mishig HF Staff commited on Jul 17, 2024

Commit

573aa88

unverified ·

1 Parent(s): fd28154

Improve inference functions (#28)

Browse files

Files changed (2) hide show

src/lib/components/InferencePlayground/InferencePlayground.svelte +4 -12
src/lib/components/InferencePlayground/inferencePlaygroundUtils.ts +27 -18

src/lib/components/InferencePlayground/InferencePlayground.svelte CHANGED Viewed

@@ -129,10 +129,7 @@
 			await handleStreamingResponse(
 				hf,
-				conversation.model,
-				requestMessages,
-				conversation.config.temperature,
-				conversation.config.maxTokens,
 				(content) => {
 					if (streamingMessage) {
 						streamingMessage.content = content;
@@ -140,17 +137,12 @@
 						conversations = conversations;
 					}
 				},
-				abortController
 			);
 		} else {
 			waitForNonStreaming = true;
-			const newMessage = await handleNonStreamingResponse(
-				hf,
-				conversation.model,
-				requestMessages,
-				conversation.config.temperature,
-				conversation.config.maxTokens
-			);
 			// check if the user did not abort the request
 			if (waitForNonStreaming) {
 				conversation.messages = [...conversation.messages, newMessage];

 			await handleStreamingResponse(
 				hf,
+				conversation,
 				(content) => {
 					if (streamingMessage) {
 						streamingMessage.content = content;
 						conversations = conversations;
 					}
 				},
+				abortController,
+				systemMessage
 			);
 		} else {
 			waitForNonStreaming = true;
+			const newMessage = await handleNonStreamingResponse(hf, conversation, systemMessage);
 			// check if the user did not abort the request
 			if (waitForNonStreaming) {
 				conversation.messages = [...conversation.messages, newMessage];

src/lib/components/InferencePlayground/inferencePlaygroundUtils.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { type ChatCompletionInputMessage } from '@huggingface/tasks';
 import { HfInference } from '@huggingface/inference';
-import type { ModelEntryWithTokenizer } from '$lib/types';
 export function createHfInference(token: string): HfInference {
 	return new HfInference(token);
@@ -8,21 +8,25 @@ export function createHfInference(token: string): HfInference {
 export async function handleStreamingResponse(
 	hf: HfInference,
-	model: string,
-	messages: ChatCompletionInputMessage[],
-	temperature: number,
-	maxTokens: number,
 	onChunk: (content: string) => void,
-	abortController: AbortController
 ): Promise<void> {
 	let out = '';
 	try {
 		for await (const chunk of hf.chatCompletionStream(
 			{
-				model: model,
-				messages: messages,
-				temperature: temperature,
-				max_tokens: maxTokens
 			},
 			{ signal: abortController.signal }
 		)) {
@@ -42,16 +46,21 @@ export async function handleStreamingResponse(
 export async function handleNonStreamingResponse(
 	hf: HfInference,
-	model: string,
-	messages: ChatCompletionInputMessage[],
-	temperature: number,
-	maxTokens: number
 ): Promise<ChatCompletionInputMessage> {
 	const response = await hf.chatCompletion({
-		model: model,
-		messages: messages,
-		temperature: temperature,
-		max_tokens: maxTokens
 	});
 	if (response.choices && response.choices.length > 0) {

 import { type ChatCompletionInputMessage } from '@huggingface/tasks';
 import { HfInference } from '@huggingface/inference';
+import type { Conversation, ModelEntryWithTokenizer } from '$lib/types';
 export function createHfInference(token: string): HfInference {
 	return new HfInference(token);
 export async function handleStreamingResponse(
 	hf: HfInference,
+	conversation: Conversation,
 	onChunk: (content: string) => void,
+	abortController: AbortController,
+	systemMessage?: ChatCompletionInputMessage
 ): Promise<void> {
+	const messages = [
+		...(isSystemPromptSupported(conversation.model) && systemMessage?.content?.length
+			? [systemMessage]
+			: []),
+		...conversation.messages
+	];
 	let out = '';
 	try {
 		for await (const chunk of hf.chatCompletionStream(
 			{
+				model: conversation.model.id,
+				messages,
+				temperature: conversation.config.temperature,
+				max_tokens: conversation.config.maxTokens
 			},
 			{ signal: abortController.signal }
 		)) {
 export async function handleNonStreamingResponse(
 	hf: HfInference,
+	conversation: Conversation,
+	systemMessage?: ChatCompletionInputMessage
 ): Promise<ChatCompletionInputMessage> {
+	const messages = [
+		...(isSystemPromptSupported(conversation.model) && systemMessage?.content?.length
+			? [systemMessage]
+			: []),
+		...conversation.messages
+	];
 	const response = await hf.chatCompletion({
+		model: conversation.model,
+		messages,
+		temperature: conversation.config.temperature,
+		max_tokens: conversation.config.maxTokens
 	});
 	if (response.choices && response.choices.length > 0) {