Spaces:
Paused
Paused
| # Notes - on how to do sagemaker streaming using boto3 | |
| import json | |
| import boto3 | |
| import sys | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import io | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system path | |
| class TokenIterator: | |
| def __init__(self, stream): | |
| self.byte_iterator = iter(stream) | |
| self.buffer = io.BytesIO() | |
| self.read_pos = 0 | |
| def __iter__(self): | |
| return self | |
| def __next__(self): | |
| while True: | |
| self.buffer.seek(self.read_pos) | |
| line = self.buffer.readline() | |
| if line and line[-1] == ord("\n"): | |
| self.read_pos += len(line) + 1 | |
| full_line = line[:-1].decode("utf-8") | |
| line_data = json.loads(full_line.lstrip("data:").rstrip("/n")) | |
| return line_data["token"]["text"] | |
| chunk = next(self.byte_iterator) | |
| self.buffer.seek(0, io.SEEK_END) | |
| self.buffer.write(chunk["PayloadPart"]["Bytes"]) | |
| payload = { | |
| "inputs": "How do I build a website?", | |
| "parameters": {"max_new_tokens": 256}, | |
| "stream": True, | |
| } | |
| client = boto3.client("sagemaker-runtime", region_name="us-west-2") | |
| response = client.invoke_endpoint_with_response_stream( | |
| EndpointName="berri-benchmarking-Llama-2-70b-chat-hf-4", | |
| Body=json.dumps(payload), | |
| ContentType="application/json", | |
| ) | |
| # for token in TokenIterator(response["Body"]): | |
| # print(token) | |