1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- from __future__ import annotations
- from aiohttp import ClientSession
- from ..typing import AsyncResult, Messages
- from .base_provider import AsyncGeneratorProvider
- models = {
- "meta-llama/Llama-2-7b-chat-hf": "meta/llama-2-7b-chat",
- "meta-llama/Llama-2-13b-chat-hf": "meta/llama-2-13b-chat",
- "meta-llama/Llama-2-70b-chat-hf": "meta/llama-2-70b-chat",
- }
- class Llama2(AsyncGeneratorProvider):
- url = "https://www.llama2.ai"
- working = True
- supports_message_history = True
- @classmethod
- async def create_async_generator(
- cls,
- model: str,
- messages: Messages,
- proxy: str = None,
- **kwargs
- ) -> AsyncResult:
- if not model:
- model = "meta/llama-2-70b-chat"
- elif model in models:
- model = models[model]
- headers = {
- "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0",
- "Accept": "*/*",
- "Accept-Language": "de,en-US;q=0.7,en;q=0.3",
- "Accept-Encoding": "gzip, deflate, br",
- "Referer": f"{cls.url}/",
- "Content-Type": "text/plain;charset=UTF-8",
- "Origin": cls.url,
- "Connection": "keep-alive",
- "Sec-Fetch-Dest": "empty",
- "Sec-Fetch-Mode": "cors",
- "Sec-Fetch-Site": "same-origin",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "TE": "trailers"
- }
- async with ClientSession(headers=headers) as session:
- prompt = format_prompt(messages)
- data = {
- "prompt": prompt,
- "model": model,
- "systemPrompt": kwargs.get("system_message", "You are a helpful assistant."),
- "temperature": kwargs.get("temperature", 0.75),
- "topP": kwargs.get("top_p", 0.9),
- "maxTokens": kwargs.get("max_tokens", 8000),
- "image": None
- }
- started = False
- async with session.post(f"{cls.url}/api", json=data, proxy=proxy) as response:
- response.raise_for_status()
- async for chunk in response.content.iter_any():
- if not started:
- chunk = chunk.lstrip()
- started = True
- yield chunk.decode()
-
- def format_prompt(messages: Messages):
- messages = [
- f"[INST] {message['content']} [/INST]"
- if message["role"] == "user"
- else message["content"]
- for message in messages
- ]
- return "\n".join(messages) + "\n"
|