## TO-DO
import os
from mistralai import *
[docs]
def get_mistral_response(prompt, local=True):
"""
Get a response from the Mistral model based on the provided prompt.
Parameters:
- prompt (str): The input prompt for the model.
- local (bool): If True, use a local model. If False, use the Mistral API.
Returns:
- str: The response generated by the model.
"""
if not local:
# Retrieve the API key from environment variables
api_key = os.environ["MISTRAL_API_KEY"]
model = "mistral-large-latest"
# Initialize the Mistral client with the API key
client = Mistral(api_key=api_key)
# Send the prompt to the Mistral API and get the response
chat_response = client.chat.complete(
model=model,
messages=[
{
"role": "user",
"content": prompt,
},
]
)
# Return the content of the first message in the response
return chat_response.choices[0].message.content
if local:
## Load the mistral-7B-instruct model into GPU
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
# Define the model name and path
model_name = "/home/Mistral-7B-Instruct-v0.1"
# Load the tokenizer from the pretrained model
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Configure quantization settings
quantization_config = BitsAndBytesConfig(
load_in_8bit=True
# ,
# llm_int8_threshold=4.0
)
# Load the model with quantization configuration
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quantization_config
)
def generate_response(prompt, max_length=256, temperature=0.5):
"""
Generate a response from the local model based on the provided prompt.
Parameters:
- prompt (str): The input prompt for the model.
- max_length (int): The maximum length of the generated response.
- temperature (float): The temperature for sampling.
Returns:
- str: The generated response.
"""
# Tokenize the input prompt
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate the response using the model
outputs = model.generate(
**inputs,
max_length=max_length,
temperature=temperature,
do_sample=True,
top_p=0.95,
top_k=40,
num_return_sequences=1
)
# Decode the generated response
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Return the generated response
return generate_response(prompt)
[docs]
def get_base_prompt(kind='short to long'):
"""
Get the base prompt for the model based on the kind of task.
Parameters:
- kind (str): The type of task ('short to long').
Returns:
- str: The base prompt for the model.
"""
if kind == 'short to long':
prompt = ('you\'re a linguist and as such you should provide me the long version for abbreviations. '
'the short form from german to english. All terms comes from neuroscience or medicine. '
'the input will be a comma separated list of strings. the output shall be a single json in the form of "key: value" '
'where the key is the input string and the value your created long form of the abbreviation. '
'only output a single json without any kind of explanations, descriptions or headers. input = ')
return prompt
## Test data ###
"""
possible_tags = '"7T MRI","3T MRI","9.4T MRI","EEG","exp","log","mit alten CWL","experiment","SCANPHYSLOG","DesignTimeResolveAssemblyReferences","Studies"'
print(get_mistral_response(get_base_prompt() + possible_tags))
"""