Source code for src.llm_response

## TO-DO

import os
from mistralai import *

[docs] def get_mistral_response(prompt, local=True): """ Get a response from the Mistral model based on the provided prompt. Parameters: - prompt (str): The input prompt for the model. - local (bool): If True, use a local model. If False, use the Mistral API. Returns: - str: The response generated by the model. """ if not local: # Retrieve the API key from environment variables api_key = os.environ["MISTRAL_API_KEY"] model = "mistral-large-latest" # Initialize the Mistral client with the API key client = Mistral(api_key=api_key) # Send the prompt to the Mistral API and get the response chat_response = client.chat.complete( model=model, messages=[ { "role": "user", "content": prompt, }, ] ) # Return the content of the first message in the response return chat_response.choices[0].message.content if local: ## Load the mistral-7B-instruct model into GPU from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig # Define the model name and path model_name = "/home/Mistral-7B-Instruct-v0.1" # Load the tokenizer from the pretrained model tokenizer = AutoTokenizer.from_pretrained(model_name) # Configure quantization settings quantization_config = BitsAndBytesConfig( load_in_8bit=True # , # llm_int8_threshold=4.0 ) # Load the model with quantization configuration model = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=quantization_config ) def generate_response(prompt, max_length=256, temperature=0.5): """ Generate a response from the local model based on the provided prompt. Parameters: - prompt (str): The input prompt for the model. - max_length (int): The maximum length of the generated response. - temperature (float): The temperature for sampling. Returns: - str: The generated response. """ # Tokenize the input prompt inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Generate the response using the model outputs = model.generate( **inputs, max_length=max_length, temperature=temperature, do_sample=True, top_p=0.95, top_k=40, num_return_sequences=1 ) # Decode the generated response return tokenizer.decode(outputs[0], skip_special_tokens=True) # Return the generated response return generate_response(prompt)
[docs] def get_base_prompt(kind='short to long'): """ Get the base prompt for the model based on the kind of task. Parameters: - kind (str): The type of task ('short to long'). Returns: - str: The base prompt for the model. """ if kind == 'short to long': prompt = ('you\'re a linguist and as such you should provide me the long version for abbreviations. ' 'the short form from german to english. All terms comes from neuroscience or medicine. ' 'the input will be a comma separated list of strings. the output shall be a single json in the form of "key: value" ' 'where the key is the input string and the value your created long form of the abbreviation. ' 'only output a single json without any kind of explanations, descriptions or headers. input = ') return prompt
## Test data ### """ possible_tags = '"7T MRI","3T MRI","9.4T MRI","EEG","exp","log","mit alten CWL","experiment","SCANPHYSLOG","DesignTimeResolveAssemblyReferences","Studies"' print(get_mistral_response(get_base_prompt() + possible_tags)) """