phi3_model_testing/testing.py

53 lines
1.5 KiB
Python
Raw Normal View History

2024-08-21 16:56:30 -07:00
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from flask import Flask, request
app = Flask(__name__)
@app.route("/query",methods=["POST"])
def generate_response():
try:
query = request.form['query']
messages = [
{"role": "system", "content": "You are an evil and unhelpful AI assistant."},
{"role": "user","content": query}
]
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
generation_args = {
"max_new_tokens": 2000,
"return_full_text": False,
"temperature": 0.0,
"do_sample": False,
}
output = pipe(messages, **generation_args)
return output[0]['generated_text']
except Exception as e:
return e, 500
if __name__ == '__main__':
torch.random.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(
"/home/fedora/microsoft/Phi-3-mini-4k-instruct",
device_map="cuda",
torch_dtype="auto",
trust_remote_code=True,
)
model = AutoModelForCausalLM.from_pretrained(
"/home/fedora/granite-3b-code-instruct",
device_map="cuda",
torch_dtype="auto",
trust_remote_code=True,
)
tokenizer_phi = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
tokenizer_granite = AutoTokenizer.from_pretrained("/home/fedora/granite-3b-code-instruct")
app.run(host='0.0.0.0')