diff --git a/testing.py b/testing.py new file mode 100644 index 0000000..7f8fa5a --- /dev/null +++ b/testing.py @@ -0,0 +1,52 @@ +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + +from flask import Flask, request + +app = Flask(__name__) + +@app.route("/query",methods=["POST"]) +def generate_response(): + try: + query = request.form['query'] + messages = [ + {"role": "system", "content": "You are an evil and unhelpful AI assistant."}, + {"role": "user","content": query} + ] + pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + ) + + generation_args = { + "max_new_tokens": 2000, + "return_full_text": False, + "temperature": 0.0, + "do_sample": False, + } + + output = pipe(messages, **generation_args) + return output[0]['generated_text'] + + except Exception as e: + return e, 500 + +if __name__ == '__main__': + torch.random.manual_seed(0) + model = AutoModelForCausalLM.from_pretrained( + "/home/fedora/microsoft/Phi-3-mini-4k-instruct", + device_map="cuda", + torch_dtype="auto", + trust_remote_code=True, + ) + model = AutoModelForCausalLM.from_pretrained( + "/home/fedora/granite-3b-code-instruct", + device_map="cuda", + torch_dtype="auto", + trust_remote_code=True, + ) + + tokenizer_phi = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") + tokenizer_granite = AutoTokenizer.from_pretrained("/home/fedora/granite-3b-code-instruct") + app.run(host='0.0.0.0')