From de1f707f1e41f39fae6931ee6de75776cc11c7bc Mon Sep 17 00:00:00 2001 From: fedora Cloud User <sjaipuriyar@ucsb.edu> Date: Thu, 22 Aug 2024 23:16:27 +0000 Subject: [PATCH] Fixed some routing issues, and allowed support for multiple models --- testing.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/testing.py b/testing.py index 7f8fa5a..612ace5 100644 --- a/testing.py +++ b/testing.py @@ -5,12 +5,24 @@ from flask import Flask, request app = Flask(__name__) -@app.route("/query",methods=["POST"]) -def generate_response(): + +@app.route("/query/",methods=["POST"]) +@app.route("/query/<string:req_model>",methods=["POST"]) +def generate_response(req_model="phi"): + if req_model not in ['phi','granite']: return "Only models phi and granite are supported.", 400 + try: query = request.form['query'] + + + if req_model == 'granite': + model = model_granite + tokenizer = tokenizer_granite + else: + model = model_phi + tokenizer = tokenizer_phi messages = [ - {"role": "system", "content": "You are an evil and unhelpful AI assistant."}, + {"role": "system", "content": "You are a helpful AI assistant."}, {"role": "user","content": query} ] pipe = pipeline( @@ -34,13 +46,13 @@ def generate_response(): if __name__ == '__main__': torch.random.manual_seed(0) - model = AutoModelForCausalLM.from_pretrained( + model_phi = AutoModelForCausalLM.from_pretrained( "/home/fedora/microsoft/Phi-3-mini-4k-instruct", device_map="cuda", torch_dtype="auto", trust_remote_code=True, ) - model = AutoModelForCausalLM.from_pretrained( + model_granite = AutoModelForCausalLM.from_pretrained( "/home/fedora/granite-3b-code-instruct", device_map="cuda", torch_dtype="auto",