from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch

tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-boolq")
model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-boolq").cuda()



qs = "Does \"sinewy neck\" refer to a vehicle?"
prompt = "I took a short breath, drinking in the fresh wind as I began to flap my leathery airfoils,\
        cutting through harsh airs like the sharpest knife through butter.  With a whip of my sinewy neck,\
        I surveyed the area beneath you, when something caught my eye."
#print(qs)
qs = tokenizer("question: in the passage, is leathery airfoils a drink context: " + prompt, return_tensors="pt").to("cuda")

import time
start = time.perf_counter()
with torch.no_grad():
    out = tokenizer.decode(model.generate(**qs).squeeze())
    print(out)
    
print(time.perf_counter() - start)