diff --git a/samples/dnn/gpt2_inference.py b/samples/dnn/gpt2_inference.py index d6bad3ee9f..8f0ddcf7d7 100644 --- a/samples/dnn/gpt2_inference.py +++ b/samples/dnn/gpt2_inference.py @@ -10,7 +10,7 @@ To export GPT-2 model to ONNX, you can use the following procedure: 1. Clone fork of Andrej Karpathy's GPT-2 repository: - git clone https://github.com/Abdurrahheem/build-nanogpt/tree/ash/export-gpt2-onnx + git clone -b ash/export-gpt2-onnx-dynamic https://github.com/Abdurrahheem/build-nanogpt.git 2. Install the required dependencies: @@ -18,34 +18,33 @@ To export GPT-2 model to ONNX, you can use the following procedure: 3 Export the model to ONNX: - python export2onnx.py --promt= --batch_size= + python export2onnx.py --promt= Run the script: 1. Install the required dependencies: - pip install tiktoken==0.7.0 + pip install tiktoken==0.7.0 numpy tqdm 2. Run the script: - - python gpt2_inference.py --model= --max_seq_len= --batch_size= --prompt= + python gpt2_inference.py --model= --prompt= ''' -from copy import deepcopy import numpy as np import tiktoken import argparse import cv2 as cv +from tqdm import tqdm def parse_args(): parser = argparse.ArgumentParser(description='Use this script to run GPT-2 inference in OpenCV', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--model', type=str, required=True, help='Path to GPT-2 model ONNX model file.') - parser.add_argument("--max_seq_len", type=int, default=30, help="Number of tokens to continue.") - parser.add_argument("--batch_size", type=int, default=5, help="Number of batches.") parser.add_argument("--prompt", type=str, default="Hello, I'm a language model,", help="Prompt to start with.") + parser.add_argument("--max_seq_len", type=int, default=40, help="Number of tokens to continue.") + parser.add_argument("--batch_size", type=int, default=1, help="Number of batches.") parser.add_argument("--seed", type=int, default=0, help="Random seed") return parser.parse_args() @@ -54,17 +53,21 @@ def stable_softmax(logits): return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True) -def gpt2_inference(net, tokens, max_length, num_return_sequences=5): +def gpt2_inference(net, tokens, max_length, num_return_sequences=1): print("Inferencing GPT-2 model...") x = np.array(tokens) - x = np.tile(x, (num_return_sequences, 1)) + x = np.tile(x, (num_return_sequences, 1)).astype(np.int32) + pos = np.arange(0, len(x), dtype=np.int32) - output_buffer = deepcopy(x) counter = x.shape[1] + pbar = tqdm(total=max_length - counter, desc="Generating tokens") while counter < max_length: - net.setInput(x) + net.setInputsNames(['input_ids', 'position_ids']) + net.setInput(x, 'input_ids') + net.setInput(pos, 'position_ids') + logits = net.forward() # logits is assumed to be (B, seq_length, vocab_size) and needs to be the last token's logits @@ -86,12 +89,14 @@ def gpt2_inference(net, tokens, max_length, num_return_sequences=5): # Append to the sequence x = np.concatenate((x, sampled_indices), axis=1) - x = x[:, 1:] ## issue due to fixes size window in opencv + pos = np.arange(0, x.shape[1], dtype=np.int32) # shape (T) - output_buffer = np.concatenate((output_buffer, sampled_indices), axis=1) counter += 1 + pbar.update(1) + + pbar.close() print("Inference done!") - return output_buffer + return x if __name__ == '__main__': @@ -102,24 +107,13 @@ if __name__ == '__main__': prompt = args.prompt net = cv.dnn.readNet(args.model) - input_token_size = net.getLayerShapes([], 0, 0)[0][0][1] enc = tiktoken.get_encoding('gpt2') tokens = enc.encode(prompt) - # Check if the prompt is of the same length as the input tokens - # if not, pad the tokens else truncate the tokens - if len(tokens) > input_token_size: - tokens = tokens[:input_token_size] - elif len(tokens) < input_token_size: - tokens2pad = input_token_size - len(tokens) - # append token to the prompt - tokens += [220] * tokens2pad - - - output_buffer = gpt2_inference(net, tokens, max_length, num_return_sequences) + output = gpt2_inference(net, tokens, max_length, num_return_sequences) for i in range(num_return_sequences): - tokens = output_buffer[i, :max_length].tolist() + tokens = output[i].tolist() decoded = enc.decode(tokens) - print(">>>>", decoded) \ No newline at end of file + print(">>>>", decoded)