Merge pull request #26573 from Abdurrahheem:ash/fix-gpt2-sample

Fix gpt2 sample #26573

This PR adds dynamic input support for `gpt2_inference.py` sample.
Fixes #26518
Fixes #26517

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Abduragim Shtanchaev 2024-12-05 12:34:49 +04:00 committed by GitHub
parent ddfb9d1dc8
commit 0c774c94f9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10,7 +10,7 @@ To export GPT-2 model to ONNX, you can use the following procedure:
1. Clone fork of Andrej Karpathy's GPT-2 repository: 1. Clone fork of Andrej Karpathy's GPT-2 repository:
git clone https://github.com/Abdurrahheem/build-nanogpt/tree/ash/export-gpt2-onnx git clone -b ash/export-gpt2-onnx-dynamic https://github.com/Abdurrahheem/build-nanogpt.git
2. Install the required dependencies: 2. Install the required dependencies:
@ -18,34 +18,33 @@ To export GPT-2 model to ONNX, you can use the following procedure:
3 Export the model to ONNX: 3 Export the model to ONNX:
python export2onnx.py --promt=<Any-promt-you-want> --batch_size=<batch-size> python export2onnx.py --promt=<Any-promt-you-want>
Run the script: Run the script:
1. Install the required dependencies: 1. Install the required dependencies:
pip install tiktoken==0.7.0 pip install tiktoken==0.7.0 numpy tqdm
2. Run the script: 2. Run the script:
python gpt2_inference.py --model=<path-to-onnx-model> --prompt=<use-promt-of-the-same-length-used-while-exporting>
python gpt2_inference.py --model=<path-to-onnx-model> --max_seq_len=<max-output-lenght> --batch_size=<use-one-used-while-exportinh> --prompt=<use-promt-of-the-same-length-used-while-exporting>
''' '''
from copy import deepcopy
import numpy as np import numpy as np
import tiktoken import tiktoken
import argparse import argparse
import cv2 as cv import cv2 as cv
from tqdm import tqdm
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description='Use this script to run GPT-2 inference in OpenCV', parser = argparse.ArgumentParser(description='Use this script to run GPT-2 inference in OpenCV',
formatter_class=argparse.ArgumentDefaultsHelpFormatter) formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--model', type=str, required=True, help='Path to GPT-2 model ONNX model file.') parser.add_argument('--model', type=str, required=True, help='Path to GPT-2 model ONNX model file.')
parser.add_argument("--max_seq_len", type=int, default=30, help="Number of tokens to continue.")
parser.add_argument("--batch_size", type=int, default=5, help="Number of batches.")
parser.add_argument("--prompt", type=str, default="Hello, I'm a language model,", help="Prompt to start with.") parser.add_argument("--prompt", type=str, default="Hello, I'm a language model,", help="Prompt to start with.")
parser.add_argument("--max_seq_len", type=int, default=40, help="Number of tokens to continue.")
parser.add_argument("--batch_size", type=int, default=1, help="Number of batches.")
parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument("--seed", type=int, default=0, help="Random seed")
return parser.parse_args() return parser.parse_args()
@ -54,17 +53,21 @@ def stable_softmax(logits):
return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True) return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
def gpt2_inference(net, tokens, max_length, num_return_sequences=5): def gpt2_inference(net, tokens, max_length, num_return_sequences=1):
print("Inferencing GPT-2 model...") print("Inferencing GPT-2 model...")
x = np.array(tokens) x = np.array(tokens)
x = np.tile(x, (num_return_sequences, 1)) x = np.tile(x, (num_return_sequences, 1)).astype(np.int32)
pos = np.arange(0, len(x), dtype=np.int32)
output_buffer = deepcopy(x)
counter = x.shape[1] counter = x.shape[1]
pbar = tqdm(total=max_length - counter, desc="Generating tokens")
while counter < max_length: while counter < max_length:
net.setInput(x) net.setInputsNames(['input_ids', 'position_ids'])
net.setInput(x, 'input_ids')
net.setInput(pos, 'position_ids')
logits = net.forward() logits = net.forward()
# logits is assumed to be (B, seq_length, vocab_size) and needs to be the last token's logits # logits is assumed to be (B, seq_length, vocab_size) and needs to be the last token's logits
@ -86,12 +89,14 @@ def gpt2_inference(net, tokens, max_length, num_return_sequences=5):
# Append to the sequence # Append to the sequence
x = np.concatenate((x, sampled_indices), axis=1) x = np.concatenate((x, sampled_indices), axis=1)
x = x[:, 1:] ## issue due to fixes size window in opencv pos = np.arange(0, x.shape[1], dtype=np.int32) # shape (T)
output_buffer = np.concatenate((output_buffer, sampled_indices), axis=1)
counter += 1 counter += 1
pbar.update(1)
pbar.close()
print("Inference done!") print("Inference done!")
return output_buffer return x
if __name__ == '__main__': if __name__ == '__main__':
@ -102,24 +107,13 @@ if __name__ == '__main__':
prompt = args.prompt prompt = args.prompt
net = cv.dnn.readNet(args.model) net = cv.dnn.readNet(args.model)
input_token_size = net.getLayerShapes([], 0, 0)[0][0][1]
enc = tiktoken.get_encoding('gpt2') enc = tiktoken.get_encoding('gpt2')
tokens = enc.encode(prompt) tokens = enc.encode(prompt)
# Check if the prompt is of the same length as the input tokens output = gpt2_inference(net, tokens, max_length, num_return_sequences)
# if not, pad the tokens else truncate the tokens
if len(tokens) > input_token_size:
tokens = tokens[:input_token_size]
elif len(tokens) < input_token_size:
tokens2pad = input_token_size - len(tokens)
# append <space> token to the prompt
tokens += [220] * tokens2pad
output_buffer = gpt2_inference(net, tokens, max_length, num_return_sequences)
for i in range(num_return_sequences): for i in range(num_return_sequences):
tokens = output_buffer[i, :max_length].tolist() tokens = output[i].tolist()
decoded = enc.decode(tokens) decoded = enc.decode(tokens)
print(">>>>", decoded) print(">>>>", decoded)