Merge pull request #26573 from Abdurrahheem:ash/fix-gpt2-sample

Fix gpt2 sample #26573 This PR adds dynamic input support for `gpt2_inference.py` sample. Fixes #26518 Fixes #26517 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-08-06 14:36:36 +08:00 · 2024-12-05 12:34:49 +04:00 · 2024-12-05 12:34:49 +04:00 · 0c774c94f9
commit 0c774c94f9
parent ddfb9d1dc8
1 changed files with 23 additions and 29 deletions
--- a/samples/dnn/gpt2_inference.py
+++ b/samples/dnn/gpt2_inference.py
@ -10,7 +10,7 @@ To export GPT-2 model to ONNX, you can use the following procedure:

 1. Clone fork of Andrej Karpathy's GPT-2 repository:

-    git clone https://github.com/Abdurrahheem/build-nanogpt/tree/ash/export-gpt2-onnx
+    git clone -b ash/export-gpt2-onnx-dynamic https://github.com/Abdurrahheem/build-nanogpt.git

 2. Install the required dependencies:

@ -18,34 +18,33 @@ To export GPT-2 model to ONNX, you can use the following procedure:

 3  Export the model to ONNX:

-    python export2onnx.py --promt=<Any-promt-you-want> --batch_size=<batch-size>
+    python export2onnx.py --promt=<Any-promt-you-want>


 Run the script:
 1. Install the required dependencies:

-    pip install tiktoken==0.7.0
+    pip install tiktoken==0.7.0 numpy tqdm

 2. Run the script:
-
-    python gpt2_inference.py --model=<path-to-onnx-model> --max_seq_len=<max-output-lenght> --batch_size=<use-one-used-while-exportinh> --prompt=<use-promt-of-the-same-length-used-while-exporting>
+    python gpt2_inference.py --model=<path-to-onnx-model>  --prompt=<use-promt-of-the-same-length-used-while-exporting>
 '''



-from copy import deepcopy
 import numpy as np
 import tiktoken
 import argparse
 import cv2 as cv
+from tqdm import tqdm

 def parse_args():
    parser = argparse.ArgumentParser(description='Use this script to run GPT-2 inference in OpenCV',
                                    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--model', type=str, required=True, help='Path to GPT-2 model ONNX model file.')
-    parser.add_argument("--max_seq_len", type=int, default=30, help="Number of tokens to continue.")
-    parser.add_argument("--batch_size", type=int, default=5, help="Number of batches.")
    parser.add_argument("--prompt", type=str, default="Hello, I'm a language model,", help="Prompt to start with.")
+    parser.add_argument("--max_seq_len", type=int, default=40, help="Number of tokens to continue.")
+    parser.add_argument("--batch_size", type=int, default=1, help="Number of batches.")
    parser.add_argument("--seed", type=int, default=0, help="Random seed")
    return parser.parse_args()

@ -54,17 +53,21 @@ def stable_softmax(logits):
    return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)


-def gpt2_inference(net, tokens, max_length, num_return_sequences=5):
+def gpt2_inference(net, tokens, max_length, num_return_sequences=1):

    print("Inferencing GPT-2 model...")
    x = np.array(tokens)
-    x = np.tile(x, (num_return_sequences, 1))
+    x = np.tile(x, (num_return_sequences, 1)).astype(np.int32)
+    pos = np.arange(0, len(x), dtype=np.int32)

-    output_buffer = deepcopy(x)
    counter = x.shape[1]
+    pbar = tqdm(total=max_length - counter, desc="Generating tokens")
    while counter < max_length:

-        net.setInput(x)
+        net.setInputsNames(['input_ids', 'position_ids'])
+        net.setInput(x, 'input_ids')
+        net.setInput(pos, 'position_ids')
+
        logits = net.forward()

        # logits is assumed to be (B, seq_length, vocab_size) and needs to be the last token's logits
@ -86,12 +89,14 @@ def gpt2_inference(net, tokens, max_length, num_return_sequences=5):

        # Append to the sequence
        x = np.concatenate((x, sampled_indices), axis=1)
-        x = x[:, 1:] ## issue due to fixes size window in opencv
+        pos = np.arange(0, x.shape[1], dtype=np.int32) # shape (T)

-        output_buffer = np.concatenate((output_buffer, sampled_indices), axis=1)
        counter += 1
+        pbar.update(1)
+
+    pbar.close()
    print("Inference done!")
-    return output_buffer
+    return x

 if __name__ == '__main__':

@ -102,24 +107,13 @@ if __name__ == '__main__':
    prompt = args.prompt

    net = cv.dnn.readNet(args.model)
-    input_token_size = net.getLayerShapes([], 0, 0)[0][0][1]

    enc = tiktoken.get_encoding('gpt2')
    tokens = enc.encode(prompt)

-    # Check if the prompt is of the same length as the input tokens
-    # if not, pad the tokens else truncate the tokens
-    if len(tokens) > input_token_size:
-        tokens = tokens[:input_token_size]
-    elif len(tokens) < input_token_size:
-        tokens2pad = input_token_size - len(tokens)
-        # append <space> token to the prompt
-        tokens += [220] * tokens2pad
-
-
-    output_buffer = gpt2_inference(net, tokens, max_length, num_return_sequences)
+    output = gpt2_inference(net, tokens, max_length, num_return_sequences)

    for i in range(num_return_sequences):
-        tokens = output_buffer[i, :max_length].tolist()
+        tokens = output[i].tolist()
        decoded = enc.decode(tokens)
-        print(">>>>", decoded)
+        print(">>>>", decoded)