From 88f05e49be315dc17b0cd0a461a01c56cedf5512 Mon Sep 17 00:00:00 2001 From: Abduragim Shtanchaev <44877829+Abdurrahheem@users.noreply.github.com> Date: Thu, 18 Jul 2024 16:47:12 +0300 Subject: [PATCH] Merge pull request #25868 from Abdurrahheem:ash/add-gpt2-sample Add sample for GPT2 inference #25868 ### Pull Request Readiness Checklist This PR adds sample for inferencing GPT-2 model. More specificly implementation of GPT-2 from [this repository](https://github.com/karpathy/build-nanogpt). Currently inference in OpenCV is only possible to do with fixed window size due to not supported dynamic shapes. See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/dnn/include/opencv2/dnn/dnn.hpp | 2 +- .../java/generator/src/cpp/listconverters.cpp | 18 +++ .../java/generator/src/cpp/listconverters.hpp | 9 +- samples/dnn/gpt2_inference.py | 125 ++++++++++++++++++ 4 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 samples/dnn/gpt2_inference.py diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 311bf380b1..0234e32e50 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -728,7 +728,7 @@ CV__DNN_INLINE_NS_BEGIN * @param outLayerShapes output parameter for output layers shapes; * order is the same as in layersIds */ - void getLayerShapes(const MatShape& netInputShape, + CV_WRAP void getLayerShapes(const MatShape& netInputShape, const int& netInputType, const int layerId, CV_OUT std::vector& inLayerShapes, diff --git a/modules/java/generator/src/cpp/listconverters.cpp b/modules/java/generator/src/cpp/listconverters.cpp index 94702acc56..19e8b5b9ca 100644 --- a/modules/java/generator/src/cpp/listconverters.cpp +++ b/modules/java/generator/src/cpp/listconverters.cpp @@ -108,3 +108,21 @@ void Copy_vector_string_to_List(JNIEnv* env, std::vector& vs, jobje env->DeleteLocalRef(element); } } + +#ifdef HAVE_OPENCV_DNN +void Copy_vector_MatShape_to_List(JNIEnv* env, std::vector& vs, jobject list) +{ + static jclass juArrayList = ARRAYLIST(env); + jmethodID m_clear = LIST_CLEAR(env, juArrayList); + jmethodID m_add = LIST_ADD(env, juArrayList); + + env->CallVoidMethod(list, m_clear); + for (size_t i = 0; i < vs.size(); i++) + { + jintArray element = env->NewIntArray((jint)vs[i].size()); + env->SetIntArrayRegion(element, 0, (jint)vs[i].size(), (const jint*)&vs[i][0]); + env->CallBooleanMethod(list, m_add, element); + env->DeleteLocalRef(element); + } +} +#endif // HAVE_OPENCV_DNN diff --git a/modules/java/generator/src/cpp/listconverters.hpp b/modules/java/generator/src/cpp/listconverters.hpp index 0ffd93489a..83635a5cb9 100644 --- a/modules/java/generator/src/cpp/listconverters.hpp +++ b/modules/java/generator/src/cpp/listconverters.hpp @@ -23,4 +23,11 @@ std::vector List_to_vector_string(JNIEnv* env, jobject list); void Copy_vector_string_to_List(JNIEnv* env, std::vector& vs, jobject list); -#endif /* LISTCONVERTERS_HPP */ +#ifdef HAVE_OPENCV_DNN +#include "opencv2/dnn.hpp" + +void Copy_vector_MatShape_to_List(JNIEnv* env, std::vector& vs, jobject list); + +#endif // HAVE_OPENCV_DNN + +#endif /* LISTCONVERTERS_HPP */ diff --git a/samples/dnn/gpt2_inference.py b/samples/dnn/gpt2_inference.py new file mode 100644 index 0000000000..d6bad3ee9f --- /dev/null +++ b/samples/dnn/gpt2_inference.py @@ -0,0 +1,125 @@ +''' +This is a sample script to run GPT-2 inference in OpenCV using ONNX model. +The script loads the GPT-2 model and runs inference on a given prompt. +Currently script only works with fixed size window, that means +you will have to specify prompt of the same length as when model was exported to ONNX. + + +Exporting GPT-2 model to ONNX. +To export GPT-2 model to ONNX, you can use the following procedure: + +1. Clone fork of Andrej Karpathy's GPT-2 repository: + + git clone https://github.com/Abdurrahheem/build-nanogpt/tree/ash/export-gpt2-onnx + +2. Install the required dependencies: + + pip install -r requirements.txt + +3 Export the model to ONNX: + + python export2onnx.py --promt= --batch_size= + + +Run the script: +1. Install the required dependencies: + + pip install tiktoken==0.7.0 + +2. Run the script: + + python gpt2_inference.py --model= --max_seq_len= --batch_size= --prompt= +''' + + + +from copy import deepcopy +import numpy as np +import tiktoken +import argparse +import cv2 as cv + +def parse_args(): + parser = argparse.ArgumentParser(description='Use this script to run GPT-2 inference in OpenCV', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--model', type=str, required=True, help='Path to GPT-2 model ONNX model file.') + parser.add_argument("--max_seq_len", type=int, default=30, help="Number of tokens to continue.") + parser.add_argument("--batch_size", type=int, default=5, help="Number of batches.") + parser.add_argument("--prompt", type=str, default="Hello, I'm a language model,", help="Prompt to start with.") + parser.add_argument("--seed", type=int, default=0, help="Random seed") + return parser.parse_args() + +def stable_softmax(logits): + exp_logits = np.exp(logits - np.max(logits, axis=-1, keepdims=True)) + return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True) + + +def gpt2_inference(net, tokens, max_length, num_return_sequences=5): + + print("Inferencing GPT-2 model...") + x = np.array(tokens) + x = np.tile(x, (num_return_sequences, 1)) + + output_buffer = deepcopy(x) + counter = x.shape[1] + while counter < max_length: + + net.setInput(x) + logits = net.forward() + + # logits is assumed to be (B, seq_length, vocab_size) and needs to be the last token's logits + logits = logits[:, -1, :] # (B, vocab_size) + + # Get the probabilities using softmax + probs = stable_softmax(logits) + + # Do top-k sampling of 50 + topk_indices = np.argpartition(probs, -50, axis=-1)[:, -50:] + topk_probs = np.take_along_axis(probs, topk_indices, axis=-1) + + # Normalize top-k probabilities + topk_probs /= np.sum(topk_probs, axis=-1, keepdims=True) + + # Select a token from the top-k probabilities + sampled_indices = [np.random.choice(topk_indices[i], p=topk_probs[i]) for i in range(len(topk_probs))] + sampled_indices = np.array(sampled_indices).reshape(-1, 1) + + # Append to the sequence + x = np.concatenate((x, sampled_indices), axis=1) + x = x[:, 1:] ## issue due to fixes size window in opencv + + output_buffer = np.concatenate((output_buffer, sampled_indices), axis=1) + counter += 1 + print("Inference done!") + return output_buffer + +if __name__ == '__main__': + + args = parse_args() + np.random.seed(args.seed) + max_length = args.max_seq_len + num_return_sequences = args.batch_size + prompt = args.prompt + + net = cv.dnn.readNet(args.model) + input_token_size = net.getLayerShapes([], 0, 0)[0][0][1] + + enc = tiktoken.get_encoding('gpt2') + tokens = enc.encode(prompt) + + # Check if the prompt is of the same length as the input tokens + # if not, pad the tokens else truncate the tokens + if len(tokens) > input_token_size: + tokens = tokens[:input_token_size] + elif len(tokens) < input_token_size: + tokens2pad = input_token_size - len(tokens) + # append token to the prompt + tokens += [220] * tokens2pad + + + output_buffer = gpt2_inference(net, tokens, max_length, num_return_sequences) + + for i in range(num_return_sequences): + tokens = output_buffer[i, :max_length].tolist() + decoded = enc.decode(tokens) + print(">>>>", decoded) \ No newline at end of file