mirror of
https://github.com/opencv/opencv.git
synced 2025-06-08 01:53:19 +08:00
Merge pull request #25868 from Abdurrahheem:ash/add-gpt2-sample
Add sample for GPT2 inference #25868 ### Pull Request Readiness Checklist This PR adds sample for inferencing GPT-2 model. More specificly implementation of GPT-2 from [this repository](https://github.com/karpathy/build-nanogpt). Currently inference in OpenCV is only possible to do with fixed window size due to not supported dynamic shapes. See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
060c24bec9
commit
88f05e49be
@ -728,7 +728,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
* @param outLayerShapes output parameter for output layers shapes;
|
||||
* order is the same as in layersIds
|
||||
*/
|
||||
void getLayerShapes(const MatShape& netInputShape,
|
||||
CV_WRAP void getLayerShapes(const MatShape& netInputShape,
|
||||
const int& netInputType,
|
||||
const int layerId,
|
||||
CV_OUT std::vector<MatShape>& inLayerShapes,
|
||||
|
@ -108,3 +108,21 @@ void Copy_vector_string_to_List(JNIEnv* env, std::vector<std::string>& vs, jobje
|
||||
env->DeleteLocalRef(element);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCV_DNN
|
||||
void Copy_vector_MatShape_to_List(JNIEnv* env, std::vector<cv::dnn::MatShape>& vs, jobject list)
|
||||
{
|
||||
static jclass juArrayList = ARRAYLIST(env);
|
||||
jmethodID m_clear = LIST_CLEAR(env, juArrayList);
|
||||
jmethodID m_add = LIST_ADD(env, juArrayList);
|
||||
|
||||
env->CallVoidMethod(list, m_clear);
|
||||
for (size_t i = 0; i < vs.size(); i++)
|
||||
{
|
||||
jintArray element = env->NewIntArray((jint)vs[i].size());
|
||||
env->SetIntArrayRegion(element, 0, (jint)vs[i].size(), (const jint*)&vs[i][0]);
|
||||
env->CallBooleanMethod(list, m_add, element);
|
||||
env->DeleteLocalRef(element);
|
||||
}
|
||||
}
|
||||
#endif // HAVE_OPENCV_DNN
|
||||
|
@ -23,4 +23,11 @@ std::vector<std::string> List_to_vector_string(JNIEnv* env, jobject list);
|
||||
|
||||
void Copy_vector_string_to_List(JNIEnv* env, std::vector<std::string>& vs, jobject list);
|
||||
|
||||
#endif /* LISTCONVERTERS_HPP */
|
||||
#ifdef HAVE_OPENCV_DNN
|
||||
#include "opencv2/dnn.hpp"
|
||||
|
||||
void Copy_vector_MatShape_to_List(JNIEnv* env, std::vector<cv::dnn::MatShape>& vs, jobject list);
|
||||
|
||||
#endif // HAVE_OPENCV_DNN
|
||||
|
||||
#endif /* LISTCONVERTERS_HPP */
|
||||
|
125
samples/dnn/gpt2_inference.py
Normal file
125
samples/dnn/gpt2_inference.py
Normal file
@ -0,0 +1,125 @@
|
||||
'''
|
||||
This is a sample script to run GPT-2 inference in OpenCV using ONNX model.
|
||||
The script loads the GPT-2 model and runs inference on a given prompt.
|
||||
Currently script only works with fixed size window, that means
|
||||
you will have to specify prompt of the same length as when model was exported to ONNX.
|
||||
|
||||
|
||||
Exporting GPT-2 model to ONNX.
|
||||
To export GPT-2 model to ONNX, you can use the following procedure:
|
||||
|
||||
1. Clone fork of Andrej Karpathy's GPT-2 repository:
|
||||
|
||||
git clone https://github.com/Abdurrahheem/build-nanogpt/tree/ash/export-gpt2-onnx
|
||||
|
||||
2. Install the required dependencies:
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
3 Export the model to ONNX:
|
||||
|
||||
python export2onnx.py --promt=<Any-promt-you-want> --batch_size=<batch-size>
|
||||
|
||||
|
||||
Run the script:
|
||||
1. Install the required dependencies:
|
||||
|
||||
pip install tiktoken==0.7.0
|
||||
|
||||
2. Run the script:
|
||||
|
||||
python gpt2_inference.py --model=<path-to-onnx-model> --max_seq_len=<max-output-lenght> --batch_size=<use-one-used-while-exportinh> --prompt=<use-promt-of-the-same-length-used-while-exporting>
|
||||
'''
|
||||
|
||||
|
||||
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
import tiktoken
|
||||
import argparse
|
||||
import cv2 as cv
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Use this script to run GPT-2 inference in OpenCV',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--model', type=str, required=True, help='Path to GPT-2 model ONNX model file.')
|
||||
parser.add_argument("--max_seq_len", type=int, default=30, help="Number of tokens to continue.")
|
||||
parser.add_argument("--batch_size", type=int, default=5, help="Number of batches.")
|
||||
parser.add_argument("--prompt", type=str, default="Hello, I'm a language model,", help="Prompt to start with.")
|
||||
parser.add_argument("--seed", type=int, default=0, help="Random seed")
|
||||
return parser.parse_args()
|
||||
|
||||
def stable_softmax(logits):
|
||||
exp_logits = np.exp(logits - np.max(logits, axis=-1, keepdims=True))
|
||||
return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
|
||||
|
||||
|
||||
def gpt2_inference(net, tokens, max_length, num_return_sequences=5):
|
||||
|
||||
print("Inferencing GPT-2 model...")
|
||||
x = np.array(tokens)
|
||||
x = np.tile(x, (num_return_sequences, 1))
|
||||
|
||||
output_buffer = deepcopy(x)
|
||||
counter = x.shape[1]
|
||||
while counter < max_length:
|
||||
|
||||
net.setInput(x)
|
||||
logits = net.forward()
|
||||
|
||||
# logits is assumed to be (B, seq_length, vocab_size) and needs to be the last token's logits
|
||||
logits = logits[:, -1, :] # (B, vocab_size)
|
||||
|
||||
# Get the probabilities using softmax
|
||||
probs = stable_softmax(logits)
|
||||
|
||||
# Do top-k sampling of 50
|
||||
topk_indices = np.argpartition(probs, -50, axis=-1)[:, -50:]
|
||||
topk_probs = np.take_along_axis(probs, topk_indices, axis=-1)
|
||||
|
||||
# Normalize top-k probabilities
|
||||
topk_probs /= np.sum(topk_probs, axis=-1, keepdims=True)
|
||||
|
||||
# Select a token from the top-k probabilities
|
||||
sampled_indices = [np.random.choice(topk_indices[i], p=topk_probs[i]) for i in range(len(topk_probs))]
|
||||
sampled_indices = np.array(sampled_indices).reshape(-1, 1)
|
||||
|
||||
# Append to the sequence
|
||||
x = np.concatenate((x, sampled_indices), axis=1)
|
||||
x = x[:, 1:] ## issue due to fixes size window in opencv
|
||||
|
||||
output_buffer = np.concatenate((output_buffer, sampled_indices), axis=1)
|
||||
counter += 1
|
||||
print("Inference done!")
|
||||
return output_buffer
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
args = parse_args()
|
||||
np.random.seed(args.seed)
|
||||
max_length = args.max_seq_len
|
||||
num_return_sequences = args.batch_size
|
||||
prompt = args.prompt
|
||||
|
||||
net = cv.dnn.readNet(args.model)
|
||||
input_token_size = net.getLayerShapes([], 0, 0)[0][0][1]
|
||||
|
||||
enc = tiktoken.get_encoding('gpt2')
|
||||
tokens = enc.encode(prompt)
|
||||
|
||||
# Check if the prompt is of the same length as the input tokens
|
||||
# if not, pad the tokens else truncate the tokens
|
||||
if len(tokens) > input_token_size:
|
||||
tokens = tokens[:input_token_size]
|
||||
elif len(tokens) < input_token_size:
|
||||
tokens2pad = input_token_size - len(tokens)
|
||||
# append <space> token to the prompt
|
||||
tokens += [220] * tokens2pad
|
||||
|
||||
|
||||
output_buffer = gpt2_inference(net, tokens, max_length, num_return_sequences)
|
||||
|
||||
for i in range(num_return_sequences):
|
||||
tokens = output_buffer[i, :max_length].tolist()
|
||||
decoded = enc.decode(tokens)
|
||||
print(">>>>", decoded)
|
Loading…
Reference in New Issue
Block a user