mirror of
https://github.com/opencv/opencv.git
synced 2025-07-25 22:57:53 +08:00
Merge pull request #24350 from dkurt:py_return_non_utf8_string
Encode QR code data to UTF-8 #24350 ### Pull Request Readiness Checklist **Merge with extra**: https://github.com/opencv/opencv_extra/pull/1105 resolves https://github.com/opencv/opencv/issues/23728 This is first PR in a series. Here we just return a raw Unicode. Later I will try expand QR codes decoding methods to use ECI assignment number and return a string with proper encoding, not only UTF-8 or raw unicode. See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
3859ac921a
commit
5ddf3de4ce
@ -2727,6 +2727,58 @@ bool QRDecode::samplingForVersion()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool checkASCIIcompatible(const uint8_t* str, const size_t size) {
|
||||||
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
uint8_t byte = str[i];
|
||||||
|
if (byte >= 0x80)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool checkUTF8(const uint8_t* str, const size_t size) {
|
||||||
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
uint8_t byte = str[i];
|
||||||
|
if (byte >= 0x80) {
|
||||||
|
// Check that symbol is encoded correctly.
|
||||||
|
|
||||||
|
// Count number of bytes per symbol as a number of leading non-zero bits
|
||||||
|
uint8_t numBytesPerSymbol;
|
||||||
|
if ((byte & 0xe0) == 0xc0)
|
||||||
|
numBytesPerSymbol = 2;
|
||||||
|
else if ((byte & 0xf0) == 0xe0)
|
||||||
|
numBytesPerSymbol = 3;
|
||||||
|
else if ((byte & 0xf8) == 0xf0)
|
||||||
|
numBytesPerSymbol = 4;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (size_t j = 1; j < numBytesPerSymbol; ++j) {
|
||||||
|
if (i + j >= size || (str[i + j] & 0xc0) != 0x80) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i += numBytesPerSymbol - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string encodeUTF8_bytesarray(const uint8_t* str, const size_t size) {
|
||||||
|
std::ostringstream res;
|
||||||
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
uint8_t byte = str[i];
|
||||||
|
if (byte >= 0x80) {
|
||||||
|
res << (char)(0xc0 | (byte >> 6));
|
||||||
|
res << (char)(0x80 | (byte & 0x3f));
|
||||||
|
} else {
|
||||||
|
res << (char)byte;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res.str();
|
||||||
|
}
|
||||||
|
|
||||||
bool QRDecode::decodingProcess()
|
bool QRDecode::decodingProcess()
|
||||||
{
|
{
|
||||||
#ifdef HAVE_QUIRC
|
#ifdef HAVE_QUIRC
|
||||||
@ -2756,11 +2808,58 @@ bool QRDecode::decodingProcess()
|
|||||||
|
|
||||||
if (errorCode != 0) { return false; }
|
if (errorCode != 0) { return false; }
|
||||||
|
|
||||||
for (int i = 0; i < qr_code_data.payload_len; i++)
|
CV_LOG_INFO(NULL, "QR: decoded with .version=" << qr_code_data.version << " .data_type=" << qr_code_data.data_type << " .eci=" << qr_code_data.eci << " .payload_len=" << qr_code_data.payload_len)
|
||||||
|
|
||||||
|
switch (qr_code_data.data_type)
|
||||||
{
|
{
|
||||||
result_info += qr_code_data.payload[i];
|
case QUIRC_DATA_TYPE_NUMERIC:
|
||||||
|
if (!checkASCIIcompatible(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||||
|
CV_LOG_INFO(NULL, "QR: DATA_TYPE_NUMERIC payload must be ACSII compatible string");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||||
|
return true;
|
||||||
|
case QUIRC_DATA_TYPE_ALPHA:
|
||||||
|
if (!checkASCIIcompatible(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||||
|
CV_LOG_INFO(NULL, "QR: DATA_TYPE_ALPHA payload must be ASCII compatible string");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||||
|
return true;
|
||||||
|
case QUIRC_DATA_TYPE_BYTE:
|
||||||
|
// https://en.wikipedia.org/wiki/Extended_Channel_Interpretation
|
||||||
|
if (qr_code_data.eci == QUIRC_ECI_UTF_8) {
|
||||||
|
CV_LOG_INFO(NULL, "QR: payload ECI is UTF-8");
|
||||||
|
if (!checkUTF8(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||||
|
CV_LOG_INFO(NULL, "QUIRC_DATA_TYPE_BYTE with UTF-8 ECI must be UTF-8 compatible string");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||||
|
} else if (qr_code_data.eci == 25/*ECI_UTF_16BE*/) {
|
||||||
|
CV_LOG_INFO(NULL, "QR: UTF-16BE ECI is not supported");
|
||||||
|
return false;
|
||||||
|
} else if (checkASCIIcompatible(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||||
|
CV_LOG_INFO(NULL, "QR: payload is ASCII compatible (special handling for symbols encoding is not needed)");
|
||||||
|
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||||
|
} else {
|
||||||
|
if (checkUTF8(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||||
|
CV_LOG_INFO(NULL, "QR: payload QUIRC_DATA_TYPE_BYTE is UTF-8 compatible, return as-is");
|
||||||
|
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||||
|
} else {
|
||||||
|
CV_LOG_INFO(NULL, "QR: assume 1-byte per symbol encoding");
|
||||||
|
result_info = encodeUTF8_bytesarray(qr_code_data.payload, qr_code_data.payload_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
case QUIRC_DATA_TYPE_KANJI:
|
||||||
|
// FIXIT BUG: we must return UTF-8 compatible string
|
||||||
|
CV_LOG_WARNING(NULL, "QR: Kanji is not supported properly");
|
||||||
|
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
|
CV_LOG_WARNING(NULL, "QR: unsupported QR data type");
|
||||||
|
return false;
|
||||||
#else
|
#else
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
|
@ -637,4 +637,25 @@ TEST_P(Objdetect_QRCode_detectAndDecodeMulti, decode_9_qrcodes_version7)
|
|||||||
|
|
||||||
#endif // UPDATE_QRCODE_TEST_DATA
|
#endif // UPDATE_QRCODE_TEST_DATA
|
||||||
|
|
||||||
|
TEST(Objdetect_QRCode_detectAndDecode, utf8_output)
|
||||||
|
{
|
||||||
|
#ifndef HAVE_QUIRC
|
||||||
|
throw SkipTestException("Quirc is required for decoding");
|
||||||
|
#else
|
||||||
|
const std::string name_current_image = "umlaut.png";
|
||||||
|
const std::string root = "qrcode/";
|
||||||
|
|
||||||
|
std::string image_path = findDataFile(root + name_current_image);
|
||||||
|
Mat src = imread(image_path);
|
||||||
|
ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path;
|
||||||
|
|
||||||
|
QRCodeDetector qrcode;
|
||||||
|
std::vector<Point> corners;
|
||||||
|
Mat straight;
|
||||||
|
std::string decoded_info = qrcode.detectAndDecode(src, corners, straight);
|
||||||
|
EXPECT_FALSE(decoded_info.empty());
|
||||||
|
EXPECT_NE(decoded_info.find("M\xc3\xbcllheimstrasse"), std::string::npos);
|
||||||
|
#endif // HAVE_QUIRC
|
||||||
|
}
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
Loading…
Reference in New Issue
Block a user