mirror of
https://github.com/opencv/opencv.git
synced 2025-07-24 14:06:27 +08:00
Merge pull request #24350 from dkurt:py_return_non_utf8_string
Encode QR code data to UTF-8 #24350 ### Pull Request Readiness Checklist **Merge with extra**: https://github.com/opencv/opencv_extra/pull/1105 resolves https://github.com/opencv/opencv/issues/23728 This is first PR in a series. Here we just return a raw Unicode. Later I will try expand QR codes decoding methods to use ECI assignment number and return a string with proper encoding, not only UTF-8 or raw unicode. See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
3859ac921a
commit
5ddf3de4ce
@ -2727,6 +2727,58 @@ bool QRDecode::samplingForVersion()
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool checkASCIIcompatible(const uint8_t* str, const size_t size) {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
uint8_t byte = str[i];
|
||||
if (byte >= 0x80)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool checkUTF8(const uint8_t* str, const size_t size) {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
uint8_t byte = str[i];
|
||||
if (byte >= 0x80) {
|
||||
// Check that symbol is encoded correctly.
|
||||
|
||||
// Count number of bytes per symbol as a number of leading non-zero bits
|
||||
uint8_t numBytesPerSymbol;
|
||||
if ((byte & 0xe0) == 0xc0)
|
||||
numBytesPerSymbol = 2;
|
||||
else if ((byte & 0xf0) == 0xe0)
|
||||
numBytesPerSymbol = 3;
|
||||
else if ((byte & 0xf8) == 0xf0)
|
||||
numBytesPerSymbol = 4;
|
||||
else
|
||||
return false;
|
||||
|
||||
for (size_t j = 1; j < numBytesPerSymbol; ++j) {
|
||||
if (i + j >= size || (str[i + j] & 0xc0) != 0x80) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
i += numBytesPerSymbol - 1;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::string encodeUTF8_bytesarray(const uint8_t* str, const size_t size) {
|
||||
std::ostringstream res;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
uint8_t byte = str[i];
|
||||
if (byte >= 0x80) {
|
||||
res << (char)(0xc0 | (byte >> 6));
|
||||
res << (char)(0x80 | (byte & 0x3f));
|
||||
} else {
|
||||
res << (char)byte;
|
||||
}
|
||||
}
|
||||
return res.str();
|
||||
}
|
||||
|
||||
bool QRDecode::decodingProcess()
|
||||
{
|
||||
#ifdef HAVE_QUIRC
|
||||
@ -2756,11 +2808,58 @@ bool QRDecode::decodingProcess()
|
||||
|
||||
if (errorCode != 0) { return false; }
|
||||
|
||||
for (int i = 0; i < qr_code_data.payload_len; i++)
|
||||
CV_LOG_INFO(NULL, "QR: decoded with .version=" << qr_code_data.version << " .data_type=" << qr_code_data.data_type << " .eci=" << qr_code_data.eci << " .payload_len=" << qr_code_data.payload_len)
|
||||
|
||||
switch (qr_code_data.data_type)
|
||||
{
|
||||
result_info += qr_code_data.payload[i];
|
||||
case QUIRC_DATA_TYPE_NUMERIC:
|
||||
if (!checkASCIIcompatible(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||
CV_LOG_INFO(NULL, "QR: DATA_TYPE_NUMERIC payload must be ACSII compatible string");
|
||||
return false;
|
||||
}
|
||||
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||
return true;
|
||||
case QUIRC_DATA_TYPE_ALPHA:
|
||||
if (!checkASCIIcompatible(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||
CV_LOG_INFO(NULL, "QR: DATA_TYPE_ALPHA payload must be ASCII compatible string");
|
||||
return false;
|
||||
}
|
||||
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||
return true;
|
||||
case QUIRC_DATA_TYPE_BYTE:
|
||||
// https://en.wikipedia.org/wiki/Extended_Channel_Interpretation
|
||||
if (qr_code_data.eci == QUIRC_ECI_UTF_8) {
|
||||
CV_LOG_INFO(NULL, "QR: payload ECI is UTF-8");
|
||||
if (!checkUTF8(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||
CV_LOG_INFO(NULL, "QUIRC_DATA_TYPE_BYTE with UTF-8 ECI must be UTF-8 compatible string");
|
||||
return false;
|
||||
}
|
||||
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||
} else if (qr_code_data.eci == 25/*ECI_UTF_16BE*/) {
|
||||
CV_LOG_INFO(NULL, "QR: UTF-16BE ECI is not supported");
|
||||
return false;
|
||||
} else if (checkASCIIcompatible(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||
CV_LOG_INFO(NULL, "QR: payload is ASCII compatible (special handling for symbols encoding is not needed)");
|
||||
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||
} else {
|
||||
if (checkUTF8(qr_code_data.payload, qr_code_data.payload_len)) {
|
||||
CV_LOG_INFO(NULL, "QR: payload QUIRC_DATA_TYPE_BYTE is UTF-8 compatible, return as-is");
|
||||
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||
} else {
|
||||
CV_LOG_INFO(NULL, "QR: assume 1-byte per symbol encoding");
|
||||
result_info = encodeUTF8_bytesarray(qr_code_data.payload, qr_code_data.payload_len);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
case QUIRC_DATA_TYPE_KANJI:
|
||||
// FIXIT BUG: we must return UTF-8 compatible string
|
||||
CV_LOG_WARNING(NULL, "QR: Kanji is not supported properly");
|
||||
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
|
||||
CV_LOG_WARNING(NULL, "QR: unsupported QR data type");
|
||||
return false;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
|
@ -637,4 +637,25 @@ TEST_P(Objdetect_QRCode_detectAndDecodeMulti, decode_9_qrcodes_version7)
|
||||
|
||||
#endif // UPDATE_QRCODE_TEST_DATA
|
||||
|
||||
TEST(Objdetect_QRCode_detectAndDecode, utf8_output)
|
||||
{
|
||||
#ifndef HAVE_QUIRC
|
||||
throw SkipTestException("Quirc is required for decoding");
|
||||
#else
|
||||
const std::string name_current_image = "umlaut.png";
|
||||
const std::string root = "qrcode/";
|
||||
|
||||
std::string image_path = findDataFile(root + name_current_image);
|
||||
Mat src = imread(image_path);
|
||||
ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path;
|
||||
|
||||
QRCodeDetector qrcode;
|
||||
std::vector<Point> corners;
|
||||
Mat straight;
|
||||
std::string decoded_info = qrcode.detectAndDecode(src, corners, straight);
|
||||
EXPECT_FALSE(decoded_info.empty());
|
||||
EXPECT_NE(decoded_info.find("M\xc3\xbcllheimstrasse"), std::string::npos);
|
||||
#endif // HAVE_QUIRC
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user