2019-12-10 03:57:59 +08:00
|
|
|
using System;
|
2019-12-03 22:02:59 +08:00
|
|
|
using System.Collections.Generic;
|
2016-04-24 07:37:25 +08:00
|
|
|
using System.Linq;
|
2019-09-29 12:24:38 +08:00
|
|
|
using System.Text;
|
2016-04-24 07:37:25 +08:00
|
|
|
using Wox.Infrastructure.Logger;
|
2019-09-29 12:24:38 +08:00
|
|
|
using Wox.Infrastructure.UserSettings;
|
2019-12-03 22:02:59 +08:00
|
|
|
using static Wox.Infrastructure.StringMatcher;
|
2016-04-23 06:29:38 +08:00
|
|
|
|
2019-12-30 07:13:33 +08:00
|
|
|
namespace Wox.Infrastructure
|
2015-02-07 16:53:33 +08:00
|
|
|
{
|
2016-04-24 07:37:25 +08:00
|
|
|
public static class StringMatcher
|
2015-02-07 16:53:33 +08:00
|
|
|
{
|
2019-10-20 20:45:06 +08:00
|
|
|
public static MatchOption DefaultMatchOption = new MatchOption();
|
2019-10-17 18:37:09 +08:00
|
|
|
|
2020-01-07 19:30:36 +08:00
|
|
|
public static SearchPrecisionScore UserSettingSearchPrecision { get; set; }
|
2019-12-30 07:13:33 +08:00
|
|
|
|
2019-12-10 03:57:59 +08:00
|
|
|
public static bool ShouldUsePinyin { get; set; }
|
2019-09-29 12:24:38 +08:00
|
|
|
|
|
|
|
[Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
|
2016-04-24 07:37:25 +08:00
|
|
|
public static int Score(string source, string target)
|
2015-02-07 16:53:33 +08:00
|
|
|
{
|
2016-04-24 07:37:25 +08:00
|
|
|
if (!string.IsNullOrEmpty(source) && !string.IsNullOrEmpty(target))
|
|
|
|
{
|
2019-10-20 20:45:06 +08:00
|
|
|
return FuzzySearch(target, source, DefaultMatchOption).Score;
|
2016-04-24 07:37:25 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2015-02-07 16:53:33 +08:00
|
|
|
|
2019-09-29 12:24:38 +08:00
|
|
|
[Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
|
|
|
|
public static bool IsMatch(string source, string target)
|
|
|
|
{
|
2019-10-20 20:45:06 +08:00
|
|
|
return FuzzySearch(target, source, DefaultMatchOption).Score > 0;
|
2019-09-29 12:24:38 +08:00
|
|
|
}
|
|
|
|
|
2019-09-29 13:03:30 +08:00
|
|
|
public static MatchResult FuzzySearch(string query, string stringToCompare)
|
2019-09-29 12:24:38 +08:00
|
|
|
{
|
2019-10-20 20:45:06 +08:00
|
|
|
return FuzzySearch(query, stringToCompare, DefaultMatchOption);
|
2019-09-29 12:24:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
2020-01-06 18:38:07 +08:00
|
|
|
/// Current method:
|
2020-01-06 18:06:41 +08:00
|
|
|
/// Character matching + substring matching;
|
2020-01-06 18:38:07 +08:00
|
|
|
/// 1. Query search string is split into substrings, separator is whitespace.
|
|
|
|
/// 2. Check each query substring's characters against full compare string,
|
|
|
|
/// 3. if a character in the substring is matched, loop back to verify the previous character.
|
|
|
|
/// 4. If previous character also matches, and is the start of the substring, update list.
|
|
|
|
/// 5. Once the previous character is verified, move on to the next character in the query substring.
|
|
|
|
/// 6. Move onto the next substring's characters until all substrings are checked.
|
|
|
|
/// 7. Consider success and move onto scoring if every char or substring without whitespaces matched
|
2019-09-29 12:24:38 +08:00
|
|
|
/// </summary>
|
|
|
|
public static MatchResult FuzzySearch(string query, string stringToCompare, MatchOption opt)
|
|
|
|
{
|
|
|
|
if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) return new MatchResult { Success = false };
|
2019-12-30 07:13:33 +08:00
|
|
|
|
2019-10-17 18:37:09 +08:00
|
|
|
query = query.Trim();
|
2019-09-29 12:24:38 +08:00
|
|
|
|
2019-12-30 07:13:33 +08:00
|
|
|
var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare;
|
|
|
|
|
|
|
|
var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query;
|
2020-01-03 04:58:20 +08:00
|
|
|
|
2020-01-07 02:59:47 +08:00
|
|
|
var querySubstrings = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
|
2020-01-03 04:58:20 +08:00
|
|
|
int currentQuerySubstringIndex = 0;
|
|
|
|
var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
|
|
|
|
var currentQuerySubstringCharacterIndex = 0;
|
2019-09-29 12:24:38 +08:00
|
|
|
|
|
|
|
var firstMatchIndex = -1;
|
2019-12-30 07:13:33 +08:00
|
|
|
var firstMatchIndexInWord = -1;
|
2019-09-29 12:24:38 +08:00
|
|
|
var lastMatchIndex = 0;
|
2020-01-03 04:58:20 +08:00
|
|
|
bool allQuerySubstringsMatched = false;
|
|
|
|
bool matchFoundInPreviousLoop = false;
|
2020-01-07 04:22:00 +08:00
|
|
|
bool allSubstringsContainedInCompareString = true;
|
2019-12-03 22:02:59 +08:00
|
|
|
|
|
|
|
var indexList = new List<int>();
|
|
|
|
|
2020-01-03 04:58:20 +08:00
|
|
|
for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++)
|
2019-09-29 12:24:38 +08:00
|
|
|
{
|
2020-01-06 16:15:05 +08:00
|
|
|
if (fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex])
|
2019-09-29 12:24:38 +08:00
|
|
|
{
|
2020-01-06 16:15:05 +08:00
|
|
|
matchFoundInPreviousLoop = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (firstMatchIndex < 0)
|
|
|
|
{
|
|
|
|
// first matched char will become the start of the compared string
|
|
|
|
firstMatchIndex = compareStringIndex;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (currentQuerySubstringCharacterIndex == 0)
|
|
|
|
{
|
|
|
|
// first letter of current word
|
|
|
|
matchFoundInPreviousLoop = true;
|
|
|
|
firstMatchIndexInWord = compareStringIndex;
|
|
|
|
}
|
|
|
|
else if (!matchFoundInPreviousLoop)
|
|
|
|
{
|
|
|
|
// we want to verify that there is not a better match if this is not a full word
|
|
|
|
// in order to do so we need to verify all previous chars are part of the pattern
|
|
|
|
var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex;
|
2019-12-30 07:13:33 +08:00
|
|
|
|
2020-01-06 16:15:05 +08:00
|
|
|
if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring))
|
2020-01-03 04:58:20 +08:00
|
|
|
{
|
|
|
|
matchFoundInPreviousLoop = true;
|
2020-01-06 16:15:05 +08:00
|
|
|
|
2020-01-14 04:30:40 +08:00
|
|
|
// if it's the beginning character of the first query substring that is matched then we need to update start index
|
2020-01-06 16:15:05 +08:00
|
|
|
firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex;
|
|
|
|
|
|
|
|
indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList);
|
2019-12-30 07:13:33 +08:00
|
|
|
}
|
2020-01-06 16:15:05 +08:00
|
|
|
}
|
2020-01-03 04:58:20 +08:00
|
|
|
|
2020-01-06 16:15:05 +08:00
|
|
|
lastMatchIndex = compareStringIndex + 1;
|
|
|
|
indexList.Add(compareStringIndex);
|
2019-12-30 07:13:33 +08:00
|
|
|
|
2020-01-06 16:15:05 +08:00
|
|
|
currentQuerySubstringCharacterIndex++;
|
2020-01-03 04:58:20 +08:00
|
|
|
|
2020-01-06 17:51:27 +08:00
|
|
|
// if finished looping through every character in the current substring
|
2020-01-06 16:15:05 +08:00
|
|
|
if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length)
|
|
|
|
{
|
2020-01-07 04:25:13 +08:00
|
|
|
// if any of the substrings was not matched then consider as all are not matched
|
2020-01-14 04:29:21 +08:00
|
|
|
allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString;
|
2020-01-07 04:25:13 +08:00
|
|
|
|
2020-01-06 16:15:05 +08:00
|
|
|
currentQuerySubstringIndex++;
|
2019-12-30 07:13:33 +08:00
|
|
|
|
2020-01-06 17:51:27 +08:00
|
|
|
allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length);
|
|
|
|
if (allQuerySubstringsMatched)
|
2020-01-06 16:15:05 +08:00
|
|
|
break;
|
2019-12-30 07:13:33 +08:00
|
|
|
|
2020-01-06 16:15:05 +08:00
|
|
|
// otherwise move to the next query substring
|
|
|
|
currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
|
|
|
|
currentQuerySubstringCharacterIndex = 0;
|
2019-09-29 12:24:38 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-03 04:58:20 +08:00
|
|
|
|
2020-01-06 18:19:15 +08:00
|
|
|
// proceed to calculate score if every char or substring without whitespaces matched
|
2020-01-03 04:58:20 +08:00
|
|
|
if (allQuerySubstringsMatched)
|
2019-09-29 12:24:38 +08:00
|
|
|
{
|
2020-01-07 05:04:56 +08:00
|
|
|
var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString);
|
2019-12-03 22:02:59 +08:00
|
|
|
var pinyinScore = ScoreForPinyin(stringToCompare, query);
|
|
|
|
|
|
|
|
var result = new MatchResult
|
2019-09-29 12:24:38 +08:00
|
|
|
{
|
|
|
|
Success = true,
|
2019-12-03 22:02:59 +08:00
|
|
|
MatchData = indexList,
|
2020-01-07 04:55:02 +08:00
|
|
|
RawScore = Math.Max(score, pinyinScore),
|
|
|
|
AllSubstringsContainedInCompareString = allSubstringsContainedInCompareString
|
2019-09-29 12:24:38 +08:00
|
|
|
};
|
2019-12-03 22:02:59 +08:00
|
|
|
|
2019-12-12 02:10:11 +08:00
|
|
|
return result;
|
2019-09-29 12:24:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return new MatchResult { Success = false };
|
|
|
|
}
|
|
|
|
|
2020-01-03 05:02:02 +08:00
|
|
|
private static bool AllPreviousCharsMatched(int startIndexToVerify, int currentQuerySubstringCharacterIndex,
|
|
|
|
string fullStringToCompareWithoutCase, string currentQuerySubstring)
|
|
|
|
{
|
|
|
|
var allMatch = true;
|
|
|
|
for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
|
|
|
|
{
|
|
|
|
if (fullStringToCompareWithoutCase[startIndexToVerify + indexToCheck] !=
|
|
|
|
currentQuerySubstring[indexToCheck])
|
|
|
|
{
|
|
|
|
allMatch = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return allMatch;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static List<int> GetUpdatedIndexList(int startIndexToVerify, int currentQuerySubstringCharacterIndex, int firstMatchIndexInWord, List<int> indexList)
|
|
|
|
{
|
|
|
|
var updatedList = new List<int>();
|
|
|
|
|
|
|
|
indexList.RemoveAll(x => x >= firstMatchIndexInWord);
|
|
|
|
|
|
|
|
updatedList.AddRange(indexList);
|
|
|
|
|
|
|
|
for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
|
|
|
|
{
|
|
|
|
updatedList.Add(startIndexToVerify + indexToCheck);
|
|
|
|
}
|
|
|
|
|
|
|
|
return updatedList;
|
|
|
|
}
|
|
|
|
|
2020-01-06 17:51:27 +08:00
|
|
|
private static bool AllQuerySubstringsMatched(int currentQuerySubstringIndex, int querySubstringsLength)
|
|
|
|
{
|
|
|
|
return currentQuerySubstringIndex >= querySubstringsLength;
|
|
|
|
}
|
|
|
|
|
2020-01-07 05:28:27 +08:00
|
|
|
private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex, int matchLen, bool allSubstringsContainedInCompareString)
|
2019-09-29 12:24:38 +08:00
|
|
|
{
|
2019-11-28 19:44:01 +08:00
|
|
|
// A match found near the beginning of a string is scored more than a match found near the end
|
|
|
|
// A match is scored more if the characters in the patterns are closer to each other,
|
|
|
|
// while the score is lower if they are more spread out
|
2019-09-29 12:24:38 +08:00
|
|
|
var score = 100 * (query.Length + 1) / ((1 + firstIndex) + (matchLen + 1));
|
2019-11-28 19:44:01 +08:00
|
|
|
|
|
|
|
// A match with less characters assigning more weights
|
2019-09-29 12:24:38 +08:00
|
|
|
if (stringToCompare.Length - query.Length < 5)
|
2019-11-28 19:44:01 +08:00
|
|
|
{
|
2019-10-17 18:37:09 +08:00
|
|
|
score += 20;
|
2019-11-28 19:44:01 +08:00
|
|
|
}
|
2019-09-29 12:24:38 +08:00
|
|
|
else if (stringToCompare.Length - query.Length < 10)
|
2019-11-28 19:44:01 +08:00
|
|
|
{
|
2019-10-17 18:37:09 +08:00
|
|
|
score += 10;
|
2019-11-28 19:44:01 +08:00
|
|
|
}
|
2019-09-29 12:24:38 +08:00
|
|
|
|
2020-01-07 05:28:27 +08:00
|
|
|
if (allSubstringsContainedInCompareString)
|
|
|
|
score += 10 * string.Concat(query.Where(c => !char.IsWhiteSpace(c))).Count();
|
2019-12-30 07:13:33 +08:00
|
|
|
|
2019-09-29 12:24:38 +08:00
|
|
|
return score;
|
|
|
|
}
|
|
|
|
|
2019-09-29 13:03:30 +08:00
|
|
|
public enum SearchPrecisionScore
|
|
|
|
{
|
|
|
|
Regular = 50,
|
|
|
|
Low = 20,
|
|
|
|
None = 0
|
|
|
|
}
|
|
|
|
|
2016-04-24 07:37:25 +08:00
|
|
|
public static int ScoreForPinyin(string source, string target)
|
|
|
|
{
|
2019-12-10 03:57:59 +08:00
|
|
|
if (!ShouldUsePinyin)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-04-24 07:37:25 +08:00
|
|
|
if (!string.IsNullOrEmpty(source) && !string.IsNullOrEmpty(target))
|
|
|
|
{
|
|
|
|
if (Alphabet.ContainsChinese(source))
|
|
|
|
{
|
2019-12-30 07:13:33 +08:00
|
|
|
var combination = Alphabet.PinyinComination(source);
|
2019-09-29 12:24:38 +08:00
|
|
|
var pinyinScore = combination
|
2019-10-17 18:37:09 +08:00
|
|
|
.Select(pinyin => FuzzySearch(target, string.Join("", pinyin)).Score)
|
2016-04-24 07:37:25 +08:00
|
|
|
.Max();
|
2019-12-30 07:13:33 +08:00
|
|
|
var acronymScore = combination.Select(Alphabet.Acronym)
|
2019-10-17 18:37:09 +08:00
|
|
|
.Select(pinyin => FuzzySearch(target, pinyin).Score)
|
2016-04-24 07:37:25 +08:00
|
|
|
.Max();
|
|
|
|
var score = Math.Max(pinyinScore, acronymScore);
|
|
|
|
return score;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2019-12-30 07:13:33 +08:00
|
|
|
}
|
2019-09-29 12:24:38 +08:00
|
|
|
}
|
2015-02-07 16:53:33 +08:00
|
|
|
|
2019-09-29 12:24:38 +08:00
|
|
|
public class MatchResult
|
|
|
|
{
|
|
|
|
public bool Success { get; set; }
|
2019-12-03 21:58:52 +08:00
|
|
|
|
2019-12-10 04:06:31 +08:00
|
|
|
/// <summary>
|
|
|
|
/// The final score of the match result with all search precision filters applied.
|
|
|
|
/// </summary>
|
|
|
|
public int Score { get; private set; }
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The raw calculated search score without any search precision filtering applied.
|
|
|
|
/// </summary>
|
|
|
|
private int _rawScore;
|
2019-12-30 07:13:33 +08:00
|
|
|
|
2019-12-10 04:06:31 +08:00
|
|
|
public int RawScore
|
2019-12-03 21:58:52 +08:00
|
|
|
{
|
2019-12-10 04:06:31 +08:00
|
|
|
get { return _rawScore; }
|
2019-12-03 21:58:52 +08:00
|
|
|
set
|
|
|
|
{
|
2019-12-10 04:06:31 +08:00
|
|
|
_rawScore = value;
|
|
|
|
Score = ApplySearchPrecisionFilter(_rawScore);
|
2019-12-03 21:58:52 +08:00
|
|
|
}
|
|
|
|
}
|
2019-12-03 22:02:59 +08:00
|
|
|
|
2020-01-07 04:55:02 +08:00
|
|
|
/// <summary>
|
|
|
|
/// Indicates if all query's substrings are contained in the string to compare
|
|
|
|
/// </summary>
|
|
|
|
public bool AllSubstringsContainedInCompareString { get; set; }
|
|
|
|
|
2019-09-29 12:24:38 +08:00
|
|
|
/// <summary>
|
2019-12-03 21:58:52 +08:00
|
|
|
/// Matched data to highlight.
|
2019-09-29 12:24:38 +08:00
|
|
|
/// </summary>
|
2019-12-03 21:58:52 +08:00
|
|
|
public List<int> MatchData { get; set; }
|
|
|
|
|
|
|
|
public bool IsSearchPrecisionScoreMet()
|
|
|
|
{
|
|
|
|
return IsSearchPrecisionScoreMet(Score);
|
|
|
|
}
|
|
|
|
|
|
|
|
private bool IsSearchPrecisionScoreMet(int score)
|
|
|
|
{
|
2020-01-07 19:30:36 +08:00
|
|
|
return score >= (int)UserSettingSearchPrecision;
|
2019-12-03 21:58:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
private int ApplySearchPrecisionFilter(int score)
|
|
|
|
{
|
|
|
|
return IsSearchPrecisionScoreMet(score) ? score : 0;
|
|
|
|
}
|
2019-09-29 12:24:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
public class MatchOption
|
|
|
|
{
|
|
|
|
/// <summary>
|
|
|
|
/// prefix of match char, use for hightlight
|
|
|
|
/// </summary>
|
2019-12-30 07:13:33 +08:00
|
|
|
[Obsolete("this is never used")]
|
|
|
|
public string Prefix { get; set; } = "";
|
|
|
|
|
2019-09-29 12:24:38 +08:00
|
|
|
/// <summary>
|
|
|
|
/// suffix of match char, use for hightlight
|
|
|
|
/// </summary>
|
2019-12-30 07:13:33 +08:00
|
|
|
[Obsolete("this is never used")]
|
|
|
|
public string Suffix { get; set; } = "";
|
2019-09-29 12:24:38 +08:00
|
|
|
|
2019-12-30 07:13:33 +08:00
|
|
|
public bool IgnoreCase { get; set; } = true;
|
2015-02-07 16:53:33 +08:00
|
|
|
}
|
2019-12-30 07:13:33 +08:00
|
|
|
}
|