PowerToys/Wox.Infrastructure/StringMatcher.cs

300 lines
11 KiB
C#
Raw Normal View History

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Wox.Infrastructure.Logger;
using Wox.Infrastructure.UserSettings;
using static Wox.Infrastructure.StringMatcher;
namespace Wox.Infrastructure
2015-02-07 16:53:33 +08:00
{
public static class StringMatcher
2015-02-07 16:53:33 +08:00
{
2019-10-20 20:45:06 +08:00
public static MatchOption DefaultMatchOption = new MatchOption();
2019-10-17 18:37:09 +08:00
public static int UserSettingSearchPrecision { get; set; }
public static bool ShouldUsePinyin { get; set; }
[Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
public static int Score(string source, string target)
2015-02-07 16:53:33 +08:00
{
if (!string.IsNullOrEmpty(source) && !string.IsNullOrEmpty(target))
{
2019-10-20 20:45:06 +08:00
return FuzzySearch(target, source, DefaultMatchOption).Score;
}
else
{
return 0;
}
}
2015-02-07 16:53:33 +08:00
[Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
public static bool IsMatch(string source, string target)
{
2019-10-20 20:45:06 +08:00
return FuzzySearch(target, source, DefaultMatchOption).Score > 0;
}
2019-09-29 13:03:30 +08:00
public static MatchResult FuzzySearch(string query, string stringToCompare)
{
2019-10-20 20:45:06 +08:00
return FuzzySearch(query, stringToCompare, DefaultMatchOption);
}
/// <summary>
/// refer to https://github.com/mattyork/fuzzy
/// </summary>
public static MatchResult FuzzySearch(string query, string stringToCompare, MatchOption opt)
{
if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) return new MatchResult { Success = false };
2019-10-17 18:37:09 +08:00
query = query.Trim();
var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare;
var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query;
2020-01-02 05:02:23 +08:00
var separatedqueryStrings = queryWithoutCase.Split(' ');
int currentSeparatedQueryStringIndex = 0;
var currentSeparatedQueryString = separatedqueryStrings[currentSeparatedQueryStringIndex];
2020-01-02 05:02:23 +08:00
var queryIndex = 0;
var firstMatchIndex = -1;
var firstMatchIndexInWord = -1;
var lastMatchIndex = 0;
bool allMatched = false;
bool isFullWordMatched = false;
bool allWordsFullyMatched = true;
var indexList = new List<int>();
for (var index = 0; index < fullStringToCompareWithoutCase.Length; index++)
{
var ch = stringToCompare[index];
2020-01-02 05:02:23 +08:00
if (fullStringToCompareWithoutCase[index] == currentSeparatedQueryString[queryIndex])
{
if (firstMatchIndex < 0)
{ // first matched char will become the start of the compared string
firstMatchIndex = index;
}
2020-01-02 05:02:23 +08:00
if (queryIndex == 0)
{ // first letter of current word
isFullWordMatched = true;
firstMatchIndexInWord = index;
}
else if (!isFullWordMatched)
{ // we want to verify that there is not a better match if this is not a full word
// in order to do so we need to verify all previous chars are part of the pattern
2020-01-02 05:02:23 +08:00
int startIndexToVerify = index - queryIndex;
bool allMatch = true;
2020-01-02 05:02:23 +08:00
for (int indexToCheck = 0; indexToCheck < queryIndex; indexToCheck++)
{
if (fullStringToCompareWithoutCase[startIndexToVerify + indexToCheck] !=
2020-01-02 05:02:23 +08:00
currentSeparatedQueryString[indexToCheck])
{
allMatch = false;
}
}
if (allMatch)
{ // update to this as a full word
isFullWordMatched = true;
2020-01-02 05:02:23 +08:00
if (currentSeparatedQueryStringIndex == 0)
{ // first word so we need to update start index
firstMatchIndex = startIndexToVerify;
}
indexList.RemoveAll(x => x >= firstMatchIndexInWord);
2020-01-02 05:02:23 +08:00
for (int indexToCheck = 0; indexToCheck < queryIndex; indexToCheck++)
{ // update the index list
indexList.Add(startIndexToVerify + indexToCheck);
}
}
}
lastMatchIndex = index + 1;
indexList.Add(index);
2020-01-02 05:02:23 +08:00
queryIndex++;
// increase the pattern matched index and check if everything was matched
2020-01-02 05:02:23 +08:00
if (queryIndex == currentSeparatedQueryString.Length)
{
2020-01-02 05:02:23 +08:00
currentSeparatedQueryStringIndex++;
if (currentSeparatedQueryStringIndex >= separatedqueryStrings.Length)
{ // moved over all the words
allMatched = true;
break;
}
// otherwise move to the next word
2020-01-02 05:02:23 +08:00
currentSeparatedQueryString = separatedqueryStrings[currentSeparatedQueryStringIndex];
queryIndex = 0;
if (!isFullWordMatched)
{ // if any of the words was not fully matched all are not fully matched
allWordsFullyMatched = false;
}
}
}
else
{
isFullWordMatched = false;
}
}
// return rendered string if we have a match for every char or all substring without whitespaces matched
if (allMatched)
{
// check if all query string was contained in string to compare
bool containedFully = lastMatchIndex - firstMatchIndex == queryWithoutCase.Length;
var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex, containedFully, allWordsFullyMatched);
var pinyinScore = ScoreForPinyin(stringToCompare, query);
var result = new MatchResult
{
Success = true,
MatchData = indexList,
RawScore = Math.Max(score, pinyinScore)
};
2019-12-12 02:10:11 +08:00
return result;
}
return new MatchResult { Success = false };
}
private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex, int matchLen,
bool isFullyContained, bool allWordsFullyMatched)
{
2019-11-28 19:44:01 +08:00
// A match found near the beginning of a string is scored more than a match found near the end
// A match is scored more if the characters in the patterns are closer to each other,
// while the score is lower if they are more spread out
var score = 100 * (query.Length + 1) / ((1 + firstIndex) + (matchLen + 1));
2019-11-28 19:44:01 +08:00
// A match with less characters assigning more weights
if (stringToCompare.Length - query.Length < 5)
2019-11-28 19:44:01 +08:00
{
2019-10-17 18:37:09 +08:00
score += 20;
2019-11-28 19:44:01 +08:00
}
else if (stringToCompare.Length - query.Length < 10)
2019-11-28 19:44:01 +08:00
{
2019-10-17 18:37:09 +08:00
score += 10;
2019-11-28 19:44:01 +08:00
}
if (isFullyContained)
{
score += 20; // honestly I'm not sure what would be a good number here or should it factor the size of the pattern
}
if (allWordsFullyMatched)
{
score += 20;
}
return score;
}
2019-09-29 13:03:30 +08:00
public enum SearchPrecisionScore
{
Regular = 50,
Low = 20,
None = 0
}
public static int ScoreForPinyin(string source, string target)
{
if (!ShouldUsePinyin)
{
return 0;
}
if (!string.IsNullOrEmpty(source) && !string.IsNullOrEmpty(target))
{
if (Alphabet.ContainsChinese(source))
{
var combination = Alphabet.PinyinComination(source);
var pinyinScore = combination
2019-10-17 18:37:09 +08:00
.Select(pinyin => FuzzySearch(target, string.Join("", pinyin)).Score)
.Max();
var acronymScore = combination.Select(Alphabet.Acronym)
2019-10-17 18:37:09 +08:00
.Select(pinyin => FuzzySearch(target, pinyin).Score)
.Max();
var score = Math.Max(pinyinScore, acronymScore);
return score;
}
else
{
return 0;
}
}
else
{
return 0;
}
}
}
2015-02-07 16:53:33 +08:00
public class MatchResult
{
public bool Success { get; set; }
/// <summary>
/// The final score of the match result with all search precision filters applied.
/// </summary>
public int Score { get; private set; }
/// <summary>
/// The raw calculated search score without any search precision filtering applied.
/// </summary>
private int _rawScore;
public int RawScore
{
get { return _rawScore; }
set
{
_rawScore = value;
Score = ApplySearchPrecisionFilter(_rawScore);
}
}
/// <summary>
/// Matched data to highlight.
/// </summary>
public List<int> MatchData { get; set; }
public bool IsSearchPrecisionScoreMet()
{
return IsSearchPrecisionScoreMet(Score);
}
private bool IsSearchPrecisionScoreMet(int score)
{
return score >= UserSettingSearchPrecision;
}
private int ApplySearchPrecisionFilter(int score)
{
return IsSearchPrecisionScoreMet(score) ? score : 0;
}
}
public class MatchOption
{
/// <summary>
/// prefix of match char, use for hightlight
/// </summary>
[Obsolete("this is never used")]
public string Prefix { get; set; } = "";
/// <summary>
/// suffix of match char, use for hightlight
/// </summary>
[Obsolete("this is never used")]
public string Suffix { get; set; } = "";
public bool IgnoreCase { get; set; } = true;
2015-02-07 16:53:33 +08:00
}
}