From de13ad1cd5013faa8e172ae0852afeca97e7b8a8 Mon Sep 17 00:00:00 2001 From: cxfksword Date: Mon, 24 Mar 2014 21:14:10 +0800 Subject: [PATCH] Optimize fuzzy match --- Wox.Infrastructure/FuzzyMatcher.cs | 140 ++++++++++++++++++++++------- Wox.Plugin.System/Programs.cs | 8 +- Wox.Test/FuzzyMatcherTest.cs | 2 +- 3 files changed, 114 insertions(+), 36 deletions(-) diff --git a/Wox.Infrastructure/FuzzyMatcher.cs b/Wox.Infrastructure/FuzzyMatcher.cs index 1d3d025c8d..5b0e144c8d 100644 --- a/Wox.Infrastructure/FuzzyMatcher.cs +++ b/Wox.Infrastructure/FuzzyMatcher.cs @@ -6,45 +6,123 @@ using System.Text.RegularExpressions; namespace Wox.Infrastructure { - //From:http://crossplatform.net/sublime-text-ctrl-p-fuzzy-matching-in-python/ + /// + /// refer to https://github.com/mattyork/fuzzy + /// public class FuzzyMatcher { - private Regex reg = null; - private string rawQuery = ""; + private string query; + private MatchOption opt; - private FuzzyMatcher(string query) + private FuzzyMatcher(string query, MatchOption opt) { - this.rawQuery = query.Trim(); - this.reg = GetPattern(query); - } - - private Regex GetPattern(string query) - { - var pattern = string.Join(".*?", query.ToCharArray().Select(x => Regex.Escape(x.ToString())).ToArray()); - return new Regex(pattern, RegexOptions.IgnoreCase); - } - - public int Score(string str) - { - var match = reg.Match(str); - if (!match.Success) - return 0; - - //a match found near the beginning of a string is scored more than a match found near the end - //a match is scored more if the characters in the patterns are closer to each other, while the score is lower if they are more spread out - var score = 100 * (this.rawQuery.Length + 1) / ((1 + match.Index) + (match.Length + 1)); - //a match with less characters assigning more weights - if (str.Length - this.rawQuery.Length < 5) - score = score + 20; - else if (str.Length - this.rawQuery.Length < 10) - score = score + 10; - - return score; + this.query = query.Trim(); + this.opt = opt; } public static FuzzyMatcher Create(string query) { - return new FuzzyMatcher(query); + return new FuzzyMatcher(query, new MatchOption()); + } + + public static FuzzyMatcher Create(string query, MatchOption opt) + { + return new FuzzyMatcher(query, opt); + } + + public MatchResult Evaluate(string str) + { + var len = str.Length; + var compareString = opt.IgnoreCase ? str.ToLower() : str; + var pattern = opt.IgnoreCase ? query.ToLower() : query; + + var sb = new StringBuilder(str.Length + (query.Length * (opt.Prefix.Length + opt.Suffix.Length))); + var patternIdx = 0; + var firstMatchIndex = -1; + var lastMatchIndex = 0; + char ch; + for (var idx = 0; idx < len; idx++) + { + ch = str[idx]; + if (compareString[idx] == pattern[patternIdx]) + { + if (firstMatchIndex < 0) + firstMatchIndex = idx; + lastMatchIndex = idx + 1; + + sb.Append(opt.Prefix + ch + opt.Suffix); + patternIdx += 1; + } + else + { + sb.Append(ch); + } + + // match success, append remain char + if (patternIdx == pattern.Length && (idx + 1) != compareString.Length) + { + sb.Append(str.Substring(idx + 1)); + break; + } + } + + // return rendered string if we have a match for every char + if (patternIdx == pattern.Length) + { + return new MatchResult() + { + Success = true, + Value = sb.ToString(), + Score = CalScore(str, firstMatchIndex, lastMatchIndex - firstMatchIndex) + }; + } + + return new MatchResult() { Success = false }; + } + + private int CalScore(string str, int firstIndex, int matchLen) + { + //a match found near the beginning of a string is scored more than a match found near the end + //a match is scored more if the characters in the patterns are closer to each other, while the score is lower if they are more spread out + var score = 100 * (query.Length + 1) / ((1 + firstIndex) + (matchLen + 1)); + //a match with less characters assigning more weights + if (str.Length - query.Length < 5) + score = score + 20; + else if (str.Length - query.Length < 10) + score = score + 10; + + return score; } } + + public class MatchResult + { + public bool Success { get; set; } + public int Score { get; set; } + /// + /// hightlight string + /// + public string Value { get; set; } + } + + public class MatchOption + { + public MatchOption() + { + this.Prefix = ""; + this.Suffix = ""; + this.IgnoreCase = true; + } + + /// + /// prefix of match char, use for hightlight + /// + public string Prefix { get; set; } + /// + /// suffix of match char, use for hightlight + /// + public string Suffix { get; set; } + + public bool IgnoreCase { get; set; } + } } diff --git a/Wox.Plugin.System/Programs.cs b/Wox.Plugin.System/Programs.cs index ca78c00a2b..f50128ea42 100644 --- a/Wox.Plugin.System/Programs.cs +++ b/Wox.Plugin.System/Programs.cs @@ -84,10 +84,10 @@ namespace Wox.Plugin.System private bool MatchProgram(Program program, FuzzyMatcher matcher) { - if (program.AbbrTitle != null && (program.Score = matcher.Score(program.AbbrTitle)) > 0) return true; - if ((program.Score = matcher.Score(program.Title)) > 0) return true; - if ((program.Score = matcher.Score(program.PinyinTitle)) > 0) return true; - if (program.ExecuteName != null && (program.Score = matcher.Score(program.ExecuteName)) > 0) return true; + if (program.AbbrTitle != null && (program.Score = matcher.Evaluate(program.AbbrTitle).Score) > 0) return true; + if ((program.Score = matcher.Evaluate(program.Title).Score) > 0) return true; + if ((program.Score = matcher.Evaluate(program.PinyinTitle).Score) > 0) return true; + if (program.ExecuteName != null && (program.Score = matcher.Evaluate(program.ExecuteName).Score) > 0) return true; return false; } diff --git a/Wox.Test/FuzzyMatcherTest.cs b/Wox.Test/FuzzyMatcherTest.cs index 8f92de72bd..72ae160e41 100644 --- a/Wox.Test/FuzzyMatcherTest.cs +++ b/Wox.Test/FuzzyMatcherTest.cs @@ -29,7 +29,7 @@ namespace Wox.Test results.Add(new Plugin.Result() { Title = str, - Score = FuzzyMatcher.Create("inst").Score(str) + Score = FuzzyMatcher.Create("inst").Evaluate(str).Score }); }