2016-04-24 07:37:25 +08:00
|
|
|
|
using System;
|
2017-01-13 04:46:40 +08:00
|
|
|
|
using System.Collections.Concurrent;
|
2016-04-24 07:37:25 +08:00
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Linq;
|
2020-01-20 07:06:16 +08:00
|
|
|
|
using System.Text;
|
2016-04-24 07:37:25 +08:00
|
|
|
|
using hyjiacan.util.p4n;
|
2017-01-12 10:16:53 +08:00
|
|
|
|
using hyjiacan.util.p4n.format;
|
2020-01-20 07:06:16 +08:00
|
|
|
|
using JetBrains.Annotations;
|
2017-02-13 00:57:41 +08:00
|
|
|
|
using Wox.Infrastructure.Logger;
|
|
|
|
|
using Wox.Infrastructure.Storage;
|
2019-11-16 06:34:27 +08:00
|
|
|
|
using Wox.Infrastructure.UserSettings;
|
2016-04-24 07:37:25 +08:00
|
|
|
|
|
|
|
|
|
namespace Wox.Infrastructure
|
|
|
|
|
{
|
2020-01-20 07:06:16 +08:00
|
|
|
|
public interface IAlphabet
|
2016-04-24 07:37:25 +08:00
|
|
|
|
{
|
2020-01-20 07:06:16 +08:00
|
|
|
|
string Translate(string stringToTranslate);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public class Alphabet : IAlphabet
|
|
|
|
|
{
|
|
|
|
|
private readonly HanyuPinyinOutputFormat Format = new HanyuPinyinOutputFormat();
|
|
|
|
|
private ConcurrentDictionary<string, string[][]> PinyinCache;
|
|
|
|
|
private BinaryStorage<ConcurrentDictionary<string, string[][]>> _pinyinStorage;
|
|
|
|
|
private Settings _settings;
|
2019-12-03 21:55:22 +08:00
|
|
|
|
|
2020-01-20 07:06:16 +08:00
|
|
|
|
public void Initialize([NotNull] Settings settings)
|
2019-12-03 21:55:22 +08:00
|
|
|
|
{
|
2020-01-20 07:06:16 +08:00
|
|
|
|
_settings = settings ?? throw new ArgumentNullException(nameof(settings));
|
2019-12-10 03:47:24 +08:00
|
|
|
|
InitializePinyinHelpers();
|
2019-12-03 21:55:22 +08:00
|
|
|
|
}
|
2017-01-12 10:16:53 +08:00
|
|
|
|
|
2020-01-20 07:06:16 +08:00
|
|
|
|
private void InitializePinyinHelpers()
|
2017-01-12 10:16:53 +08:00
|
|
|
|
{
|
2017-01-13 04:46:40 +08:00
|
|
|
|
Format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
|
2017-02-13 00:57:41 +08:00
|
|
|
|
|
|
|
|
|
Stopwatch.Normal("|Wox.Infrastructure.Alphabet.Initialize|Preload pinyin cache", () =>
|
|
|
|
|
{
|
|
|
|
|
_pinyinStorage = new BinaryStorage<ConcurrentDictionary<string, string[][]>>("Pinyin");
|
|
|
|
|
PinyinCache = _pinyinStorage.TryLoad(new ConcurrentDictionary<string, string[][]>());
|
2019-12-03 21:55:22 +08:00
|
|
|
|
|
2017-02-13 01:22:56 +08:00
|
|
|
|
// force pinyin library static constructor initialize
|
|
|
|
|
PinyinHelper.toHanyuPinyinStringArray('T', Format);
|
2017-02-13 00:57:41 +08:00
|
|
|
|
});
|
|
|
|
|
Log.Info($"|Wox.Infrastructure.Alphabet.Initialize|Number of preload pinyin combination<{PinyinCache.Count}>");
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-20 07:06:16 +08:00
|
|
|
|
public string Translate(string str)
|
|
|
|
|
{
|
|
|
|
|
return ConvertChineseCharactersToPinyin(str);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public string ConvertChineseCharactersToPinyin(string source)
|
|
|
|
|
{
|
|
|
|
|
if (!_settings.ShouldUsePinyin)
|
|
|
|
|
return source;
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrEmpty(source))
|
|
|
|
|
return source;
|
|
|
|
|
|
|
|
|
|
if (!ContainsChinese(source))
|
|
|
|
|
return source;
|
|
|
|
|
|
|
|
|
|
var combination = PinyinCombination(source);
|
|
|
|
|
|
|
|
|
|
var pinyinArray=combination.Select(x => string.Join("", x));
|
|
|
|
|
var acronymArray = combination.Select(Acronym).Distinct();
|
|
|
|
|
|
|
|
|
|
var joinedSingleStringCombination = new StringBuilder();
|
|
|
|
|
var all = acronymArray.Concat(pinyinArray);
|
|
|
|
|
all.ToList().ForEach(x => joinedSingleStringCombination.Append(x));
|
|
|
|
|
|
|
|
|
|
return joinedSingleStringCombination.ToString();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void Save()
|
2017-02-13 00:57:41 +08:00
|
|
|
|
{
|
2019-12-10 03:47:24 +08:00
|
|
|
|
if (!_settings.ShouldUsePinyin)
|
2019-12-03 21:55:22 +08:00
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
2017-02-13 00:57:41 +08:00
|
|
|
|
_pinyinStorage.Save(PinyinCache);
|
2017-01-12 10:16:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-11-16 06:34:27 +08:00
|
|
|
|
private static string[] EmptyStringArray = new string[0];
|
|
|
|
|
private static string[][] Empty2DStringArray = new string[0][];
|
|
|
|
|
|
2020-01-20 07:06:16 +08:00
|
|
|
|
[Obsolete("Not accurate, eg 音乐 will not return yinyue but returns yinle ")]
|
2016-04-24 07:37:25 +08:00
|
|
|
|
/// <summary>
|
|
|
|
|
/// replace chinese character with pinyin, non chinese character won't be modified
|
|
|
|
|
/// <param name="word"> should be word or sentence, instead of single character. e.g. 微软 </param>
|
|
|
|
|
/// </summary>
|
2020-01-20 07:06:16 +08:00
|
|
|
|
public string[] Pinyin(string word)
|
2016-04-24 07:37:25 +08:00
|
|
|
|
{
|
2019-12-10 03:47:24 +08:00
|
|
|
|
if (!_settings.ShouldUsePinyin)
|
2019-11-16 06:34:27 +08:00
|
|
|
|
{
|
|
|
|
|
return EmptyStringArray;
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-24 07:37:25 +08:00
|
|
|
|
var pinyin = word.Select(c =>
|
2017-01-12 10:16:53 +08:00
|
|
|
|
{
|
|
|
|
|
var pinyins = PinyinHelper.toHanyuPinyinStringArray(c);
|
|
|
|
|
var result = pinyins == null ? c.ToString() : pinyins[0];
|
|
|
|
|
return result;
|
|
|
|
|
}).ToArray();
|
2016-04-24 07:37:25 +08:00
|
|
|
|
return pinyin;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summmary>
|
|
|
|
|
/// replace chinese character with pinyin, non chinese character won't be modified
|
|
|
|
|
/// Because we don't have words dictionary, so we can only return all possiblie pinyin combination
|
|
|
|
|
/// e.g. 音乐 will return yinyue and yinle
|
2017-01-12 10:16:53 +08:00
|
|
|
|
/// <param name="characters"> should be word or sentence, instead of single character. e.g. 微软 </param>
|
2016-04-24 07:37:25 +08:00
|
|
|
|
/// </summmary>
|
2020-01-20 07:06:16 +08:00
|
|
|
|
public string[][] PinyinCombination(string characters)
|
2016-04-24 07:37:25 +08:00
|
|
|
|
{
|
2019-12-10 03:47:24 +08:00
|
|
|
|
if (!_settings.ShouldUsePinyin || string.IsNullOrEmpty(characters))
|
2017-01-12 10:16:53 +08:00
|
|
|
|
{
|
2019-12-03 21:55:22 +08:00
|
|
|
|
return Empty2DStringArray;
|
|
|
|
|
}
|
2017-01-13 04:46:40 +08:00
|
|
|
|
|
2019-12-03 21:55:22 +08:00
|
|
|
|
if (!PinyinCache.ContainsKey(characters))
|
|
|
|
|
{
|
|
|
|
|
var allPinyins = new List<string[]>();
|
|
|
|
|
foreach (var c in characters)
|
|
|
|
|
{
|
|
|
|
|
var pinyins = PinyinHelper.toHanyuPinyinStringArray(c, Format);
|
|
|
|
|
if (pinyins != null)
|
2017-01-13 04:46:40 +08:00
|
|
|
|
{
|
2019-12-03 21:55:22 +08:00
|
|
|
|
var r = pinyins.Distinct().ToArray();
|
|
|
|
|
allPinyins.Add(r);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
var r = new[] { c.ToString() };
|
|
|
|
|
allPinyins.Add(r);
|
2017-01-13 04:46:40 +08:00
|
|
|
|
}
|
2017-01-12 10:16:53 +08:00
|
|
|
|
}
|
2019-12-03 21:55:22 +08:00
|
|
|
|
|
|
|
|
|
var combination = allPinyins.Aggregate(Combination).Select(c => c.Split(';')).ToArray();
|
|
|
|
|
PinyinCache[characters] = combination;
|
|
|
|
|
return combination;
|
2017-01-12 10:16:53 +08:00
|
|
|
|
}
|
2017-01-13 04:46:40 +08:00
|
|
|
|
else
|
|
|
|
|
{
|
2019-12-03 21:55:22 +08:00
|
|
|
|
return PinyinCache[characters];
|
2017-01-13 04:46:40 +08:00
|
|
|
|
}
|
2016-04-24 07:37:25 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-01-20 07:06:16 +08:00
|
|
|
|
public string Acronym(string[] pinyin)
|
2016-04-24 07:37:25 +08:00
|
|
|
|
{
|
|
|
|
|
var acronym = string.Join("", pinyin.Select(p => p[0]));
|
|
|
|
|
return acronym;
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-20 07:06:16 +08:00
|
|
|
|
public bool ContainsChinese(string word)
|
2016-04-24 07:37:25 +08:00
|
|
|
|
{
|
2019-12-10 03:47:24 +08:00
|
|
|
|
if (!_settings.ShouldUsePinyin)
|
2019-11-16 06:34:27 +08:00
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-16 07:28:07 +08:00
|
|
|
|
if (word.Length > 40)
|
|
|
|
|
{
|
|
|
|
|
Log.Debug($"|Wox.Infrastructure.StringMatcher.ScoreForPinyin|skip too long string: {word}");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-24 07:37:25 +08:00
|
|
|
|
var chinese = word.Select(PinyinHelper.toHanyuPinyinStringArray)
|
|
|
|
|
.Any(p => p != null);
|
|
|
|
|
return chinese;
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-20 07:06:16 +08:00
|
|
|
|
private string[] Combination(string[] array1, string[] array2)
|
2016-04-24 07:37:25 +08:00
|
|
|
|
{
|
2019-12-10 03:47:24 +08:00
|
|
|
|
if (!_settings.ShouldUsePinyin)
|
2019-11-16 06:34:27 +08:00
|
|
|
|
{
|
|
|
|
|
return EmptyStringArray;
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-24 07:37:25 +08:00
|
|
|
|
var combination = (
|
|
|
|
|
from a1 in array1
|
|
|
|
|
from a2 in array2
|
|
|
|
|
select $"{a1};{a2}"
|
|
|
|
|
).ToArray();
|
|
|
|
|
return combination;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|