PowerToys/Wox.Infrastructure/Alphabet.cs

159 lines
5.3 KiB
C#
Raw Normal View History

using System;
2017-01-13 04:46:40 +08:00
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using hyjiacan.util.p4n;
2017-01-12 10:16:53 +08:00
using hyjiacan.util.p4n.format;
using Wox.Infrastructure.Logger;
using Wox.Infrastructure.Storage;
2019-11-16 06:34:27 +08:00
using Wox.Infrastructure.UserSettings;
namespace Wox.Infrastructure
{
public static class Alphabet
{
2017-01-13 04:46:40 +08:00
private static readonly HanyuPinyinOutputFormat Format = new HanyuPinyinOutputFormat();
private static ConcurrentDictionary<string, string[][]> PinyinCache;
private static BinaryStorage<ConcurrentDictionary<string, string[][]>> _pinyinStorage;
private static bool _shouldUsePinyin = true;
public static void Initialize(bool shouldUsePinyin = true)
{
_shouldUsePinyin = shouldUsePinyin;
if (_shouldUsePinyin)
{
InitializePinyinHelpers();
}
}
2017-01-12 10:16:53 +08:00
private static void InitializePinyinHelpers()
2017-01-12 10:16:53 +08:00
{
2017-01-13 04:46:40 +08:00
Format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
Stopwatch.Normal("|Wox.Infrastructure.Alphabet.Initialize|Preload pinyin cache", () =>
{
_pinyinStorage = new BinaryStorage<ConcurrentDictionary<string, string[][]>>("Pinyin");
PinyinCache = _pinyinStorage.TryLoad(new ConcurrentDictionary<string, string[][]>());
// force pinyin library static constructor initialize
PinyinHelper.toHanyuPinyinStringArray('T', Format);
});
Log.Info($"|Wox.Infrastructure.Alphabet.Initialize|Number of preload pinyin combination<{PinyinCache.Count}>");
}
public static void Save()
{
if (!_shouldUsePinyin)
{
return;
}
_pinyinStorage.Save(PinyinCache);
2017-01-12 10:16:53 +08:00
}
2019-11-16 06:34:27 +08:00
private static string[] EmptyStringArray = new string[0];
private static string[][] Empty2DStringArray = new string[0][];
/// <summary>
/// replace chinese character with pinyin, non chinese character won't be modified
/// <param name="word"> should be word or sentence, instead of single character. e.g. 微软 </param>
/// </summary>
public static string[] Pinyin(string word)
{
if (!_shouldUsePinyin)
2019-11-16 06:34:27 +08:00
{
return EmptyStringArray;
}
var pinyin = word.Select(c =>
2017-01-12 10:16:53 +08:00
{
var pinyins = PinyinHelper.toHanyuPinyinStringArray(c);
var result = pinyins == null ? c.ToString() : pinyins[0];
return result;
}).ToArray();
return pinyin;
}
/// <summmary>
/// replace chinese character with pinyin, non chinese character won't be modified
/// Because we don't have words dictionary, so we can only return all possiblie pinyin combination
/// e.g. 音乐 will return yinyue and yinle
2017-01-12 10:16:53 +08:00
/// <param name="characters"> should be word or sentence, instead of single character. e.g. 微软 </param>
/// </summmary>
2017-01-13 04:46:40 +08:00
public static string[][] PinyinComination(string characters)
{
if (!_shouldUsePinyin || string.IsNullOrEmpty(characters))
2017-01-12 10:16:53 +08:00
{
return Empty2DStringArray;
}
2017-01-13 04:46:40 +08:00
if (!PinyinCache.ContainsKey(characters))
{
var allPinyins = new List<string[]>();
foreach (var c in characters)
{
var pinyins = PinyinHelper.toHanyuPinyinStringArray(c, Format);
if (pinyins != null)
2017-01-13 04:46:40 +08:00
{
var r = pinyins.Distinct().ToArray();
allPinyins.Add(r);
}
else
{
var r = new[] { c.ToString() };
allPinyins.Add(r);
2017-01-13 04:46:40 +08:00
}
2017-01-12 10:16:53 +08:00
}
var combination = allPinyins.Aggregate(Combination).Select(c => c.Split(';')).ToArray();
PinyinCache[characters] = combination;
return combination;
2017-01-12 10:16:53 +08:00
}
2017-01-13 04:46:40 +08:00
else
{
return PinyinCache[characters];
2017-01-13 04:46:40 +08:00
}
}
public static string Acronym(string[] pinyin)
{
var acronym = string.Join("", pinyin.Select(p => p[0]));
return acronym;
}
public static bool ContainsChinese(string word)
{
if (!_shouldUsePinyin)
2019-11-16 06:34:27 +08:00
{
return false;
}
if (word.Length > 40)
{
Log.Debug($"|Wox.Infrastructure.StringMatcher.ScoreForPinyin|skip too long string: {word}");
return false;
}
var chinese = word.Select(PinyinHelper.toHanyuPinyinStringArray)
.Any(p => p != null);
return chinese;
}
private static string[] Combination(string[] array1, string[] array2)
{
if (!_shouldUsePinyin)
2019-11-16 06:34:27 +08:00
{
return EmptyStringArray;
}
var combination = (
from a1 in array1
from a2 in array2
select $"{a1};{a2}"
).ToArray();
return combination;
}
}
}