Advanced Paste v2 improvements

---------

    Co-authored-by: Stefan Markovic <stefan@janeasystems.com>
    Co-authored-by: Niels Laute <niels.laute@live.nl>
    Co-authored-by: Jordi Adoumie <98557455+joadoumie@users.noreply.github.com>
    Co-authored-by: Stefan Markovic <57057282+stefansjfw@users.noreply.github.com>
    Co-authored-by: Jaime Bernardo <jaime@janeasystems.com>
This commit is contained in:
Craig Loewen 2024-05-20 20:03:29 -04:00
parent 483f7aa464
commit 8dc95c991e
28 changed files with 2033 additions and 258 deletions

View File

@ -35,7 +35,10 @@
<PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Hosting.WindowsServices" Version="8.0.0" />
<PackageVersion Include="Microsoft.ML.OnnxRuntime" Version="1.17.3" />
<PackageVersion Include="Microsoft.ML.OnnxRuntime.Extensions" Version="0.10.0" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageVersion Include="Microsoft.Extensions.ObjectPool" Version="8.0.0" />
<PackageVersion Include="Microsoft.Toolkit.Uwp.Notifications" Version="7.1.2" />
<PackageVersion Include="Microsoft.Web.WebView2" Version="1.0.2365.46" />
<!-- Package Microsoft.Win32.SystemEvents added as a hack for being able to exclude the runtime assets so they don't conflict with 8.0.1. This is a dependency of System.Drawing.Common but the 8.0.1 version wasn't published to nuget. -->
@ -58,6 +61,7 @@
<PackageVersion Include="NLog" Version="5.0.4" />
<PackageVersion Include="NLog.Extensions.Logging" Version="5.3.8" />
<PackageVersion Include="NLog.Schema" Version="5.2.8" />
<PackageVersion Include="NReco.VideoConverter" Version="1.2.1" />
<PackageVersion Include="ReverseMarkdown" Version="4.1.0" />
<PackageVersion Include="ScipBe.Common.Office.OneNote" Version="3.0.1" />
<PackageVersion Include="SharpCompress" Version="0.37.2" />
@ -75,10 +79,11 @@
<PackageVersion Include="System.Diagnostics.EventLog" Version="8.0.0" />
<!-- Package System.Diagnostics.PerformanceCounter added as a hack for being able to exclude the runtime assets so they don't conflict with 8.0.1. This is a dependency of System.Data.OleDb but the 8.0.1 version wasn't published to nuget. -->
<PackageVersion Include="System.Diagnostics.PerformanceCounter" Version="8.0.0" />
<PackageVersion Include="System.Drawing.Common" Version="8.0.5" />
<PackageVersion Include="System.Drawing.Common" Version="8.0.4" />
<PackageVersion Include="System.IO.Abstractions" Version="17.2.3" />
<PackageVersion Include="System.IO.Abstractions.TestingHelpers" Version="17.2.3" />
<PackageVersion Include="System.Management" Version="8.0.0" />
<PackageVersion Include="System.Management.Automation" Version="7.4.0" />
<PackageVersion Include="System.Reactive" Version="6.0.0-preview.9" />
<PackageVersion Include="System.Runtime.Caching" Version="8.0.0" />
<!-- Package System.Security.Cryptography.ProtectedData added as a hack for being able to exclude the runtime assets so they don't conflict with 8.0.1. This is a dependency of System.Data.OleDb but the 8.0.1 version wasn't published to nuget. -->

View File

@ -0,0 +1 @@
*.onnx

View File

@ -0,0 +1,49 @@
## Setting up the ML models
PowerToys Advanced Paste uses ML models for on device AI actions such as transcribing an audio or video file. Before you can use these features, you will need to download the models and place them in the right directory before building the project.
Here is how your directory structure should look like with the models included
```
src\modules\AdvancedPaste\AdvancedPaste
├── AIModelAssets
│ ├── whisper
│ | ├── silero_vad.onnx
│ │ ├── whisper-small.onnx
```
There are two models that are used in this project.
1. Silero VAD - a voice activity detection model that is used to detect speech in an audio file and used to chunk a long audio file into smaller segments
2. Whisper Small - an automatic speech recognition model that is used to transcribe the audio or audio file segments
### Silero VAD
1. Download the pre-trained model from this [GitHub repo](https://github.com/snakers4/silero-vad). The model is available in the `files` directory, named `silero_vad.onnx`.
2. Place the `silero_vad.onnx` file in the `src\modules\AdvancedPaste\AdvancedPaste\AIModelAssets\whisper` directory. Create the directory if it does not exist.
### Whisper
1. Generate an optimized model with Olive [following these instructions](https://github.com/microsoft/Olive/blob/main/examples/whisper/README.md). Here the commands we used to generate the model (assuming you already have python installed):
``` bash
# Clone the Olive repository and navigate to the whisper example folder
git clone https://github.com/microsoft/Olive
cd Olive/examples/whisper
# Install the required packages
pip install olive-ai
python -m pip install -r requirements.txt
pip install onnxruntime onnxruntime_extensions
# prepare the whisper model (note, you can use other whisper variants as well, e.g. whisper-tiny)
python prepare_whisper_configs.py --model_name openai/whisper-small --multilingual --enable_timestamps
# Run the Olive workflow to generate the optimized model
olive run --config whisper_cpu_int8.json --setup
olive run --config whisper_cpu_int8.json
```
The generated model will be in the `.\models\conversion-transformers_optimization-onnx_dynamic_quantization-insert_beam_search-prepost` folder.
2. Rename the `whisper_cpu_int8_cpu-cpu_model.onnx` file that was generated to `whisper-small.onnx`and place it in the `src\modules\AdvancedPaste\AdvancedPaste\AIModelAssets\whisper` directory. Create the directory if it does not exist.

View File

@ -0,0 +1,13 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace AdvancedPaste.AIModels.Whisper
{
public class DetectionResult
{
public string Type { get; set; }
public double Seconds { get; set; }
}
}

View File

@ -0,0 +1,148 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
namespace AdvancedPaste.AIModels.Whisper
{
public class SlieroVadDetector : IDisposable
{
private readonly SlieroVadOnnxModel model;
private readonly float startThreshold;
private readonly float endThreshold;
private readonly int samplingRate;
private readonly float minSilenceSamples;
private readonly float speechPadSamples;
private bool triggered;
private int tempEnd;
private int currentSample;
public SlieroVadDetector(
float startThreshold,
float endThreshold,
int samplingRate,
int minSilenceDurationMs,
int speechPadMs)
{
if (samplingRate != 8000 && samplingRate != 16000)
{
throw new ArgumentException("does not support sampling rates other than [8000, 16000]");
}
this.model = new SlieroVadOnnxModel();
this.startThreshold = startThreshold;
this.endThreshold = endThreshold;
this.samplingRate = samplingRate;
this.minSilenceSamples = samplingRate * minSilenceDurationMs / 1000f;
this.speechPadSamples = samplingRate * speechPadMs / 1000f;
Reset();
}
public void Reset()
{
model.ResetStates();
triggered = false;
tempEnd = 0;
currentSample = 0;
}
public Dictionary<string, double> Apply(byte[] data, bool returnSeconds)
{
float[] audioData = new float[data.Length / 2];
for (int i = 0; i < audioData.Length; i++)
{
audioData[i] = ((data[i * 2] & 0xff) | (data[(i * 2) + 1] << 8)) / 32767.0f;
}
int windowSizeSamples = audioData.Length;
currentSample += windowSizeSamples;
float speechProb = 0;
try
{
speechProb = model.Call(new float[][] { audioData }, samplingRate)[0];
}
catch (Exception ex)
{
throw new InvalidOperationException("An error occurred while calling the model", ex);
}
if (speechProb >= startThreshold && tempEnd != 0)
{
tempEnd = 0;
}
if (speechProb >= startThreshold && !triggered)
{
triggered = true;
int speechStart = (int)(currentSample - speechPadSamples);
speechStart = Math.Max(speechStart, 0);
Dictionary<string, double> result = new Dictionary<string, double>();
if (returnSeconds)
{
double speechStartSeconds = speechStart / (double)samplingRate;
double roundedSpeechStart = Math.Round(speechStartSeconds, 1, MidpointRounding.AwayFromZero);
result["start"] = roundedSpeechStart;
}
else
{
result["start"] = speechStart;
}
return result;
}
if (speechProb < endThreshold && triggered)
{
if (tempEnd == 0)
{
tempEnd = currentSample;
}
if (currentSample - tempEnd < minSilenceSamples)
{
return new Dictionary<string, double>();
}
else
{
int speechEnd = (int)(tempEnd + speechPadSamples);
tempEnd = 0;
triggered = false;
Dictionary<string, double> result = new Dictionary<string, double>();
if (returnSeconds)
{
double speechEndSeconds = speechEnd / (double)samplingRate;
double roundedSpeechEnd = Math.Round(speechEndSeconds, 1, MidpointRounding.AwayFromZero);
result["end"] = roundedSpeechEnd;
}
else
{
result["end"] = speechEnd;
}
return result;
}
}
return new Dictionary<string, double>();
}
public void Close()
{
Reset();
model.Close();
}
public void Dispose()
{
GC.SuppressFinalize(this);
this.model.Dispose();
}
}
}

View File

@ -0,0 +1,154 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
namespace AdvancedPaste.AIModels.Whisper
{
public class SlieroVadOnnxModel : IDisposable
{
private readonly InferenceSession session;
private OrtValue h;
private OrtValue c;
private int lastSr;
private int lastBatchSize;
private static readonly List<int> SampleRates = new List<int> { 8000, 16000 };
public SlieroVadOnnxModel()
{
var modelPath = $@"{AppDomain.CurrentDomain.BaseDirectory}AIModelAssets\whisper\silero_vad.onnx";
var options = new SessionOptions();
options.InterOpNumThreads = 1;
options.IntraOpNumThreads = 1;
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_EXTENDED;
session = new InferenceSession(modelPath, options);
ResetStates();
}
public void ResetStates()
{
try
{
var hTensor = new DenseTensor<float>(new[] { 2, 1, 64 });
var cTensor = new DenseTensor<float>(new[] { 2, 1, 64 });
h = OrtValue.CreateTensorValueFromMemory<float>(OrtMemoryInfo.DefaultInstance, hTensor.Buffer, [2, 1, 64]);
c = OrtValue.CreateTensorValueFromMemory<float>(OrtMemoryInfo.DefaultInstance, cTensor.Buffer, [2, 1, 64]);
lastSr = 0;
lastBatchSize = 0;
}
catch (Exception)
{
}
}
public void Close()
{
session.Dispose();
}
public class ValidationResult
{
public float[][] X { get; private set; }
public int Sr { get; private set; }
public ValidationResult(float[][] x, int sr)
{
X = x;
Sr = sr;
}
}
private ValidationResult ValidateInput(float[][] x, int sr)
{
if (x.Length == 1)
{
x = new float[][] { x[0] };
}
if (x.Length > 2)
{
throw new ArgumentException($"Incorrect audio data dimension: {x.Length}");
}
if (sr != 16000 && sr % 16000 == 0)
{
int step = sr / 16000;
float[][] reducedX = x.Select(row => row.Where((_, i) => i % step == 0).ToArray()).ToArray();
x = reducedX;
sr = 16000;
}
if (!SampleRates.Contains(sr))
{
throw new ArgumentException($"Only supports sample rates {string.Join(", ", SampleRates)} (or multiples of 16000)");
}
if ((float)sr / x[0].Length > 31.25)
{
throw new ArgumentException("Input audio is too short");
}
return new ValidationResult(x, sr);
}
public float[] Call(float[][] x, int sr)
{
var result = ValidateInput(x, sr);
x = result.X;
sr = result.Sr;
int batchSize = x.Length;
int sampleSize = x[0].Length; // Assuming all subarrays have identical length
if (lastBatchSize == 0 || lastSr != sr || lastBatchSize != batchSize)
{
ResetStates();
}
// Flatten the jagged array and create the tensor with the correct shape
var flatArray = x.SelectMany(inner => inner).ToArray();
var input = new Dictionary<string, OrtValue>
{
{ "input", OrtValue.CreateTensorValueFromMemory(flatArray, [batchSize, sampleSize]) },
{ "sr", OrtValue.CreateTensorValueFromMemory(new long[] { sr }, [1]) },
{ "h", h },
{ "c", c },
};
var runOptions = new RunOptions();
try
{
using (var results = session.Run(runOptions, input, session.OutputNames))
{
var output = results[0].GetTensorDataAsSpan<float>().ToArray();
h = OrtValue.CreateTensorValueFromMemory(results.ElementAt(1).GetTensorDataAsSpan<float>().ToArray(), [2, 1, 64]);
c = OrtValue.CreateTensorValueFromMemory(results.ElementAt(2).GetTensorDataAsSpan<float>().ToArray(), [2, 1, 64]);
lastSr = sr;
lastBatchSize = batchSize;
return output;
}
}
catch (Exception ex)
{
throw new InvalidOperationException("An error occurred while calling the model", ex);
}
}
public void Dispose()
{
session?.Dispose();
GC.SuppressFinalize(this);
}
}
}

View File

@ -0,0 +1,21 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace AdvancedPaste.AIModels.Whisper
{
public class WhisperChunk
{
public double Start { get; set; }
public double End { get; set; }
public WhisperChunk(double start, double end)
{
this.Start = start;
this.End = end;
}
public double Length => End - Start;
}
}

View File

@ -0,0 +1,114 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
namespace AdvancedPaste.AIModels.Whisper
{
public static class WhisperChunking
{
private static readonly int SAMPLERATE = 16000;
private static readonly float STARTTHRESHOLD = 0.25f;
private static readonly float ENDTHRESHOLD = 0.25f;
private static readonly int MINSILENCEDURATIONMS = 1000;
private static readonly int SPEECHPADMS = 400;
private static readonly int WINDOWSIZESAMPLES = 3200;
private static readonly double MAXCHUNKS = 29;
private static readonly double MINCHUNKS = 5;
public static List<WhisperChunk> SmartChunking(byte[] audioBytes)
{
SlieroVadDetector vadDetector;
vadDetector = new SlieroVadDetector(STARTTHRESHOLD, ENDTHRESHOLD, SAMPLERATE, MINSILENCEDURATIONMS, SPEECHPADMS);
int bytesPerSample = 2;
int bytesPerWindow = WINDOWSIZESAMPLES * bytesPerSample;
float totalSeconds = audioBytes.Length / (SAMPLERATE * 2);
var result = new List<DetectionResult>();
for (int offset = 0; offset + bytesPerWindow <= audioBytes.Length; offset += bytesPerWindow)
{
byte[] data = new byte[bytesPerWindow];
Array.Copy(audioBytes, offset, data, 0, bytesPerWindow);
// Simulating the process as if data was being read in chunks
try
{
var detectResult = vadDetector.Apply(data, true);
// iterate over detectResult and apply the data to result:
foreach (var (key, value) in detectResult)
{
result.Add(new DetectionResult { Type = key, Seconds = value });
}
}
catch (Exception e)
{
// Depending on the need, you might want to break out of the loop or just report the error
Console.Error.WriteLine($"Error applying VAD detector: {e.Message}");
}
}
var stamps = GetTimeStamps(result, totalSeconds, MAXCHUNKS, MINCHUNKS);
return stamps;
}
private static List<WhisperChunk> GetTimeStamps(List<DetectionResult> voiceAreas, double totalSeconds, double maxChunkLength, double minChunkLength)
{
if (totalSeconds <= maxChunkLength)
{
return new List<WhisperChunk> { new WhisperChunk(0, totalSeconds) };
}
voiceAreas = voiceAreas.OrderBy(va => va.Seconds).ToList();
List<WhisperChunk> chunks = new List<WhisperChunk>();
double nextChunkStart = 0.0;
while (nextChunkStart < totalSeconds)
{
double idealChunkEnd = nextChunkStart + maxChunkLength;
double chunkEnd = idealChunkEnd > totalSeconds ? totalSeconds : idealChunkEnd;
var validVoiceAreas = voiceAreas.Where(va => va.Seconds > nextChunkStart && va.Seconds <= chunkEnd).ToList();
if (validVoiceAreas.Count != 0)
{
chunkEnd = validVoiceAreas.Last().Seconds;
}
chunks.Add(new WhisperChunk(nextChunkStart, chunkEnd));
nextChunkStart = chunkEnd + 0.1;
}
return MergeSmallChunks(chunks, maxChunkLength, minChunkLength);
}
private static List<WhisperChunk> MergeSmallChunks(List<WhisperChunk> chunks, double maxChunkLength, double minChunkLength)
{
for (int i = 1; i < chunks.Count; i++)
{
// Check if current chunk is small and can be merged with previous
if (chunks[i].Length < minChunkLength)
{
double prevChunkLength = chunks[i - 1].Length;
double combinedLength = prevChunkLength + chunks[i].Length;
if (combinedLength <= maxChunkLength)
{
chunks[i - 1].End = chunks[i].End; // Merge with previous chunk
chunks.RemoveAt(i); // Remove current chunk
i--; // Adjust index to recheck current position now pointing to next chunk
}
}
}
return chunks;
}
}
}

View File

@ -0,0 +1,212 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Threading.Tasks;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using NReco.VideoConverter;
using Windows.Storage;
namespace AdvancedPaste.AIModels.Whisper
{
public static class Whisper
{
private static InferenceSession _inferenceSession;
private static InferenceSession InitializeModel()
{
// model generated from https://github.com/microsoft/Olive/blob/main/examples/whisper/README.md
// var modelPath = $@"{AppDomain.CurrentDomain.BaseDirectory}AIModelAssets\whisper\whisper_tiny.onnx";
var modelPath = $@"{AppDomain.CurrentDomain.BaseDirectory}AIModelAssets\whisper\whisper_small.onnx";
SessionOptions options = new SessionOptions();
options.RegisterOrtExtensions();
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
var session = new InferenceSession(modelPath, options);
return session;
}
private static List<WhisperTranscribedChunk> TranscribeChunkAsync(byte[] pcmAudioData, string inputLanguage, WhisperTaskType taskType, int offsetSeconds = 30)
{
#pragma warning disable CA1861 // Avoid constant arrays as arguments
if (_inferenceSession == null)
{
_inferenceSession = InitializeModel();
}
var audioTensor = new DenseTensor<byte>(pcmAudioData, [1, pcmAudioData.Length]);
var timestampsEnableTensor = new DenseTensor<int>(1);
timestampsEnableTensor.Fill(1);
int task = (int)taskType;
int langCode = WhisperUtils.GetLangId(inputLanguage);
var decoderInputIds = new int[] { 50258, langCode, task };
var langAndModeTensor = new DenseTensor<int>(decoderInputIds, [1, 3]);
var minLengthTensor = new DenseTensor<int>(1);
minLengthTensor.Fill(0);
var maxLengthTensor = new DenseTensor<int>(1);
maxLengthTensor.Fill(448);
var numBeamsTensor = new DenseTensor<int>(1);
numBeamsTensor.Fill(1);
var numReturnSequencesTensor = new DenseTensor<int>(1);
numReturnSequencesTensor.Fill(1);
var lengthPenaltyTensor = new DenseTensor<float>(1);
lengthPenaltyTensor.Fill(1.0f);
var repetitionPenaltyTensor = new DenseTensor<float>(1);
repetitionPenaltyTensor.Fill(1.2f);
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("audio_stream", audioTensor),
NamedOnnxValue.CreateFromTensor("min_length", minLengthTensor),
NamedOnnxValue.CreateFromTensor("max_length", maxLengthTensor),
NamedOnnxValue.CreateFromTensor("num_beams", numBeamsTensor),
NamedOnnxValue.CreateFromTensor("num_return_sequences", numReturnSequencesTensor),
NamedOnnxValue.CreateFromTensor("length_penalty", lengthPenaltyTensor),
NamedOnnxValue.CreateFromTensor("repetition_penalty", repetitionPenaltyTensor),
NamedOnnxValue.CreateFromTensor("logits_processor", timestampsEnableTensor),
NamedOnnxValue.CreateFromTensor("decoder_input_ids", langAndModeTensor),
};
#pragma warning restore CA1861 // Avoid constant arrays as arguments
// for multithread need to try AsyncRun
try
{
using var results = _inferenceSession.Run(inputs);
var result = results[0].AsTensor<string>().GetValue(0);
return WhisperUtils.ProcessTranscriptionWithTimestamps(result, offsetSeconds);
}
catch (Exception)
{
// return empty list in case of exception
return new List<WhisperTranscribedChunk>();
}
}
public static List<WhisperTranscribedChunk> TranscribeAsync(StorageFile audioFile, int startSeconds, int durationSeconds, EventHandler<float> progress = null)
{
var transcribedChunks = new List<WhisperTranscribedChunk>();
var sw = Stopwatch.StartNew();
var audioBytes = LoadAudioBytes(audioFile.Path, startSeconds, durationSeconds);
sw.Stop();
Debug.WriteLine($"Loading took {sw.ElapsedMilliseconds} ms");
sw.Start();
var dynamicChunks = WhisperChunking.SmartChunking(audioBytes);
sw.Stop();
Debug.WriteLine($"Chunking took {sw.ElapsedMilliseconds} ms");
for (var i = 0; i < dynamicChunks.Count; i++)
{
var chunk = dynamicChunks[i];
var audioSegment = ExtractAudioSegment(audioFile.Path, chunk.Start, chunk.End - chunk.Start);
var transcription = TranscribeChunkAsync(audioSegment, "en", WhisperTaskType.Transcribe, (int)chunk.Start);
transcribedChunks.AddRange(transcription);
progress?.Invoke(null, (float)i / dynamicChunks.Count);
}
return transcribedChunks;
}
private static byte[] LoadAudioBytes(string file, int startSeconds, int durationSeconds)
{
var ffmpeg = new FFMpegConverter();
var output = new MemoryStream();
var extension = Path.GetExtension(file).Substring(1);
// Convert to PCM
if (startSeconds == 0 && durationSeconds == 0)
{
ffmpeg.ConvertMedia(
inputFile: file,
inputFormat: null,
outputStream: output,
outputFormat: "s16le",
new ConvertSettings()
{
AudioCodec = "pcm_s16le",
AudioSampleRate = 16000,
CustomOutputArgs = "-ac 1",
});
}
else
{
ffmpeg.ConvertMedia(
inputFile: file,
inputFormat: null,
outputStream: output,
outputFormat: "s16le",
new ConvertSettings()
{
Seek = (float?)startSeconds,
MaxDuration = (float?)durationSeconds,
AudioCodec = "pcm_s16le",
AudioSampleRate = 16000,
CustomOutputArgs = "-ac 1",
});
}
return output.ToArray();
}
private static byte[] ExtractAudioSegment(string inPath, double startTimeInSeconds, double segmentDurationInSeconds)
{
try
{
var extension = System.IO.Path.GetExtension(inPath).Substring(1);
var output = new MemoryStream();
var convertSettings = new ConvertSettings
{
Seek = (float?)startTimeInSeconds,
MaxDuration = (float?)segmentDurationInSeconds,
AudioSampleRate = 16000,
CustomOutputArgs = "-vn -ac 1",
};
var ffMpegConverter = new FFMpegConverter();
ffMpegConverter.ConvertMedia(
inputFile: inPath,
inputFormat: null,
outputStream: output,
outputFormat: "wav",
convertSettings);
return output.ToArray();
}
catch (Exception ex)
{
Console.WriteLine("Error during the audio extraction: " + ex.Message);
return Array.Empty<byte>(); // Return an empty array in case of exception
}
}
}
internal enum WhisperTaskType
{
Translate = 50358,
Transcribe = 50359,
}
}

View File

@ -0,0 +1,17 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
namespace AdvancedPaste.AIModels.Whisper
{
public class WhisperTranscribedChunk
{
public string Text { get; set; }
public double Start { get; set; }
public double End { get; set; }
public double Length => End - Start;
}
}

View File

@ -0,0 +1,206 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace AdvancedPaste.AIModels.Whisper
{
internal static class WhisperUtils
{
private static Dictionary<string, string> languageCodes = new()
{
{ "English", "en" },
{ "Serbian", "sr" },
{ "Hindi", "hi" },
{ "Spanish", "es" },
{ "Russian", "ru" },
{ "Korean", "ko" },
{ "French", "fr" },
{ "Japanese", "ja" },
{ "Portuguese", "pt" },
{ "Turkish", "tr" },
{ "Polish", "pl" },
{ "Catalan", "ca" },
{ "Dutch", "nl" },
{ "Arabic", "ar" },
{ "Swedish", "sv" },
{ "Italian", "it" },
{ "Indonesian", "id" },
{ "Macedonian", "mk" },
{ "Mandarin", "zh" },
};
public static int GetLangId(string languageString)
{
int langId = 50259;
Dictionary<string, int> langToId = new Dictionary<string, int>
{
{ "af", 50327 },
{ "am", 50334 },
{ "ar", 50272 },
{ "as", 50350 },
{ "az", 50304 },
{ "ba", 50355 },
{ "be", 50330 },
{ "bg", 50292 },
{ "bn", 50302 },
{ "bo", 50347 },
{ "br", 50309 },
{ "bs", 50315 },
{ "ca", 50270 },
{ "cs", 50283 },
{ "cy", 50297 },
{ "da", 50285 },
{ "de", 50261 },
{ "el", 50281 },
{ "en", 50259 },
{ "es", 50262 },
{ "et", 50307 },
{ "eu", 50310 },
{ "fa", 50300 },
{ "fi", 50277 },
{ "fo", 50338 },
{ "fr", 50265 },
{ "gl", 50319 },
{ "gu", 50333 },
{ "haw", 50352 },
{ "ha", 50354 },
{ "he", 50279 },
{ "hi", 50276 },
{ "hr", 50291 },
{ "ht", 50339 },
{ "hu", 50286 },
{ "hy", 50312 },
{ "id", 50275 },
{ "is", 50311 },
{ "it", 50274 },
{ "ja", 50266 },
{ "jw", 50356 },
{ "ka", 50329 },
{ "kk", 50316 },
{ "km", 50323 },
{ "kn", 50306 },
{ "ko", 50264 },
{ "la", 50294 },
{ "lb", 50345 },
{ "ln", 50353 },
{ "lo", 50336 },
{ "lt", 50293 },
{ "lv", 50301 },
{ "mg", 50349 },
{ "mi", 50295 },
{ "mk", 50308 },
{ "ml", 50296 },
{ "mn", 50314 },
{ "mr", 50320 },
{ "ms", 50282 },
{ "mt", 50343 },
{ "my", 50346 },
{ "ne", 50313 },
{ "nl", 50271 },
{ "nn", 50342 },
{ "no", 50288 },
{ "oc", 50328 },
{ "pa", 50321 },
{ "pl", 50269 },
{ "ps", 50340 },
{ "pt", 50267 },
{ "ro", 50284 },
{ "ru", 50263 },
{ "sa", 50344 },
{ "sd", 50332 },
{ "si", 50322 },
{ "sk", 50298 },
{ "sl", 50305 },
{ "sn", 50324 },
{ "so", 50326 },
{ "sq", 50317 },
{ "sr", 50303 },
{ "su", 50357 },
{ "sv", 50273 },
{ "sw", 50318 },
{ "ta", 50287 },
{ "te", 50299 },
{ "tg", 50331 },
{ "th", 50289 },
{ "tk", 50341 },
{ "tl", 50325 },
{ "tr", 50268 },
{ "tt", 50335 },
{ "ug", 50348 },
{ "uk", 50260 },
{ "ur", 50337 },
{ "uz", 50351 },
{ "vi", 50278 },
{ "xh", 50322 },
{ "yi", 50305 },
{ "yo", 50324 },
{ "zh", 50258 },
{ "zu", 50321 },
};
if (languageCodes.TryGetValue(languageString, out string langCode))
{
langId = langToId[langCode];
}
return langId;
}
public static List<WhisperTranscribedChunk> ProcessTranscriptionWithTimestamps(string transcription, double offsetSeconds = 0)
{
Regex pattern = new Regex(@"<\|([\d.]+)\|>([^<]+)<\|([\d.]+)\|>");
MatchCollection matches = pattern.Matches(transcription);
List<WhisperTranscribedChunk> list = new();
for (int i = 0; i < matches.Count; i++)
{
// Parse the original start and end times
#pragma warning disable CA1305 // Specify IFormatProvider
double start = double.Parse(matches[i].Groups[1].Value);
double end = double.Parse(matches[i].Groups[3].Value);
#pragma warning restore CA1305 // Specify IFormatProvider
string subtitle = string.IsNullOrEmpty(matches[i].Groups[2].Value) ? string.Empty : matches[i].Groups[2].Value.Trim();
WhisperTranscribedChunk chunk = new()
{
Text = subtitle,
Start = start + offsetSeconds,
End = end + offsetSeconds,
};
list.Add(chunk);
}
return list;
}
public static List<WhisperTranscribedChunk> MergeTranscribedChunks(List<WhisperTranscribedChunk> chunks)
{
List<WhisperTranscribedChunk> list = new();
WhisperTranscribedChunk transcribedChunk = chunks[0];
for (int i = 1; i < chunks.Count; i++)
{
char lastCharOfPrev = transcribedChunk.Text[transcribedChunk.Text.Length - 1];
char firstCharOfNext = chunks[i].Text[0];
// Approach 1: Get full sentences together
// Approach 2: Sliding window of desired duration
if (char.IsLower(firstCharOfNext) || (lastCharOfPrev != '.' && lastCharOfPrev != '?' && lastCharOfPrev != '!'))
{
transcribedChunk.End = chunks[i].End;
transcribedChunk.Text += " " + chunks[i].Text;
}
else
{
list.Add(transcribedChunk);
transcribedChunk = chunks[i];
}
}
list.Add(transcribedChunk);
return list;
}
}
}

View File

@ -3,9 +3,9 @@
<PropertyGroup>
<OutputType>WinExe</OutputType>
<TargetFramework>net8.0-windows10.0.20348.0</TargetFramework>
<TargetFramework>net8.0-windows10.0.22621.0</TargetFramework>
<TargetPlatformMinVersion>10.0.19041.0</TargetPlatformMinVersion>
<SupportedOSPlatformVersion>10.0.19041.0</SupportedOSPlatformVersion>
<SupportedOSPlatformVersion>10.0.22621.0</SupportedOSPlatformVersion>
<OutputPath>..\..\..\..\$(Platform)\$(Configuration)\WinUI3Apps</OutputPath>
<UseWinUI>true</UseWinUI>
<ApplicationIcon>Assets\AdvancedPaste\AdvancedPaste.ico</ApplicationIcon>
@ -66,13 +66,18 @@
<PackageReference Include="CommunityToolkit.WinUI.Extensions" />
<PackageReference Include="CommunityToolkit.WinUI.Controls.Primitives" />
<PackageReference Include="Microsoft.Extensions.Hosting" />
<PackageReference Include="Microsoft.ML.OnnxRuntime" />
<PackageReference Include="Microsoft.ML.OnnxRuntime.Extensions" />
<PackageReference Include="Microsoft.WindowsAppSDK" />
<PackageReference Include="Microsoft.Windows.CsWin32" />
<PackageReference Include="Microsoft.Windows.CsWinRT" />
<PackageReference Include="Microsoft.Windows.SDK.BuildTools" />
<PackageReference Include="NReco.VideoConverter" />
<PackageReference Include="ReverseMarkdown" />
<!-- HACK: To align Microsoft.Bcl.AsyncInterfaces.dll version with PowerToys.Settings.csproj. -->
<PackageReference Include="StreamJsonRpc" />
<!-- HACK: To align deps versions. -->
<PackageReference Include="System.Drawing.Common" />
<PackageReference Include="WinUIEx" />
<Manifest Include="$(ApplicationManifest)" />
@ -102,6 +107,13 @@
<ProjectReference Include="..\..\..\settings-ui\Settings.UI.Library\Settings.UI.Library.csproj" />
</ItemGroup>
<!-- Copy the model over -->
<ItemGroup>
<None Update="AIModelAssets\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<!--
Defining the "HasPackageAndPublishMenuAddedByProject" property here allows the Solution
Explorer "Package and Publish" context menu entry to be enabled for this project even if

View File

@ -5,6 +5,7 @@
using System;
using System.Threading.Tasks;
using AdvancedPaste.Helpers;
using AdvancedPaste.Pages;
using AdvancedPaste.ViewModels;
using ManagedCommon;
using Microsoft.Extensions.DependencyInjection;
@ -100,14 +101,12 @@ namespace AdvancedPaste
private void OnAdvancedPasteJsonHotkey()
{
viewModel.GetClipboardData();
viewModel.ToJsonFunction(true);
viewModel.ToJson();
}
private void OnAdvancedPasteMarkdownHotkey()
{
viewModel.GetClipboardData();
viewModel.ToMarkdownFunction(true);
viewModel.ToMarkdown();
}
private void OnAdvancedPasteHotkey()

View File

@ -1,4 +1,4 @@
<UserControl
<UserControl
x:Class="AdvancedPaste.Controls.PromptBox"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"

View File

@ -137,6 +137,10 @@ namespace AdvancedPaste.Controls
private void InputTxtBox_TextChanging(Microsoft.UI.Xaml.Controls.TextBox sender, TextBoxTextChangingEventArgs args)
{
SendBtn.Visibility = InputTxtBox.Text.Length > 0 ? Visibility.Visible : Visibility.Collapsed;
// Sort available options
string input = InputTxtBox.Text;
ViewModel.FilterOptionsFromInput(input);
}
private void InputTxtBox_KeyDown(object sender, Microsoft.UI.Xaml.Input.KeyRoutedEventArgs e)

View File

@ -7,8 +7,8 @@
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
xmlns:pages="using:AdvancedPaste.Pages"
xmlns:winuiex="using:WinUIEx"
Width="420"
Height="308"
Width="600"
Height="320"
MinWidth="420"
MinHeight="308"
Closed="WindowEx_Closed"

View File

@ -115,6 +115,7 @@
ItemClick="PasteOptionsListView_ItemClick"
ItemContainerTransitions="{x:Null}"
ItemsSource="{x:Bind pasteFormats, Mode=OneWay}"
ContainerContentChanging="PasteFormatListContentChanging"
SelectionMode="None"
TabIndex="1">
<ListView.ItemTemplate>

View File

@ -5,6 +5,7 @@
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using AdvancedPaste.Helpers;
@ -24,28 +25,39 @@ namespace AdvancedPaste.Pages
public sealed partial class MainPage : Page
{
private readonly ObservableCollection<ClipboardItem> clipboardHistory;
private readonly ObservableCollection<PasteFormat> pasteFormats;
private readonly Microsoft.UI.Dispatching.DispatcherQueue _dispatcherQueue = Microsoft.UI.Dispatching.DispatcherQueue.GetForCurrentThread();
private string _filterText;
private ObservableCollection<PasteFormat> pasteFormats = new();
private bool _pasteAsPlainEnabled;
private bool _pasteAsMarkdownEnabled;
private bool _pasteAsJsonEnabled;
private bool _pasteAudioToTextEnabled;
private bool _pasteAsFileEnabled;
public OptionsViewModel ViewModel { get; private set; }
public MainPage()
{
this.InitializeComponent();
pasteFormats =
[
new PasteFormat { Icon = new FontIcon() { Glyph = "\uE8E9" }, Name = ResourceLoaderInstance.ResourceLoader.GetString("PasteAsPlainText"), Format = PasteFormats.PlainText },
new PasteFormat { Icon = new FontIcon() { Glyph = "\ue8a5" }, Name = ResourceLoaderInstance.ResourceLoader.GetString("PasteAsMarkdown"), Format = PasteFormats.Markdown },
new PasteFormat { Icon = new FontIcon() { Glyph = "\uE943" }, Name = ResourceLoaderInstance.ResourceLoader.GetString("PasteAsJson"), Format = PasteFormats.Json },
];
ViewModel = App.GetService<OptionsViewModel>();
clipboardHistory = new ObservableCollection<ClipboardItem>();
LoadClipboardHistoryEvent(null, null);
LoadClipboardHistoryAsync();
Clipboard.HistoryChanged += LoadClipboardHistoryEvent;
ViewModel.FormatsChanged += FormatsChangedHandler;
ViewModel.WindowShown += WindowShownHandler;
this.EnablePasteOptions();
}
private bool WindowShownHandler()
{
EnablePasteOptions();
return true;
}
private void LoadClipboardHistoryEvent(object sender, object e)
@ -56,6 +68,72 @@ namespace AdvancedPaste.Pages
});
}
private void GenerateFormatList()
{
List<PasteFormat> pasteFormatFullList =
[
new PasteFormat { Icon = new FontIcon() { Glyph = "\uE8AC" }, Name = ResourceLoaderInstance.ResourceLoader.GetString("PasteAsPlainText"), Format = PasteFormats.PlainText, Enabled = _pasteAsPlainEnabled },
new PasteFormat { Icon = new FontIcon() { Glyph = "\ue8a5" }, Name = ResourceLoaderInstance.ResourceLoader.GetString("PasteAsMarkdown"), Format = PasteFormats.Markdown, Enabled = _pasteAsMarkdownEnabled },
new PasteFormat { Icon = new FontIcon() { Glyph = "\uE943" }, Name = ResourceLoaderInstance.ResourceLoader.GetString("PasteAsJson"), Format = PasteFormats.Json, Enabled = _pasteAsJsonEnabled },
new PasteFormat { Icon = new FontIcon() { Glyph = "\uE943" }, Name = ResourceLoaderInstance.ResourceLoader.GetString("PasteAudioToText"), Format = PasteFormats.AudioToText, Enabled = _pasteAudioToTextEnabled },
new PasteFormat { Icon = new FontIcon() { Glyph = "\uE943" }, Name = ResourceLoaderInstance.ResourceLoader.GetString("PasteAsFile"), Format = PasteFormats.File, Enabled = _pasteAsFileEnabled },
];
ObservableCollection<PasteFormat> toAddFormats;
if (_filterText != null)
{
toAddFormats = new ObservableCollection<PasteFormat>(pasteFormatFullList.Where(pasteFormat => pasteFormat.Name.Contains(_filterText, StringComparison.OrdinalIgnoreCase)).OrderByDescending(pasteFormat => pasteFormat.Enabled));
}
else
{
toAddFormats = new ObservableCollection<PasteFormat>(pasteFormatFullList.OrderByDescending(pasteFormat => pasteFormat.Enabled));
}
pasteFormats.Clear();
foreach (var format in toAddFormats)
{
pasteFormats.Add(format);
}
}
private void EnablePasteOptions()
{
Logger.LogInfo("Enabling paste options");
_pasteAsPlainEnabled = false;
_pasteAsMarkdownEnabled = false;
_pasteAsJsonEnabled = false;
_pasteAudioToTextEnabled = false;
_pasteAsFileEnabled = false;
if (ViewModel.ClipboardHasText)
{
_pasteAsJsonEnabled = true;
_pasteAsPlainEnabled = true;
_pasteAsFileEnabled = true;
}
if (ViewModel.ClipboardHasHtml)
{
_pasteAsMarkdownEnabled = true;
_pasteAsFileEnabled = true;
}
if (ViewModel.ClipboardHasImage)
{
_pasteAsFileEnabled = true;
}
if (ViewModel.ClipboardHasAudio)
{
_pasteAudioToTextEnabled = true;
}
GenerateFormatList();
}
public async void LoadClipboardHistoryAsync()
{
try
@ -131,17 +209,27 @@ namespace AdvancedPaste.Pages
private void PasteAsPlain()
{
ViewModel.ToPlainTextFunction();
ViewModel.ToPlainText();
}
private void PasteAsMarkdown()
{
ViewModel.ToMarkdownFunction();
ViewModel.ToMarkdown();
}
private void PasteAsJson()
{
ViewModel.ToJsonFunction();
ViewModel.ToJson();
}
private void AudioToText()
{
ViewModel.AudioToText();
}
private void PasteAsFile()
{
ViewModel.ToFile();
}
private void PasteOptionsListView_ItemClick(object sender, ItemClickEventArgs e)
@ -170,6 +258,20 @@ namespace AdvancedPaste.Pages
PowerToysTelemetry.Log.WriteEvent(new Telemetry.AdvancedPasteFormatClickedEvent(PasteFormats.Json));
break;
}
case PasteFormats.AudioToText:
{
AudioToText();
PowerToysTelemetry.Log.WriteEvent(new Telemetry.AdvancedPasteFormatClickedEvent(PasteFormats.AudioToText));
return;
}
case PasteFormats.File:
{
PasteAsFile();
PowerToysTelemetry.Log.WriteEvent(new Telemetry.AdvancedPasteFormatClickedEvent(PasteFormats.File));
break;
}
}
}
}
@ -237,5 +339,24 @@ namespace AdvancedPaste.Pages
}
}
}
private void PasteFormatListContentChanging(ListViewBase sender, ContainerContentChangingEventArgs args)
{
var listViewItem = args.ItemContainer;
if (listViewItem != null)
{
var model = (PasteFormat)args.Item;
listViewItem.IsEnabled = model.Enabled;
}
}
private bool FormatsChangedHandler(string input)
{
_filterText = input;
GenerateFormatList();
return true;
}
}
}

View File

@ -6,11 +6,13 @@ using System;
using System.Globalization;
using System.IO;
using System.Net;
using AdvancedPaste.Models;
using Azure;
using Azure.AI.OpenAI;
using ManagedCommon;
using Microsoft.PowerToys.Settings.UI.Library;
using Microsoft.PowerToys.Telemetry;
using Windows.ApplicationModel.DataTransfer;
using Windows.Security.Credentials;
namespace AdvancedPaste.Helpers
@ -93,21 +95,8 @@ namespace AdvancedPaste.Helpers
return response.Value.Choices[0].Text;
}
public AICompletionsResponse AIFormatString(string inputInstructions, string inputString)
private AICompletionsResponse TryAICompletion(string systemInstructions, string userMessage)
{
string systemInstructions = $@"You are tasked with reformatting user's clipboard data. Use the user's instructions, and the content of their clipboard below to edit their clipboard content as they have requested it.
Do not output anything else besides the reformatted clipboard content.";
string userMessage = $@"User instructions:
{inputInstructions}
Clipboard Content:
{inputString}
Output:
";
string aiResponse = null;
int apiRequestStatus = (int)HttpStatusCode.OK;
try
@ -129,5 +118,170 @@ Output:
return new AICompletionsResponse(aiResponse, apiRequestStatus);
}
public AICompletionsResponse AIFormatString(string inputInstructions, string inputString)
{
string systemInstructions = $@"You are tasked with reformatting user's clipboard data. Use the user's instructions, and the content of their clipboard below to edit their clipboard content as they have requested it.
Ensure that you do all that is requested of you in the instructions. If the user has multiple instructions in their prompt be sure that both are all completed.
Your output can include HTML if necessary, but it is not required.";
string userMessage = $@"User instructions:
{inputInstructions}
Clipboard Content:
{inputString}
Output:
";
return TryAICompletion(systemInstructions, userMessage);
}
public string AIFormatStringAsHTML(string inputInstructions, string inputString)
{
string systemInstructions = $@"You are tasked with reformatting user's clipboard data. Use the user's instructions, and the content of their clipboard below to reformat their clipboard content as they have requested it.
Ensure that you do all that is requested of you in the instructions. If the user has multiple instructions in their prompt be sure that both are all completed.
Do not use <code> blocks or classes to style the HTML, instead format directly into the HTML with inline styles wherever possible.
Your output needs to be in HTML format.";
string userMessage = $@"User instructions:
{inputInstructions}
Clipboard Content:
{inputString}
Output:
";
return TryAICompletion(systemInstructions, userMessage).Response;
}
public string AIGetHTMLOrPlainTextOutput(string inputInstructions, string inputString)
{
string systemInstructions = $@"You are tasked with determining the output format for a user's request to reformat the clipboard data.
You can choose between the output of 'HTML' or 'PlainText'. Your answer can only be those two options, do not put any other output.
Use these examples below to inform you.
Example user instructions:
Make this pretty
Example clipboard content:
var x = 5;
Example output:
HTML
Example user instructions:
Change to a pirate speaking in markdown
Example clipboard content:
Hello my good friend.
Example output:
PlainText
Example user instructions:
Show this data as a table.
Example clipboard content:
T-Rex, 5, 10
Velociraptor, 7, 15
Example output:
HTML
Now output the real answer.";
string userMessage = $@"User instructions:
{inputInstructions}
Clipboard Content:
{inputString}
Output:
";
return TryAICompletion(systemInstructions, userMessage).Response;
}
public string GetOperationsFromAI(string inputInstructions, bool hasText, bool hasImage, bool hasHtml, bool hasFile, bool hasAudio)
{
string availableFormatString = "(string inputInstructions";
if (hasText)
{
availableFormatString += ", string clipboardText";
}
if (hasImage)
{
availableFormatString += ", Image clipboardImage";
}
if (hasHtml)
{
availableFormatString += ", HtmlData clipboardHTML";
}
if (hasFile)
{
availableFormatString += ", File clipboardFile";
}
if (hasAudio)
{
availableFormatString += ", Audio clipboardAudio";
}
availableFormatString += ")";
string systemInstructions = $@"You are tasked with determining what operations are needed to reformat a user's clipboard data. Use the user's instructions, available functions, and clipboard data content to output the list of operations needed.
You will output youre response as a function in C# ONLY using the functions provided (Do not use any other C# functions other than what is provided below!)
Available functions:
- string ToJSON(string clipboardText)
- Returns a string formatted into JSON from the clipboard content, only accepts text
- Only to be used if the user explicitly requests JSON.
- string ToPlainText(string clipboardText)
- Returns a string with the clipboard content formatted into plain text, only accepts text
- string ToCustomWithAI(string inputInstructions, string clipboardText)
- Returns a string with the clipboard content formatted according to the input instructions, only accepts text.
- Use this function to do custom processing of the text if another function above does not meet the requirements. Feel free to modify the user's instructions as needed to input to this function.
- string ToFile(string clipboardText)
- Returns a string of the filename of the file created from the input clipboard text
- string ToFile(Image clipboardImage)
- Returns a string of the filename of the file created from the input clipboard image
- string AudioToText(Audio clipboardAudio, int seekSeconds, int maxDurationSeconds)
- Returns a string with the clipboard audio content formatted into text, only accepts audio
- seekSeconds is the number of seconds to skip from the start of the audio file
- maxDurationSeconds is the maximum number of seconds to process from the audio file
- If seekSeconds and maxDurationSeconds are 0 and 0 the entire file will be processed.
Example available arguments:
(string inputInstructions, Audio clipboardAudio)
Example user instructions:
To text, convert to Python, and highlight syntax with VS Code highlighting
Example output:
public string ReformatClipboard(string inputInstructions, Audio clipboardAudio)
{{
string audioText = AudioToText(clipboardAudio, 0, 0);
string customFormattedText = ToCustomWithAI('Convert to Python', imageText);
string customFormattedText2 = ToCustomWithAI('Highlight syntax with VS Code highlighting', imageText);
return customFormattedText2;
}}";
string userMessage = $@"Available arguments:
{availableFormatString}
User instructions:
{inputInstructions}
Output:
";
return TryAICompletion(systemInstructions, userMessage).Response;
}
}
}

View File

@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation
// The Microsoft Corporation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Linq;
using System.Threading.Tasks;
using AdvancedPaste.AIModels.Whisper;
using Windows.Storage;
namespace AdvancedPaste.Helpers
{
public class AILocalModelsHelper
{
public Task<string> DoWhisperInference(StorageFile file)
{
return Task.Run(() =>
{
var results = Whisper.TranscribeAsync(file, 0, 0);
return string.Join("\n", results.Select(r => r.Text));
});
}
public Task<string> DoWhisperInference(StorageFile file, int startSeconds, int durationSeconds)
{
return Task.Run(() =>
{
var results = Whisper.TranscribeAsync(file, startSeconds, durationSeconds);
return string.Join("\n", results.Select(r => r.Text));
});
}
}
}

View File

@ -3,11 +3,16 @@
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Specialized;
using System.Net.Http;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;
using ManagedCommon;
using Microsoft.UI.Xaml.Media.Imaging;
using Windows.ApplicationModel.DataTransfer;
using Windows.Graphics.Imaging;
using Windows.Storage;
using Windows.Storage.Streams;
using Windows.System;
@ -15,6 +20,16 @@ namespace AdvancedPaste.Helpers
{
internal static class ClipboardHelper
{
public enum ClipboardContentFormats
{
Text,
Image,
File,
HTML,
Audio,
Invalid,
}
internal static void SetClipboardTextContent(string text)
{
Logger.LogTrace();
@ -89,6 +104,81 @@ namespace AdvancedPaste.Helpers
}
}
internal static string ConvertHTMLToPlainText(string inputHTML)
{
return System.Net.WebUtility.HtmlDecode(System.Text.RegularExpressions.Regex.Replace(inputHTML, "<.*?>", string.Empty));
}
internal static async Task<bool> SetClipboardFile(string fileName)
{
Logger.LogTrace();
if (fileName != null)
{
StorageFile storageFile = await StorageFile.GetFileFromPathAsync(fileName).AsTask();
DataPackage output = new();
output.SetStorageItems(new[] { storageFile });
Clipboard.SetContent(output);
// TODO(stefan): For some reason Flush() fails from time to time when directly activated via hotkey.
// Calling inside a loop makes it work.
for (int i = 0; i < 5; i++)
{
try
{
Clipboard.Flush();
}
catch (Exception ex)
{
Logger.LogError("Clipboard.Flush() failed", ex);
}
}
}
else
{
return false;
}
return true;
}
internal static void SetClipboardHTMLContent(string htmlContent)
{
Logger.LogTrace();
if (htmlContent != null)
{
// Set htmlContent to output
DataPackage output = new();
output.SetHtmlFormat(HtmlFormatHelper.CreateHtmlFormat(htmlContent));
// Extract plain text from HTML
string plainText = ConvertHTMLToPlainText(htmlContent);
output.SetText(plainText);
Clipboard.SetContent(output);
// TODO(stefan): For some reason Flush() fails from time to time when directly activated via hotkey.
// Calling inside a loop makes it work.
for (int i = 0; i < 5; i++)
{
try
{
Clipboard.Flush();
}
catch (Exception ex)
{
Logger.LogError("Clipboard.Flush() failed", ex);
}
}
}
else
{
Console.WriteLine("Error");
}
}
// Function to send a single key event
private static void SendSingleKeyboardInput(short keyCode, uint keyStatus)
{
@ -135,5 +225,91 @@ namespace AdvancedPaste.Helpers
Logger.LogInfo("Paste sent");
}
internal static async Task<string> GetClipboardTextContent(DataPackageView clipboardData)
{
if (clipboardData != null)
{
if (clipboardData.Contains(StandardDataFormats.Text))
{
return await Task.Run(async () =>
{
string plainText = await clipboardData.GetTextAsync() as string;
return plainText;
});
}
}
return string.Empty;
}
internal static async Task<string> GetClipboardHTMLContent(DataPackageView clipboardData)
{
if (clipboardData != null)
{
if (clipboardData.Contains(StandardDataFormats.Html))
{
return await Task.Run(async () =>
{
string htmlText = await clipboardData.GetHtmlFormatAsync() as string;
return htmlText;
});
}
}
return string.Empty;
}
internal static async Task<string> GetClipboardFileName(DataPackageView clipboardData)
{
if (clipboardData != null)
{
if (clipboardData.Contains(StandardDataFormats.StorageItems))
{
return await Task.Run(async () =>
{
var storageItems = await clipboardData.GetStorageItemsAsync();
var file = storageItems[0] as StorageFile;
return file.Path;
});
}
}
return string.Empty;
}
internal static async Task<SoftwareBitmap> GetClipboardImageContent(DataPackageView clipboardData)
{
SoftwareBitmap softwareBitmap = null;
// Check if the clipboard contains a file reference
if (clipboardData.Contains(StandardDataFormats.StorageItems))
{
var storageItems = await clipboardData.GetStorageItemsAsync();
var file = storageItems[0] as StorageFile;
if (file != null)
{
using (var stream = await file.OpenReadAsync())
{
// Get image stream and create a software bitmap
BitmapDecoder decoder = await BitmapDecoder.CreateAsync(stream);
softwareBitmap = await decoder.GetSoftwareBitmapAsync();
}
}
}
else
{
if (clipboardData.Contains(StandardDataFormats.Bitmap))
{
// If it's not a file reference, get bitmap directly
var imageStreamReference = await clipboardData.GetBitmapAsync();
var imageStream = await imageStreamReference.OpenReadAsync();
BitmapDecoder decoder = await BitmapDecoder.CreateAsync(imageStream);
softwareBitmap = await decoder.GetSoftwareBitmapAsync();
}
}
return softwareBitmap;
}
}
}

View File

@ -14,31 +14,17 @@ namespace AdvancedPaste.Helpers
{
internal static class JsonHelper
{
internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
internal static string ToJsonFromXmlOrCsv(string inputText)
{
Logger.LogTrace();
if (clipboardData == null || !clipboardData.Contains(StandardDataFormats.Text))
{
Logger.LogWarning("Clipboard does not contain text data");
return string.Empty;
}
#pragma warning disable VSTHRD002 // Avoid problematic synchronous waits
string text = Task.Run(async () =>
{
string plainText = await clipboardData.GetTextAsync() as string;
return plainText;
}).Result;
#pragma warning restore VSTHRD002 // Avoid problematic synchronous waits
string jsonText = string.Empty;
// Try convert XML
try
{
XmlDocument doc = new XmlDocument();
doc.LoadXml(text);
doc.LoadXml(inputText);
jsonText = JsonConvert.SerializeXmlNode(doc, Newtonsoft.Json.Formatting.Indented);
}
catch (Exception ex)
@ -53,7 +39,7 @@ namespace AdvancedPaste.Helpers
{
var csv = new List<string[]>();
foreach (var line in text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries))
foreach (var line in inputText.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries))
{
csv.Add(line.Split(","));
}
@ -66,7 +52,7 @@ namespace AdvancedPaste.Helpers
Logger.LogError("Failed parsing input as csv", ex);
}
return string.IsNullOrEmpty(jsonText) ? text : jsonText;
return string.IsNullOrEmpty(jsonText) ? inputText : jsonText;
}
}
}

View File

@ -14,69 +14,6 @@ namespace AdvancedPaste.Helpers
{
internal static class MarkdownHelper
{
public static string ToMarkdown(DataPackageView clipboardData)
{
Logger.LogTrace();
if (clipboardData == null)
{
Logger.LogWarning("Clipboard does not contain data");
return string.Empty;
}
string data = string.Empty;
if (clipboardData.Contains(StandardDataFormats.Html))
{
data = Task.Run(async () =>
{
string data = await clipboardData.GetHtmlFormatAsync() as string;
return data;
}).Result;
}
else if (clipboardData.Contains(StandardDataFormats.Text))
{
data = Task.Run(async () =>
{
string plainText = await clipboardData.GetTextAsync() as string;
return plainText;
}).Result;
}
if (!string.IsNullOrEmpty(data))
{
string cleanedHtml = CleanHtml(data);
return ConvertHtmlToMarkdown(cleanedHtml);
}
return string.Empty;
}
public static string PasteAsPlainTextFromClipboard(DataPackageView clipboardData)
{
Logger.LogTrace();
if (clipboardData != null)
{
if (!clipboardData.Contains(StandardDataFormats.Text))
{
Logger.LogWarning("Clipboard does not contain text data");
return string.Empty;
}
return Task.Run(async () =>
{
string plainText = await clipboardData.GetTextAsync() as string;
return plainText;
}).Result;
}
return string.Empty;
}
private static string CleanHtml(string html)
{
Logger.LogTrace();
@ -158,13 +95,15 @@ namespace AdvancedPaste.Helpers
}
}
private static string ConvertHtmlToMarkdown(string html)
internal static string ConvertHtmlToMarkdown(string data)
{
Logger.LogTrace();
string cleanedHtml = CleanHtml(data);
// Perform the conversion from HTML to Markdown using your chosen library or method
var converter = new ReverseMarkdown.Converter();
string markdown = converter.Convert(html);
string markdown = converter.Convert(cleanedHtml);
return markdown;
}
}

View File

@ -13,5 +13,7 @@ namespace AdvancedPaste.Models
public string Name { get; set; }
public PasteFormats Format { get; set; }
public bool Enabled { get; set; }
}
}

View File

@ -10,5 +10,7 @@ namespace AdvancedPaste.Models
Markdown,
Json,
Custom,
AudioToText,
File,
}
}

View File

@ -165,6 +165,12 @@
<data name="PasteAsPlainText" xml:space="preserve">
<value>Paste as plain text</value>
</data>
<data name="PasteAudioToText" xml:space="preserve">
<value>Paste audio to text</value>
</data>
<data name="PasteAsFile" xml:space="preserve">
<value>Paste as file</value>
</data>
<data name="PasteButtonAutomation.[using:Microsoft.UI.Xaml.Automation]AutomationProperties.Name" xml:space="preserve">
<value>Paste</value>
</data>

View File

@ -3,10 +3,21 @@
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Collections.Specialized;
using System.Formats.Tar;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Runtime.CompilerServices;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Media.Imaging;
using System.Windows.Xps.Packaging;
using AdvancedPaste.Helpers;
using AdvancedPaste.Models;
using AdvancedPaste.Settings;
@ -19,12 +30,44 @@ using Microsoft.UI.Dispatching;
using Microsoft.UI.Xaml;
using Microsoft.Win32;
using Windows.ApplicationModel.DataTransfer;
using Windows.Graphics.Imaging;
using Windows.Storage;
using Windows.Storage.Streams;
using WinUIEx;
using static AdvancedPaste.Helpers.NativeMethods;
using Application = Microsoft.UI.Xaml.Application;
using BitmapDecoder = Windows.Graphics.Imaging.BitmapDecoder;
using BitmapEncoder = Windows.Graphics.Imaging.BitmapEncoder;
using Clipboard = Windows.ApplicationModel.DataTransfer.Clipboard;
namespace AdvancedPaste.ViewModels
{
public partial class OptionsViewModel : ObservableObject
{
internal struct SavedClipboardItem
{
public ClipboardHelper.ClipboardContentFormats Format { get; set; }
public string Text { get; set; }
public string HTML { get; set; }
public string Filename { get; set; }
public SoftwareBitmap Image { get; set; }
}
private static readonly string[] FunctionNames =
{
"ToCustomWithAI",
"RemoveBackground",
"ToJSON",
"ToPlainText",
"ToMarkdown",
"ToFile",
"AudioToText",
};
private readonly DispatcherQueue _dispatcherQueue = DispatcherQueue.GetForCurrentThread();
private App app = App.Current as App;
@ -33,13 +76,6 @@ namespace AdvancedPaste.ViewModels
private UserSettings _userSettings;
public DataPackageView ClipboardData { get; set; }
[ObservableProperty]
[NotifyPropertyChangedFor(nameof(InputTxtBoxPlaceholderText))]
[NotifyPropertyChangedFor(nameof(IsCustomAIEnabled))]
private bool _isClipboardDataText;
[ObservableProperty]
[NotifyPropertyChangedFor(nameof(InputTxtBoxPlaceholderText))]
private bool _isCustomAIEnabled;
@ -51,16 +87,114 @@ namespace AdvancedPaste.ViewModels
[NotifyPropertyChangedFor(nameof(InputTxtBoxErrorText))]
private int _apiRequestStatus;
[ObservableProperty]
private string _customFormatResult;
[ObservableProperty]
private bool _customFormatIsHTML;
[ObservableProperty]
private DataPackageView _clipboardContent;
[ObservableProperty]
private bool _clipboardHasText;
[ObservableProperty]
private bool _clipboardHasHtml;
[ObservableProperty]
private bool _clipboardHasImage;
[ObservableProperty]
private bool _clipboardHasFile;
[ObservableProperty]
private bool _clipboardHasAudio;
// List to store generated responses
internal ObservableCollection<SavedClipboardItem> GeneratedResponses { get; set; } = new ObservableCollection<SavedClipboardItem>();
// Index to keep track of the current response
private int _currentResponseIndex;
internal int CurrentResponseIndex
{
get => _currentResponseIndex;
set
{
if (value >= 0 && value < GeneratedResponses.Count)
{
SetProperty(ref _currentResponseIndex, value);
CustomFormatResult = GeneratedResponses[_currentResponseIndex].Text;
OnPropertyChanged(nameof(CurrentIndexDisplay));
}
}
}
public bool HasMultipleResponses
{
get => GeneratedResponses.Count > 1;
}
public string CurrentIndexDisplay => $"{CurrentResponseIndex + 1}/{GeneratedResponses.Count}";
public string InputTxtBoxPlaceholderText
{
get
{
app.GetMainWindow().ClearInputText();
if (!aiHelper.IsAIEnabled)
{
return ResourceLoaderInstance.ResourceLoader.GetString("OpenAINotConfigured");
}
else
{
return ResourceLoaderInstance.ResourceLoader.GetString("CustomFormatTextBox/PlaceholderText");
}
}
}
public string InputTxtBoxErrorText
{
get
{
if (ApiRequestStatus != (int)HttpStatusCode.OK)
{
if (ApiRequestStatus == (int)HttpStatusCode.TooManyRequests)
{
return ResourceLoaderInstance.ResourceLoader.GetString("OpenAIAPIKeyTooManyRequests");
}
else if (ApiRequestStatus == (int)HttpStatusCode.Unauthorized)
{
return ResourceLoaderInstance.ResourceLoader.GetString("OpenAIAPIKeyUnauthorized");
}
else
{
return ResourceLoaderInstance.ResourceLoader.GetString("OpenAIAPIKeyError") + ApiRequestStatus.ToString(CultureInfo.InvariantCulture);
}
}
return string.Empty;
}
}
private AILocalModelsHelper aiLocalModelsHelper;
public event Func<string, bool> FormatsChanged;
public event Func<bool> WindowShown;
public OptionsViewModel()
{
aiHelper = new AICompletionsHelper();
_userSettings = new UserSettings();
IsCustomAIEnabled = IsClipboardDataText && aiHelper.IsAIEnabled;
IsCustomAIEnabled = aiHelper.IsAIEnabled;
ApiRequestStatus = (int)HttpStatusCode.OK;
GeneratedResponses = new ObservableCollection<string>();
GeneratedResponses = new ObservableCollection<SavedClipboardItem>();
GeneratedResponses.CollectionChanged += (s, e) =>
{
OnPropertyChanged(nameof(HasMultipleResponses));
@ -68,13 +202,73 @@ namespace AdvancedPaste.ViewModels
};
ClipboardHistoryEnabled = IsClipboardHistoryEnabled();
GetClipboardData();
aiLocalModelsHelper = new AILocalModelsHelper();
}
public void GetClipboardData()
{
ClipboardData = Clipboard.GetContent();
IsClipboardDataText = ClipboardData.Contains(StandardDataFormats.Text);
ClipboardContent = Clipboard.GetContent();
ClipboardHasText = false;
ClipboardHasHtml = false;
ClipboardHasImage = false;
ClipboardHasFile = false;
ClipboardHasAudio = false;
if (ClipboardContent == null)
{
Logger.LogWarning("Clipboard does not contain any data");
return;
}
if (ClipboardContent.Contains(StandardDataFormats.Text))
{
ClipboardHasText = true;
}
if (ClipboardContent.Contains(StandardDataFormats.Html))
{
ClipboardHasHtml = true;
}
if (ClipboardContent.Contains(StandardDataFormats.Bitmap))
{
ClipboardHasImage = true;
}
if (ClipboardContent.Contains(StandardDataFormats.StorageItems))
{
// Get storage items and iterate through their file names to find endings
// to enable audio and image to text
ClipboardHasFile = true;
try
{
var storageItemsAwaiter = ClipboardContent.GetStorageItemsAsync();
storageItemsAwaiter.AsTask().Wait();
var storageItems = storageItemsAwaiter.GetResults();
foreach (var storageItem in storageItems)
{
if (storageItem is Windows.Storage.StorageFile file)
{
if (file.ContentType.Contains("audio") || file.Name.EndsWith("waptt", StringComparison.InvariantCulture))
{
if (file.ContentType.Contains("audio"))
{
ClipboardHasAudio = true;
}
else if (file.ContentType.Contains("image"))
{
ClipboardHasImage = true;
}
}
}
}
}
catch (Exception e)
{
Logger.LogError("Error getting storage items", e);
}
}
}
public void OnShow()
@ -99,101 +293,30 @@ namespace AdvancedPaste.ViewModels
{
app.GetMainWindow().FinishLoading(aiHelper.IsAIEnabled);
OnPropertyChanged(nameof(InputTxtBoxPlaceholderText));
IsCustomAIEnabled = IsClipboardDataText && aiHelper.IsAIEnabled;
IsCustomAIEnabled = aiHelper.IsAIEnabled;
});
},
TaskScheduler.Default);
}
else
{
IsCustomAIEnabled = IsClipboardDataText && aiHelper.IsAIEnabled;
IsCustomAIEnabled = aiHelper.IsAIEnabled;
}
ClipboardHistoryEnabled = IsClipboardHistoryEnabled();
GeneratedResponses.Clear();
WindowShown?.Invoke();
}
// List to store generated responses
public ObservableCollection<string> GeneratedResponses { get; set; } = new ObservableCollection<string>();
// Index to keep track of the current response
private int _currentResponseIndex;
public int CurrentResponseIndex
private void HideWindow()
{
get => _currentResponseIndex;
set
if (app.GetMainWindow() != null)
{
if (value >= 0 && value < GeneratedResponses.Count)
{
SetProperty(ref _currentResponseIndex, value);
CustomFormatResult = GeneratedResponses[_currentResponseIndex];
OnPropertyChanged(nameof(CurrentIndexDisplay));
}
Windows.Win32.Foundation.HWND hwnd = (Windows.Win32.Foundation.HWND)app.GetMainWindow().GetWindowHandle();
Windows.Win32.PInvoke.ShowWindow(hwnd, Windows.Win32.UI.WindowsAndMessaging.SHOW_WINDOW_CMD.SW_HIDE);
}
}
public bool HasMultipleResponses
{
get => GeneratedResponses.Count > 1;
}
public string CurrentIndexDisplay => $"{CurrentResponseIndex + 1}/{GeneratedResponses.Count}";
public string InputTxtBoxPlaceholderText
{
get
{
app.GetMainWindow().ClearInputText();
if (!aiHelper.IsAIEnabled)
{
return ResourceLoaderInstance.ResourceLoader.GetString("OpenAINotConfigured");
}
else if (!IsClipboardDataText)
{
return ResourceLoaderInstance.ResourceLoader.GetString("ClipboardDataTypeMismatchWarning");
}
else
{
return ResourceLoaderInstance.ResourceLoader.GetString("CustomFormatTextBox/PlaceholderText");
}
}
}
public string InputTxtBoxErrorText
{
get
{
if (ApiRequestStatus != (int)HttpStatusCode.OK)
{
if (ApiRequestStatus == (int)HttpStatusCode.TooManyRequests)
{
return ResourceLoaderInstance.ResourceLoader.GetString("OpenAIApiKeyTooManyRequests");
}
else if (ApiRequestStatus == (int)HttpStatusCode.Unauthorized)
{
return ResourceLoaderInstance.ResourceLoader.GetString("OpenAIApiKeyUnauthorized");
}
else
{
return ResourceLoaderInstance.ResourceLoader.GetString("OpenAIApiKeyError") + ApiRequestStatus.ToString(CultureInfo.InvariantCulture);
}
}
return string.Empty;
}
}
[ObservableProperty]
private string _customFormatResult;
[RelayCommand]
public void PasteCustom()
{
PasteCustomFunction(GeneratedResponses[CurrentResponseIndex]);
}
// Command to select the previous custom format
[RelayCommand]
public void PreviousCustomFormat()
@ -222,6 +345,12 @@ namespace AdvancedPaste.ViewModels
(App.Current as App).GetMainWindow().Close();
}
[RelayCommand]
public void PasteCustom()
{
_ = PasteCustomFunction(GeneratedResponses[CurrentResponseIndex]);
}
private void SetClipboardContentAndHideWindow(string content)
{
if (!string.IsNullOrEmpty(content))
@ -229,20 +358,16 @@ namespace AdvancedPaste.ViewModels
ClipboardHelper.SetClipboardTextContent(content);
}
if (app.GetMainWindow() != null)
{
Windows.Win32.Foundation.HWND hwnd = (Windows.Win32.Foundation.HWND)app.GetMainWindow().GetWindowHandle();
Windows.Win32.PInvoke.ShowWindow(hwnd, Windows.Win32.UI.WindowsAndMessaging.SHOW_WINDOW_CMD.SW_HIDE);
}
HideWindow();
}
internal void ToPlainTextFunction()
internal async void ToPlainText()
{
try
{
Logger.LogTrace();
string outputString = MarkdownHelper.PasteAsPlainTextFromClipboard(ClipboardData);
string outputString = await ClipboardHelper.GetClipboardTextContent(ClipboardContent);
SetClipboardContentAndHideWindow(outputString);
@ -256,13 +381,24 @@ namespace AdvancedPaste.ViewModels
}
}
internal void ToMarkdownFunction(bool pasteAlways = false)
internal async void ToMarkdown(bool pasteAlways = false)
{
try
{
Logger.LogTrace();
string outputString = MarkdownHelper.ToMarkdown(ClipboardData);
string inputString = string.Empty;
if (ClipboardHasHtml)
{
inputString = await ClipboardHelper.GetClipboardHTMLContent(ClipboardContent);
}
else if (ClipboardHasText)
{
inputString = await ClipboardHelper.GetClipboardTextContent(ClipboardContent);
}
string outputString = ToMarkdownFunction(inputString);
SetClipboardContentAndHideWindow(outputString);
@ -276,13 +412,20 @@ namespace AdvancedPaste.ViewModels
}
}
internal void ToJsonFunction(bool pasteAlways = false)
internal string ToMarkdownFunction(string inputHTML)
{
return MarkdownHelper.ConvertHtmlToMarkdown(inputHTML);
}
internal async void ToJson(bool pasteAlways = false)
{
try
{
Logger.LogTrace();
string jsonText = JsonHelper.ToJsonFromXmlOrCsv(ClipboardData);
string inputText = await ClipboardHelper.GetClipboardTextContent(ClipboardContent);
string jsonText = ToJsonFunction(inputText);
SetClipboardContentAndHideWindow(jsonText);
@ -296,61 +439,313 @@ namespace AdvancedPaste.ViewModels
}
}
internal string ToJsonFunction(string inputString, bool pasteAlways = false)
{
return JsonHelper.ToJsonFromXmlOrCsv(inputString);
}
internal async void AudioToText()
{
try
{
Logger.LogTrace();
var fileContent = Windows.ApplicationModel.DataTransfer.Clipboard.GetContent();
string outputText = await AudioToTextFunction(fileContent);
ClipboardHelper.SetClipboardTextContent(outputText);
SetClipboardContentAndHideWindow(outputText);
if (_userSettings.SendPasteKeyCombination)
{
ClipboardHelper.SendPasteKeyCombination();
}
}
catch
{
}
}
internal async Task<string> AudioToTextFunction(DataPackageView fileContent)
{
var fileList = await fileContent.GetStorageItemsAsync();
var outputText = string.Empty;
StorageFile file = null;
if (fileList.Count > 0)
{
file = fileList[0] as StorageFile;
outputText = await aiLocalModelsHelper.DoWhisperInference(file);
return outputText;
}
else
{
// TODO: Add error handling
Console.WriteLine("Hit error");
return string.Empty;
}
}
internal async Task<string> AudioToTextFunction(string fileName, int startSeconds, int durationSeconds)
{
// Get StorageFile from fileName
var file = await StorageFile.GetFileFromPathAsync(fileName);
var outputText = await aiLocalModelsHelper.DoWhisperInference(file, startSeconds, durationSeconds);
return outputText;
}
internal async Task<string> CustomWithAIFunction(string inputInstructions, string inputContent)
{
var aiOutput = await Task.Run(() => aiHelper.AIFormatString(inputInstructions, inputContent));
return aiOutput.Response;
}
internal async void ToFile()
{
try
{
Logger.LogTrace();
// Determine the type of content in the clipboard
string fileName = null;
if (ClipboardHasText)
{
string clipboardText = await ClipboardContent.GetTextAsync();
fileName = await ToFileFunction(clipboardText);
}
else if (ClipboardHasImage)
{
SoftwareBitmap softwareBitmap = await ClipboardHelper.GetClipboardImageContent(ClipboardContent);
fileName = await ToFileFunction(softwareBitmap);
}
// Set the clipboard data
_ = await ClipboardHelper.SetClipboardFile(fileName);
HideWindow();
if (_userSettings.SendPasteKeyCombination)
{
ClipboardHelper.SendPasteKeyCombination();
}
}
catch
{
}
}
internal async Task<string> ToFileFunction(string inputContent)
{
// Create a local file in the temp directory
string tempFileName = Path.Combine(Path.GetTempPath(), "clipboard.txt");
// Write the content to the file
await File.WriteAllTextAsync(tempFileName, inputContent);
return tempFileName;
}
internal async Task<string> ToFileFunction(SoftwareBitmap softwareBitmap)
{
// Create a local file in the temp directory
string tempFileName = Path.Combine(Path.GetTempPath(), "clipboard.png");
using (var stream = new InMemoryRandomAccessStream())
{
// Encode the SoftwareBitmap to the stream
var encoder = await BitmapEncoder.CreateAsync(BitmapEncoder.PngEncoderId, stream);
encoder.SetSoftwareBitmap(softwareBitmap);
await encoder.FlushAsync();
// Set the stream position to the beginning
stream.Seek(0);
// Create a new file in the temporary directory with a .png extension
using (var fileStream = File.Create(tempFileName))
{
await stream.AsStream().CopyToAsync(fileStream);
}
}
return tempFileName;
}
internal async Task<string> GenerateCustomFunction(string inputInstructions)
{
Logger.LogTrace();
if (string.IsNullOrWhiteSpace(inputInstructions))
// Get what operations are needed from the AI
// For whatever operation is returned do that
string aiOperationsOutput = await Task.Run(() => aiHelper.GetOperationsFromAI(inputInstructions, ClipboardHasText, ClipboardHasImage, ClipboardHasHtml, ClipboardHasFile, ClipboardHasAudio));
// Define in loop variables to hold values
string currentClipboardText = await ClipboardHelper.GetClipboardTextContent(ClipboardContent);
string currentClipboardHTML = await ClipboardHelper.GetClipboardHTMLContent(ClipboardContent);
string currentFileName = await ClipboardHelper.GetClipboardFileName(ClipboardContent);
SoftwareBitmap currentClipboardImage = null;
if (ClipboardHasImage)
{
return string.Empty;
currentClipboardImage = await ClipboardHelper.GetClipboardImageContent(ClipboardContent);
}
if (ClipboardData == null || !ClipboardData.Contains(StandardDataFormats.Text))
{
Logger.LogWarning("Clipboard does not contain text data");
return string.Empty;
}
ClipboardHelper.ClipboardContentFormats returnFormat = ClipboardHelper.ClipboardContentFormats.Invalid;
string currentClipboardText = await Task.Run(async () =>
string[] lines = aiOperationsOutput.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
foreach (string line in lines)
{
try
foreach (string functionName in OptionsViewModel.FunctionNames)
{
string text = await ClipboardData.GetTextAsync() as string;
return text;
if (line.Contains(functionName + "("))
{
switch (functionName)
{
case "ToCustomWithAI":
// Get the input instructions seen after 'CustomWithAI(' using regex to account for either the " or ' character
string pattern = @"CustomWithAI\(['""](.+?)['""]";
string customInputInstructions = string.Empty;
Match match = Regex.Match(line, pattern);
if (match.Success)
{
customInputInstructions = match.Groups[1].Value;
}
string result = await CustomWithAIFunction(customInputInstructions, currentClipboardText);
currentClipboardHTML = result;
currentClipboardText = ClipboardHelper.ConvertHTMLToPlainText(currentClipboardHTML);
returnFormat = ClipboardHelper.ClipboardContentFormats.HTML;
break;
case "ToJSON":
break;
case "ToPlainText":
break;
case "ToMarkdown":
break;
case "ToFile":
if (currentClipboardText != null)
{
currentFileName = await ToFileFunction(currentClipboardText);
}
else if (currentClipboardHTML != null)
{
currentFileName = await ToFileFunction(currentClipboardHTML);
}
else if (currentClipboardImage != null)
{
currentFileName = await ToFileFunction(currentClipboardImage);
}
returnFormat = ClipboardHelper.ClipboardContentFormats.File;
break;
case "AudioToText":
// Use regex and get the input instructions after AudioToText( and split them by the comma
string audioToTextPattern = @"AudioToText\((.+?)\)";
string audioToTextFileName = string.Empty;
int seekSeconds = 0;
int maxDurationSeconds = 0;
Match audioToTextMatch = Regex.Match(line, audioToTextPattern);
if (audioToTextMatch.Success)
{
audioToTextFileName = audioToTextMatch.Groups[1].Value.Split(',')[0];
seekSeconds = int.Parse(audioToTextMatch.Groups[1].Value.Split(',')[1], CultureInfo.InvariantCulture);
maxDurationSeconds = int.Parse(audioToTextMatch.Groups[1].Value.Split(',')[2], CultureInfo.InvariantCulture);
}
currentClipboardText = await AudioToTextFunction(currentFileName, seekSeconds, maxDurationSeconds);
returnFormat = ClipboardHelper.ClipboardContentFormats.Text;
break;
default:
break;
}
break; // No need to check other function names for this line
}
}
catch (Exception)
{
// Couldn't get text from the clipboard. Resume with empty text.
}
var resultSavedClipboardItem = new SavedClipboardItem
{
Format = returnFormat,
};
// DO return logic with enum
switch (returnFormat)
{
case ClipboardHelper.ClipboardContentFormats.HTML:
resultSavedClipboardItem.HTML = currentClipboardHTML;
GeneratedResponses.Add(resultSavedClipboardItem);
CurrentResponseIndex = GeneratedResponses.Count - 1;
return currentClipboardHTML;
// Other formats not yet supported
case ClipboardHelper.ClipboardContentFormats.Image:
return "Image not implemented";
case ClipboardHelper.ClipboardContentFormats.File:
resultSavedClipboardItem.Filename = currentFileName;
GeneratedResponses.Add(resultSavedClipboardItem);
CurrentResponseIndex = GeneratedResponses.Count - 1;
return "Paste as file.";
case ClipboardHelper.ClipboardContentFormats.Audio:
return "Audio not implemented";
case ClipboardHelper.ClipboardContentFormats.Text:
resultSavedClipboardItem.Text = currentClipboardText;
GeneratedResponses.Add(resultSavedClipboardItem);
CurrentResponseIndex = GeneratedResponses.Count - 1;
return currentClipboardText;
default:
return string.Empty;
}
});
if (string.IsNullOrWhiteSpace(currentClipboardText))
{
Logger.LogWarning("Clipboard has no usable text data");
return string.Empty;
}
var aiResponse = await Task.Run(() => aiHelper.AIFormatString(inputInstructions, currentClipboardText));
string aiOutput = aiResponse.Response;
ApiRequestStatus = aiResponse.ApiRequestStatus;
GeneratedResponses.Add(aiOutput);
CurrentResponseIndex = GeneratedResponses.Count - 1;
return aiOutput;
}
internal void PasteCustomFunction(string text)
internal async Task<bool> PasteCustomFunction(SavedClipboardItem inItem)
{
Logger.LogTrace();
SetClipboardContentAndHideWindow(text);
if (_userSettings.SendPasteKeyCombination)
try
{
ClipboardHelper.SendPasteKeyCombination();
Logger.LogTrace();
switch (inItem.Format)
{
case ClipboardHelper.ClipboardContentFormats.HTML:
ClipboardHelper.SetClipboardHTMLContent(inItem.HTML);
break;
case ClipboardHelper.ClipboardContentFormats.Image:
break;
case ClipboardHelper.ClipboardContentFormats.File:
await ClipboardHelper.SetClipboardFile(inItem.Filename);
break;
case ClipboardHelper.ClipboardContentFormats.Audio:
break;
case ClipboardHelper.ClipboardContentFormats.Text:
ClipboardHelper.SetClipboardTextContent(inItem.Text);
break;
default:
break;
}
HideWindow();
if (_userSettings.SendPasteKeyCombination)
{
ClipboardHelper.SendPasteKeyCombination();
}
}
catch
{
}
return true;
}
internal CustomQuery RecallPreviousCustomQuery()
@ -406,5 +801,11 @@ namespace AdvancedPaste.ViewModels
return false;
}
}
internal void FilterOptionsFromInput(string input)
{
// Generate event
FormatsChanged?.Invoke(input);
}
}
}

View File

@ -89,7 +89,7 @@
<controls:ShortcutControl MinWidth="{StaticResource SettingActionControlMinWidth}" HotkeySettings="{x:Bind Path=ViewModel.AdvancedPasteUIShortcut, Mode=TwoWay}" />
<tkcontrols:SettingsExpander.Items>
<tkcontrols:SettingsCard Visibility="Collapsed">
<!-- There's a bug that makes it so that the first shortcut control inside an expander doesn't work. We add this dummy one so the other entries aren't affected. -->
<!-- There's a bug that makes it so that the first shortcut control inside an expander doesn't work. We add this dummy one so the other entries aren't affected. -->
<TextBox />
</tkcontrols:SettingsCard>
<tkcontrols:SettingsCard x:Uid="PasteAsPlainText_Shortcut">