From 7f5a7625209c7a97aade4e681ee397d57f9f6638 Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Fri, 12 Apr 2024 17:46:41 +0200 Subject: [PATCH 01/11] WIP: Implement new streaming ask endpoint --- clients/dotnet/WebClient/MemoryWebClient.cs | 47 ++++++++ service/Abstractions/Constants.cs | 1 + service/Abstractions/IKernelMemory.cs | 18 +++ service/Abstractions/Search/ISearchClient.cs | 16 +++ service/Core/MemoryServerless.cs | 25 ++++ service/Core/MemoryService.cs | 25 ++++ service/Core/Search/SearchClient.cs | 116 ++++++++++++++++++- service/Service/WebAPIEndpoints.cs | 25 ++++ 8 files changed, 272 insertions(+), 1 deletion(-) diff --git a/clients/dotnet/WebClient/MemoryWebClient.cs b/clients/dotnet/WebClient/MemoryWebClient.cs index 730968374..8c4cdcea7 100644 --- a/clients/dotnet/WebClient/MemoryWebClient.cs +++ b/clients/dotnet/WebClient/MemoryWebClient.cs @@ -6,6 +6,7 @@ using System.Linq; using System.Net; using System.Net.Http; +using System.Runtime.CompilerServices; using System.Text; using System.Text.Json; using System.Threading; @@ -299,6 +300,52 @@ public async Task AskAsync( return JsonSerializer.Deserialize(json, s_caseInsensitiveJsonOptions) ?? new MemoryAnswer(); } + /// + public async IAsyncEnumerable AskStreamingAsync( + string question, + string? index = null, + MemoryFilter? filter = null, + ICollection? filters = null, + double minRelevance = 0, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + if (filter != null) + { + if (filters == null) { filters = new List(); } + + filters.Add(filter); + } + + MemoryQuery request = new() + { + Index = index, + Question = question, + Filters = (filters is { Count: > 0 }) ? filters.ToList() : new(), + MinRelevance = minRelevance + }; + using StringContent content = new(JsonSerializer.Serialize(request), Encoding.UTF8, "application/json"); + + using var httpRequest = new HttpRequestMessage(HttpMethod.Post, Constants.HttpAskStreamEndpoint); + httpRequest.Content = content; + + HttpResponseMessage response = await this._client.SendAsync(httpRequest, cancellationToken).ConfigureAwait(false); + response.EnsureSuccessStatusCode(); + + using var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false); + using var reader = new StreamReader(stream, Encoding.UTF8); + + while (!reader.EndOfStream) + { + var line = await reader.ReadLineAsync().ConfigureAwait(false); + if (line is null || line.Length <= 0) + { + continue; + } + + yield return line; + } + } + #region private /// Document ID diff --git a/service/Abstractions/Constants.cs b/service/Abstractions/Constants.cs index 5dfb4ad2e..6ddcc0bb2 100644 --- a/service/Abstractions/Constants.cs +++ b/service/Abstractions/Constants.cs @@ -49,6 +49,7 @@ public static class Constants // Endpoints public const string HttpAskEndpoint = "/ask"; + public const string HttpAskStreamEndpoint = "/ask/stream"; public const string HttpSearchEndpoint = "/search"; public const string HttpUploadEndpoint = "/upload"; public const string HttpUploadStatusEndpoint = "/upload-status"; diff --git a/service/Abstractions/IKernelMemory.cs b/service/Abstractions/IKernelMemory.cs index a2b0c8d7e..a7e83fee6 100644 --- a/service/Abstractions/IKernelMemory.cs +++ b/service/Abstractions/IKernelMemory.cs @@ -197,4 +197,22 @@ public Task AskAsync( ICollection? filters = null, double minRelevance = 0, CancellationToken cancellationToken = default); + + /// + /// Search the given index for an answer to the given query. + /// + /// Question to answer + /// Optional index name + /// Filter to match + /// Filters to match (using inclusive OR logic). If 'filter' is provided too, the value is merged into this list. + /// Minimum Cosine Similarity required + /// Async task cancellation token + /// A stream that contains an answer to the query, or an empty list + public IAsyncEnumerable AskStreamingAsync( + string question, + string? index = null, + MemoryFilter? filter = null, + ICollection? filters = null, + double minRelevance = 0, + CancellationToken cancellationToken = default); } diff --git a/service/Abstractions/Search/ISearchClient.cs b/service/Abstractions/Search/ISearchClient.cs index 9925ba052..b6d76e1d7 100644 --- a/service/Abstractions/Search/ISearchClient.cs +++ b/service/Abstractions/Search/ISearchClient.cs @@ -45,6 +45,22 @@ Task AskAsync( double minRelevance = 0, CancellationToken cancellationToken = default); + /// + /// Answer the given question, if possible. + /// + /// Index (aka collection) to search for grounding information + /// Question to answer + /// Filtering criteria to select memories to consider + /// Minimum relevance of the memories considered + /// Async task cancellation token + /// Answer to the given question + IAsyncEnumerable AskStreamingAsync( + string index, + string question, + ICollection? filters = null, + double minRelevance = 0, + CancellationToken cancellationToken = default); + /// /// List the available memory indexes (aka collections). /// diff --git a/service/Core/MemoryServerless.cs b/service/Core/MemoryServerless.cs index 4d90aaab2..faee00c02 100644 --- a/service/Core/MemoryServerless.cs +++ b/service/Core/MemoryServerless.cs @@ -243,4 +243,29 @@ public Task AskAsync( minRelevance: minRelevance, cancellationToken: cancellationToken); } + + /// + public IAsyncEnumerable AskStreamingAsync( + string question, + string? index = null, + MemoryFilter? filter = null, + ICollection? filters = null, + double minRelevance = 0, + CancellationToken cancellationToken = default) + { + if (filter != null) + { + if (filters == null) { filters = new List(); } + + filters.Add(filter); + } + + index = IndexName.CleanName(index, this._defaultIndexName); + return this._searchClient.AskStreamingAsync( + index: index, + question: question, + filters: filters, + minRelevance: minRelevance, + cancellationToken: cancellationToken); + } } diff --git a/service/Core/MemoryService.cs b/service/Core/MemoryService.cs index b7267c060..c8ba0cdd1 100644 --- a/service/Core/MemoryService.cs +++ b/service/Core/MemoryService.cs @@ -220,4 +220,29 @@ public Task AskAsync( minRelevance: minRelevance, cancellationToken: cancellationToken); } + + /// + public IAsyncEnumerable AskStreamingAsync( + string question, + string? index = null, + MemoryFilter? filter = null, + ICollection? filters = null, + double minRelevance = 0, + CancellationToken cancellationToken = default) + { + if (filter != null) + { + if (filters == null) { filters = new List(); } + + filters.Add(filter); + } + + index = IndexName.CleanName(index, this._defaultIndexName); + return this._searchClient.AskStreamingAsync( + index: index, + question: question, + filters: filters, + minRelevance: minRelevance, + cancellationToken: cancellationToken); + } } diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs index 29d114523..7bde4b081 100644 --- a/service/Core/Search/SearchClient.cs +++ b/service/Core/Search/SearchClient.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using System.Runtime.CompilerServices; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -331,7 +332,120 @@ public async Task AskAsync( return answer; } - private IAsyncEnumerable GenerateAnswerAsync(string question, string facts) + /// + public async IAsyncEnumerable AskStreamingAsync( + string index, + string question, + ICollection? filters = null, + double minRelevance = 0, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + if (string.IsNullOrEmpty(question)) + { + this._log.LogWarning("No question provided"); + yield return this._config.EmptyAnswer; + yield break; + } + + var facts = new StringBuilder(); + var maxTokens = this._config.MaxAskPromptSize > 0 + ? this._config.MaxAskPromptSize + : this._textGenerator.MaxTokenTotal; + var tokensAvailable = maxTokens + - this._textGenerator.CountTokens(this._answerPrompt) + - this._textGenerator.CountTokens(question) + - this._config.AnswerTokens; + + var factsUsedCount = 0; + var factsAvailableCount = 0; + + this._log.LogTrace("Fetching relevant memories"); + IAsyncEnumerable<(MemoryRecord, double)> matches = this._memoryDb.GetSimilarListAsync( + index: index, + text: question, + filters: filters, + minRelevance: minRelevance, + limit: this._config.MaxMatchesCount, + withEmbeddings: false, + cancellationToken: cancellationToken); + + // Memories are sorted by relevance, starting from the most relevant + await foreach ((MemoryRecord memory, double relevance) in matches.ConfigureAwait(false)) + { + string fileName = memory.GetFileName(this._log); + + var partitionText = memory.GetPartitionText(this._log).Trim(); + if (string.IsNullOrEmpty(partitionText)) + { + this._log.LogError("The document partition is empty, doc: {0}", memory.Id); + continue; + } + + factsAvailableCount++; + + // TODO: add file age in days, to push relevance of newer documents + var fact = $"==== [File:{fileName};Relevance:{relevance:P1}]:\n{partitionText}\n"; + + // Use the partition/chunk only if there's room for it + var size = this._textGenerator.CountTokens(fact); + if (size >= tokensAvailable) + { + // Stop after reaching the max number of tokens + break; + } + + factsUsedCount++; + this._log.LogTrace("Adding text {0} with relevance {1}", factsUsedCount, relevance); + + facts.Append(fact); + tokensAvailable -= size; + } + + if (factsAvailableCount > 0 && factsUsedCount == 0) + { + this._log.LogError("Unable to inject memories in the prompt, not enough tokens available"); + yield return this._config.EmptyAnswer; + yield break; + } + + if (factsUsedCount == 0) + { + this._log.LogWarning("No memories available"); + yield return this._config.EmptyAnswer; + yield break; + } + + var charsGenerated = 0; + var watch = Stopwatch.StartNew(); + await foreach (var x in this.GenerateAnswerAsync(question, facts.ToString()) + .WithCancellation(cancellationToken).ConfigureAwait(false)) + { + if (x is null || x.Length == 0) + { + continue; + } + + if (charsGenerated == 0 && ValueIsEquivalentTo(x, this._config.EmptyAnswer)) + { + this._log.LogTrace("Answer generated in {0} msecs. No relevant memories found", watch.ElapsedMilliseconds); + yield return this._config.EmptyAnswer; + yield break; + } + + charsGenerated += x.Length; + yield return x; + + if (this._log.IsEnabled(LogLevel.Trace) && charsGenerated >= 30) + { + this._log.LogTrace("{0} chars generated", charsGenerated); + } + } + + watch.Stop(); + this._log.LogTrace("Answer generated in {0} msecs", watch.ElapsedMilliseconds); + } + + private IAsyncEnumerable GenerateAnswerAsync(string question, string facts) { var prompt = this._answerPrompt; prompt = prompt.Replace("{{$facts}}", facts.Trim(), StringComparison.OrdinalIgnoreCase); diff --git a/service/Service/WebAPIEndpoints.cs b/service/Service/WebAPIEndpoints.cs index 2a0f74df0..180da030c 100644 --- a/service/Service/WebAPIEndpoints.cs +++ b/service/Service/WebAPIEndpoints.cs @@ -31,6 +31,7 @@ public static void ConfigureMinimalAPI(this WebApplication app, KernelMemoryConf app.UseDeleteIndexesEndpoint(authFilter); app.UseDeleteDocumentsEndpoint(authFilter); app.UseAskEndpoint(authFilter); + app.UseAskStreamEndpoint(authFilter); app.UseSearchEndpoint(authFilter); app.UseUploadStatusEndpoint(authFilter); } @@ -223,6 +224,30 @@ async Task ( if (authFilter != null) { route.AddEndpointFilter(authFilter); } } + public static void UseAskStreamEndpoint(this IEndpointRouteBuilder app, IEndpointFilter? authFilter = null) + { + // Ask streaming endpoint + var route = app.MapPost(Constants.HttpAskStreamEndpoint, IAsyncEnumerable ( + MemoryQuery query, + IKernelMemory service, + ILogger log, + CancellationToken cancellationToken) => + { + log.LogTrace("New search request, index '{0}', minRelevance {1}", query.Index, query.MinRelevance); + return service.AskStreamingAsync( + question: query.Question, + index: query.Index, + filters: query.Filters, + minRelevance: query.MinRelevance, + cancellationToken: cancellationToken); + }) + .Produces>(StatusCodes.Status200OK) + .Produces(StatusCodes.Status401Unauthorized) + .Produces(StatusCodes.Status403Forbidden); + + if (authFilter != null) { route.AddEndpointFilter(authFilter); } + } + public static void UseSearchEndpoint(this IEndpointRouteBuilder app, IEndpointFilter? authFilter = null) { // Search endpoint From 69b022fb6358405722e199a54be1ddb13357566d Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Wed, 24 Apr 2024 16:14:01 +0200 Subject: [PATCH 02/11] Remove useless charsGenerated checking --- service/Core/Search/SearchClient.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs index 7bde4b081..fa6a6e62f 100644 --- a/service/Core/Search/SearchClient.cs +++ b/service/Core/Search/SearchClient.cs @@ -415,7 +415,7 @@ public async IAsyncEnumerable AskStreamingAsync( yield break; } - var charsGenerated = 0; + StringBuilder bufferedAnswer = new(); var watch = Stopwatch.StartNew(); await foreach (var x in this.GenerateAnswerAsync(question, facts.ToString()) .WithCancellation(cancellationToken).ConfigureAwait(false)) @@ -425,19 +425,19 @@ public async IAsyncEnumerable AskStreamingAsync( continue; } - if (charsGenerated == 0 && ValueIsEquivalentTo(x, this._config.EmptyAnswer)) + bufferedAnswer.Append(x); + yield return x; + + int currentLength = bufferedAnswer.Length; + if (currentLength <= this._config.EmptyAnswer.Length && ValueIsEquivalentTo(bufferedAnswer.ToString(), this._config.EmptyAnswer)) { this._log.LogTrace("Answer generated in {0} msecs. No relevant memories found", watch.ElapsedMilliseconds); - yield return this._config.EmptyAnswer; - yield break; + break; } - charsGenerated += x.Length; - yield return x; - - if (this._log.IsEnabled(LogLevel.Trace) && charsGenerated >= 30) + if (this._log.IsEnabled(LogLevel.Trace) && currentLength >= 30) { - this._log.LogTrace("{0} chars generated", charsGenerated); + this._log.LogTrace("{0} chars generated", currentLength); } } From f10b59ca1e28a58c4b3a79fee822473b335ddaf8 Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Wed, 24 Apr 2024 16:16:28 +0200 Subject: [PATCH 03/11] replace break with yield break --- service/Core/Search/SearchClient.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs index fa6a6e62f..7fcc89a29 100644 --- a/service/Core/Search/SearchClient.cs +++ b/service/Core/Search/SearchClient.cs @@ -432,7 +432,7 @@ public async IAsyncEnumerable AskStreamingAsync( if (currentLength <= this._config.EmptyAnswer.Length && ValueIsEquivalentTo(bufferedAnswer.ToString(), this._config.EmptyAnswer)) { this._log.LogTrace("Answer generated in {0} msecs. No relevant memories found", watch.ElapsedMilliseconds); - break; + yield break; } if (this._log.IsEnabled(LogLevel.Trace) && currentLength >= 30) From 123e3e2ff4dd75b1073386c756635b27c4d8250f Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Wed, 24 Apr 2024 18:34:03 +0200 Subject: [PATCH 04/11] sync --- service/Service.AspNetCore/WebAPIEndpoints.cs | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/service/Service.AspNetCore/WebAPIEndpoints.cs b/service/Service.AspNetCore/WebAPIEndpoints.cs index 9831783bb..71f65131d 100644 --- a/service/Service.AspNetCore/WebAPIEndpoints.cs +++ b/service/Service.AspNetCore/WebAPIEndpoints.cs @@ -219,31 +219,34 @@ async Task ( if (authFilter != null) { route.AddEndpointFilter(authFilter); } } - - public static void UseAskStreamEndpoint(this IEndpointRouteBuilder app, IEndpointFilter? authFilter = null) + public static void AddAskStreamEndpoint( + this IEndpointRouteBuilder builder, string apiPrefix = "/", IEndpointFilter? authFilter = null) { - // Ask streaming endpoint - var route = app.MapPost(Constants.HttpAskStreamEndpoint, IAsyncEnumerable ( - MemoryQuery query, - IKernelMemory service, - ILogger log, - CancellationToken cancellationToken) => - { - log.LogTrace("New search request, index '{0}', minRelevance {1}", query.Index, query.MinRelevance); - return service.AskStreamingAsync( - question: query.Question, - index: query.Index, - filters: query.Filters, - minRelevance: query.MinRelevance, - cancellationToken: cancellationToken); - }) + RouteGroupBuilder group = builder.MapGroup(apiPrefix); + + // Ask endpoint + var route = group.MapPost(Constants.HttpAskStreamEndpoint, + async IAsyncEnumerable ( + MemoryQuery query, + IKernelMemory service, + ILogger log, + CancellationToken cancellationToken) => + { + log.LogTrace("New search request, index '{0}', minRelevance {1}", query.Index, query.MinRelevance); + return service.AskStreamingAsync( + question: query.Question, + index: query.Index, + filters: query.Filters, + minRelevance: query.MinRelevance, + cancellationToken: cancellationToken); + }) .Produces>(StatusCodes.Status200OK) .Produces(StatusCodes.Status401Unauthorized) .Produces(StatusCodes.Status403Forbidden); if (authFilter != null) { route.AddEndpointFilter(authFilter); } } - + public static void AddSearchEndpoint( this IEndpointRouteBuilder builder, string apiPrefix = "/", IEndpointFilter? authFilter = null) { From f4f046cfb34e3438b0425d0e00e27af076c9f499 Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Wed, 24 Apr 2024 18:34:19 +0200 Subject: [PATCH 05/11] change comment --- service/Service.AspNetCore/WebAPIEndpoints.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/Service.AspNetCore/WebAPIEndpoints.cs b/service/Service.AspNetCore/WebAPIEndpoints.cs index 71f65131d..10c11a212 100644 --- a/service/Service.AspNetCore/WebAPIEndpoints.cs +++ b/service/Service.AspNetCore/WebAPIEndpoints.cs @@ -224,7 +224,7 @@ public static void AddAskStreamEndpoint( { RouteGroupBuilder group = builder.MapGroup(apiPrefix); - // Ask endpoint + // Ask streaming endpoint var route = group.MapPost(Constants.HttpAskStreamEndpoint, async IAsyncEnumerable ( MemoryQuery query, From 0d9e47e2ba2efbdf497448c5a18eb999ab2b8266 Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Wed, 24 Apr 2024 20:50:42 +0200 Subject: [PATCH 06/11] Fix buffering in WebClient --- clients/dotnet/WebClient/MemoryWebClient.cs | 12 ++++---- examples/001-dotnet-WebClient/Program.cs | 30 +++++++++++++++++++ service/Service.AspNetCore/WebAPIEndpoints.cs | 30 +++++++++---------- 3 files changed, 51 insertions(+), 21 deletions(-) diff --git a/clients/dotnet/WebClient/MemoryWebClient.cs b/clients/dotnet/WebClient/MemoryWebClient.cs index 8c4cdcea7..87b9e1d08 100644 --- a/clients/dotnet/WebClient/MemoryWebClient.cs +++ b/clients/dotnet/WebClient/MemoryWebClient.cs @@ -328,21 +328,21 @@ public async IAsyncEnumerable AskStreamingAsync( using var httpRequest = new HttpRequestMessage(HttpMethod.Post, Constants.HttpAskStreamEndpoint); httpRequest.Content = content; - HttpResponseMessage response = await this._client.SendAsync(httpRequest, cancellationToken).ConfigureAwait(false); + using HttpResponseMessage response = await this._client.SendAsync(httpRequest, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); response.EnsureSuccessStatusCode(); using var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false); - using var reader = new StreamReader(stream, Encoding.UTF8); + using var reader = new StreamReader(stream); while (!reader.EndOfStream) { - var line = await reader.ReadLineAsync().ConfigureAwait(false); - if (line is null || line.Length <= 0) + var character = reader.Read(); + if (character == -1) { - continue; + break; } - yield return line; + yield return ((char)character).ToString(); } } diff --git a/examples/001-dotnet-WebClient/Program.cs b/examples/001-dotnet-WebClient/Program.cs index 5ca0332a0..80010fd6a 100644 --- a/examples/001-dotnet-WebClient/Program.cs +++ b/examples/001-dotnet-WebClient/Program.cs @@ -59,6 +59,7 @@ public static async Task Main() // ======================= await AskSimpleQuestion(); + await AskSimpleQuestionWithStreaming(); await AskSimpleQuestionAndShowSources(); await AskQuestionAboutImageContent(); await AskQuestionUsingFilter(); @@ -258,6 +259,35 @@ due to the speed of light being a very large number when squared. This concept i */ } + // Question without filters + private static async Task AskSimpleQuestionWithStreaming() + { + var question = "Any news from NASA about Orion?"; + Console.WriteLine($"Question: {question}"); + var answer = s_memory.AskStreamingAsync(question, filter: MemoryFilters.ByTag("user", "Taylor")); + + Console.WriteLine("\nAnswer:\n"); + await foreach (var answerPart in answer) + { + Console.Write(answerPart); + } + + Console.WriteLine("\n====================================\n"); + + /* OUTPUT + + Question: Any news from NASA about Orion? + + Answer: Yes, NASA has invited media to see the new test version of the Orion spacecraft and the hardware teams will use to recover the capsule and astronauts upon their return from space during the Artemis II mission. + The event will take place at Naval Base San Diego on August 2. + Personnel involved in recovery operations from NASA, the U.S. Navy, and the U.S. Air Force will be available to speak with media. + Teams are currently conducting tests in the Pacific Ocean to demonstrate and evaluate the processes, procedures, and hardware for recovery operations for crewed Artemis missions. + The tests will help prepare the team for Artemis II, NASA's first crewed mission under Artemis that will send four astronauts in Orion around the Moon to checkout systems ahead of future lunar missions. + The Artemis II crew will participate in recovery testing at sea next year. + + */ + } + // Another question without filters and show sources private static async Task AskSimpleQuestionAndShowSources() { diff --git a/service/Service.AspNetCore/WebAPIEndpoints.cs b/service/Service.AspNetCore/WebAPIEndpoints.cs index 10c11a212..a153ae958 100644 --- a/service/Service.AspNetCore/WebAPIEndpoints.cs +++ b/service/Service.AspNetCore/WebAPIEndpoints.cs @@ -219,27 +219,27 @@ async Task ( if (authFilter != null) { route.AddEndpointFilter(authFilter); } } + public static void AddAskStreamEndpoint( this IEndpointRouteBuilder builder, string apiPrefix = "/", IEndpointFilter? authFilter = null) { RouteGroupBuilder group = builder.MapGroup(apiPrefix); // Ask streaming endpoint - var route = group.MapPost(Constants.HttpAskStreamEndpoint, - async IAsyncEnumerable ( - MemoryQuery query, - IKernelMemory service, - ILogger log, - CancellationToken cancellationToken) => - { - log.LogTrace("New search request, index '{0}', minRelevance {1}", query.Index, query.MinRelevance); - return service.AskStreamingAsync( - question: query.Question, - index: query.Index, - filters: query.Filters, - minRelevance: query.MinRelevance, - cancellationToken: cancellationToken); - }) + var route = group.MapPost(Constants.HttpAskStreamEndpoint, IAsyncEnumerable ( + MemoryQuery query, + IKernelMemory service, + ILogger log, + CancellationToken cancellationToken) => + { + log.LogTrace("New search request, index '{0}', minRelevance {1}", query.Index, query.MinRelevance); + return service.AskStreamingAsync( + question: query.Question, + index: query.Index, + filters: query.Filters, + minRelevance: query.MinRelevance, + cancellationToken: cancellationToken); + }) .Produces>(StatusCodes.Status200OK) .Produces(StatusCodes.Status401Unauthorized) .Produces(StatusCodes.Status403Forbidden); From 038f97115097c606cb724a525252b0cc93fda1da Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Wed, 24 Apr 2024 22:43:28 +0200 Subject: [PATCH 07/11] Add System.Net.Http.Json and use ReadFromJsonAsAsyncEnumerable --- Directory.Packages.props | 1 + clients/dotnet/WebClient/MemoryWebClient.cs | 13 +++++-------- clients/dotnet/WebClient/WebClient.csproj | 1 + 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index bf1895ce2..6aaf554a2 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -30,6 +30,7 @@ + diff --git a/clients/dotnet/WebClient/MemoryWebClient.cs b/clients/dotnet/WebClient/MemoryWebClient.cs index 87b9e1d08..9c006c335 100644 --- a/clients/dotnet/WebClient/MemoryWebClient.cs +++ b/clients/dotnet/WebClient/MemoryWebClient.cs @@ -6,6 +6,7 @@ using System.Linq; using System.Net; using System.Net.Http; +using System.Net.Http.Json; using System.Runtime.CompilerServices; using System.Text; using System.Text.Json; @@ -331,18 +332,14 @@ public async IAsyncEnumerable AskStreamingAsync( using HttpResponseMessage response = await this._client.SendAsync(httpRequest, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); response.EnsureSuccessStatusCode(); - using var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false); - using var reader = new StreamReader(stream); - - while (!reader.EndOfStream) + await foreach (var responsePart in response.Content.ReadFromJsonAsAsyncEnumerable(cancellationToken)) { - var character = reader.Read(); - if (character == -1) + if (responsePart is null || responsePart.Length == 0) { - break; + continue; } - yield return ((char)character).ToString(); + yield return responsePart; } } diff --git a/clients/dotnet/WebClient/WebClient.csproj b/clients/dotnet/WebClient/WebClient.csproj index b69d40942..c3f0655d2 100644 --- a/clients/dotnet/WebClient/WebClient.csproj +++ b/clients/dotnet/WebClient/WebClient.csproj @@ -9,6 +9,7 @@ + From c21f78ef80b1dfcbc1025558a478be039fc63622 Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Fri, 17 May 2024 15:05:28 +0200 Subject: [PATCH 08/11] Refactor SearchClient.cs to return MemoryAnswer in stead of string --- Directory.Packages.props | 1 - clients/dotnet/WebClient/MemoryWebClient.cs | 6 +- clients/dotnet/WebClient/WebClient.csproj | 1 - examples/001-dotnet-WebClient/Program.cs | 30 ++- examples/002-dotnet-Serverless/Program.cs | 50 +++++ service/Abstractions/IKernelMemory.cs | 2 +- service/Abstractions/Models/MemoryAnswer.cs | 6 +- service/Abstractions/Search/ISearchClient.cs | 5 +- service/Core/MemoryServerless.cs | 2 +- service/Core/MemoryService.cs | 2 +- service/Core/Search/SearchClient.cs | 205 ++++++++++-------- service/Service.AspNetCore/WebAPIEndpoints.cs | 23 +- 12 files changed, 217 insertions(+), 116 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index 4f19344ec..fc6097e42 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -45,7 +45,6 @@ - diff --git a/clients/dotnet/WebClient/MemoryWebClient.cs b/clients/dotnet/WebClient/MemoryWebClient.cs index 27e314252..b0df6c5d2 100644 --- a/clients/dotnet/WebClient/MemoryWebClient.cs +++ b/clients/dotnet/WebClient/MemoryWebClient.cs @@ -361,7 +361,7 @@ public async Task AskAsync( } /// - public async IAsyncEnumerable AskStreamingAsync( + public async IAsyncEnumerable AskStreamingAsync( string question, string? index = null, MemoryFilter? filter = null, @@ -391,9 +391,9 @@ public async IAsyncEnumerable AskStreamingAsync( using HttpResponseMessage response = await this._client.SendAsync(httpRequest, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); response.EnsureSuccessStatusCode(); - await foreach (var responsePart in response.Content.ReadFromJsonAsAsyncEnumerable(cancellationToken)) + await foreach (var responsePart in response.Content.ReadFromJsonAsAsyncEnumerable(cancellationToken)) { - if (responsePart is null || responsePart.Length == 0) + if (responsePart is null) { continue; } diff --git a/clients/dotnet/WebClient/WebClient.csproj b/clients/dotnet/WebClient/WebClient.csproj index 5722be483..64008c999 100644 --- a/clients/dotnet/WebClient/WebClient.csproj +++ b/clients/dotnet/WebClient/WebClient.csproj @@ -10,7 +10,6 @@ - diff --git a/examples/001-dotnet-WebClient/Program.cs b/examples/001-dotnet-WebClient/Program.cs index 46fdd7d53..511b05cc3 100644 --- a/examples/001-dotnet-WebClient/Program.cs +++ b/examples/001-dotnet-WebClient/Program.cs @@ -58,7 +58,7 @@ public static async Task Main() // ======================= await AskSimpleQuestion(); - await AskSimpleQuestionWithStreaming(); + await AskSimpleQuestionWithStreamingAndShowSources(); await AskSimpleQuestionAndShowSources(); await AskQuestionAboutImageContent(); await AskQuestionUsingFilter(); @@ -276,17 +276,32 @@ due to the speed of light being a very large number when squared. This concept i */ } - // Question without filters - private static async Task AskSimpleQuestionWithStreaming() + private static async Task AskSimpleQuestionWithStreamingAndShowSources() { var question = "Any news from NASA about Orion?"; Console.WriteLine($"Question: {question}"); var answer = s_memory.AskStreamingAsync(question, filter: MemoryFilters.ByTag("user", "Taylor")); - Console.WriteLine("\nAnswer:\n"); + + List? citations = []; + bool isFirstPart = true; await foreach (var answerPart in answer) { - Console.Write(answerPart); + if (isFirstPart) + { + citations = answerPart.RelevantSources; + isFirstPart = false; + } + + Console.Write(answerPart.Result); + } + + Console.WriteLine("\n\nSources:\n"); + foreach (var x in citations) + { + Console.WriteLine(x.SourceUrl != null + ? $" - {x.SourceUrl} [{x.Partitions.First().LastUpdate:D}]" + : $" - {x.SourceName} - {x.Link} [{x.Partitions.First().LastUpdate:D}]"); } Console.WriteLine("\n====================================\n"); @@ -295,13 +310,16 @@ private static async Task AskSimpleQuestionWithStreaming() Question: Any news from NASA about Orion? - Answer: Yes, NASA has invited media to see the new test version of the Orion spacecraft and the hardware teams will use to recover the capsule and astronauts upon their return from space during the Artemis II mission. + Answer: + Yes, NASA has invited media to see the new test version of the Orion spacecraft and the hardware teams will use to recover the capsule and astronauts upon their return from space during the Artemis II mission. The event will take place at Naval Base San Diego on August 2. Personnel involved in recovery operations from NASA, the U.S. Navy, and the U.S. Air Force will be available to speak with media. Teams are currently conducting tests in the Pacific Ocean to demonstrate and evaluate the processes, procedures, and hardware for recovery operations for crewed Artemis missions. The tests will help prepare the team for Artemis II, NASA's first crewed mission under Artemis that will send four astronauts in Orion around the Moon to checkout systems ahead of future lunar missions. The Artemis II crew will participate in recovery testing at sea next year. + Sources: + - /download?index=default&documentId=doc003&filename=file5-NASA-news.pdf [Friday, 17 May 2024] */ } diff --git a/examples/002-dotnet-Serverless/Program.cs b/examples/002-dotnet-Serverless/Program.cs index af0e8b01d..dae989b2c 100644 --- a/examples/002-dotnet-Serverless/Program.cs +++ b/examples/002-dotnet-Serverless/Program.cs @@ -86,6 +86,7 @@ public static async Task Main() // ======================= await AskSimpleQuestion(); + await AskSimpleQuestionWithStreamingAndShowSources(); await AskSimpleQuestionAndShowSources(); await AskQuestionAboutImageContent(); await AskQuestionUsingFilter(); @@ -305,6 +306,55 @@ due to the speed of light being a very large number when squared. This concept i */ } + // Question without filters and show sources, with streaming + private static async Task AskSimpleQuestionWithStreamingAndShowSources() + { + var question = "What's E = m*c^2?"; + Console.WriteLine($"Question: {question}"); + + var answer = s_memory.AskStreamingAsync(question, minRelevance: 0.76); + Console.WriteLine("\nAnswer:\n"); + + List? citations = []; + bool isFirstPart = true; + await foreach (var answerPart in answer) + { + if (isFirstPart) + { + citations = answerPart.RelevantSources; + isFirstPart = false; + } + + Console.Write(answerPart.Result); + } + + Console.WriteLine("\n\nSources:\n"); + foreach (var x in citations) + { + Console.WriteLine(x.SourceUrl != null + ? $" - {x.SourceUrl} [{x.Partitions.First().LastUpdate:D}]" + : $" - {x.SourceName} - {x.Link} [{x.Partitions.First().LastUpdate:D}]"); + } + + Console.WriteLine("\n====================================\n"); + + /* OUTPUT + + Question: What's E = m*c^2? + + Answer: E = m*c^2 is the formula representing the principle of mass-energy equivalence, which was introduced by Albert Einstein. In this equation, + E stands for energy, m represents mass, and c is the speed of light in a vacuum, which is approximately 299,792,458 meters per second (m/s). + The equation states that the energy (E) of a system in its rest frame is equal to its mass (m) multiplied by the square of the speed of light (c^2). + This implies that mass and energy are interchangeable; a small amount of mass can be converted into a large amount of energy and vice versa, + due to the speed of light being a very large number when squared. This concept is a fundamental principle in physics and has important implications + in various fields, including nuclear physics and cosmology. + + Sources: + - /download?index=default&documentId=doc003&filename=file5-NASA-news.pdf [Friday, 17 May 2024] + + */ + } + // Another question without filters and show sources private static async Task AskSimpleQuestionAndShowSources() { diff --git a/service/Abstractions/IKernelMemory.cs b/service/Abstractions/IKernelMemory.cs index b6355cabb..21fa59bc0 100644 --- a/service/Abstractions/IKernelMemory.cs +++ b/service/Abstractions/IKernelMemory.cs @@ -222,7 +222,7 @@ public Task AskAsync( /// Minimum Cosine Similarity required /// Async task cancellation token /// A stream that contains an answer to the query, or an empty list - public IAsyncEnumerable AskStreamingAsync( + public IAsyncEnumerable AskStreamingAsync( string question, string? index = null, MemoryFilter? filter = null, diff --git a/service/Abstractions/Models/MemoryAnswer.cs b/service/Abstractions/Models/MemoryAnswer.cs index 5033ade63..b689e8ad1 100644 --- a/service/Abstractions/Models/MemoryAnswer.cs +++ b/service/Abstractions/Models/MemoryAnswer.cs @@ -17,11 +17,11 @@ public class MemoryAnswer /// [JsonPropertyName("question")] [JsonPropertyOrder(1)] - public string Question { get; set; } = string.Empty; + public string? Question { get; set; } = string.Empty; [JsonPropertyName("noResult")] [JsonPropertyOrder(2)] - public bool NoResult { get; set; } = true; + public bool? NoResult { get; set; } = true; /// /// Content of the answer. @@ -45,7 +45,7 @@ public class MemoryAnswer /// [JsonPropertyName("relevantSources")] [JsonPropertyOrder(20)] - public List RelevantSources { get; set; } = new(); + public List? RelevantSources { get; set; } = new(); /// /// Serialize using .NET JSON serializer, e.g. to avoid ambiguity diff --git a/service/Abstractions/Search/ISearchClient.cs b/service/Abstractions/Search/ISearchClient.cs index b6d76e1d7..33c5724bd 100644 --- a/service/Abstractions/Search/ISearchClient.cs +++ b/service/Abstractions/Search/ISearchClient.cs @@ -46,7 +46,8 @@ Task AskAsync( CancellationToken cancellationToken = default); /// - /// Answer the given question, if possible. + /// Answer the given question, if possible, grounding the response with relevant memories matching the given criteria. + /// First result in the stream contains metadata about the result, subsequent results only contain answer tokens. /// /// Index (aka collection) to search for grounding information /// Question to answer @@ -54,7 +55,7 @@ Task AskAsync( /// Minimum relevance of the memories considered /// Async task cancellation token /// Answer to the given question - IAsyncEnumerable AskStreamingAsync( + IAsyncEnumerable AskStreamingAsync( string index, string question, ICollection? filters = null, diff --git a/service/Core/MemoryServerless.cs b/service/Core/MemoryServerless.cs index 14c2b632b..ccb8d6640 100644 --- a/service/Core/MemoryServerless.cs +++ b/service/Core/MemoryServerless.cs @@ -259,7 +259,7 @@ public Task AskAsync( } /// - public IAsyncEnumerable AskStreamingAsync( + public IAsyncEnumerable AskStreamingAsync( string question, string? index = null, MemoryFilter? filter = null, diff --git a/service/Core/MemoryService.cs b/service/Core/MemoryService.cs index 5bafe350c..95b9079dd 100644 --- a/service/Core/MemoryService.cs +++ b/service/Core/MemoryService.cs @@ -236,7 +236,7 @@ public Task AskAsync( } /// - public IAsyncEnumerable AskStreamingAsync( + public IAsyncEnumerable AskStreamingAsync( string question, string? index = null, MemoryFilter? filter = null, diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs index 04013449c..9ea30d914 100644 --- a/service/Core/Search/SearchClient.cs +++ b/service/Core/Search/SearchClient.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using System.Runtime.CompilerServices; using System.Text; using System.Threading; @@ -119,15 +118,6 @@ public async Task SearchAsync( // Memories are sorted by relevance, starting from the most relevant foreach ((MemoryRecord memory, double relevance) in list) { - // Note: a document can be composed by multiple files - string documentId = memory.GetDocumentId(this._log); - - // Identify the file in case there are multiple files - string fileId = memory.GetFileId(this._log); - - // Note: this is not a URL and perhaps could be dropped. For now it acts as a unique identifier. See also SourceUrl. - string linkToFile = $"{index}/{documentId}/{fileId}"; - var partitionText = memory.GetPartitionText(this._log).Trim(); if (string.IsNullOrEmpty(partitionText)) { @@ -138,32 +128,7 @@ public async Task SearchAsync( // Relevance is `float.MinValue` when search uses only filters and no embeddings (see code above) if (relevance > float.MinValue) { this._log.LogTrace("Adding result with relevance {0}", relevance); } - // If the file is already in the list of citations, only add the partition - var citation = result.Results.FirstOrDefault(x => x.Link == linkToFile); - if (citation == null) - { - citation = new Citation(); - result.Results.Add(citation); - } - - // Add the partition to the list of citations - citation.Index = index; - citation.DocumentId = documentId; - citation.FileId = fileId; - citation.Link = linkToFile; - citation.SourceContentType = memory.GetFileContentType(this._log); - citation.SourceName = memory.GetFileName(this._log); - citation.SourceUrl = memory.GetWebPageUrl(index); - - citation.Partitions.Add(new Citation.Partition - { - Text = partitionText, - Relevance = (float)relevance, - PartitionNumber = memory.GetPartitionNumber(this._log), - SectionNumber = memory.GetSectionNumber(), - LastUpdate = memory.GetLastUpdate(), - Tags = memory.Tags, - }); + this.MapMatchToCitation(index, result.Results, memory, relevance); // In cases where a buggy storage connector is returning too many records if (result.Results.Count >= this._config.MaxMatchesCount) @@ -228,15 +193,6 @@ public async Task AskAsync( // Memories are sorted by relevance, starting from the most relevant await foreach ((MemoryRecord memory, double relevance) in matches.ConfigureAwait(false)) { - // Note: a document can be composed by multiple files - string documentId = memory.GetDocumentId(this._log); - - // Identify the file in case there are multiple files - string fileId = memory.GetFileId(this._log); - - // Note: this is not a URL and perhaps could be dropped. For now it acts as a unique identifier. See also SourceUrl. - string linkToFile = $"{index}/{documentId}/{fileId}"; - string fileName = memory.GetFileName(this._log); var partitionText = memory.GetPartitionText(this._log).Trim(); @@ -248,8 +204,7 @@ public async Task AskAsync( factsAvailableCount++; - // TODO: add file age in days, to push relevance of newer documents - var fact = $"==== [File:{fileName};Relevance:{relevance:P1}]:\n{partitionText}\n"; + var fact = GenerateFactString(fileName, relevance, partitionText); // Use the partition/chunk only if there's room for it var size = this._textGenerator.CountTokens(fact); @@ -265,32 +220,7 @@ public async Task AskAsync( facts.Append(fact); tokensAvailable -= size; - // If the file is already in the list of citations, only add the partition - var citation = answer.RelevantSources.FirstOrDefault(x => x.Link == linkToFile); - if (citation == null) - { - citation = new Citation(); - answer.RelevantSources.Add(citation); - } - - // Add the partition to the list of citations - citation.Index = index; - citation.DocumentId = documentId; - citation.FileId = fileId; - citation.Link = linkToFile; - citation.SourceContentType = memory.GetFileContentType(this._log); - citation.SourceName = fileName; - citation.SourceUrl = memory.GetWebPageUrl(index); - - citation.Partitions.Add(new Citation.Partition - { - Text = partitionText, - Relevance = (float)relevance, - PartitionNumber = memory.GetPartitionNumber(this._log), - SectionNumber = memory.GetSectionNumber(), - LastUpdate = memory.GetLastUpdate(), - Tags = memory.Tags, - }); + this.MapMatchToCitation(index, answer.RelevantSources, memory, relevance); // In cases where a buggy storage connector is returning too many records if (factsUsedCount >= this._config.MaxMatchesCount) @@ -332,8 +262,9 @@ public async Task AskAsync( watch.Stop(); answer.Result = text.ToString(); - answer.NoResult = ValueIsEquivalentTo(answer.Result, this._config.EmptyAnswer); - if (answer.NoResult) + var noResult = ValueIsEquivalentTo(answer.Result, this._config.EmptyAnswer); + answer.NoResult = noResult; + if (noResult) { answer.NoResultReason = "No relevant memories found"; this._log.LogTrace("Answer generated in {0} msecs. No relevant memories found", watch.ElapsedMilliseconds); @@ -347,17 +278,25 @@ public async Task AskAsync( } /// - public async IAsyncEnumerable AskStreamingAsync( + public async IAsyncEnumerable AskStreamingAsync( string index, string question, ICollection? filters = null, double minRelevance = 0, [EnumeratorCancellation] CancellationToken cancellationToken = default) { + var noAnswerFound = new MemoryAnswer + { + Question = question, + NoResult = true, + Result = this._config.EmptyAnswer, + }; + if (string.IsNullOrEmpty(question)) { this._log.LogWarning("No question provided"); - yield return this._config.EmptyAnswer; + noAnswerFound.NoResultReason = "No question provided"; + yield return noAnswerFound; yield break; } @@ -372,6 +311,7 @@ public async IAsyncEnumerable AskStreamingAsync( var factsUsedCount = 0; var factsAvailableCount = 0; + var answer = noAnswerFound; this._log.LogTrace("Fetching relevant memories"); IAsyncEnumerable<(MemoryRecord, double)> matches = this._memoryDb.GetSimilarListAsync( @@ -397,8 +337,7 @@ public async IAsyncEnumerable AskStreamingAsync( factsAvailableCount++; - // TODO: add file age in days, to push relevance of newer documents - var fact = $"==== [File:{fileName};Relevance:{relevance:P1}]:\n{partitionText}\n"; + var fact = GenerateFactString(fileName, relevance, partitionText); // Use the partition/chunk only if there's room for it var size = this._textGenerator.CountTokens(fact); @@ -413,40 +352,73 @@ public async IAsyncEnumerable AskStreamingAsync( facts.Append(fact); tokensAvailable -= size; + + this.MapMatchToCitation(index, answer.RelevantSources, memory, relevance); + + // In cases where a buggy storage connector is returning too many records + if (factsUsedCount >= this._config.MaxMatchesCount) + { + break; + } } if (factsAvailableCount > 0 && factsUsedCount == 0) { this._log.LogError("Unable to inject memories in the prompt, not enough tokens available"); - yield return this._config.EmptyAnswer; + noAnswerFound.NoResultReason = "Unable to use memories"; + yield return noAnswerFound; yield break; } if (factsUsedCount == 0) { this._log.LogWarning("No memories available"); - yield return this._config.EmptyAnswer; + noAnswerFound.NoResultReason = "No memories available"; + yield return noAnswerFound; yield break; } StringBuilder bufferedAnswer = new(); + bool finishedRequiredBuffering = false; var watch = Stopwatch.StartNew(); - await foreach (var x in this.GenerateAnswerAsync(question, facts.ToString()) - .WithCancellation(cancellationToken).ConfigureAwait(false)) + await foreach (var token in this.GenerateAnswerAsync(question, facts.ToString()).WithCancellation(cancellationToken).ConfigureAwait(false)) { - if (x is null || x.Length == 0) + if (token is null || token.Length == 0) { continue; } - bufferedAnswer.Append(x); - yield return x; + bufferedAnswer.Append(token); int currentLength = bufferedAnswer.Length; - if (currentLength <= this._config.EmptyAnswer.Length && ValueIsEquivalentTo(bufferedAnswer.ToString(), this._config.EmptyAnswer)) + + if (!finishedRequiredBuffering) + { + // Adding 5 to the length to ensure that the extra tokens in ValueIsEquivalentTo can be checked (non-text tokens) + if (currentLength <= this._config.EmptyAnswer.Length + 5 && ValueIsEquivalentTo(bufferedAnswer.ToString(), this._config.EmptyAnswer)) + { + this._log.LogTrace("Answer generated in {0} msecs. No relevant memories found", watch.ElapsedMilliseconds); + noAnswerFound.NoResultReason = "No relevant memories found"; + yield return noAnswerFound; + yield break; + } + else if (currentLength > this._config.EmptyAnswer.Length) + { + finishedRequiredBuffering = true; + answer.NoResult = false; + answer.Result = bufferedAnswer.ToString(); + yield return answer; + } + } + else { - this._log.LogTrace("Answer generated in {0} msecs. No relevant memories found", watch.ElapsedMilliseconds); - yield break; + yield return new MemoryAnswer + { + Result = token, + NoResult = null, + Question = null, + RelevantSources = null, + }; } if (this._log.IsEnabled(LogLevel.Trace) && currentLength >= 30) @@ -455,6 +427,14 @@ public async IAsyncEnumerable AskStreamingAsync( } } + //Edge case when the generated answer is shorter than the configured empty answer + if (!finishedRequiredBuffering) + { + answer.NoResult = false; + answer.Result = bufferedAnswer.ToString(); + yield return answer; + } + watch.Stop(); this._log.LogTrace("Answer generated in {0} msecs", watch.ElapsedMilliseconds); } @@ -497,4 +477,51 @@ private static bool ValueIsEquivalentTo(string value, string target) target = target.Trim().Trim('.', '"', '\'', '`', '~', '!', '?', '@', '#', '$', '%', '^', '+', '*', '_', '-', '=', '|', '\\', '/', '(', ')', '[', ']', '{', '}', '<', '>'); return string.Equals(value, target, StringComparison.OrdinalIgnoreCase); } + + private static string GenerateFactString(string fileName, double relevance, string partitionText) + { + // TODO: add file age in days, to push relevance of newer documents + return $"==== [File:{fileName};Relevance:{relevance:P1}]:\n{partitionText}\n"; + } + + private void MapMatchToCitation(string index, List citations, MemoryRecord memory, double relevance) + { + string partitionText = memory.GetPartitionText(this._log).Trim(); + + // Note: a document can be composed by multiple files + string documentId = memory.GetDocumentId(this._log); + + // Identify the file in case there are multiple files + string fileId = memory.GetFileId(this._log); + + // Note: this is not a URL and perhaps could be dropped. For now it acts as a unique identifier. See also SourceUrl. + string linkToFile = $"{index}/{documentId}/{fileId}"; + + // If the file is already in the list of citations, only add the partition + Citation? citation = citations.Find(x => x.Link == linkToFile); + if (citation == null) + { + citation = new Citation(); + citations.Add(citation); + } + + // Add the partition to the list of citations + citation.Index = index; + citation.DocumentId = documentId; + citation.FileId = fileId; + citation.Link = linkToFile; + citation.SourceContentType = memory.GetFileContentType(this._log); + citation.SourceName = memory.GetFileName(this._log); + citation.SourceUrl = memory.GetWebPageUrl(index); + + citation.Partitions.Add(new Citation.Partition + { + Text = partitionText, + Relevance = (float)relevance, + PartitionNumber = memory.GetPartitionNumber(this._log), + SectionNumber = memory.GetSectionNumber(), + LastUpdate = memory.GetLastUpdate(), + Tags = memory.Tags, + }); + } } diff --git a/service/Service.AspNetCore/WebAPIEndpoints.cs b/service/Service.AspNetCore/WebAPIEndpoints.cs index 767ff8b69..875e32d8a 100644 --- a/service/Service.AspNetCore/WebAPIEndpoints.cs +++ b/service/Service.AspNetCore/WebAPIEndpoints.cs @@ -16,6 +16,8 @@ using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Service.AspNetCore.Models; using System.IO; +using System.Text.Json; +using System.Text.Json.Serialization; using Microsoft.AspNetCore.Http.HttpResults; namespace Microsoft.KernelMemory.Service.AspNetCore; @@ -235,21 +237,26 @@ public static void AddAskStreamEndpoint( RouteGroupBuilder group = builder.MapGroup(apiPrefix); // Ask streaming endpoint - var route = group.MapPost(Constants.HttpAskStreamEndpoint, IAsyncEnumerable ( + var route = group.MapPost(Constants.HttpAskStreamEndpoint, IResult ( MemoryQuery query, IKernelMemory service, ILogger log, CancellationToken cancellationToken) => { log.LogTrace("New search request, index '{0}', minRelevance {1}", query.Index, query.MinRelevance); - return service.AskStreamingAsync( - question: query.Question, - index: query.Index, - filters: query.Filters, - minRelevance: query.MinRelevance, - cancellationToken: cancellationToken); + return Results.Json( + service.AskStreamingAsync( + question: query.Question, + index: query.Index, + filters: query.Filters, + minRelevance: query.MinRelevance, + cancellationToken: cancellationToken), + new JsonSerializerOptions + { + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull + }); }) - .Produces>(StatusCodes.Status200OK) + .Produces>(StatusCodes.Status200OK) .Produces(StatusCodes.Status401Unauthorized) .Produces(StatusCodes.Status403Forbidden); From e5009200e20f41d8645ddaa04fea26bcc823ccd6 Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Mon, 10 Jun 2024 00:11:47 +0200 Subject: [PATCH 09/11] Fix null reference errors --- service/Abstractions/Models/MemoryAnswer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/Abstractions/Models/MemoryAnswer.cs b/service/Abstractions/Models/MemoryAnswer.cs index b689e8ad1..b19948527 100644 --- a/service/Abstractions/Models/MemoryAnswer.cs +++ b/service/Abstractions/Models/MemoryAnswer.cs @@ -45,7 +45,7 @@ public class MemoryAnswer /// [JsonPropertyName("relevantSources")] [JsonPropertyOrder(20)] - public List? RelevantSources { get; set; } = new(); + public List RelevantSources { get; set; } = new(); /// /// Serialize using .NET JSON serializer, e.g. to avoid ambiguity From 16969ebcd53baa1c9aacb88bef19d91a8203c118 Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Mon, 10 Jun 2024 00:11:47 +0200 Subject: [PATCH 10/11] Fix null reference errors --- service/Abstractions/Models/MemoryAnswer.cs | 2 +- service/Core/Search/SearchClient.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/service/Abstractions/Models/MemoryAnswer.cs b/service/Abstractions/Models/MemoryAnswer.cs index b689e8ad1..b19948527 100644 --- a/service/Abstractions/Models/MemoryAnswer.cs +++ b/service/Abstractions/Models/MemoryAnswer.cs @@ -45,7 +45,7 @@ public class MemoryAnswer /// [JsonPropertyName("relevantSources")] [JsonPropertyOrder(20)] - public List? RelevantSources { get; set; } = new(); + public List RelevantSources { get; set; } = new(); /// /// Serialize using .NET JSON serializer, e.g. to avoid ambiguity diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs index f91d9159d..1f09556b6 100644 --- a/service/Core/Search/SearchClient.cs +++ b/service/Core/Search/SearchClient.cs @@ -419,7 +419,7 @@ public async IAsyncEnumerable AskStreamingAsync( Result = token, NoResult = null, Question = null, - RelevantSources = null, + RelevantSources = [], }; } From 7eb5093e66b7ef834ff89f526f2c177106c612e1 Mon Sep 17 00:00:00 2001 From: Jonathan Velkeneers Date: Mon, 10 Jun 2024 00:20:14 +0200 Subject: [PATCH 11/11] Undo null changes --- service/Abstractions/Models/MemoryAnswer.cs | 4 ++-- service/Core/Search/SearchClient.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/service/Abstractions/Models/MemoryAnswer.cs b/service/Abstractions/Models/MemoryAnswer.cs index b19948527..5033ade63 100644 --- a/service/Abstractions/Models/MemoryAnswer.cs +++ b/service/Abstractions/Models/MemoryAnswer.cs @@ -17,11 +17,11 @@ public class MemoryAnswer /// [JsonPropertyName("question")] [JsonPropertyOrder(1)] - public string? Question { get; set; } = string.Empty; + public string Question { get; set; } = string.Empty; [JsonPropertyName("noResult")] [JsonPropertyOrder(2)] - public bool? NoResult { get; set; } = true; + public bool NoResult { get; set; } = true; /// /// Content of the answer. diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs index 1f09556b6..ad6027185 100644 --- a/service/Core/Search/SearchClient.cs +++ b/service/Core/Search/SearchClient.cs @@ -417,8 +417,8 @@ public async IAsyncEnumerable AskStreamingAsync( yield return new MemoryAnswer { Result = token, - NoResult = null, - Question = null, + NoResult = false, + Question = "", RelevantSources = [], }; }