using System.Text;
using System.Text.RegularExpressions;
namespace Connected.Components;
public static class Splitter
{
private const string NextBoundary = ".*?\\b";
private static StringBuilder s_stringBuilderCached;
///
/// Splits the text into fragments, according to the
/// text to be highlighted
///
/// The whole text
/// The text to be highlighted
/// The texts to be highlighted
/// Regex expression that was used to split fragments.
/// Whether it's case sensitive or not
/// If true, splits until the next regex boundary
///
public static Memory GetFragments(string text,
string highlightedText,
IEnumerable highlightedTexts,
out string regex,
bool caseSensitive = false,
bool untilNextBoundary = false)
{
if (string.IsNullOrEmpty(text))
{
regex = "";
return Memory.Empty;
}
var builder = Interlocked.Exchange(ref s_stringBuilderCached, null) ?? new();
//the first brace in the pattern is to keep the patten when splitting,
//the `(?:` in the pattern is to accept multiple highlightedTexts but not capture them.
builder.Append("((?:");
//this becomes true if `AppendPattern` was called at least once.
bool hasAtLeastOnePattern = false;
if (!string.IsNullOrEmpty(highlightedText))
{
AppendPattern(highlightedText);
}
if (highlightedTexts is not null)
{
foreach (var substring in highlightedTexts)
{
if (string.IsNullOrEmpty(substring))
continue;
//split pattern if we already added an string to search.
if (hasAtLeastOnePattern)
{
builder.Append(")|(?:");
}
AppendPattern(substring);
}
}
if (hasAtLeastOnePattern)
{
//close the last pattern group and the capture group.
builder.Append("))");
}
else
{
builder.Clear();
s_stringBuilderCached = builder;
//all patterns were empty or null.
regex = "";
return new string[] { text };
}
regex = builder.ToString();
builder.Clear();
s_stringBuilderCached = builder;
var splits = Regex
.Split(text,
regex,
caseSensitive
? RegexOptions.None
: RegexOptions.IgnoreCase);
var length = 0;
for (var i = 0; i < splits.Length; i++)
{
var s = splits[i];
if (!string.IsNullOrEmpty(s))
{
splits[length++] = s;
}
}
Array.Clear(splits, length, splits.Length - length);
return splits.AsMemory(0, length);
void AppendPattern(string value)
{
hasAtLeastOnePattern = true;
//escapes the text for regex
value = Regex.Escape(value);
builder.Append(value);
if (untilNextBoundary)
{
builder.Append(NextBoundary);
}
}
}
}