Skip to content

Commit

Permalink
Add new and extend existing til::string helpers (#16772)
Browse files Browse the repository at this point in the history
`wstring_case_insensitive_compare` is not a great name for what it
does as it's incorrect to use for regular (human readable) strings.
This PR thus renames it to `env_key_sorter`.

`compare_string_ordinal` was renamed to `compare_ordinal_insensitive`
to make sure callers know that the comparison is insensitive
(this may otherwise be incorrect in certain contexts after all).
The return value was changed to match `memcmp` so that the API
is detached from its underlying implementation (= NLS).

`compare_linguistic_insensitive` and `contains_linguistic_insensitive`
were added to sort and filter human-readable strings respectively.

`prefix_split` was extended to allow for needles that are just a
single character. This significantly improves the generated assembly
and is also usually what someone would want to actually use.
I've left the string-as-needle variant in just in case.

This PR is prep-work for #2664
  • Loading branch information
lhecker authored Feb 28, 2024
1 parent e7796e7 commit badc00e
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 58 deletions.
2 changes: 1 addition & 1 deletion src/cascadia/TerminalConnection/ConptyConnection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ namespace winrt::Microsoft::Terminal::TerminalConnection::implementation
if (_environment)
{
// Order the environment variable names so that resolution order is consistent
std::set<std::wstring, til::wstring_case_insensitive_compare> keys{};
std::set<std::wstring, til::env_key_sorter> keys{};
for (const auto item : _environment)
{
keys.insert(item.Key().c_str());
Expand Down
8 changes: 0 additions & 8 deletions src/cascadia/TerminalSettingsEditor/Appearances.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,6 @@ Author(s):

namespace winrt::Microsoft::Terminal::Settings::Editor::implementation
{
struct FontComparator
{
bool operator()(const Font& lhs, const Font& rhs) const
{
return lhs.LocalizedName() < rhs.LocalizedName();
}
};

struct Font : FontT<Font>
{
public:
Expand Down
11 changes: 9 additions & 2 deletions src/cascadia/TerminalSettingsEditor/ProfileViewModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "ProfileViewModel.h"
#include "ProfileViewModel.g.cpp"
#include "EnumEntry.h"
#include "Appearances.h"

#include <LibraryResources.h>
#include "../WinRTUtils/inc/Utils.h"
Expand Down Expand Up @@ -154,11 +155,17 @@ namespace winrt::Microsoft::Terminal::Settings::Editor::implementation
CATCH_LOG();
}

const auto comparator = [&](const Editor::Font& lhs, const Editor::Font& rhs) {
const auto a = lhs.LocalizedName();
const auto b = rhs.LocalizedName();
return til::compare_linguistic_insensitive(a, b) < 0;
};

// sort and save the lists
std::sort(begin(fontList), end(fontList), FontComparator());
std::sort(begin(fontList), end(fontList), comparator);
_FontList = single_threaded_observable_vector<Editor::Font>(std::move(fontList));

std::sort(begin(monospaceFontList), end(monospaceFontList), FontComparator());
std::sort(begin(monospaceFontList), end(monospaceFontList), comparator);
_MonospaceFontList = single_threaded_observable_vector<Editor::Font>(std::move(monospaceFontList));
}
CATCH_LOG();
Expand Down
1 change: 0 additions & 1 deletion src/cascadia/TerminalSettingsEditor/ProfileViewModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include "ProfileViewModel.g.h"
#include "Utils.h"
#include "ViewModelHelpers.h"
#include "Appearances.h"

namespace winrt::Microsoft::Terminal::Settings::Editor::implementation
{
Expand Down
5 changes: 2 additions & 3 deletions src/cascadia/TerminalSettingsModel/CascadiaSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@
#include <WtExeUtils.h>

#include <shellapi.h>
#include <shlwapi.h>
#include <til/latch.h>
#include <til/string.h>
#include <til/env.h>

using namespace winrt::Microsoft::Terminal;
using namespace winrt::Microsoft::Terminal::Settings;
Expand Down Expand Up @@ -555,7 +554,7 @@ void CascadiaSettings::_validateProfileEnvironmentVariables()
{
for (const auto& profile : _allProfiles)
{
std::set<std::wstring, til::wstring_case_insensitive_compare> envVarNames{};
std::set<std::wstring, til::env_key_sorter> envVarNames{};
if (profile.EnvironmentVariables() == nullptr)
{
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ static KeyChord _fromString(std::wstring_view wstr)

while (!wstr.empty())
{
const auto part = til::prefix_split(wstr, L"+");
const auto part = til::prefix_split(wstr, L'+');

if (til::equals_insensitive_ascii(part, CTRL_KEY))
{
Expand Down
28 changes: 22 additions & 6 deletions src/inc/til/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,22 @@ class EnvTests;

namespace til // Terminal Implementation Library. Also: "Today I Learned"
{
// A case-insensitive wide-character map is used to store environment variables
// due to documented requirements:
//
// "All strings in the environment block must be sorted alphabetically by name.
// The sort is case-insensitive, Unicode order, without regard to locale.
// Because the equal sign is a separator, it must not be used in the name of
// an environment variable."
// https://docs.microsoft.com/en-us/windows/desktop/ProcThread/changing-environment-variables
struct env_key_sorter
{
[[nodiscard]] bool operator()(const std::wstring& lhs, const std::wstring& rhs) const noexcept
{
return compare_ordinal_insensitive(lhs, rhs) < 0;
}
};

namespace details
{

Expand Down Expand Up @@ -161,7 +177,7 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
friend class ::EnvTests;
#endif

std::map<std::wstring, std::wstring, til::wstring_case_insensitive_compare> _envMap{};
std::map<std::wstring, std::wstring, til::env_key_sorter> _envMap{};

// We make copies of the environment variable names to ensure they are null terminated.
void get(wil::zwstring_view variable)
Expand Down Expand Up @@ -348,8 +364,8 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
{
static constexpr std::wstring_view temp{ L"temp" };
static constexpr std::wstring_view tmp{ L"tmp" };
if (til::compare_string_ordinal(var, temp) == CSTR_EQUAL ||
til::compare_string_ordinal(var, tmp) == CSTR_EQUAL)
if (til::compare_ordinal_insensitive(var, temp) == 0 ||
til::compare_ordinal_insensitive(var, tmp) == 0)
{
return til::details::wil_env::GetShortPathNameW<std::wstring, 256>(value.data());
}
Expand All @@ -364,9 +380,9 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
static constexpr std::wstring_view path{ L"Path" };
static constexpr std::wstring_view libPath{ L"LibPath" };
static constexpr std::wstring_view os2LibPath{ L"Os2LibPath" };
return til::compare_string_ordinal(input, path) == CSTR_EQUAL ||
til::compare_string_ordinal(input, libPath) == CSTR_EQUAL ||
til::compare_string_ordinal(input, os2LibPath) == CSTR_EQUAL;
return til::compare_ordinal_insensitive(input, path) == 0 ||
til::compare_ordinal_insensitive(input, libPath) == 0 ||
til::compare_ordinal_insensitive(input, os2LibPath) == 0;
}

void parse(const wchar_t* lastCh)
Expand Down
124 changes: 88 additions & 36 deletions src/inc/til/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,65 +315,117 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
// * return "foo"
// If the needle cannot be found the "str" argument is returned as is.
template<typename T, typename Traits>
std::basic_string_view<T, Traits> prefix_split(std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& needle) noexcept
constexpr std::basic_string_view<T, Traits> prefix_split(std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& needle) noexcept
{
using view_type = std::basic_string_view<T, Traits>;

const auto idx = str.find(needle);
// > If the needle cannot be found the "str" argument is returned as is.
// ...but if needle is empty, idx will always be npos, forcing us to return str.
if (idx == view_type::npos || needle.empty())
{
return std::exchange(str, {});
}
const auto needleLen = needle.size();
const auto idx = needleLen == 0 ? str.size() : str.find(needle);
const auto prefixIdx = std::min(str.size(), idx);
const auto suffixIdx = std::min(str.size(), prefixIdx + needle.size());

const auto suffixIdx = idx + needle.size();
const view_type result{ str.data(), idx };
const view_type result{ str.data(), prefixIdx };
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead
str = { str.data() + suffixIdx, str.size() - suffixIdx };
return result;
}

inline std::string_view prefix_split(std::string_view& str, const std::string_view& needle) noexcept
constexpr std::string_view prefix_split(std::string_view& str, const std::string_view& needle) noexcept
{
return prefix_split<>(str, needle);
}

inline std::wstring_view prefix_split(std::wstring_view& str, const std::wstring_view& needle) noexcept
constexpr std::wstring_view prefix_split(std::wstring_view& str, const std::wstring_view& needle) noexcept
{
return prefix_split<>(str, needle);
}

//
// A case-insensitive wide-character map is used to store environment variables
// due to documented requirements:
//
// "All strings in the environment block must be sorted alphabetically by name.
// The sort is case-insensitive, Unicode order, without regard to locale.
// Because the equal sign is a separator, it must not be used in the name of
// an environment variable."
// https://docs.microsoft.com/en-us/windows/desktop/ProcThread/changing-environment-variables
//
// - Returns CSTR_LESS_THAN, CSTR_EQUAL or CSTR_GREATER_THAN
[[nodiscard]] inline int compare_string_ordinal(const std::wstring_view& lhs, const std::wstring_view& rhs) noexcept
{
const auto result = CompareStringOrdinal(
lhs.data(),
::base::saturated_cast<int>(lhs.size()),
rhs.data(),
::base::saturated_cast<int>(rhs.size()),
TRUE);
FAIL_FAST_LAST_ERROR_IF(!result);
// Give the arguments ("foo bar baz", " "), this method will
// * modify the first argument to "bar baz"
// * return "foo"
// If the needle cannot be found the "str" argument is returned as is.
template<typename T, typename Traits>
constexpr std::basic_string_view<T, Traits> prefix_split(std::basic_string_view<T, Traits>& str, T ch) noexcept
{
using view_type = std::basic_string_view<T, Traits>;

const auto idx = str.find(ch);
const auto prefixIdx = std::min(str.size(), idx);
const auto suffixIdx = std::min(str.size(), prefixIdx + 1);

const view_type result{ str.data(), prefixIdx };
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead
str = { str.data() + suffixIdx, str.size() - suffixIdx };
return result;
}

struct wstring_case_insensitive_compare
template<typename T, typename Traits>
constexpr std::basic_string_view<T, Traits> trim(const std::basic_string_view<T, Traits>& str, const T ch) noexcept
{
[[nodiscard]] bool operator()(const std::wstring& lhs, const std::wstring& rhs) const noexcept
auto beg = str.data();
auto end = beg + str.size();

for (; beg != end && *beg == ch; ++beg)
{
return compare_string_ordinal(lhs, rhs) == CSTR_LESS_THAN;
}
};

for (; beg != end && end[-1] == ch; --end)
{
}

return { beg, end };
}

// This function is appropriate for case-insensitive equivalence testing of file paths and other "system" strings.
// Similar to memcmp, this returns <0, 0 or >0.
inline int compare_ordinal_insensitive(const std::wstring_view& lhs, const std::wstring_view& rhs) noexcept
{
const auto lhsLen = ::base::saturated_cast<int>(lhs.size());
const auto rhsLen = ::base::saturated_cast<int>(rhs.size());
// MSDN:
// > To maintain the C runtime convention of comparing strings,
// > the value 2 can be subtracted from a nonzero return value.
// > [...]
// > The function returns 0 if it does not succeed. [...] following error codes:
// > * ERROR_INVALID_PARAMETER. Any of the parameter values was invalid.
// -> We can just subtract 2.
return CompareStringOrdinal(lhs.data(), lhsLen, rhs.data(), rhsLen, TRUE) - 2;
}

// This function is appropriate for sorting strings primarily used for human consumption, like a list of file names.
// Similar to memcmp, this returns <0, 0 or >0.
inline int compare_linguistic_insensitive(const std::wstring_view& lhs, const std::wstring_view& rhs) noexcept
{
const auto lhsLen = ::base::saturated_cast<int>(lhs.size());
const auto rhsLen = ::base::saturated_cast<int>(rhs.size());
// MSDN:
// > To maintain the C runtime convention of comparing strings,
// > the value 2 can be subtracted from a nonzero return value.
// > [...]
// > The function returns 0 if it does not succeed. [...] following error codes:
// > * ERROR_INVALID_FLAGS. The values supplied for flags were invalid.
// > * ERROR_INVALID_PARAMETER. Any of the parameter values was invalid.
// -> We can just subtract 2.
#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47).
return CompareStringEx(LOCALE_NAME_USER_DEFAULT, LINGUISTIC_IGNORECASE, lhs.data(), lhsLen, rhs.data(), rhsLen, nullptr, nullptr, 0) - 2;
}

// This function is appropriate for strings primarily used for human consumption, like a list of file names.
inline bool contains_linguistic_insensitive(const std::wstring_view& str, const std::wstring_view& needle) noexcept
{
const auto strLen = ::base::saturated_cast<int>(str.size());
const auto needleLen = ::base::saturated_cast<int>(needle.size());
// MSDN:
// > Returns a 0-based index into the source string indicated by lpStringSource if successful.
// > [...]
// > The function returns -1 if it does not succeed.
// > * ERROR_INVALID_FLAGS. The values supplied for flags were not valid.
// > * ERROR_INVALID_PARAMETER. Any of the parameter values was invalid.
// > * ERROR_SUCCESS. The action completed successfully but yielded no results.
// -> We can just check for -1.
#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47).
return FindNLSStringEx(LOCALE_NAME_USER_DEFAULT, LINGUISTIC_IGNORECASE, str.data(), strLen, needle.data(), needleLen, nullptr, nullptr, nullptr, 0) != -1;
}

// Implement to_int in terms of to_ulong by negating its result. to_ulong does not expect
// to be passed signed numbers and will return an error accordingly. That error when
Expand Down
18 changes: 18 additions & 0 deletions src/til/ut_til/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,24 @@ class StringTests
}
}

TEST_METHOD(prefix_split_char)
{
{
std::string_view s{ "" };
VERIFY_ARE_EQUAL("", til::prefix_split(s, ' '));
VERIFY_ARE_EQUAL("", s);
}
{
std::string_view s{ "foo bar baz" };
VERIFY_ARE_EQUAL("foo", til::prefix_split(s, ' '));
VERIFY_ARE_EQUAL("bar baz", s);
VERIFY_ARE_EQUAL("bar", til::prefix_split(s, ' '));
VERIFY_ARE_EQUAL("baz", s);
VERIFY_ARE_EQUAL("baz", til::prefix_split(s, ' '));
VERIFY_ARE_EQUAL("", s);
}
}

TEST_METHOD(CleanPathAndFilename)
{
VERIFY_ARE_EQUAL(LR"(CUsersGeddyMusicAnalog Man)", til::clean_filename(LR"(C:\Users\Geddy\Music\"Analog Man")"));
Expand Down

0 comments on commit badc00e

Please sign in to comment.