Skip to content

Commit

Permalink
[libc++] Optimize std::getline
Browse files Browse the repository at this point in the history
```
-----------------------------------------------
Benchmark                   old             new
-----------------------------------------------
BM_getline_string        318 ns         32.4 ns
```
  • Loading branch information
philnik777 committed Feb 5, 2025
1 parent 8cb3d7b commit 310f910
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 106 deletions.
2 changes: 2 additions & 0 deletions libcxx/docs/ReleaseNotes/20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ Improvements and New Features

- Added :ref:`hardening mode <hardening>` support for ``forward_list`` and ``bitset``.

- The performance of ``std::getline`` has been improved, resulting in a performance uplift of up to 10x.

Deprecations and Removals
-------------------------

Expand Down
77 changes: 51 additions & 26 deletions libcxx/include/istream
Original file line number Diff line number Diff line change
Expand Up @@ -1265,41 +1265,66 @@ _LIBCPP_HIDE_FROM_ABI basic_istream<_CharT, _Traits>&
getline(basic_istream<_CharT, _Traits>& __is, basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm) {
ios_base::iostate __state = ios_base::goodbit;
typename basic_istream<_CharT, _Traits>::sentry __sen(__is, true);
if (__sen) {
if (!__sen)
return __is;
# if _LIBCPP_HAS_EXCEPTIONS
try {
try {
# endif
__str.clear();
streamsize __extr = 0;
while (true) {
typename _Traits::int_type __i = __is.rdbuf()->sbumpc();
if (_Traits::eq_int_type(__i, _Traits::eof())) {
__state |= ios_base::eofbit;
break;
__str.clear();

auto& __buffer = *__is.rdbuf();

auto __next = __buffer.sgetc();
for (; !_Traits::eq_int_type(__next, _Traits::eof()); __next = __buffer.sgetc()) {
const auto* __first = __buffer.gptr();
const auto* __last = __buffer.egptr();
_CharT __1buf;

if (__first == __last) {
__1buf = __next;
__first = &__1buf;
__last = &__1buf + 1;
}

auto __bump_stream = [&](ptrdiff_t __diff) {
if (__first == &__1buf) {
_LIBCPP_ASSERT_INTERNAL(__diff == 1, "trying to bump stream further than buffer size");
__buffer.sbumpc();
} else {
__buffer.__gbump_ptrdiff(__diff);
}
++__extr;
_CharT __ch = _Traits::to_char_type(__i);
if (_Traits::eq(__ch, __dlm))
break;
__str.push_back(__ch);
if (__str.size() == __str.max_size()) {
};

const auto* __match = _Traits::find(__first, __last - __first, __dlm);
if (__match) {
if (auto __cap = __str.max_size() - __str.size(); __cap > static_cast<size_t>(__match - __first)) {
__str.append(__first, __match);
__bump_stream(__match - __first + 1);
} else {
__str.append(__first, __cap);
__bump_stream(__cap);
__state |= ios_base::failbit;
break;
}
break;
}
if (__extr == 0)
__state |= ios_base::failbit;

__str.append(__first, __last);
__bump_stream(__last - __first);
}

if (_Traits::eq_int_type(__next, _Traits::eof()))
__state |= ios_base::eofbit | (__str.empty() ? ios_base::failbit : ios_base::goodbit);

# if _LIBCPP_HAS_EXCEPTIONS
} catch (...) {
__state |= ios_base::badbit;
__is.__setstate_nothrow(__state);
if (__is.exceptions() & ios_base::badbit) {
throw;
}
} catch (...) {
__state |= ios_base::badbit;
__is.__setstate_nothrow(__state);
if (__is.exceptions() & ios_base::badbit) {
throw;
}
# endif
__is.setstate(__state);
}
# endif
__is.setstate(__state);
return __is;
}

Expand Down
7 changes: 7 additions & 0 deletions libcxx/include/streambuf
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ protected:

inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void gbump(int __n) { __ninp_ += __n; }

// gbump takes an int, so it might not be able to represent the offset we want to add.
_LIBCPP_HIDE_FROM_ABI void __gbump_ptrdiff(ptrdiff_t __n) { __ninp_ += __n; }

inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) {
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gnext), "[gbeg, gnext) must be a valid range");
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gend), "[gbeg, gend) must be a valid range");
Expand Down Expand Up @@ -370,6 +373,10 @@ private:
char_type* __bout_ = nullptr;
char_type* __nout_ = nullptr;
char_type* __eout_ = nullptr;

template <class _CharT2, class _Traits2, class _Allocator>
_LIBCPP_HIDE_FROM_ABI friend basic_istream<_CharT2, _Traits2>&
getline(basic_istream<_CharT2, _Traits2>&, basic_string<_CharT2, _Traits2, _Allocator>&, _CharT2);
};

extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_streambuf<char>;
Expand Down
35 changes: 35 additions & 0 deletions libcxx/test/benchmarks/streams/getline.bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03

#include <istream>
#include <sstream>

#include <benchmark/benchmark.h>

void BM_getline_string(benchmark::State& state) {
std::istringstream iss;

std::string str;
str.reserve(128);
iss.str("A long string to let getline do some more work, making sure that longer strings are parsed fast enough");

for (auto _ : state) {
benchmark::DoNotOptimize(iss);

std::getline(iss, str);
benchmark::DoNotOptimize(str);
iss.seekg(0);
}
}

BENCHMARK(BM_getline_string);

BENCHMARK_MAIN();
Original file line number Diff line number Diff line change
Expand Up @@ -13,55 +13,42 @@
// getline(basic_istream<charT,traits>& is,
// basic_string<charT,traits,Allocator>& str);

#include <string>
#include <sstream>
#include <cassert>
#include <sstream>
#include <string>

#include "make_string.h"
#include "min_allocator.h"
#include "stream_types.h"
#include "test_macros.h"

template <template <class> class Alloc>
void test_string() {
using S = std::basic_string<char, std::char_traits<char>, Alloc<char> >;
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
using WS = std::basic_string<wchar_t, std::char_traits<wchar_t>, Alloc<wchar_t> >;
#endif
{
std::istringstream in(" abc\n def\n ghij");
S s("initial text");
std::getline(in, s);
assert(in.good());
assert(s == " abc");
std::getline(in, s);
assert(in.good());
assert(s == " def");
std::getline(in, s);
assert(in.eof());
assert(s == " ghij");
}
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
template <class CharT, class Alloc, class Stream, class Streambuf>
void test() {
using string_type = std::basic_string<CharT, std::char_traits<CharT>, Alloc>;
using stream_type = std::basic_istream<CharT>;
using streambuf_type = Streambuf;

{
std::wistringstream in(L" abc\n def\n ghij");
WS s(L"initial text");
streambuf_type sb(MAKE_CSTRING(CharT, " abc\n def\n ghij"));
stream_type in(&sb);
string_type s(MAKE_CSTRING(CharT, "initial text"));
std::getline(in, s);
assert(in.good());
assert(s == L" abc");
assert(s == MAKE_CSTRING(CharT, " abc"));
std::getline(in, s);
assert(in.good());
assert(s == L" def");
assert(s == MAKE_CSTRING(CharT, " def"));
std::getline(in, s);
assert(in.eof());
assert(s == L" ghij");
assert(s == MAKE_CSTRING(CharT, " ghij"));
}
#endif

#ifndef TEST_HAS_NO_EXCEPTIONS
{
std::basic_stringbuf<char> sb("hello");
std::basic_istream<char> is(&sb);
streambuf_type sb(MAKE_CSTRING(CharT, "hello"));
stream_type is(&sb);
is.exceptions(std::ios_base::eofbit);

S s;
string_type s;
bool threw = false;
try {
std::getline(is, s);
Expand All @@ -73,15 +60,14 @@ void test_string() {
assert(!is.fail());
assert(is.eof());
assert(threw);
assert(s == "hello");
assert(s == MAKE_CSTRING(CharT, "hello"));
}
# ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
std::basic_stringbuf<wchar_t> sb(L"hello");
std::basic_istream<wchar_t> is(&sb);
is.exceptions(std::ios_base::eofbit);
streambuf_type sb(MAKE_CSTRING(CharT, ""));
stream_type is(&sb);
is.exceptions(std::ios_base::failbit);

WS s;
string_type s;
bool threw = false;
try {
std::getline(is, s);
Expand All @@ -90,61 +76,52 @@ void test_string() {
}

assert(!is.bad());
assert(!is.fail());
assert(is.fail());
assert(is.eof());
assert(threw);
assert(s == L"hello");
assert(s == MAKE_CSTRING(CharT, ""));
}
# endif
#endif // TEST_HAS_NO_EXCEPTIONS
}

template <template <class> class Alloc>
void test_alloc() {
test<char, Alloc<char>, std::basic_istringstream<char>, std::basic_stringbuf<char> >();
test<char, Alloc<char>, std::basic_istringstream<char>, non_buffering_streambuf<char> >();
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
test<wchar_t, Alloc<wchar_t>, std::basic_istringstream<wchar_t>, std::basic_stringbuf<wchar_t> >();
test<wchar_t, Alloc<wchar_t>, std::basic_istringstream<wchar_t>, non_buffering_streambuf<wchar_t> >();
#endif
}
/*
void test_tiny_allocator() {
{
std::basic_stringbuf<char> sb;
std::basic_istream<char> is(&sb);
is.exceptions(std::ios_base::failbit);

S s;
bool threw = false;
try {
std::getline(is, s);
} catch (std::ios::failure const&) {
threw = true;
}

assert(!is.bad());
assert(is.fail());
assert(is.eof());
assert(threw);
assert(s == "");
std::string in_str =
"this is a too long line for the string that has to be longer because the implementation is broken\n";
std::istringstream iss(in_str);
std::basic_string<char, std::char_traits<char>, tiny_size_allocator<40, char>> str;
std::getline(iss, str);
assert(str == std::string_view{in_str}.substr(0, str.max_size()));
assert(iss.rdstate() & std::ios::failbit);
}
# ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
std::basic_stringbuf<wchar_t> sb;
std::basic_istream<wchar_t> is(&sb);
is.exceptions(std::ios_base::failbit);

WS s;
bool threw = false;
try {
std::getline(is, s);
} catch (std::ios::failure const&) {
threw = true;
}

assert(!is.bad());
assert(is.fail());
assert(is.eof());
assert(threw);
assert(s == L"");
std::string in_str =
"this is a too long line for the string that has to be longer because the implementation is broken";
std::istringstream iss(in_str);
std::basic_string<char, std::char_traits<char>, tiny_size_allocator<40, char>> str;
std::getline(iss, str);
assert(str == std::string_view{in_str}.substr(0, str.max_size()));
assert(iss.rdstate() & std::ios::failbit);
}
# endif
#endif // TEST_HAS_NO_EXCEPTIONS
}
*/

int main(int, char**) {
test_string<std::allocator>();
test_alloc<std::allocator>();
#if TEST_STD_VER >= 11
test_string<min_allocator>();
test_alloc<min_allocator>();
#endif
// test_tiny_allocator();

return 0;
}
44 changes: 44 additions & 0 deletions libcxx/test/support/stream_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef TEST_SUPPORT_STREAM_TYPE_H
#define TEST_SUPPORT_STREAM_TYPE_H

#include <streambuf>
#include <string>
#include <utility>

template <class CharT>
class non_buffering_streambuf : public std::basic_streambuf<CharT> {
using char_type = CharT;
using traits_type = std::char_traits<CharT>;
using int_type = typename traits_type::int_type;

public:
non_buffering_streambuf(std::basic_string<char_type> underlying_data)
: underlying_data_(std::move(underlying_data)), index_(0) {}

protected:
int_type underflow() override {
if (index_ != underlying_data_.size())
return underlying_data_[index_];
return traits_type::eof();
}

int_type uflow() override {
if (index_ != underlying_data_.size())
return underlying_data_[index_++];
return traits_type::eof();
}

private:
std::basic_string<char_type> underlying_data_;
size_t index_;
};

#endif // TEST_SUPPORT_STREAM_TYPE_H

0 comments on commit 310f910

Please sign in to comment.