// Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. // https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "google/protobuf/json/internal/lexer.h" #include #include #include #include #include #include #include #include #include #include "absl/algorithm/container.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/ascii.h" #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_replace.h" #include "absl/strings/string_view.h" #include "absl/types/variant.h" #include "google/protobuf/io/zero_copy_stream.h" #include "google/protobuf/io/zero_copy_stream_impl_lite.h" #include "google/protobuf/json/internal/test_input_stream.h" #include "google/protobuf/stubs/status_macros.h" // Must be included last. #include "google/protobuf/port_def.inc" namespace google { namespace protobuf { namespace json_internal { namespace { using ::testing::_; using ::testing::ElementsAre; using ::testing::Field; using ::testing::HasSubstr; using ::testing::IsEmpty; using ::testing::Pair; using ::testing::SizeIs; using ::testing::VariantWith; // TODO(b/234474291): Use the gtest versions once that's available in OSS. MATCHER_P(IsOkAndHolds, inner, absl::StrCat("is OK and holds ", testing::PrintToString(inner))) { if (!arg.ok()) { *result_listener << arg.status(); return false; } return testing::ExplainMatchResult(inner, *arg, result_listener); } // absl::Status GetStatus(const absl::Status& s) { return s; } template absl::Status GetStatus(const absl::StatusOr& s) { return s.status(); } MATCHER_P(StatusIs, status, absl::StrCat(".status() is ", testing::PrintToString(status))) { return GetStatus(arg).code() == status; } #define EXPECT_OK(x) EXPECT_THAT(x, StatusIs(absl::StatusCode::kOk)) #define ASSERT_OK(x) ASSERT_THAT(x, StatusIs(absl::StatusCode::kOk)) // TODO(b/234868512): There are several tests that validate non-standard // behavior that is assumed to be present in the wild due to Hyrum's Law. These // tests are grouped under the `NonStandard` suite. These tests ensure the // non-standard syntax is accepted, and that disabling legacy mode rejects them. // // All other tests are strictly-conforming. // A generic JSON value, which is gtest-matcher friendly and stream-printable. struct Value { static absl::StatusOr Parse(io::ZeroCopyInputStream* stream, ParseOptions options = {}) { JsonLexer lex(stream, options); return Parse(lex); } static absl::StatusOr Parse(JsonLexer& lex) { absl::StatusOr kind = lex.PeekKind(); RETURN_IF_ERROR(kind.status()); switch (*kind) { case JsonLexer::kNull: RETURN_IF_ERROR(lex.Expect("null")); return Value{Null{}}; case JsonLexer::kFalse: RETURN_IF_ERROR(lex.Expect("false")); return Value{false}; case JsonLexer::kTrue: RETURN_IF_ERROR(lex.Expect("true")); return Value{true}; case JsonLexer::kNum: { absl::StatusOr> num = lex.ParseNumber(); RETURN_IF_ERROR(num.status()); return Value{num->value}; } case JsonLexer::kStr: { absl::StatusOr> str = lex.ParseUtf8(); RETURN_IF_ERROR(str.status()); return Value{str->value.ToString()}; } case JsonLexer::kArr: { std::vector arr; absl::Status s = lex.VisitArray([&arr, &lex]() -> absl::Status { absl::StatusOr val = Value::Parse(lex); RETURN_IF_ERROR(val.status()); arr.emplace_back(*std::move(val)); return absl::OkStatus(); }); RETURN_IF_ERROR(s); return Value{std::move(arr)}; } case JsonLexer::kObj: { std::vector> obj; absl::Status s = lex.VisitObject( [&obj, &lex](LocationWith& key) -> absl::Status { absl::StatusOr val = Value::Parse(lex); RETURN_IF_ERROR(val.status()); obj.emplace_back(std::move(key.value.ToString()), *std::move(val)); return absl::OkStatus(); }); RETURN_IF_ERROR(s); return Value{std::move(obj)}; } } return absl::InternalError("Unrecognized kind in lexer"); } friend std::ostream& operator<<(std::ostream& os, const Value& v) { if (absl::holds_alternative(v.value)) { os << "null"; } else if (const auto* x = absl::get_if(&v.value)) { os << "bool:" << (*x ? "true" : "false"); } else if (const auto* x = absl::get_if(&v.value)) { os << "num:" << *x; } else if (const auto* x = absl::get_if(&v.value)) { os << "str:" << absl::CHexEscape(*x); } else if (const auto* x = absl::get_if(&v.value)) { os << "arr:["; bool first = true; for (const auto& val : *x) { if (!first) { os << ", "; } os << val; } os << "]"; } else if (const auto* x = absl::get_if(&v.value)) { os << "obj:["; bool first = true; for (const auto& kv : *x) { if (!first) { os << ", "; first = false; } os << kv.first << ":" << kv.second; } os << "]"; } return os; } struct Null {}; using Array = std::vector; using Object = std::vector>; absl::variant value; }; template testing::Matcher ValueIs(M inner) { return Field(&Value::value, VariantWith(inner)); } // Executes `test` once for each three-segment split of `json`. void Do(absl::string_view json, std::function test, bool verify_all_consumed = true) { SCOPED_TRACE(absl::StrCat("json: ", absl::CHexEscape(json))); for (size_t i = 0; i < json.size(); ++i) { for (size_t j = 0; j < json.size() - i + 1; ++j) { SCOPED_TRACE(absl::StrFormat("json[0:%d], json[%d:%d], json[%d:%d]", i, i, i + j, i + j, json.size())); std::string first(json.substr(0, i)); std::string second(json.substr(i, j)); std::string third(json.substr(i + j)); TestInputStream in = {first, second, third}; test(&in); if (testing::Test::HasFailure()) { return; } if (verify_all_consumed) { if (!absl::c_all_of(third, [](char c) { return absl::ascii_isspace(c); })) { ASSERT_GE(in.Consumed(), 3); } else if (!absl::c_all_of( second, [](char c) { return absl::ascii_isspace(c); })) { ASSERT_GE(in.Consumed(), 2); } else { ASSERT_GE(in.Consumed(), 1); } } } } } void BadInner(absl::string_view json, ParseOptions opts = {}) { Do( json, [=](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream, opts), StatusIs(absl::StatusCode::kInvalidArgument)); }, false); } // Like Do, but runs a legacy syntax test twice: once with legacy settings, once // without. For the latter, the test is expected to fail; for the former, // `test` is called so it can run expectations. void DoLegacy(absl::string_view json, std::function test) { Do(json, [&](io::ZeroCopyInputStream* stream) { ParseOptions options; options.allow_legacy_syntax = true; auto value = Value::Parse(stream, options); ASSERT_OK(value); test(*value); }); BadInner(json); } // Like Bad, but ensures json fails to parse in both modes. void Bad(absl::string_view json) { ParseOptions options; options.allow_legacy_syntax = true; BadInner(json, options); BadInner(json); } TEST(LexerTest, Null) { Do("null", [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(_))); }); } TEST(LexerTest, False) { Do("false", [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(false))); }); } TEST(LexerTest, True) { Do("true", [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(true))); }); } TEST(LexerTest, Typos) { Bad("-"); Bad("-foo"); Bad("nule"); } TEST(LexerTest, UnknownCharacters) { Bad("*"); Bad("[*]"); Bad("{key: *}"); } TEST(LexerTest, EmptyString) { Do(R"json("")json", [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(IsEmpty()))); }); } TEST(LexerTest, SimpleString) { Do(R"json("My String")json", [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs("My String"))); }); } TEST(NonStandard, SingleQuoteString) { DoLegacy(R"json('My String')json", [=](const Value& value) { EXPECT_THAT(value, ValueIs("My String")); }); } TEST(NonStandard, ControlCharsInString) { DoLegacy("\"\1\2\3\4\5\6\7\b\n\f\r\"", [=](const Value& value) { EXPECT_THAT(value, ValueIs("\1\2\3\4\5\6\7\b\n\f\r")); }); } TEST(LexerTest, Latin) { Do(R"json("Pokémon")json", [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs("Pokémon"))); }); } TEST(LexerTest, Cjk) { Do(R"json("施氏食獅史")json", [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs("施氏食獅史"))); }); } TEST(LexerTest, BrokenString) { Bad(R"json("broken)json"); Bad(R"json("broken')json"); Bad(R"json("broken\")json"); } TEST(NonStandard, BrokenString) { Bad(R"json('broken)json"); Bad(R"json('broken")json"); } TEST(LexerTest, BrokenEscape) { Bad(R"json("\)json"); Bad(R"json("\a")json"); Bad(R"json("\u")json"); Bad(R"json("\u123")json"); Bad(R"json("\u{1f36f}")json"); Bad(R"json("\u123$$$")json"); Bad(R"json("\ud800\udcfg")json"); } void GoodNumber(absl::string_view json, double value) { Do(json, [value](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(value))); }); } TEST(LexerTest, Zero) { GoodNumber("0", 0); GoodNumber("0.0", 0); GoodNumber("0.000", 0); GoodNumber("-0", -0.0); GoodNumber("-0.0", -0.0); Bad("00"); Bad("-00"); } TEST(LexerTest, Integer) { GoodNumber("123456", 123456); GoodNumber("-79497823553162768", -79497823553162768); GoodNumber("11779497823553163264", 11779497823553163264u); Bad("0777"); } TEST(LexerTest, Overflow) { GoodNumber("18446744073709551616", 18446744073709552000.0); GoodNumber("-18446744073709551616", -18446744073709551616.0); Bad("1.89769e308"); Bad("-1.89769e308"); } TEST(LexerTest, Double) { GoodNumber("42.5", 42.5); GoodNumber("42.50", 42.50); GoodNumber("-1045.235", -1045.235); GoodNumber("-0.235", -0.235); Bad("42."); Bad("01.3"); Bad(".5"); Bad("-.5"); } TEST(LexerTest, Scientific) { GoodNumber("1.2345e+10", 1.2345e+10); GoodNumber("1.2345e-10", 1.2345e-10); GoodNumber("1.2345e10", 1.2345e10); GoodNumber("1.2345E+10", 1.2345e+10); GoodNumber("1.2345E-10", 1.2345e-10); GoodNumber("1.2345E10", 1.2345e10); GoodNumber("0e0", 0); GoodNumber("9E9", 9e9); Bad("1.e5"); Bad("-e5"); Bad("1e"); Bad("1e-"); Bad("1e+"); } TEST(LexerTest, EmptyArray) { Do("[]", [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(IsEmpty()))); }); } TEST(LexerTest, PrimitiveArray) { absl::string_view json = R"json( [true, false, null, "string"] )json"; Do(json, [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(ElementsAre( ValueIs(true), // ValueIs(false), // ValueIs(_), // ValueIs("string") // )))); }); } TEST(LexerTest, BrokenArray) { Bad("["); Bad("[["); Bad("[true, null}"); } TEST(LexerTest, BrokenStringInArray) { Bad(R"json(["Unterminated])json"); } TEST(LexerTest, NestedArray) { absl::string_view json = R"json( [ [22, -127, 45.3, -1056.4, 11779497823553162765], {"key": true} ] )json"; Do(json, [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(ElementsAre( ValueIs(ElementsAre( ValueIs(22), // ValueIs(-127), // ValueIs(45.3), // ValueIs(-1056.4), // ValueIs(11779497823553162765u) // )), ValueIs( ElementsAre(Pair("key", ValueIs(true)))))))); }); } TEST(LexerTest, EmptyObject) { Do("{}", [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(IsEmpty()))); }); } TEST(LexerTest, BrokenObject) { Bad("{"); Bad("{{"); Bad(R"json({"key": true])json"); Bad(R"json({"key")json"); Bad(R"json({"key":})json"); } TEST(LexerTest, BrokenStringInObject) { Bad(R"json({"oops": "Unterminated})json"); } TEST(LexerTest, NonPairInObject) { Bad("{null}"); Bad("{true}"); Bad("{false}"); Bad("{42}"); Bad("{[null]}"); Bad(R"json({{"nest_pas": true}})json"); Bad(R"json({"missing colon"})json"); } TEST(NonStandard, NonPairInObject) { Bad("{'missing colon'}"); Bad("{missing_colon}"); } TEST(LexerTest, WrongCommas) { Bad("[null null]"); Bad("[null,, null]"); Bad(R"json({"a": 0 "b": true})json"); Bad(R"json({"a": 0,, "b": true})json"); } TEST(NonStandard, Keys) { DoLegacy(R"json({'s': true})json", [](const Value& value) { EXPECT_THAT(value, ValueIs( ElementsAre(Pair("s", ValueIs(true))))); }); DoLegacy(R"json({key: null})json", [](const Value& value) { EXPECT_THAT(value, ValueIs( ElementsAre(Pair("key", ValueIs(_))))); }); DoLegacy(R"json({snake_key: []})json", [](const Value& value) { EXPECT_THAT(value, ValueIs(ElementsAre(Pair( "snake_key", ValueIs(IsEmpty()))))); }); DoLegacy(R"json({camelKey: {}})json", [](const Value& value) { EXPECT_THAT(value, ValueIs(ElementsAre(Pair( "camelKey", ValueIs(IsEmpty()))))); }); } TEST(NonStandard, KeywordPrefixedKeys) { DoLegacy(R"json({nullkey: "a"})json", [](const Value& value) { EXPECT_THAT(value, ValueIs(ElementsAre( Pair("nullkey", ValueIs("a"))))); }); DoLegacy(R"json({truekey: "b"})json", [](const Value& value) { EXPECT_THAT(value, ValueIs(ElementsAre( Pair("truekey", ValueIs("b"))))); }); DoLegacy(R"json({falsekey: "c"})json", [](const Value& value) { EXPECT_THAT(value, ValueIs(ElementsAre( Pair("falsekey", ValueIs("c"))))); }); } TEST(LexerTest, BadKeys) { Bad("{null: 0}"); Bad("{true: 0}"); Bad("{false: 0}"); Bad("{lisp-kebab: 0}"); Bad("{42: true}"); } TEST(LexerTest, NestedObject) { absl::string_view json = R"json( { "t": true, "f": false, "n": null, "s": "a string", "pi": 22, "ni": -127, "pd": 45.3, "nd": -1056.4, "pl": 11779497823553162765, "l": [ [ ] ], "o": { "key": true } } )json"; Do(json, [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(ElementsAre( Pair("t", ValueIs(true)), // Pair("f", ValueIs(false)), // Pair("n", ValueIs(_)), // Pair("s", ValueIs("a string")), // Pair("pi", ValueIs(22)), // Pair("ni", ValueIs(-127)), // Pair("pd", ValueIs(45.3)), // Pair("nd", ValueIs(-1056.4)), // Pair("pl", ValueIs(11779497823553162765u)), // Pair("l", ValueIs(ElementsAre( ValueIs(IsEmpty())))), // Pair("o", ValueIs(ElementsAre( Pair("key", ValueIs(true))))) // )))); }); } TEST(LexerTest, RejectNonUtf8) { absl::string_view json = R"json( { "address": x"施氏食獅史" } )json"; Bad(absl::StrReplaceAll(json, {{"x", "\xff"}})); } TEST(LexerTest, RejectNonUtf8String) { absl::string_view json = R"json( { "address": "施氏x食獅史" } )json"; Bad(absl::StrReplaceAll(json, {{"x", "\xff"}})); } TEST(LexerTest, RejectNonUtf8Prefix) { Bad("\xff{}"); } TEST(LexerTest, SurrogateEscape) { absl::string_view json = R"json( [ "\ud83d\udc08\u200D\u2b1B\ud83d\uDdA4" ] )json"; Do(json, [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs( ElementsAre(ValueIs("🐈‍⬛🖤"))))); }); } TEST(LexerTest, InvalidCodePoint) { Bad(R"json(["\ude36"])json"); } TEST(LexerTest, LonelyHighSurrogate) { Bad(R"json(["\ud83d"])json"); Bad(R"json(["\ud83d|trailing"])json"); Bad(R"json(["\ud83d\ude--"])json"); Bad(R"json(["\ud83d\ud83d"])json"); } TEST(LexerTest, AsciiEscape) { absl::string_view json = R"json( ["\b", "\ning", "test\f", "\r\t", "test\\\"\/ing"] )json"; Do(json, [](io::ZeroCopyInputStream* stream) { EXPECT_THAT(Value::Parse(stream), IsOkAndHolds(ValueIs(ElementsAre( ValueIs("\b"), // ValueIs("\ning"), // ValueIs("test\f"), // ValueIs("\r\t"), // ValueIs("test\\\"/ing") // )))); }); } TEST(NonStandard, AsciiEscape) { DoLegacy(R"json(["\'", '\''])json", [](const Value& value) { EXPECT_THAT(value, ValueIs(ElementsAre(ValueIs("'"), // ValueIs("'") // ))); }); } TEST(NonStandard, TrailingCommas) { DoLegacy(R"json({"foo": 42,})json", [](const Value& value) { EXPECT_THAT(value, ValueIs( ElementsAre(Pair("foo", ValueIs(42))))); }); DoLegacy(R"json({"foo": [42,],})json", [](const Value& value) { EXPECT_THAT( value, ValueIs(ElementsAre(Pair( "foo", ValueIs(ElementsAre(ValueIs(42))))))); }); DoLegacy(R"json([42,])json", [](const Value& value) { EXPECT_THAT(value, ValueIs(ElementsAre(ValueIs(42)))); }); DoLegacy(R"json([{},])json", [](const Value& value) { EXPECT_THAT(value, ValueIs( ElementsAre(ValueIs(IsEmpty())))); }); } // These strings are enormous; so that the test actually finishes in a // reasonable time, we skip using Do(). TEST(LexerTest, ArrayRecursion) { std::string ok = std::string(ParseOptions::kDefaultDepth, '[') + std::string(ParseOptions::kDefaultDepth, ']'); { io::ArrayInputStream stream(ok.data(), static_cast(ok.size())); auto value = Value::Parse(&stream); ASSERT_OK(value); Value* v = &*value; for (int i = 0; i < ParseOptions::kDefaultDepth - 1; ++i) { ASSERT_THAT(*v, ValueIs(SizeIs(1))); v = &absl::get(v->value)[0]; } ASSERT_THAT(*v, ValueIs(IsEmpty())); } { std::string evil = absl::StrFormat("[%s]", ok); io::ArrayInputStream stream(evil.data(), static_cast(evil.size())); ASSERT_THAT(Value::Parse(&stream), StatusIs(absl::StatusCode::kInvalidArgument)); } } TEST(LexerTest, ObjectRecursion) { std::string ok; for (int i = 0; i < ParseOptions::kDefaultDepth - 1; ++i) { absl::StrAppend(&ok, "{\"k\":"); } absl::StrAppend(&ok, "{"); ok += std::string(ParseOptions::kDefaultDepth, '}'); { io::ArrayInputStream stream(ok.data(), static_cast(ok.size())); auto value = Value::Parse(&stream); ASSERT_OK(value); Value* v = &*value; for (int i = 0; i < ParseOptions::kDefaultDepth - 1; ++i) { ASSERT_THAT(*v, ValueIs(ElementsAre(Pair("k", _)))); v = &absl::get(v->value)[0].second; } ASSERT_THAT(*v, ValueIs(IsEmpty())); } { std::string evil = absl::StrFormat("{\"k\":%s}", ok); io::ArrayInputStream stream(evil.data(), static_cast(evil.size())); ASSERT_THAT(Value::Parse(&stream), StatusIs(absl::StatusCode::kInvalidArgument)); } } } // namespace } // namespace json_internal } // namespace protobuf } // namespace google