first commit

This commit is contained in:
sanya
2025-09-01 14:20:39 +00:00
committed by ExternPointer
commit 490fc11f6a
4328 changed files with 1796224 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
/bin/*
!/bin/data
!/bin/encodings
!/bin/jsonchecker
/build
/doc/html
/doc/doxygen_*.db
*.a
# Temporary files created during CMake build
CMakeCache.txt
CMakeFiles
cmake_install.cmake
CTestTestfile.cmake
Makefile
RapidJSON*.cmake
RapidJSON.pc
Testing
/googletest
install_manifest.txt
Doxyfile
DartConfiguration.tcl

View File

@@ -0,0 +1,3 @@
[submodule "thirdparty/gtest"]
path = thirdparty/gtest
url = https://chromium.googlesource.com/external/googletest.git

View File

@@ -0,0 +1,44 @@
language: cpp
compiler:
- clang
- gcc
env:
matrix:
- CONF=debug ARCH=x86_64
- CONF=release ARCH=x86_64
- CONF=debug ARCH=x86
- CONF=release ARCH=x86
global:
- ARCH_FLAGS_x86='-m32' # #266: don't use SSE on 32-bit
- ARCH_FLAGS_x86_64='-msse4.2' # use SSE4.2 on 64-bit
- GITHUB_REPO='miloyip/rapidjson'
- secure: "HrsaCb+N66EG1HR+LWH1u51SjaJyRwJEDzqJGYMB7LJ/bfqb9mWKF1fLvZGk46W5t7TVaXRDD5KHFx9DPWvKn4gRUVkwTHEy262ah5ORh8M6n/6VVVajeV/AYt2C0sswdkDBDO4Xq+xy5gdw3G8s1A4Inbm73pUh+6vx+7ltBbk="
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq cmake doxygen valgrind
- if [ "$ARCH" = "x86" ]; then sudo apt-get install -qq g++-multilib libc6-dbg:i386; fi
install: true
before_script:
# hack to avoid Valgrind bug (https://bugs.kde.org/show_bug.cgi?id=326469),
# exposed by merging PR#163 (using -march=native)
- sed -i "s/-march=native//" CMakeLists.txt
- mkdir build
- >
eval "ARCH_FLAGS=\${ARCH_FLAGS_${ARCH}}" ;
(cd build && cmake
-DRAPIDJSON_HAS_STDSTRING=ON
-DCMAKE_VERBOSE_MAKEFILE=ON
-DCMAKE_BUILD_TYPE=$CONF
-DCMAKE_CXX_FLAGS="$ARCH_FLAGS" ..)
script:
- cd build
- make tests
- make examples
- ctest -V `[ "$CONF" = "release" ] || echo "-E perftest"`
- make travis_doc

View File

@@ -0,0 +1,112 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
SET(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules)
PROJECT(RapidJSON CXX)
set(LIB_MAJOR_VERSION "0")
set(LIB_MINOR_VERSION "12")
set(LIB_PATCH_VERSION "0")
set(LIB_VERSION_STRING "${LIB_MAJOR_VERSION}.${LIB_MINOR_VERSION}.${LIB_PATCH_VERSION}")
# compile in release with debug info mode by default
SET(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Build Type")
# Build all binaries in a separate directory
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
option(RAPIDJSON_BUILD_DOC "Build rapidjson documentation." ON)
option(RAPIDJSON_BUILD_EXAMPLES "Build rapidjson examples." ON)
option(RAPIDJSON_BUILD_TESTS "Build rapidjson perftests and unittests." ON)
option(RAPIDJSON_HAS_STDSTRING "" OFF)
if(RAPIDJSON_HAS_STDSTRING)
add_definitions(-DRAPIDJSON_HAS_STDSTRING)
endif()
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Wall -Wextra")
elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Wall -Wextra")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_definitions(-D_CRT_SECURE_NO_WARNINGS=1)
endif()
#add extra search paths for libraries and includes
SET(INCLUDE_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/include" CACHE PATH "The directory the headers are installed in")
SET(LIB_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE STRING "Directory where lib will install")
SET(DOC_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/share/doc/${PROJECT_NAME}" CACHE PATH "Path to the documentation")
IF(UNIX OR CYGWIN)
SET(_CMAKE_INSTALL_DIR "${LIB_INSTALL_DIR}/cmake/${PROJECT_NAME}")
ELSEIF(WIN32)
SET(_CMAKE_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/cmake")
ENDIF()
SET(CMAKE_INSTALL_DIR "${_CMAKE_INSTALL_DIR}" CACHE PATH "The directory cmake fiels are installed in")
include_directories(${CMAKE_SOURCE_DIR}/include)
if(RAPIDJSON_BUILD_DOC)
add_subdirectory(doc)
endif()
add_custom_target(travis_doc)
add_custom_command(TARGET travis_doc
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/travis-doxygen.sh)
if(RAPIDJSON_BUILD_EXAMPLES)
add_subdirectory(example)
endif()
if(RAPIDJSON_BUILD_TESTS)
if(MSVC11)
# required for VS2012 due to missing support for variadic templates
add_definitions(-D_VARIADIC_MAX=10)
endif(MSVC11)
add_subdirectory(test)
include(CTest)
endif()
# pkg-config
IF (UNIX OR CYGWIN)
CONFIGURE_FILE (${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}.pc.in
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc
@ONLY)
INSTALL (FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc
DESTINATION "${LIB_INSTALL_DIR}/pkgconfig"
COMPONENT pkgconfig)
ENDIF()
install(FILES readme.md
DESTINATION "${DOC_INSTALL_DIR}"
COMPONENT doc)
install(DIRECTORY include/rapidjson
DESTINATION "${INCLUDE_INSTALL_DIR}"
COMPONENT dev)
install(DIRECTORY example/
DESTINATION "${DOC_INSTALL_DIR}/examples"
COMPONENT examples
# Following patterns are for excluding the intermediate/object files
# from an install of in-source CMake build.
PATTERN "CMakeFiles" EXCLUDE
PATTERN "Makefile" EXCLUDE
PATTERN "cmake_install.cmake" EXCLUDE)
# Provide config and version files to be used by other applications
# ===============================
export(PACKAGE ${PROJECT_NAME})
# cmake-modules
CONFIGURE_FILE(${PROJECT_NAME}Config.cmake.in
${PROJECT_NAME}Config.cmake
@ONLY)
CONFIGURE_FILE(${PROJECT_NAME}ConfigVersion.cmake.in
${PROJECT_NAME}ConfigVersion.cmake
@ONLY)
INSTALL(FILES
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
DESTINATION "${CMAKE_INSTALL_DIR}"
COMPONENT dev)

View File

@@ -0,0 +1,22 @@
SET(GTEST_SEARCH_PATH
"${GTEST_SOURCE_DIR}"
"${CMAKE_SOURCE_DIR}/thirdparty/gtest")
IF(UNIX)
LIST(INSERT GTEST_SEARCH_PATH 1 "/usr/src/gtest")
ENDIF()
FIND_PATH(GTEST_SOURCE_DIR
NAMES CMakeLists.txt src/gtest_main.cc
PATHS ${GTEST_SEARCH_PATH})
# Debian installs gtest include directory in /usr/include, thus need to look
# for include directory separately from source directory.
FIND_PATH(GTEST_INCLUDE_DIR
NAMES gtest/gtest.h
PATH_SUFFIXES include
PATHS ${GTEST_SEARCH_PATH})
find_package_handle_standard_args(GTestSrc DEFAULT_MSG
GTEST_SOURCE_DIR
GTEST_INCLUDE_DIR)

View File

@@ -0,0 +1,7 @@
includedir=@INCLUDE_INSTALL_DIR@
Name: @PROJECT_NAME@
Description: A fast JSON parser/generator for C++ with both SAX/DOM style API
Version: @LIB_VERSION_STRING@
URL: https://github.com/miloyip/rapidjson
Cflags: -I${includedir}

View File

@@ -0,0 +1,3 @@
get_filename_component(RAPIDJSON_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
set(RAPIDJSON_INCLUDE_DIRS "@INCLUDE_INSTALL_DIR@")
message(STATUS "RapidJSON found. Headers: ${RAPIDJSON_INCLUDE_DIRS}")

View File

@@ -0,0 +1,10 @@
SET(PACKAGE_VERSION "@LIB_VERSION_STRING@")
IF (PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
SET(PACKAGE_VERSION_EXACT "true")
ENDIF (PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
IF (NOT PACKAGE_FIND_VERSION VERSION_GREATER PACKAGE_VERSION)
SET(PACKAGE_VERSION_COMPATIBLE "true")
ELSE (NOT PACKAGE_FIND_VERSION VERSION_GREATER PACKAGE_VERSION)
SET(PACKAGE_VERSION_UNSUITABLE "true")
ENDIF (NOT PACKAGE_FIND_VERSION VERSION_GREATER PACKAGE_VERSION)

View File

@@ -0,0 +1,28 @@
version: 0.12.{build}
configuration:
- Debug
- Release
environment:
matrix:
- VS_VERSION: 11
VS_PLATFORM: win32
- VS_VERSION: 11
VS_PLATFORM: x64
- VS_VERSION: 12
VS_PLATFORM: win32
- VS_VERSION: 12
VS_PLATFORM: x64
before_build:
- git submodule update --init --recursive
- cmake -H. -BBuild/VS -G "Visual Studio %VS_VERSION%" -DCMAKE_GENERATOR_PLATFORM=%VS_PLATFORM% -DBUILD_SHARED_LIBS=true -Wno-dev
build:
project: Build\VS\RapidJSON.sln
parallel: true
verbosity: minimal
test_script:
- cd Build\VS && if %CONFIGURATION%==Debug (ctest --verbose -E perftest --build-config %CONFIGURATION%) else (ctest --verbose --build-config %CONFIGURATION%)

View File

@@ -0,0 +1,22 @@
{
"glossary": {
"title": "example glossary",
"GlossDiv": {
"title": "S",
"GlossList": {
"GlossEntry": {
"ID": "SGML",
"SortAs": "SGML",
"GlossTerm": "Standard Generalized Markup Language",
"Acronym": "SGML",
"Abbrev": "ISO 8879:1986",
"GlossDef": {
"para": "A meta-markup language, used to create markup languages such as DocBook.",
"GlossSeeAlso": ["GML", "XML"]
},
"GlossSee": "markup"
}
}
}
}
}

View File

@@ -0,0 +1,27 @@
{"menu": {
"header": "SVG Viewer",
"items": [
{"id": "Open"},
{"id": "OpenNew", "label": "Open New"},
null,
{"id": "ZoomIn", "label": "Zoom In"},
{"id": "ZoomOut", "label": "Zoom Out"},
{"id": "OriginalView", "label": "Original View"},
null,
{"id": "Quality"},
{"id": "Pause"},
{"id": "Mute"},
null,
{"id": "Find", "label": "Find..."},
{"id": "FindAgain", "label": "Find Again"},
{"id": "Copy"},
{"id": "CopyAgain", "label": "Copy Again"},
{"id": "CopySVG", "label": "Copy SVG"},
{"id": "ViewSVG", "label": "View SVG"},
{"id": "ViewSource", "label": "View Source"},
{"id": "SaveAs", "label": "Save As"},
null,
{"id": "Help"},
{"id": "About", "label": "About Adobe CVG Viewer..."}
]
}}

View File

@@ -0,0 +1 @@
sample.json is obtained from http://code.google.com/p/json-test-suite/downloads/detail?name=sample.zip

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,88 @@
{"web-app": {
"servlet": [
{
"servlet-name": "cofaxCDS",
"servlet-class": "org.cofax.cds.CDSServlet",
"init-param": {
"configGlossary:installationAt": "Philadelphia, PA",
"configGlossary:adminEmail": "ksm@pobox.com",
"configGlossary:poweredBy": "Cofax",
"configGlossary:poweredByIcon": "/images/cofax.gif",
"configGlossary:staticPath": "/content/static",
"templateProcessorClass": "org.cofax.WysiwygTemplate",
"templateLoaderClass": "org.cofax.FilesTemplateLoader",
"templatePath": "templates",
"templateOverridePath": "",
"defaultListTemplate": "listTemplate.htm",
"defaultFileTemplate": "articleTemplate.htm",
"useJSP": false,
"jspListTemplate": "listTemplate.jsp",
"jspFileTemplate": "articleTemplate.jsp",
"cachePackageTagsTrack": 200,
"cachePackageTagsStore": 200,
"cachePackageTagsRefresh": 60,
"cacheTemplatesTrack": 100,
"cacheTemplatesStore": 50,
"cacheTemplatesRefresh": 15,
"cachePagesTrack": 200,
"cachePagesStore": 100,
"cachePagesRefresh": 10,
"cachePagesDirtyRead": 10,
"searchEngineListTemplate": "forSearchEnginesList.htm",
"searchEngineFileTemplate": "forSearchEngines.htm",
"searchEngineRobotsDb": "WEB-INF/robots.db",
"useDataStore": true,
"dataStoreClass": "org.cofax.SqlDataStore",
"redirectionClass": "org.cofax.SqlRedirection",
"dataStoreName": "cofax",
"dataStoreDriver": "com.microsoft.jdbc.sqlserver.SQLServerDriver",
"dataStoreUrl": "jdbc:microsoft:sqlserver://LOCALHOST:1433;DatabaseName=goon",
"dataStoreUser": "sa",
"dataStorePassword": "dataStoreTestQuery",
"dataStoreTestQuery": "SET NOCOUNT ON;select test='test';",
"dataStoreLogFile": "/usr/local/tomcat/logs/datastore.log",
"dataStoreInitConns": 10,
"dataStoreMaxConns": 100,
"dataStoreConnUsageLimit": 100,
"dataStoreLogLevel": "debug",
"maxUrlLength": 500}},
{
"servlet-name": "cofaxEmail",
"servlet-class": "org.cofax.cds.EmailServlet",
"init-param": {
"mailHost": "mail1",
"mailHostOverride": "mail2"}},
{
"servlet-name": "cofaxAdmin",
"servlet-class": "org.cofax.cds.AdminServlet"},
{
"servlet-name": "fileServlet",
"servlet-class": "org.cofax.cds.FileServlet"},
{
"servlet-name": "cofaxTools",
"servlet-class": "org.cofax.cms.CofaxToolsServlet",
"init-param": {
"templatePath": "toolstemplates/",
"log": 1,
"logLocation": "/usr/local/tomcat/logs/CofaxTools.log",
"logMaxSize": "",
"dataLog": 1,
"dataLogLocation": "/usr/local/tomcat/logs/dataLog.log",
"dataLogMaxSize": "",
"removePageCache": "/content/admin/remove?cache=pages&id=",
"removeTemplateCache": "/content/admin/remove?cache=templates&id=",
"fileTransferFolder": "/usr/local/tomcat/webapps/content/fileTransferFolder",
"lookInContext": 1,
"adminGroupID": 4,
"betaServer": true}}],
"servlet-mapping": {
"cofaxCDS": "/",
"cofaxEmail": "/cofaxutil/aemail/*",
"cofaxAdmin": "/admin/*",
"fileServlet": "/static/*",
"cofaxTools": "/tools/*"},
"taglib": {
"taglib-uri": "cofax.tld",
"taglib-location": "/WEB-INF/tlds/cofax.tld"}}}

View File

@@ -0,0 +1,26 @@
{"widget": {
"debug": "on",
"window": {
"title": "Sample Konfabulator Widget",
"name": "main_window",
"width": 500,
"height": 500
},
"image": {
"src": "Images/Sun.png",
"name": "sun1",
"hOffset": 250,
"vOffset": 250,
"alignment": "center"
},
"text": {
"data": "Click Here",
"size": 36,
"style": "bold",
"name": "text1",
"hOffset": 250,
"vOffset": 100,
"alignment": "center",
"onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;"
}
}}

View File

@@ -0,0 +1,7 @@
{
"en":"I can eat glass and it doesn't hurt me.",
"zh-Hant":"我能吞下玻璃而不傷身體。",
"zh-Hans":"我能吞下玻璃而不伤身体。",
"ja":"私はガラスを食べられます。それは私を傷つけません。",
"ko":"나는 유리를 먹을 수 있어요. 그래도 아프지 않아요"
}

View File

@@ -0,0 +1,7 @@
{
"en":"I can eat glass and it doesn't hurt me.",
"zh-Hant":"我能吞下玻璃而不傷身體。",
"zh-Hans":"我能吞下玻璃而不伤身体。",
"ja":"私はガラスを食べられます。それは私を傷つけません。",
"ko":"나는 유리를 먹을 수 있어요. 그래도 아프지 않아요"
}

View File

@@ -0,0 +1 @@
"A JSON payload should be an object or array, not a string."

View File

@@ -0,0 +1 @@
{"Extra value after close": true} "misplaced quoted value"

View File

@@ -0,0 +1 @@
{"Illegal expression": 1 + 2}

View File

@@ -0,0 +1 @@
{"Illegal invocation": alert()}

View File

@@ -0,0 +1 @@
{"Numbers cannot have leading zeroes": 013}

View File

@@ -0,0 +1 @@
{"Numbers cannot be hex": 0x14}

View File

@@ -0,0 +1 @@
["Illegal backslash escape: \x15"]

View File

@@ -0,0 +1 @@
[\naked]

View File

@@ -0,0 +1 @@
["Illegal backslash escape: \017"]

View File

@@ -0,0 +1 @@
[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]

View File

@@ -0,0 +1 @@
{"Missing colon" null}

View File

@@ -0,0 +1 @@
["Unclosed array"

View File

@@ -0,0 +1 @@
{"Double colon":: null}

View File

@@ -0,0 +1 @@
{"Comma instead of colon", null}

View File

@@ -0,0 +1 @@
["Colon instead of comma": false]

View File

@@ -0,0 +1 @@
["Bad value", truth]

View File

@@ -0,0 +1 @@
['single quote']

View File

@@ -0,0 +1 @@
[" tab character in string "]

View File

@@ -0,0 +1 @@
["tab\ character\ in\ string\ "]

View File

@@ -0,0 +1,2 @@
["line
break"]

View File

@@ -0,0 +1,2 @@
["line\
break"]

View File

@@ -0,0 +1 @@
[0e]

View File

@@ -0,0 +1 @@
{unquoted_key: "keys must be quoted"}

View File

@@ -0,0 +1 @@
[0e+]

View File

@@ -0,0 +1 @@
[0e+-1]

View File

@@ -0,0 +1 @@
{"Comma instead if closing brace": true,

View File

@@ -0,0 +1 @@
["mismatch"}

View File

@@ -0,0 +1 @@
["extra comma",]

View File

@@ -0,0 +1 @@
["double extra comma",,]

View File

@@ -0,0 +1 @@
[ , "<-- missing value"]

View File

@@ -0,0 +1 @@
["Comma after the close"],

View File

@@ -0,0 +1 @@
["Extra close"]]

View File

@@ -0,0 +1 @@
{"Extra comma": true,}

View File

@@ -0,0 +1,58 @@
[
"JSON Test Pattern pass1",
{"object with 1 member":["array with 1 element"]},
{},
[],
-42,
true,
false,
null,
{
"integer": 1234567890,
"real": -9876.543210,
"e": 0.123456789e-12,
"E": 1.234567890E+34,
"": 23456789012E66,
"zero": 0,
"one": 1,
"space": " ",
"quote": "\"",
"backslash": "\\",
"controls": "\b\f\n\r\t",
"slash": "/ & \/",
"alpha": "abcdefghijklmnopqrstuvwyz",
"ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
"digit": "0123456789",
"0123456789": "digit",
"special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
"hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
"true": true,
"false": false,
"null": null,
"array":[ ],
"object":{ },
"address": "50 St. James Street",
"url": "http://www.JSON.org/",
"comment": "// /* <!-- --",
"# -- --> */": " ",
" s p a c e d " :[1,2 , 3
,
4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7],
"jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
"quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
"\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
: "A key can be any string"
},
0.5 ,98.6
,
99.44
,
1066,
1e1,
0.1e1,
1e-1,
1e00,2e+00,2e-00
,"rosebud"]

View File

@@ -0,0 +1 @@
[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]

View File

@@ -0,0 +1,6 @@
{
"JSON Test Pattern pass3": {
"The outermost value": "must be an object or array.",
"In this test": "It is an object."
}
}

View File

@@ -0,0 +1,3 @@
Test suite from http://json.org/JSON_checker/.
If the JSON_checker is working correctly, it must accept all of the pass*.json files and reject all of the fail*.json files.

View File

@@ -0,0 +1,23 @@
find_package(Doxygen)
IF(NOT DOXYGEN_FOUND)
MESSAGE(STATUS "No Doxygen found. Documentation won't be built")
ELSE()
file(GLOB SOURCES ${CMAKE_SOURCE_DIR}/include/*)
file(GLOB MARKDOWN_DOC ${CMAKE_SOURCE_DIR}/doc/*.md)
list(APPEND MARKDOWN_DOC ${CMAKE_SOURCE_DIR}/readme.md)
CONFIGURE_FILE(Doxyfile.in Doxyfile @ONLY)
add_custom_command(OUTPUT html
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/html
DEPENDS ${MARKDOWN_DOC} ${SOURCES} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
add_custom_target(doc ALL DEPENDS html)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html
DESTINATION ${DOC_INSTALL_DIR}
COMPONENT doc)
ENDIF()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,65 @@
digraph {
compound=true
fontname="Inconsolata, Consolas"
fontsize=10
margin="0,0"
ranksep=0.2
penwidth=0.5
node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5]
edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal]
{
node [shape=record, fontsize="8", margin="0.04", height=0.2, color=gray]
oldjson [label="\{|\"|m|s|g|\"|:|\"|H|e|l|l|o|\\|n|W|o|r|l|d|!|\"|,|\"|\\|u|0|0|7|3|t|a|r|s|\"|:|1|0|\}", xlabel="Before Parsing"]
//newjson [label="\{|\"|<a>m|s|g|\\0|:|\"|<b>H|e|l|l|o|\\n|W|o|r|l|d|!|\\0|\"|,|\"|<c>s|t|a|r|s|\\0|t|a|r|s|:|1|0|\}", xlabel="After Parsing"]
newjson [shape=plaintext, label=<
<table BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="2"><tr>
<td>{</td>
<td>"</td><td port="a">m</td><td>s</td><td>g</td><td bgcolor="yellow">\\0</td>
<td>:</td>
<td>"</td><td port="b">H</td><td>e</td><td>l</td><td>l</td><td>o</td><td bgcolor="yellow">\\n</td><td bgcolor="yellow">W</td><td bgcolor="yellow">o</td><td bgcolor="yellow">r</td><td bgcolor="yellow">l</td><td bgcolor="yellow">d</td><td bgcolor="yellow">!</td><td bgcolor="yellow">\\0</td><td>"</td>
<td>,</td>
<td>"</td><td port="c" bgcolor="yellow">s</td><td bgcolor="yellow">t</td><td bgcolor="yellow">a</td><td bgcolor="yellow">r</td><td bgcolor="yellow">s</td><td bgcolor="yellow">\\0</td><td>t</td><td>a</td><td>r</td><td>s</td>
<td>:</td>
<td>1</td><td>0</td>
<td>}</td>
</tr></table>
>, xlabel="After Parsing"]
}
subgraph cluster1 {
margin="10,10"
labeljust="left"
label = "Document by In situ Parsing"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
root [label="{object|}", fillcolor=3]
{
msg [label="{string|<a>}", fillcolor=5]
helloworld [label="{string|<a>}", fillcolor=5]
stars [label="{string|<a>}", fillcolor=5]
ten [label="{number|10}", fillcolor=6]
}
}
oldjson -> root [label=" ParseInsitu()" lhead="cluster1"]
edge [arrowhead=vee]
root -> { msg; stars }
edge [arrowhead="none"]
msg -> helloworld
stars -> ten
{
edge [arrowhead=vee, arrowtail=dot, arrowsize=0.5, dir=both, tailclip=false]
msg:a:c -> newjson:a
helloworld:a:c -> newjson:b
stars:a:c -> newjson:c
}
//oldjson -> newjson [style=invis]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@@ -0,0 +1,62 @@
digraph {
fontname="Inconsolata, Consolas"
fontsize=10
margin="0,0"
penwidth=0.0
node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5]
edge [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5]
node [shape = doublecircle]; Start; Finish;
node [shape = box; style = "rounded, filled"; fillcolor=white ];
Start -> ArrayInitial [label=" ["];
Start -> ObjectInitial [label=" {"];
subgraph clusterArray {
margin="10,10"
style=filled
fillcolor=gray95
label = "Array"
ArrayInitial; Element; ElementDelimiter; ArrayFinish;
}
subgraph clusterObject {
margin="10,10"
style=filled
fillcolor=gray95
label = "Object"
ObjectInitial; MemberKey; KeyValueDelimiter; MemberValue; MemberDelimiter; ObjectFinish;
}
ArrayInitial -> ArrayInitial [label="["];
ArrayInitial -> ArrayFinish [label=" ]"];
ArrayInitial -> ObjectInitial [label="{", constraint=false];
ArrayInitial -> Element [label="string\nfalse\ntrue\nnull\nnumber"];
Element -> ArrayFinish [label="]"];
Element -> ElementDelimiter [label=","];
ElementDelimiter -> ArrayInitial [label=" ["];
ElementDelimiter -> ObjectInitial [label="{"];
ElementDelimiter -> Element [label="string\nfalse\ntrue\nnull\nnumber"];
ObjectInitial -> ObjectFinish [label=" }"];
ObjectInitial -> MemberKey [label=" string "];
MemberKey -> KeyValueDelimiter [label=":"];
KeyValueDelimiter -> ArrayInitial [label="["];
KeyValueDelimiter -> ObjectInitial [label=" {"];
KeyValueDelimiter -> MemberValue [label=" string\n false\n true\n null\n number"];
MemberValue -> ObjectFinish [label="}"];
MemberValue -> MemberDelimiter [label=","];
MemberDelimiter -> MemberKey [label=" string "];
ArrayFinish -> Finish;
ObjectFinish -> Finish;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

View File

@@ -0,0 +1,47 @@
digraph {
compound=true
fontname="Inconsolata, Consolas"
fontsize=10
margin="0,0"
ranksep=0.2
penwidth=0.5
node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5]
edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal]
subgraph cluster1 {
margin="10,10"
labeljust="left"
label = "Before"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
{
rank = same
b1 [label="{b:number|456}", fillcolor=6]
a1 [label="{a:number|123}", fillcolor=6]
}
a1 -> b1 [style="dashed", label="Move", dir=back]
}
subgraph cluster2 {
margin="10,10"
labeljust="left"
label = "After"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
{
rank = same
b2 [label="{b:null|}", fillcolor=1]
a2 [label="{a:number|456}", fillcolor=6]
}
a2 -> b2 [style=invis, dir=back]
}
b1 -> b2 [style=invis]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.0 KiB

View File

@@ -0,0 +1,62 @@
digraph {
compound=true
fontname="Inconsolata, Consolas"
fontsize=10
margin="0,0"
ranksep=0.2
penwidth=0.5
node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5]
edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal]
subgraph cluster1 {
margin="10,10"
labeljust="left"
label = "Before Copying (Hypothetic)"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
c1 [label="{contact:array|}", fillcolor=4]
c11 [label="{|}"]
c12 [label="{|}"]
c13 [shape="none", label="...", style="solid"]
o1 [label="{o:object|}", fillcolor=3]
ghost [label="{o:object|}", style=invis]
c1 -> o1 [style="dashed", label="AddMember", constraint=false]
edge [arrowhead=vee]
c1 -> { c11; c12; c13 }
o1 -> ghost [style=invis]
}
subgraph cluster2 {
margin="10,10"
labeljust="left"
label = "After Copying (Hypothetic)"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
c2 [label="{contact:array|}", fillcolor=4]
c3 [label="{array|}", fillcolor=4]
c21 [label="{|}"]
c22 [label="{|}"]
c23 [shape=none, label="...", style="solid"]
o2 [label="{o:object|}", fillcolor=3]
cs [label="{string|\"contact\"}", fillcolor=5]
c31 [label="{|}"]
c32 [label="{|}"]
c33 [shape="none", label="...", style="solid"]
edge [arrowhead=vee]
c2 -> { c21; c22; c23 }
o2 -> cs
cs -> c3 [arrowhead=none]
c3 -> { c31; c32; c33 }
}
ghost -> o2 [style=invis]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -0,0 +1,60 @@
digraph {
compound=true
fontname="Inconsolata, Consolas"
fontsize=10
margin="0,0"
ranksep=0.2
penwidth=0.5
forcelabels=true
node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5]
edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal]
subgraph cluster1 {
margin="10,10"
labeljust="left"
label = "Before Moving"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
c1 [label="{contact:array|}", fillcolor=4]
c11 [label="{|}"]
c12 [label="{|}"]
c13 [shape=none, label="...", style="solid"]
o1 [label="{o:object|}", fillcolor=3]
ghost [label="{o:object|}", style=invis]
c1 -> o1 [style="dashed", constraint=false, label="AddMember"]
edge [arrowhead=vee]
c1 -> { c11; c12; c13 }
o1 -> ghost [style=invis]
}
subgraph cluster2 {
margin="10,10"
labeljust="left"
label = "After Moving"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
c2 [label="{contact:null|}", fillcolor=1]
c3 [label="{array|}", fillcolor=4]
c21 [label="{|}"]
c22 [label="{|}"]
c23 [shape="none", label="...", style="solid"]
o2 [label="{o:object|}", fillcolor=3]
cs [label="{string|\"contact\"}", fillcolor=5]
c2 -> o2 [style="dashed", constraint=false, label="AddMember", style=invis]
edge [arrowhead=vee]
c3 -> { c21; c22; c23 }
o2 -> cs
cs -> c3 [arrowhead=none]
}
ghost -> o2 [style=invis]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

View File

@@ -0,0 +1,56 @@
digraph {
compound=true
fontname="Inconsolata, Consolas"
fontsize=10
margin="0,0"
ranksep=0.2
penwidth=0.5
node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5]
edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal]
{
node [shape=record, fontsize="8", margin="0.04", height=0.2, color=gray]
normaljson [label="\{|\"|m|s|g|\"|:|\"|H|e|l|l|o|\\|n|W|o|r|l|d|!|\"|,|\"|\\|u|0|0|7|3|t|a|r|s\"|:|1|0|\}"]
{
rank = same
msgstring [label="m|s|g|\\0"]
helloworldstring [label="H|e|l|l|o|\\n|W|o|r|l|d|!|\\0"]
starsstring [label="s|t|a|r|s\\0"]
}
}
subgraph cluster1 {
margin="10,10"
labeljust="left"
label = "Document by Normal Parsing"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
root [label="{object|}", fillcolor=3]
{
msg [label="{string|<a>}", fillcolor=5]
helloworld [label="{string|<a>}", fillcolor=5]
stars [label="{string|<a>}", fillcolor=5]
ten [label="{number|10}", fillcolor=6]
}
}
normaljson -> root [label=" Parse()" lhead="cluster1"]
edge [arrowhead=vee]
root -> { msg; stars }
edge [arrowhead="none"]
msg -> helloworld
stars -> ten
edge [arrowhead=vee, arrowtail=dot, arrowsize=0.5, dir=both, tailclip=false]
msg:a:c -> msgstring:w
helloworld:a:c -> helloworldstring:w
stars:a:c -> starsstring:w
msgstring -> helloworldstring -> starsstring [style=invis]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

View File

@@ -0,0 +1,54 @@
digraph {
compound=true
fontname="Inconsolata, Consolas"
fontsize=10
margin="0,0"
ranksep=0.2
penwidth=0.5
node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5]
edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal]
{
node [shape=record, fontsize="8", margin="0.04", height=0.2, color=gray]
srcjson [label="\{|p|r|o|j|e|c|t|\"|:|\"|r|a|p|i|d|j|s|o|n|\"|,|\"|s|t|a|r|s|\"|:|1|0|\}"]
dstjson [label="\{|p|r|o|j|e|c|t|\"|:|\"|r|a|p|i|d|j|s|o|n|\"|,|\"|s|t|a|r|s|\"|:|1|1|\}"]
}
{
node [shape="box", style="filled", fillcolor="gray95"]
Document2 [label="(Modified) Document"]
Writer
}
subgraph cluster1 {
margin="10,10"
labeljust="left"
label = "Document"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
root [label="{object|}", fillcolor=3]
{
project [label="{string|\"project\"}", fillcolor=5]
rapidjson [label="{string|\"rapidjson\"}", fillcolor=5]
stars [label="{string|\"stars\"}", fillcolor=5]
ten [label="{number|10}", fillcolor=6]
}
edge [arrowhead=vee]
root -> { project; stars }
edge [arrowhead="none"]
project -> rapidjson
stars -> ten
}
srcjson -> root [label=" Parse()", lhead="cluster1"]
ten -> Document2 [label=" Increase \"stars\"", ltail="cluster1" ]
Document2 -> Writer [label=" Traverse DOM by Accept()"]
Writer -> dstjson [label=" Output to StringBuffer"]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

View File

@@ -0,0 +1,58 @@
digraph {
compound=true
fontname="Inconsolata, Consolas"
fontsize=10
margin="0,0"
ranksep=0.2
penwidth=0.5
node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5]
edge [fontname="Inconsolata, Consolas", fontsize=10]
subgraph cluster1 {
margin="10,10"
labeljust="left"
label = "Document"
style=filled
fillcolor=gray95
node [shape=Mrecord, style=filled, colorscheme=spectral7]
root [label="{object|}", fillcolor=3]
{
hello [label="{string|\"hello\"}", fillcolor=5]
t [label="{string|\"t\"}", fillcolor=5]
f [label="{string|\"f\"}", fillcolor=5]
n [label="{string|\"n\"}", fillcolor=5]
i [label="{string|\"i\"}", fillcolor=5]
pi [label="{string|\"pi\"}", fillcolor=5]
a [label="{string|\"a\"}", fillcolor=5]
world [label="{string|\"world\"}", fillcolor=5]
true [label="{true|}", fillcolor=7]
false [label="{false|}", fillcolor=2]
null [label="{null|}", fillcolor=1]
i1 [label="{number|123}", fillcolor=6]
pi1 [label="{number|3.1416}", fillcolor=6]
array [label="{array|size=4}", fillcolor=4]
a1 [label="{number|1}", fillcolor=6]
a2 [label="{number|2}", fillcolor=6]
a3 [label="{number|3}", fillcolor=6]
a4 [label="{number|4}", fillcolor=6]
}
edge [arrowhead=vee]
root -> { hello; t; f; n; i; pi; a }
array -> { a1; a2; a3; a4 }
edge [arrowhead=none]
hello -> world
t -> true
f -> false
n -> null
i -> i1
pi -> pi1
a -> array
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

View File

@@ -0,0 +1,272 @@
# DOM
Document Object Model(DOM) is an in-memory representation of JSON for query and manipulation. The basic usage of DOM is described in [Tutorial](doc/tutorial.md). This section will describe some details and more advanced usages.
[TOC]
# Template {#Template}
In the tutorial, `Value` and `Document` was used. Similarly to `std::string`, these are actually `typedef` of template classes:
~~~~~~~~~~cpp
namespace rapidjson {
template <typename Encoding, typename Allocator = MemoryPoolAllocator<> >
class GenericValue {
// ...
};
template <typename Encoding, typename Allocator = MemoryPoolAllocator<> >
class GenericDocument : public GenericValue<Encoding, Allocator> {
// ...
};
typedef GenericValue<UTF8<> > Value;
typedef GenericDocument<UTF8<> > Document;
} // namespace rapidjson
~~~~~~~~~~
User can customize these template parameters.
## Encoding {#Encoding}
The `Encoding` parameter specifies the encoding of JSON String value in memory. Possible options are `UTF8`, `UTF16`, `UTF32`. Note that, these 3 types are also template class. `UTF8<>` is `UTF8<char>`, which means using char to store the characters. You may refer to [Encoding](doc/encoding.md) for details.
Suppose a Windows application would query localization strings stored in JSON files. Unicode-enabled functions in Windows use UTF-16 (wide character) encoding. No matter what encoding was used in JSON files, we can store the strings in UTF-16 in memory.
~~~~~~~~~~cpp
using namespace rapidjson;
typedef GenericDocument<UTF16<> > WDocument;
typedef GenericValue<UTF16<> > WValue;
FILE* fp = fopen("localization.json", "rb"); // non-Windows use "r"
char readBuffer[256];
FileReadStream bis(fp, readBuffer, sizeof(readBuffer));
AutoUTFInputStream<unsigned, FileReadStream> eis(bis); // wraps bis into eis
WDocument d;
d.ParseStream<0, AutoUTF<unsigned> >(eis);
const WValue locale(L"ja"); // Japanese
MessageBoxW(hWnd, d[locale].GetString(), L"Test", MB_OK);
~~~~~~~~~~
## Allocator {#Allocator}
The `Allocator` defines which allocator class is used when allocating/deallocating memory for `Document`/`Value`. `Document` owns, or references to an `Allocator` instance. On the other hand, `Value` does not do so, in order to reduce memory consumption.
The default allocator used in `GenericDocument` is `MemoryPoolAllocator`. This allocator actually allocate memory sequentially, and cannot deallocate one by one. This is very suitable when parsing a JSON to generate a DOM tree.
Another allocator is `CrtAllocator`, of which CRT is short for C RunTime library. This allocator simply calls the standard `malloc()`/`realloc()`/`free()`. When there is a lot of add and remove operations, this allocator may be preferred. But this allocator is far less efficient than `MemoryPoolAllocator`.
# Parsing {#Parsing}
`Document` provides several functions for parsing. In below, (1) is the fundamental function, while the others are helpers which call (1).
~~~~~~~~~~cpp
using namespace rapidjson;
// (1) Fundamental
template <unsigned parseFlags, typename SourceEncoding, typename InputStream>
GenericDocument& GenericDocument::ParseStream(InputStream& is);
// (2) Using the same Encoding for stream
template <unsigned parseFlags, typename InputStream>
GenericDocument& GenericDocument::ParseStream(InputStream& is);
// (3) Using default parse flags
template <typename InputStream>
GenericDocument& GenericDocument::ParseStream(InputStream& is);
// (4) In situ parsing
template <unsigned parseFlags, typename SourceEncoding>
GenericDocument& GenericDocument::ParseInsitu(Ch* str);
// (5) In situ parsing, using same Encoding for stream
template <unsigned parseFlags>
GenericDocument& GenericDocument::ParseInsitu(Ch* str);
// (6) In situ parsing, using default parse flags
GenericDocument& GenericDocument::ParseInsitu(Ch* str);
// (7) Normal parsing of a string
template <unsigned parseFlags, typename SourceEncoding>
GenericDocument& GenericDocument::Parse(const Ch* str);
// (8) Normal parsing of a string, using same Encoding for stream
template <unsigned parseFlags>
GenericDocument& GenericDocument::Parse(const Ch* str);
// (9) Normal parsing of a string, using default parse flags
GenericDocument& GenericDocument::Parse(const Ch* str);
~~~~~~~~~~
The examples of [tutorial](doc/tutorial.md) uses (9) for normal parsing of string. The examples of [stream](doc/stream.md) uses the first three. *In situ* parsing will be described soon.
The `parseFlags` are combination of the following bit-flags:
Parse flags | Meaning
------------------------------|-----------------------------------
`kParseNoFlags` | No flag is set.
`kParseDefaultFlags` | Default parse flags. It is equal to macro `RAPIDJSON_PARSE_DEFAULT_FLAGS`, which is defined as `kParseNoFlags`.
`kParseInsituFlag` | In-situ(destructive) parsing.
`kParseValidateEncodingFlag` | Validate encoding of JSON strings.
`kParseIterativeFlag` | Iterative(constant complexity in terms of function call stack size) parsing.
`kParseStopWhenDoneFlag` | After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate `kParseErrorDocumentRootNotSingular` error. Using this flag for parsing multiple JSONs in the same stream.
`kParseFullPrecisionFlag` | Parse number in full precision (slower). If this flag is not set, the normal precision (faster) is used. Normal precision has maximum 3 [ULP](http://en.wikipedia.org/wiki/Unit_in_the_last_place) error.
By using a non-type template parameter, instead of a function parameter, C++ compiler can generate code which is optimized for specified combinations, improving speed, and reducing code size (if only using a single specialization). The downside is the flags needed to be determined in compile-time.
The `SourceEncoding` parameter defines what encoding is in the stream. This can be differed to the `Encoding` of the `Document`. See [Transcoding and Validation](#TranscodingAndValidation) section for details.
And the `InputStream` is type of input stream.
## Parse Error {#ParseError}
When the parse processing succeeded, the `Document` contains the parse results. When there is an error, the original DOM is *unchanged*. And the error state of parsing can be obtained by `bool HasParseError()`, `ParseErrorCode GetParseError()` and `size_t GetParseOffet()`.
Parse Error Code | Description
--------------------------------------------|---------------------------------------------------
`kParseErrorNone` | No error.
`kParseErrorDocumentEmpty` | The document is empty.
`kParseErrorDocumentRootNotSingular` | The document root must not follow by other values.
`kParseErrorValueInvalid` | Invalid value.
`kParseErrorObjectMissName` | Missing a name for object member.
`kParseErrorObjectMissColon` | Missing a colon after a name of object member.
`kParseErrorObjectMissCommaOrCurlyBracket` | Missing a comma or `}` after an object member.
`kParseErrorArrayMissCommaOrSquareBracket` | Missing a comma or `]` after an array element.
`kParseErrorStringUnicodeEscapeInvalidHex` | Incorrect hex digit after `\\u` escape in string.
`kParseErrorStringUnicodeSurrogateInvalid` | The surrogate pair in string is invalid.
`kParseErrorStringEscapeInvalid` | Invalid escape character in string.
`kParseErrorStringMissQuotationMark` | Missing a closing quotation mark in string.
`kParseErrorStringInvalidEncoding` | Invalid encoding in string.
`kParseErrorNumberTooBig` | Number too big to be stored in `double`.
`kParseErrorNumberMissFraction` | Miss fraction part in number.
`kParseErrorNumberMissExponent` | Miss exponent in number.
The offset of error is defined as the character number from beginning of stream. Currently RapidJSON does not keep track of line number.
To get an error message, RapidJSON provided a English messages in `rapidjson/error/en.h`. User can customize it for other locales, or use a custom localization system.
Here shows an example of parse error handling.
~~~~~~~~~~cpp
// TODO: example
~~~~~~~~~~
## In Situ Parsing {#InSituParsing}
From [Wikipedia](http://en.wikipedia.org/wiki/In_situ):
> *In situ* ... is a Latin phrase that translates literally to "on site" or "in position". It means "locally", "on site", "on the premises" or "in place" to describe an event where it takes place, and is used in many different contexts.
> ...
> (In computer science) An algorithm is said to be an in situ algorithm, or in-place algorithm, if the extra amount of memory required to execute the algorithm is O(1), that is, does not exceed a constant no matter how large the input. For example, heapsort is an in situ sorting algorithm.
In normal parsing process, a large overhead is to decode JSON strings and copy them to other buffers. *In situ* parsing decodes those JSON string at the place where it is stored. It is possible in JSON because the decoded string is always shorter than the one in JSON. In this context, decoding a JSON string means to process the escapes, such as `"\n"`, `"\u1234"`, etc., and add a null terminator (`'\0'`)at the end of string.
The following diagrams compare normal and *in situ* parsing. The JSON string values contain pointers to the decoded string.
![normal parsing](diagram/normalparsing.png)
In normal parsing, the decoded string are copied to freshly allocated buffers. `"\\n"` (2 characters) is decoded as `"\n"` (1 character). `"\\u0073"` (6 characters) is decoded as "s" (1 character).
![instiu parsing](diagram/insituparsing.png)
*In situ* parsing just modified the original JSON. Updated characters are highlighted in the diagram. If the JSON string does not contain escape character, such as `"msg"`, the parsing process merely replace the closing double quotation mark with a null character.
Since *in situ* parsing modify the input, the parsing API needs `char*` instead of `const char*`.
~~~~~~~~~~cpp
// Read whole file into a buffer
FILE* fp = fopen("test.json", "r");
fseek(fp, 0, SEEK_END);
size_t filesize = (size_t)ftell(fp);
fseek(fp, 0, SEEK_SET);
char* buffer = (char*)malloc(filesize + 1);
size_t readLength = fread(buffer, 1, filesize, fp);
buffer[readLength] = '\0';
fclose(fp);
// In situ parsing the buffer into d, buffer will also be modified
Document d;
d.ParseInsitu(buffer);
// Query/manipulate the DOM here...
free(buffer);
// Note: At this point, d may have dangling pointers pointed to the deallocated buffer.
~~~~~~~~~~
The JSON strings are marked as constant-string. But they may not be really "constant". The life cycle of it depends on the JSON buffer.
In situ parsing minimizes allocation overheads and memory copying. Generally this improves cache coherence, which is an important factor of performance in modern computer.
There are some limitations of *in situ* parsing:
1. The whole JSON is in memory.
2. The source encoding in stream and target encoding in document must be the same.
3. The buffer need to be retained until the document is no longer used.
4. If the DOM need to be used for long period after parsing, and there are few JSON strings in the DOM, retaining the buffer may be a memory waste.
*In situ* parsing is mostly suitable for short-term JSON that only need to be processed once, and then be released from memory. In practice, these situation is very common, for example, deserializing JSON to C++ objects, processing web requests represented in JSON, etc.
## Transcoding and Validation {#TranscodingAndValidation}
RapidJSON supports conversion between Unicode formats (officially termed UCS Transformation Format) internally. During DOM parsing, the source encoding of the stream can be different from the encoding of the DOM. For example, the source stream contains a UTF-8 JSON, while the DOM is using UTF-16 encoding. There is an example code in [EncodedInputStream](doc/stream.md#EncodedInputStream).
When writing a JSON from DOM to output stream, transcoding can also be used. An example is in [EncodedOutputStream](stream.md##EncodedOutputStream).
During transcoding, the source string is decoded to into Unicode code points, and then the code points are encoded in the target format. During decoding, it will validate the byte sequence in the source string. If it is not a valid sequence, the parser will be stopped with `kParseErrorStringInvalidEncoding` error.
When the source encoding of stream is the same as encoding of DOM, by default, the parser will *not* validate the sequence. User may use `kParseValidateEncodingFlag` to force validation.
# Techniques {#Techniques}
Some techniques about using DOM API is discussed here.
## DOM as SAX Event Publisher
In RapidJSON, stringifying a DOM with `Writer` may be look a little bit weired.
~~~~~~~~~~cpp
// ...
Writer<StringBuffer> writer(buffer);
d.Accept(writer);
~~~~~~~~~~
Actually, `Value::Accept()` is responsible for publishing SAX events about the value to the handler. With this design, `Value` and `Writer` are decoupled. `Value` can generate SAX events, and `Writer` can handle those events.
User may create customer handlers for transforming the DOM into other formats. For example, a handler which converts the DOM into XML.
~~~~~~~~~~cpp
// TODO: example
~~~~~~~~~~
For more about SAX events and handler, please refer to [SAX](doc/sax.md).
## User Buffer {#UserBuffer}
Some applications may try to avoid memory allocations whenever possible.
`MemoryPoolAllocator` can support this by letting user to provide a buffer. The buffer can be on the program stack, or a "scratch buffer" which is statically allocated (a static/global array) for storing temporary data.
`MemoryPoolAllocator` will use the user buffer to satisfy allocations. When the user buffer is used up, it will allocate a chunk of memory from the base allocator (by default the `CrtAllocator`).
Here is an example of using stack memory.
~~~~~~~~~~cpp
char buffer[1024];
MemoryPoolAllocator allocator(buffer, sizeof(buffer));
Document d(&allocator);
d.Parse(json);
~~~~~~~~~~
If the total size of allocation is less than 1024 during parsing, this code does not invoke any heap allocation (via `new` or `malloc()`) at all.
User can query the current memory consumption in bytes via `MemoryPoolAllocator::Size()`. And then user can determine a suitable size of user buffer.

View File

@@ -0,0 +1,146 @@
# Encoding
According to [ECMA-404](http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf),
> (in Introduction) JSON text is a sequence of Unicode code points.
The earlier [RFC4627](http://www.ietf.org/rfc/rfc4627.txt) stated that,
> (in §3) JSON text SHALL be encoded in Unicode. The default encoding is UTF-8.
> (in §6) JSON may be represented using UTF-8, UTF-16, or UTF-32. When JSON is written in UTF-8, JSON is 8bit compatible. When JSON is written in UTF-16 or UTF-32, the binary content-transfer-encoding must be used.
RapidJSON supports various encodings. It can also validate the encodings of JSON, and transconding JSON among encodings. All these features are implemented internally, without the need for external libraries (e.g. [ICU](http://site.icu-project.org/)).
[TOC]
# Unicode {#Unicode}
From [Unicode's official website](http://www.unicode.org/standard/WhatIsUnicode.html):
> Unicode provides a unique number for every character,
> no matter what the platform,
> no matter what the program,
> no matter what the language.
Those unique numbers are called code points, which is in the range `0x0` to `0x10FFFF`.
## Unicode Transformation Format {#UTF}
There are various encodings for storing Unicode code points. These are called Unicode Transformation Format (UTF). RapidJSON supports the most commonly used UTFs, including
* UTF-8: 8-bit variable-width encoding. It maps a code point to 14 bytes.
* UTF-16: 16-bit variable-width encoding. It maps a code point to 12 16-bit code units (i.e., 24 bytes).
* UTF-32: 32-bit fixed-width encoding. It directly maps a code point to a single 32-bit code unit (i.e. 4 bytes).
For UTF-16 and UTF-32, the byte order (endianness) does matter. Within computer memory, they are often stored in the computer's endianness. However, when it is stored in file or transferred over network, we need to state the byte order of the byte sequence, either little-endian (LE) or big-endian (BE).
RapidJSON provide these encodings via the structs in `rapidjson/encodings.h`:
~~~~~~~~~~cpp
namespace rapidjson {
template<typename CharType = char>
struct UTF8;
template<typename CharType = wchar_t>
struct UTF16;
template<typename CharType = wchar_t>
struct UTF16LE;
template<typename CharType = wchar_t>
struct UTF16BE;
template<typename CharType = unsigned>
struct UTF32;
template<typename CharType = unsigned>
struct UTF32LE;
template<typename CharType = unsigned>
struct UTF32BE;
} // namespace rapidjson
~~~~~~~~~~
For processing text in memory, we normally use `UTF8`, `UTF16` or `UTF32`. For processing text via I/O, we may use `UTF8`, `UTF16LE`, `UTF16BE`, `UTF32LE` or `UTF32BE`.
When using the DOM-style API, the `Encoding` template parameter in `GenericValue<Encoding>` and `GenericDocument<Encoding>` indicates the encoding to be used to represent JSON string in memory. So normally we will use `UTF8`, `UTF16` or `UTF32` for this template parameter. The choice depends on operating systems and other libraries that the application is using. For example, Windows API represents Unicode characters in UTF-16, while most Linux distributions and applications prefer UTF-8.
Example of UTF-16 DOM declaration:
~~~~~~~~~~cpp
typedef GenericDocument<UTF16<> > WDocument;
typedef GenericValue<UTF16<> > WValue;
~~~~~~~~~~
For a detail example, please check the example in [DOM's Encoding](doc/stream.md#Encoding) section.
## Character Type {#CharacterType}
As shown in the declaration, each encoding has a `CharType` template parameter. Actually, it may be a little bit confusing, but each `CharType` stores a code unit, not a character (code point). As mentioned in previous section, a code point may be encoded to 14 code units for UTF-8.
For `UTF16(LE|BE)`, `UTF32(LE|BE)`, the `CharType` must be integer type of at least 2 and 4 bytes respectively.
Note that C++11 introduces `char16_t` and `char32_t`, which can be used for `UTF16` and `UTF32` respectively.
## AutoUTF {#AutoUTF}
Previous encodings are statically bound in compile-time. In other words, user must know exactly which encodings will be used in the memory or streams. However, sometimes we may need to read/write files of different encodings. The encoding needed to be decided in runtime.
`AutoUTF` is an encoding designed for this purpose. It chooses which encoding to be used according to the input or output stream. Currently, it should be used with `EncodedInputStream` and `EncodedOutputStream`.
## ASCII {#ASCII}
Although the JSON standards did not mention about [ASCII](http://en.wikipedia.org/wiki/ASCII), sometimes we would like to write 7-bit ASCII JSON for applications that cannot handle UTF-8. Since any JSON can represent unicode characters in escaped sequence `\uXXXX`, JSON can always be encoded in ASCII.
Here is an example for writing a UTF-8 DOM into ASCII:
~~~~~~~~~~cpp
using namespace rapidjson;
Document d; // UTF8<>
// ...
StringBuffer buffer;
Writer<StringBuffer, Document::EncodingType, ASCII<> > writer(buffer);
d.Accept(writer);
std::cout << buffer.GetString();
~~~~~~~~~~
ASCII can be used in input stream. If the input stream contains bytes with values above 127, it will cause `kParseErrorStringInvalidEncoding` error.
ASCII *cannot* be used in memory (encoding of `Document` or target encoding of `Reader`), as it cannot represent Unicode code points.
# Validation & Transcoding {#ValidationTranscoding}
When RapidJSON parses a JSON, it can validate the input JSON, whether it is a valid sequence of a specified encoding. This option can be turned on by adding `kParseValidateEncodingFlag` in `parseFlags` template parameter.
If the input encoding and output encoding is different, `Reader` and `Writer` will automatically transcode (convert) the text. In this case, `kParseValidateEncodingFlag` is not necessary, as it must decode the input sequence. And if the sequence was unable to be decoded, it must be invalid.
## Transcoder {#Transcoder}
Although the encoding functions in RapidJSON are designed for JSON parsing/generation, user may abuse them for transcoding of non-JSON strings.
Here is an example for transcoding a string from UTF-8 to UTF-16:
~~~~~~~~~~cpp
#include "rapidjson/encodings.h"
using namespace rapidjson;
const char* s = "..."; // UTF-8 string
StringStream source(s);
GenericStringBuffer<UTF16<> > target;
bool hasError = false;
while (source.Peak() != '\0')
if (!Transcoder::Transcode<UTF8<>, UTF16<> >(source, target)) {
hasError = true;
break;
}
if (!hasError) {
const wchar_t* t = target.GetString();
// ...
}
~~~~~~~~~~
You may also use `AutoUTF` and the associated streams for setting source/target encoding in runtime.

View File

@@ -0,0 +1,146 @@
# FAQ
## General
1. What is RapidJSON?
RapidJSON is a C++ library for parsing and generating JSON. You may check all [features](doc/features.md) of it.
2. Why is RapidJSON named so?
It is inspired by [RapidXML](http://rapidxml.sourceforge.net/), which is a fast XML DOM parser.
3. Is RapidJSON similar to RapidXML?
RapidJSON borrowed some designs of RapidXML, including *in situ* parsing, header-only library. But the two APIs are completely different. Also RapidJSON provide many features that are not in RapidXML.
4. Is RapidJSON free?
Yes, it is free under MIT license. It can be used in commercial applications. Please check the details in [license.txt](https://github.com/miloyip/rapidjson/blob/master/license.txt).
5. Is RapidJSON small? What are its dependencies?
Yes. A simple executable which parses a JSON and prints its statistics is less than 30KB on Windows.
RapidJSON depends on C++ standard library only.
6. How to install RapidJSON?
Check [Installation section](http://miloyip.github.io/rapidjson/).
7. Can RapidJSON run on my platform?
RapidJSON has been tested in many combinations of operating systems, compilers and CPU architecture by the community. But we cannot ensure that it can be run on your particular platform. Building and running the unit test suite will give you the answer.
8. Does RapidJSON support C++03? C++11?
RapidJSON was firstly implemented for C++03. Later it added optional support of some C++11 features (e.g., move constructor, `noexcept`). RapidJSON shall be compatible with C++03 or C++11 compliant compilers.
9. Does RapidJSON really work in real applications?
Yes. It is deployed in both client and server real applications. A community member reported that RapidJSON in their system parses 50 million JSONs daily.
10. How RapidJSON is tested?
RapidJSON contains a unit test suite for automatic testing. [Travis](https://travis-ci.org/miloyip/rapidjson/) will compile and run the unit test suite for all modifications. The test process also uses Valgrind to detect memory leaks.
11. Is RapidJSON well documented?
RapidJSON provides [user guide and API documentationn](http://miloyip.github.io/rapidjson/index.html).
12. Are there alternatives?
Yes, there are a lot alternatives. For example, [nativejson-benchmark](https://github.com/miloyip/nativejson-benchmark) has a listing of open-source C/C++ JSON libraries. [json.org](http://www.json.org/) also has a list.
## JSON
1. What is JSON?
JSON (JavaScript Object Notation) is a lightweight data-interchange format. It uses human readable text format. More details of JSON can be referred to [RFC7159](http://www.ietf.org/rfc/rfc7159.txt) and [ECMA-404](http://www.ecma-international.org/publications/standards/Ecma-404.htm).
2. What are applications of JSON?
JSON are commonly used in web applications for transferring structured data. It is also used as a file format for data persistence.
2. Does RapidJSON conform to the JSON standard?
Yes. RapidJSON is fully compliance with [RFC7159](http://www.ietf.org/rfc/rfc7159.txt) and [ECMA-404](http://www.ecma-international.org/publications/standards/Ecma-404.htm). It can handle corner cases, such as supporting null character and surrogate pairs in JSON strings.
3. Does RapidJSON support relaxed syntax?
Currently no. RapidJSON only support the strict standardized format. Support on related syntax is under discussion in this [issue](https://github.com/miloyip/rapidjson/issues/36).
## DOM and SAX
1. What is DOM style API?
Document Object Model (DOM) is an in-memory representation of JSON for query and manipulation.
2. What is SAX style API?
SAX is an event-driven API for parsing and generation.
3. Should I choose DOM or SAX?
DOM is easy for query and manipulation. SAX is very fast and memory-saving but often more difficult to be applied.
4. What is *in situ* parsing?
*in situ* parsing decodes the JSON strings directly into the input JSON. This is an optimization which can reduce memory consumption and improve performance, but the input JSON will be modified. Check [in-situ parsing](http://miloyip.github.io/rapidjson/md_doc_dom.html#InSituParsing) for details.
5. When does parsing generate an error?
The parser generates an error when the input JSON contains invalid syntax, or a value can not be represented (a number is too big), or the handler of parsers terminate the parsing. Check [parse error](http://miloyip.github.io/rapidjson/md_doc_dom.html#ParseError) for details.
6. What error information is provided?
The error is stored in `ParseResult`, which includes the error code and offset (number of characters from the beginning of JSON). The error code can be translated into human-readable error message.
7. Why not just using `double` to represent JSON number?
Some applications use 64-bit unsigned/signed integers. And these integers cannot be converted into `double` without loss of precision. So the parsers detects whether a JSON number is convertible to different types of integers and/or `double`.
## Document/Value (DOM)
1. What is move semantics? Why?
2. How to copy a value?
3. Why do I need to provide the length of string?
4. Why do I need to provide allocator in many DOM manipulation API?
5. Does it convert between numerical types?
## Reader/Writer (SAX)
1. Why not just `printf` a JSON? Why need a `Writer`?
2. Why can't I parse a JSON which is just a number?
3. Can I pause the parsing process and resume it later?
## Unicode
1. Does it support UTF-8, UTF-16 and other format?
2. Can it validate the encoding?
3. What is surrogate pair? Does RapidJSON support it?
4. Can it handle '\u0000' (null character) in JSON string?
5. Can I output '\uxxxx' for all non-ASCII character?
## Stream
1. I have a big JSON file. Should I load the whole file to memory?
2. Can I parse JSON while it is streamed from network?
3. I don't know what format will the JSON be. How to handle them?
4. What is BOM? How RapidJSON handle it?
5. Why little/big endian is related?
## Performance
1. Is RapidJSON really fast?
2. Why is it fast?
3. What is SIMD? How it is applied in RapidJSON?
4. Does it consume a lot of memory?
5. What is the purpose of being high performance?
## Gossip
1. Who are the developers of RapidJSON?
2. Why do you develop RapidJSON?
3. Why there is a long empty period of development?
4. Why did the repository move from Google Code to GitHub?

View File

@@ -0,0 +1,96 @@
# Features
## General
* Cross-platform
* Compilers: Visual Studio, gcc, clang, etc.
* Architectures: x86, x64, ARM, etc.
* Operating systems: Windows, Mac OS X, Linux, iOS, Android, etc.
* Easy installation
* Header files only library. Just copy the headers to your project.
* Self-contained, minimal dependences
* No STL, BOOST, etc.
* Only included `<cstdio>`, `<cstdlib>`, `<cstring>`, `<inttypes.h>`, `<new>`, `<stdint.h>`.
* Without C++ exception, RTTI
* High performance
* Use template and inline functions to reduce function call overheads.
* Internal optimized Grisu2 and floating point parsing implementations.
* Optional SSE2/SSE4.1 support.
## Standard compliance
* RapidJSON should be fully RFC4627/ECMA-404 compliance.
* Support Unicode surrogate.
* Support null character (`"\u0000"`)
* For example, `["Hello\u0000World"]` can be parsed and handled gracefully. There is API for getting/setting lengths of string.
## Unicode
* Support UTF-8, UTF-16, UTF-32 encodings, including little endian and big endian.
* These encodings are used in input/output streams and in-memory representation.
* Support automatic detection of encodings in input stream.
* Support transcoding between encodings internally.
* For example, you can read a UTF-8 file and let RapidJSON transcode the JSON strings into UTF-16 in the DOM.
* Support encoding validation internally.
* For example, you can read a UTF-8 file, and let RapidJSON check whether all JSON strings are valid UTF-8 byte sequence.
* Support custom character types.
* By default the character types are `char` for UTF8, `wchar_t` for UTF16, `uint32_t` for UTF32.
* Support custom encodings.
## API styles
* SAX (Simple API for XML) style API
* Similar to [SAX](http://en.wikipedia.org/wiki/Simple_API_for_XML), RapidJSON provides a event sequential access parser API (`rapidjson::GenericReader`). It also provides a generator API (`rapidjson::Writer`) which consumes the same set of events.
* DOM (Document Object Model) style API
* Similar to [DOM](http://en.wikipedia.org/wiki/Document_Object_Model) for HTML/XML, RapidJSON can parse JSON into a DOM representation (`rapidjson::GenericDocument`), for easy manipulation, and finally stringify back to JSON if needed.
* The DOM style API (`rapidjson::GenericDocument`) is actually implemented with SAX style API (`rapidjson::GenericReader`). SAX is faster but sometimes DOM is easier. Users can pick their choices according to scenarios.
## Parsing
* Recursive (default) and iterative parser
* Recursive parser is faster but prone to stack overflow in extreme cases.
* Iterative parser use custom stack to keep parsing state.
* Support *in situ* parsing.
* Parse JSON string values in-place at the source JSON, and then the DOM points to addresses of those strings.
* Faster than convention parsing: no allocation for strings, no copy (if string does not contain escapes), cache-friendly.
* Support 32-bit/64-bit signed/unsigned integer and `double` for JSON number type.
* Support parsing multiple JSONs in input stream (`kParseStopWhenDoneFlag`).
* Error Handling
* Support comprehensive error code if parsing failed.
* Support error message localization.
## DOM (Document)
* RapidJSON checks range of numerical values for conversions.
* Optimization for string literal
* Only store pointer instead of copying
* Optimization for "short" strings
* Store short string in `Value` internally without additional allocation.
* For UTF-8 string: maximum 11 characters in 32-bit, 15 characters in 64-bit.
* Optionally support `std::string` (define `RAPIDJSON_HAS_STDSTRING=1`)
## Generation
* Support `rapidjson::PrettyWriter` for adding newlines and indentations.
## Stream
* Support `rapidjson::GenericStringBuffer` for storing the output JSON as string.
* Support `rapidjson::FileReadStream` and `rapidjson::FileWriteStream` for input/output `FILE` object.
* Support custom streams.
## Memory
* Minimize memory overheads for DOM.
* Each JSON value occupies exactly 16/20 bytes for most 32/64-bit machines (excluding text string).
* Support fast default allocator.
* A stack-based allocator (allocate sequentially, prohibit to free individual allocations, suitable for parsing).
* User can provide a pre-allocated buffer. (Possible to parse a number of JSONs without any CRT allocation)
* Support standard CRT(C-runtime) allocator.
* Support custom allocators.
## Miscellaneous
* Some C++11 support (optional)
* Rvalue reference
* `noexcept` specifier

View File

@@ -0,0 +1,127 @@
# Internals
This section records some design and implementation details.
[TOC]
# Value {#Value}
## Data Layout {#DataLayout}
## Flags {#Flags}
# Allocator {#Allocator}
## MemoryPoolAllocator {#MemoryPoolAllocator}
# Parsing Optimization {#ParsingOptimization}
## Skip Whitespace with SIMD {#SkipwhitespaceWithSIMD}
## Pow10() {#Pow10}
## Local Stream Copy {#LocalStreamCopy}
# Parser {#Parser}
## Iterative Parser {#IterativeParser}
The iterative parser is a recursive descent LL(1) parser
implemented in a non-recursive manner.
### Grammar {#IterativeParserGrammar}
The grammar used for this parser is based on strict JSON syntax:
~~~~~~~~~~
S -> array | object
array -> [ values ]
object -> { members }
values -> non-empty-values | ε
non-empty-values -> value addition-values
addition-values -> ε | , non-empty-values
members -> non-empty-members | ε
non-empty-members -> member addition-members
addition-members -> ε | , non-empty-members
member -> STRING : value
value -> STRING | NUMBER | NULL | BOOLEAN | object | array
~~~~~~~~~~
Note that left factoring is applied to non-terminals `values` and `members`
to make the grammar be LL(1).
### Parsing Table {#IterativeParserParsingTable}
Based on the grammar, we can construct the FIRST and FOLLOW set.
The FIRST set of non-terminals is listed below:
| NON-TERMINAL | FIRST |
|:-----------------:|:--------------------------------:|
| array | [ |
| object | { |
| values | ε STRING NUMBER NULL BOOLEAN { [ |
| addition-values | ε COMMA |
| members | ε STRING |
| addition-members | ε COMMA |
| member | STRING |
| value | STRING NUMBER NULL BOOLEAN { [ |
| S | [ { |
| non-empty-members | STRING |
| non-empty-values | STRING NUMBER NULL BOOLEAN { [ |
The FOLLOW set is listed below:
| NON-TERMINAL | FOLLOW |
|:-----------------:|:-------:|
| S | $ |
| array | , $ } ] |
| object | , $ } ] |
| values | ] |
| non-empty-values | ] |
| addition-values | ] |
| members | } |
| non-empty-members | } |
| addition-members | } |
| member | , } |
| value | , } ] |
Finally the parsing table can be constructed from FIRST and FOLLOW set:
| NON-TERMINAL | [ | { | , | : | ] | } | STRING | NUMBER | NULL | BOOLEAN |
|:-----------------:|:---------------------:|:---------------------:|:-------------------:|:-:|:-:|:-:|:-----------------------:|:---------------------:|:---------------------:|:---------------------:|
| S | array | object | | | | | | | | |
| array | [ values ] | | | | | | | | | |
| object | | { members } | | | | | | | | |
| values | non-empty-values | non-empty-values | | | ε | | non-empty-values | non-empty-values | non-empty-values | non-empty-values |
| non-empty-values | value addition-values | value addition-values | | | | | value addition-values | value addition-values | value addition-values | value addition-values |
| addition-values | | | , non-empty-values | | ε | | | | | |
| members | | | | | | ε | non-empty-members | | | |
| non-empty-members | | | | | | | member addition-members | | | |
| addition-members | | | , non-empty-members | | | ε | | | | |
| member | | | | | | | STRING : value | | | |
| value | array | object | | | | | STRING | NUMBER | NULL | BOOLEAN |
There is a great [tool](http://hackingoff.com/compilers/predict-first-follow-set) for above grammar analysis.
### Implementation {#IterativeParserImplementation}
Based on the parsing table, a direct(or conventional) implementation
that pushes the production body in reverse order
while generating a production could work.
In RapidJSON, several modifications(or adaptations to current design) are made to a direct implementation.
First, the parsing table is encoded in a state machine in RapidJSON.
States are constructed by the head and body of production.
State transitions are constructed by production rules.
Besides, extra states are added for productions involved with `array` and `object`.
In this way the generation of array values or object members would be a single state transition,
rather than several pop/push operations in the direct implementation.
This also makes the estimation of stack size more easier.
The state diagram is shown as follows:
![State Diagram](diagram/iterative-parser-states-diagram.png)
Second, the iterative parser also keeps track of array's value count and object's member count
in its internal stack, which may be different from a conventional implementation.

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 KiB

View File

@@ -0,0 +1,119 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="217.15039"
height="60.831055"
id="svg2"
version="1.1"
inkscape:version="0.48.4 r9939"
sodipodi:docname="rapidjson.svg">
<defs
id="defs4">
<linearGradient
id="linearGradient3801">
<stop
style="stop-color:#000000;stop-opacity:1;"
offset="0"
id="stop3803" />
<stop
style="stop-color:#000000;stop-opacity:0;"
offset="1"
id="stop3805" />
</linearGradient>
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3801"
id="linearGradient3807"
x1="81.25"
y1="52.737183"
x2="122.25"
y2="52.737183"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(1.2378503,0,0,1.1662045,-226.99279,64.427324)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3801"
id="linearGradient3935"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(-1.4768835,0,0,2.2904698,246.48785,81.630301)"
x1="81.25"
y1="52.737183"
x2="115.96579"
y2="48.439766" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3801"
id="linearGradient3947"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(1.2378503,0,0,1.1662045,-226.99279,-10.072676)"
x1="81.25"
y1="52.737183"
x2="122.25"
y2="52.737183" />
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="2"
inkscape:cx="207.8959"
inkscape:cy="-3.2283687"
inkscape:document-units="px"
inkscape:current-layer="layer1"
showgrid="false"
inkscape:window-width="1920"
inkscape:window-height="1137"
inkscape:window-x="-8"
inkscape:window-y="-8"
inkscape:window-maximized="1"
fit-margin-top="10"
fit-margin-left="10"
fit-margin-right="10"
fit-margin-bottom="10" />
<metadata
id="metadata7">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(-39.132812,-38.772339)">
<text
sodipodi:linespacing="125%"
id="text3939"
y="79.862183"
x="147.5"
style="font-size:20px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Microsoft JhengHei;-inkscape-font-specification:Microsoft JhengHei"
xml:space="preserve"><tspan
style="font-size:48px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Inconsolata;-inkscape-font-specification:Inconsolata"
y="79.862183"
x="147.5"
id="tspan3941"
sodipodi:role="line"><tspan
id="tspan3943"
style="font-size:42px;font-style:oblique;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Segoe UI;-inkscape-font-specification:Segoe UI Oblique">Rapid</tspan><tspan
id="tspan3945"
style="font-weight:bold;-inkscape-font-specification:Inconsolata Bold">JSON</tspan></tspan></text>
</g>
</svg>

After

Width:  |  Height:  |  Size: 4.1 KiB

View File

@@ -0,0 +1,194 @@
<doxygenlayout version="1.0">
<!-- Generated by doxygen 1.8.7 -->
<!-- Navigation index tabs for HTML output -->
<navindex>
<tab type="mainpage" visible="yes" title=""/>
<tab type="pages" visible="yes" title="" intro=""/>
<tab type="modules" visible="yes" title="" intro=""/>
<tab type="namespaces" visible="yes" title="">
<tab type="namespacelist" visible="yes" title="" intro=""/>
<tab type="namespacemembers" visible="yes" title="" intro=""/>
</tab>
<tab type="classes" visible="yes" title="">
<tab type="classlist" visible="yes" title="" intro=""/>
<tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/>
<tab type="hierarchy" visible="yes" title="" intro=""/>
<tab type="classmembers" visible="yes" title="" intro=""/>
</tab>
<tab type="files" visible="yes" title="">
<tab type="filelist" visible="yes" title="" intro=""/>
<tab type="globals" visible="yes" title="" intro=""/>
</tab>
<tab type="examples" visible="yes" title="" intro=""/>
</navindex>
<!-- Layout definition for a class page -->
<class>
<briefdescription visible="yes"/>
<includes visible="$SHOW_INCLUDE_FILES"/>
<inheritancegraph visible="$CLASS_GRAPH"/>
<collaborationgraph visible="$COLLABORATION_GRAPH"/>
<memberdecl>
<nestedclasses visible="yes" title=""/>
<publictypes title=""/>
<services title=""/>
<interfaces title=""/>
<publicslots title=""/>
<signals title=""/>
<publicmethods title=""/>
<publicstaticmethods title=""/>
<publicattributes title=""/>
<publicstaticattributes title=""/>
<protectedtypes title=""/>
<protectedslots title=""/>
<protectedmethods title=""/>
<protectedstaticmethods title=""/>
<protectedattributes title=""/>
<protectedstaticattributes title=""/>
<packagetypes title=""/>
<packagemethods title=""/>
<packagestaticmethods title=""/>
<packageattributes title=""/>
<packagestaticattributes title=""/>
<properties title=""/>
<events title=""/>
<privatetypes title=""/>
<privateslots title=""/>
<privatemethods title=""/>
<privatestaticmethods title=""/>
<privateattributes title=""/>
<privatestaticattributes title=""/>
<friends title=""/>
<related title="" subtitle=""/>
<membergroups visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
<memberdef>
<inlineclasses title=""/>
<typedefs title=""/>
<enums title=""/>
<services title=""/>
<interfaces title=""/>
<constructors title=""/>
<functions title=""/>
<related title=""/>
<variables title=""/>
<properties title=""/>
<events title=""/>
</memberdef>
<allmemberslink visible="yes"/>
<usedfiles visible="$SHOW_USED_FILES"/>
<authorsection visible="yes"/>
</class>
<!-- Layout definition for a namespace page -->
<namespace>
<briefdescription visible="yes"/>
<memberdecl>
<nestednamespaces visible="yes" title=""/>
<constantgroups visible="yes" title=""/>
<classes visible="yes" title=""/>
<typedefs title=""/>
<enums title=""/>
<functions title=""/>
<variables title=""/>
<membergroups visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
<memberdef>
<inlineclasses title=""/>
<typedefs title=""/>
<enums title=""/>
<functions title=""/>
<variables title=""/>
</memberdef>
<authorsection visible="yes"/>
</namespace>
<!-- Layout definition for a file page -->
<file>
<briefdescription visible="yes"/>
<includes visible="$SHOW_INCLUDE_FILES"/>
<includegraph visible="$INCLUDE_GRAPH"/>
<includedbygraph visible="$INCLUDED_BY_GRAPH"/>
<sourcelink visible="yes"/>
<memberdecl>
<classes visible="yes" title=""/>
<namespaces visible="yes" title=""/>
<constantgroups visible="yes" title=""/>
<defines title=""/>
<typedefs title=""/>
<enums title=""/>
<functions title=""/>
<variables title=""/>
<membergroups visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
<memberdef>
<inlineclasses title=""/>
<defines title=""/>
<typedefs title=""/>
<enums title=""/>
<functions title=""/>
<variables title=""/>
</memberdef>
<authorsection/>
</file>
<!-- Layout definition for a group page -->
<group>
<briefdescription visible="yes"/>
<groupgraph visible="$GROUP_GRAPHS"/>
<memberdecl>
<nestedgroups visible="yes" title=""/>
<dirs visible="yes" title=""/>
<files visible="yes" title=""/>
<namespaces visible="yes" title=""/>
<classes visible="yes" title=""/>
<defines title=""/>
<typedefs title=""/>
<enums title=""/>
<enumvalues title=""/>
<functions title=""/>
<variables title=""/>
<signals title=""/>
<publicslots title=""/>
<protectedslots title=""/>
<privateslots title=""/>
<events title=""/>
<properties title=""/>
<friends title=""/>
<membergroups visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
<memberdef>
<pagedocs/>
<inlineclasses title=""/>
<defines title=""/>
<typedefs title=""/>
<enums title=""/>
<enumvalues title=""/>
<functions title=""/>
<variables title=""/>
<signals title=""/>
<publicslots title=""/>
<protectedslots title=""/>
<privateslots title=""/>
<events title=""/>
<properties title=""/>
<friends title=""/>
</memberdef>
<authorsection visible="yes"/>
</group>
<!-- Layout definition for a directory page -->
<directory>
<briefdescription visible="yes"/>
<directorygraph visible="yes"/>
<memberdecl>
<dirs visible="yes"/>
<files visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
</directory>
</doxygenlayout>

View File

@@ -0,0 +1,260 @@
body code {
margin: 0;
border: 1px solid #ddd;
background-color: #f8f8f8;
border-radius: 3px;
padding: 0;
}
a {
color: #4183c4;
}
a.el {
font-weight: normal;
}
body, table, div, p, dl {
color: #333333;
font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 15px;
font-style: normal;
font-variant: normal;
font-weight: normal;
line-height: 25.5px;
}
body {
background-color: #eee;
}
div.header {
background-image: none;
background-color: white;
margin: 0px;
border: 0px;
}
div.headertitle {
width: 858px;
margin: 30px;
padding: 0px;
}
div.toc {
background-color: #f8f8f8;
border-color: #ddd;
margin-right: 10px;
margin-left: 20px;
}
div.toc h3 {
color: #333333;
font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 18px;
font-style: normal;
font-variant: normal;
font-weight: normal;
}
div.toc li {
color: #333333;
font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 12px;
font-style: normal;
font-variant: normal;
font-weight: normal;
}
.title {
font-size: 2.5em;
line-height: 63.75px;
border-bottom: 1px solid #ddd;
margin-bottom: 15px;
margin-left: 0px;
margin-right: 0px;
margin-top: 0px;
}
body h1 {
font-size: 2em;
line-height: 1.7;
border-bottom: 1px solid #eee;
margin: 1em 0 15px;
padding: 0;
overflow: hidden;
}
body h2 {
font-size: 1.5em;
line-height: 1.7;
margin: 1em 0 15px;
padding: 0;
}
pre.fragment {
font-family: Consolas, 'Liberation Mono', Menlo, Courier, monospace;
font-size: 13px;
font-style: normal;
font-variant: normal;
font-weight: normal;
line-height: 19px;
}
table.doxtable th {
background-color: #f8f8f8;
color: #333333;
font-size: 15px;
}
table.doxtable td, table.doxtable th {
border: 1px solid #ddd;
}
#doc-content {
background-color: #fff;
width: 918px;
height: auto !important;
}
div.contents {
width: 858px;
margin: 30px;
}
div.line {
font-family: Consolas, 'Liberation Mono', Menlo, Courier, monospace;
font-size: 13px;
font-style: normal;
font-variant: normal;
font-weight: normal;
line-height: 19px;
}
tt, code, pre {
font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
font-size: 12px;
}
div.fragment {
background-color: #f8f8f8;
border: 1px solid #ddd;
font-size: 13px;
line-height: 19px;
overflow: auto;
padding: 6px 10px;
border-radius: 3px;
}
#topbanner {
position: fixed;
margin: 15px;
z-index: 101;
}
#projectname
{
font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 38px;
font-weight: bold;
line-height: 63.75px;
margin: 0px;
padding: 2px 0px;
}
#projectbrief
{
font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 16px;
line-height: 22.4px;
margin: 0px 0px 13px 0px;
padding: 2px;
}
/* side bar and search */
#side-nav
{
padding: 10px 0px 20px 20px;
border-top: 60px solid #2980b9;
background-color: #343131;
width: 250px !important;
position: fixed
}
#nav-tree
{
background-color: transparent;
background-image: none;
height: 100% !important;
}
#nav-tree .label
{
font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol';
line-height: 25.5px;
font-size: 15px;
}
#nav-tree
{
color: #b3b3b3;
}
#nav-tree .selected {
background-image: none;
}
#nav-tree a
{
color: #b3b3b3;
}
#github
{
position: fixed;
left: auto;
right: auto;
width: 250px;
}
#MSearchBox
{
margin: 20px;
left: 40px;
right: auto;
position: fixed;
width: 180px;
}
#MSearchField
{
width: 121px;
}
#MSearchResultsWindow
{
left: 45px !important;
}
#nav-sync
{
display: none;
}
.ui-resizable .ui-resizable-handle
{
width: 0px;
}
#nav-path
{
display: none;
}
/* external link icon */
div.contents a[href ^= "http"]:after {
content: " " url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAVklEQVR4Xn3PgQkAMQhDUXfqTu7kTtkpd5RA8AInfArtQ2iRXFWT2QedAfttj2FsPIOE1eCOlEuoWWjgzYaB/IkeGOrxXhqB+uA9Bfcm0lAZuh+YIeAD+cAqSz4kCMUAAAAASUVORK5CYII=);
}
.githublogo {
content: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAyRpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuNS1jMDIxIDc5LjE1NDkxMSwgMjAxMy8xMC8yOS0xMTo0NzoxNiAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wTU09Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9tbS8iIHhtbG5zOnN0UmVmPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvc1R5cGUvUmVzb3VyY2VSZWYjIiB4bWxuczp4bXA9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC8iIHhtcE1NOkRvY3VtZW50SUQ9InhtcC5kaWQ6RERCMUIwOUY4NkNFMTFFM0FBNTJFRTMzNTJEMUJDNDYiIHhtcE1NOkluc3RhbmNlSUQ9InhtcC5paWQ6RERCMUIwOUU4NkNFMTFFM0FBNTJFRTMzNTJEMUJDNDYiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIENTNiAoTWFjaW50b3NoKSI+IDx4bXBNTTpEZXJpdmVkRnJvbSBzdFJlZjppbnN0YW5jZUlEPSJ4bXAuaWlkOkU1MTc4QTJBOTlBMDExRTI5QTE1QkMxMDQ2QTg5MDREIiBzdFJlZjpkb2N1bWVudElEPSJ4bXAuZGlkOkU1MTc4QTJCOTlBMDExRTI5QTE1QkMxMDQ2QTg5MDREIi8+IDwvcmRmOkRlc2NyaXB0aW9uPiA8L3JkZjpSREY+IDwveDp4bXBtZXRhPiA8P3hwYWNrZXQgZW5kPSJyIj8+jUqS1wAAApVJREFUeNq0l89rE1EQx3e3gVJoSPzZeNEWPKgHoa0HBak0iHiy/4C3WvDmoZ56qJ7txVsPQu8qlqqHIhRKJZceesmhioQEfxTEtsoSpdJg1u/ABJ7Pmc1m8zLwgWTmzcw3L+/te+tHUeQltONgCkyCi2AEDHLsJ6iBMlgHL8FeoqokoA2j4CloRMmtwTmj7erHBXPgCWhG6a3JNXKdCiDl1cidVbXZkJoXQRi5t5BrxwoY71FzU8S4JuAIqFkJ2+BFSlEh525b/hr3+k/AklDkNsf6wTT4yv46KIMNpsy+iMdMc47HNWxbsgVcUn7FmLAzzoFAWDsBx+wVP6bUpp5ewI+DOeUx0Wd9D8F70BTGNjkWtqnhmT1JQAHcUgZd8Lo3rQb1LAT8eJVUfgGvHQigGp+V2Z0iAUUl8QH47kAA1XioxIo+bRN8OG8F/oBjwv+Z1nJgX5jpdzQDw0LCjsPmrcW7I/iHScCAEDj03FtD8A0EyuChHgg4KTlJQF3wZ7WELppnBX+dBFSVpJsOBWi1qiRgSwnOgoyD5hmuJdkWCVhTgnTvW3AgYIFrSbZGh0UW/Io5Vp+DQoK7o80pztWMemZbgxeNwCNwDbw1fIfgGZjhU6xPaJgBV8BdsMw5cbZoHsenwYFxkZzl83xTSKTiviCAfCsJLysH3POfC8m8NegyGAGfLP/VmGmfSChgXroR0RSWjEFv2J/nG84cuKFMf4sTCZqXuJd4KaXFVjEG3+tw4eXbNK/YC9oXXs3O8NY8y99L4BXY5cvLY/Bb2VZ58EOJVcB18DHJq9lRsKr8inyKGVjlmh29mtHs3AHfuhCwy1vXT/Nu2GKQt+UHsGdctyX6eQyNvc+5sfX9Dl7Pe2J/BRgAl2CpwmrsHR0AAAAASUVORK5CYII=);
}

View File

@@ -0,0 +1,9 @@
<!-- HTML footer for doxygen 1.8.7-->
<!-- start footer part -->
<!--BEGIN GENERATE_TREEVIEW-->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
$navpath
</ul>
</div>
<!--END GENERATE_TREEVIEW-->

View File

@@ -0,0 +1,24 @@
<!-- HTML header for doxygen 1.8.7-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen $doxygenversion"/>
<!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME-->
<!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME-->
<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="$relpath^jquery.js"></script>
<script type="text/javascript" src="$relpath^dynsections.js"></script>
$treeview
$search
$mathjax
<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" />
$extrastylesheet
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="topbanner"><a href="https://github.com/miloyip/rapidjson" title="RapidJSON GitHub"><i class="githublogo"></i></a></div>
$searchbox
<!--END TITLEAREA-->
<!-- end header part -->

View File

@@ -0,0 +1,30 @@
# Performance
The old performance article for RapidJSON 0.1 is provided [here](https://code.google.com/p/rapidjson/wiki/Performance).
The (third-party) performance tests have been removed from this repository
and are now part of a dedicated [native JSON benchmark collection] [1].
This file will be updated with a summary of benchmarking results based on
the above benchmark collection in the future.
[1]: https://github.com/miloyip/nativejson-benchmark
Additionally, you may refer to the following third-party benchmarks.
## Third-party benchmarks
* [Basic benchmarks for miscellaneous C++ JSON parsers and generators](https://github.com/mloskot/json_benchmark) by Mateusz Loskot (Jun 2013)
* [casablanca](https://casablanca.codeplex.com/)
* [json_spirit](https://github.com/cierelabs/json_spirit)
* [jsoncpp](http://jsoncpp.sourceforge.net/)
* [libjson](http://sourceforge.net/projects/libjson/)
* [rapidjson](https://github.com/miloyip/rapidjson/)
* [QJsonDocument](http://qt-project.org/doc/qt-5.0/qtcore/qjsondocument.html)
* [JSON Parser Benchmarking](http://chadaustin.me/2013/01/json-parser-benchmarking/) by Chad Austin (Jan 2013)
* [sajson](https://github.com/chadaustin/sajson)
* [rapidjson](https://github.com/miloyip/rapidjson/)
* [vjson](https://code.google.com/p/vjson/)
* [YAJL](http://lloyd.github.com/yajl/)
* [Jansson](http://www.digip.org/jansson/)

View File

@@ -0,0 +1,475 @@
# SAX
The term "SAX" originated from [Simple API for XML](http://en.wikipedia.org/wiki/Simple_API_for_XML). We borrowed this term for JSON parsing and generation.
In RapidJSON, `Reader` (typedef of `GenericReader<...>`) is the SAX-style parser for JSON, and `Writer` (typedef of `GenericWriter<...>`) is the SAX-style generator for JSON.
[TOC]
# Reader {#Reader}
`Reader` parses a JSON from a stream. While it reads characters from the stream, it analyze the characters according to the syntax of JSON, and publish events to a handler.
For example, here is a JSON.
~~~~~~~~~~js
{
"hello": "world",
"t": true ,
"f": false,
"n": null,
"i": 123,
"pi": 3.1416,
"a": [1, 2, 3, 4]
}
~~~~~~~~~~
While a `Reader` parses this JSON, it publishes the following events to the handler sequentially:
~~~~~~~~~~
StartObject()
Key("hello", 5, true)
String("world", 5, true)
Key("t", 1, true)
Bool(true)
Key("f", 1, true)
Bool(false)
Key("n", 1, true)
Null()
Key("i")
UInt(123)
Key("pi")
Double(3.1416)
Key("a")
StartArray()
Uint(1)
Uint(2)
Uint(3)
Uint(4)
EndArray(4)
EndObject(7)
~~~~~~~~~~
These events can be easily matched with the JSON, except some event parameters need further explanation. Let's see the `simplereader` example which produces exactly the same output as above:
~~~~~~~~~~cpp
#include "rapidjson/reader.h"
#include <iostream>
using namespace rapidjson;
using namespace std;
struct MyHandler {
bool Null() { cout << "Null()" << endl; return true; }
bool Bool(bool b) { cout << "Bool(" << boolalpha << b << ")" << endl; return true; }
bool Int(int i) { cout << "Int(" << i << ")" << endl; return true; }
bool Uint(unsigned u) { cout << "Uint(" << u << ")" << endl; return true; }
bool Int64(int64_t i) { cout << "Int64(" << i << ")" << endl; return true; }
bool Uint64(uint64_t u) { cout << "Uint64(" << u << ")" << endl; return true; }
bool Double(double d) { cout << "Double(" << d << ")" << endl; return true; }
bool String(const char* str, SizeType length, bool copy) {
cout << "String(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl;
return true;
}
bool StartObject() { cout << "StartObject()" << endl; return true; }
bool Key(const char* str, SizeType length, bool copy) {
cout << "Key(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl;
return true;
}
bool EndObject(SizeType memberCount) { cout << "EndObject(" << memberCount << ")" << endl; return true; }
bool StartArray() { cout << "StartArray()" << endl; return true; }
bool EndArray(SizeType elementCount) { cout << "EndArray(" << elementCount << ")" << endl; return true; }
};
void main() {
const char json[] = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3, 4] } ";
MyHandler handler;
Reader reader;
StringStream ss(json);
reader.Parse(ss, handler);
}
~~~~~~~~~~
Note that, RapidJSON uses template to statically bind the `Reader` type and the handler type, instead of using class with virtual functions. This paradigm can improve the performance by inlining functions.
## Handler {#Handler}
As the previous example showed, user needs to implement a handler, which consumes the events (function calls) from `Reader`. The handler must contain the following member functions.
~~~~~~~~~~cpp
class Handler {
bool Null();
bool Bool(bool b);
bool Int(int i);
bool Uint(unsigned i);
bool Int64(int64_t i);
bool Uint64(uint64_t i);
bool Double(double d);
bool String(const Ch* str, SizeType length, bool copy);
bool StartObject();
bool Key(const Ch* str, SizeType length, bool copy);
bool EndObject(SizeType memberCount);
bool StartArray();
bool EndArray(SizeType elementCount);
};
~~~~~~~~~~
`Null()` is called when the `Reader` encounters a JSON null value.
`Bool(bool)` is called when the `Reader` encounters a JSON true or false value.
When the `Reader` encounters a JSON number, it chooses a suitable C++ type mapping. And then it calls *one* function out of `Int(int)`, `Uint(unsigned)`, `Int64(int64_t)`, `Uint64(uint64_t)` and `Double(double)`.
`String(const char* str, SizeType length, bool copy)` is called when the `Reader` encounters a string. The first parameter is pointer to the string. The second parameter is the length of the string (excluding the null terminator). Note that RapidJSON supports null character `'\0'` inside a string. If such situation happens, `strlen(str) < length`. The last `copy` indicates whether the handler needs to make a copy of the string. For normal parsing, `copy = true`. Only when *insitu* parsing is used, `copy = false`. And beware that, the character type depends on the target encoding, which will be explained later.
When the `Reader` encounters the beginning of an object, it calls `StartObject()`. An object in JSON is a set of name-value pairs. If the object contains members it first calls `Key()` for the name of member, and then calls functions depending on the type of the value. These calls of name-value pairs repeats until calling `EndObject(SizeType memberCount)`. Note that the `memberCount` parameter is just an aid for the handler, user may not need this parameter.
Array is similar to object but simpler. At the beginning of an array, the `Reader` calls `BeginArary()`. If there is elements, it calls functions according to the types of element. Similarly, in the last call `EndArray(SizeType elementCount)`, the parameter `elementCount` is just an aid for the handler.
Every handler functions returns a `bool`. Normally it should returns `true`. If the handler encounters an error, it can return `false` to notify event publisher to stop further processing.
For example, when we parse a JSON with `Reader` and the handler detected that the JSON does not conform to the required schema, then the handler can return `false` and let the `Reader` stop further parsing. And the `Reader` will be in error state with error code `kParseErrorTermination`.
## GenericReader {#GenericReader}
As mentioned before, `Reader` is a typedef of a template class `GenericReader`:
~~~~~~~~~~cpp
namespace rapidjson {
template <typename SourceEncoding, typename TargetEncoding, typename Allocator = MemoryPoolAllocator<> >
class GenericReader {
// ...
};
typedef GenericReader<UTF8<>, UTF8<> > Reader;
} // namespace rapidjson
~~~~~~~~~~
The `Reader` uses UTF-8 as both source and target encoding. The source encoding means the encoding in the JSON stream. The target encoding means the encoding of the `str` parameter in `String()` calls. For example, to parse a UTF-8 stream and outputs UTF-16 string events, you can define a reader by:
~~~~~~~~~~cpp
GenericReader<UTF8<>, UTF16<> > reader;
~~~~~~~~~~
Note that, the default character type of `UTF16` is `wchar_t`. So this `reader`needs to call `String(const wchar_t*, SizeType, bool)` of the handler.
The third template parameter `Allocator` is the allocator type for internal data structure (actually a stack).
## Parsing {#Parsing}
The one and only one function of `Reader` is to parse JSON.
~~~~~~~~~~cpp
template <unsigned parseFlags, typename InputStream, typename Handler>
bool Parse(InputStream& is, Handler& handler);
// with parseFlags = kDefaultParseFlags
template <typename InputStream, typename Handler>
bool Parse(InputStream& is, Handler& handler);
~~~~~~~~~~
If an error occurs during parsing, it will return `false`. User can also calls `bool HasParseEror()`, `ParseErrorCode GetParseErrorCode()` and `size_t GetErrorOffset()` to obtain the error states. Actually `Document` uses these `Reader` functions to obtain parse errors. Please refer to [DOM](doc/dom.md) for details about parse error.
# Writer {#Writer}
`Reader` converts (parses) JSON into events. `Writer` does exactly the opposite. It converts events into JSON.
`Writer` is very easy to use. If your application only need to converts some data into JSON, it may be a good choice to use `Writer` directly, instead of building a `Document` and then stringifying it with a `Writer`.
In `simplewriter` example, we do exactly the reverse of `simplereader`.
~~~~~~~~~~cpp
#include "rapidjson/writer.h"
#include "rapidjson/stringbuffer.h"
#include <iostream>
using namespace rapidjson;
using namespace std;
void main() {
StringBuffer s;
Writer<StringBuffer> writer(s);
writer.StartObject();
writer.Key("hello");
writer.String("world");
writer.Key("t");
writer.Bool(true);
writer.Key("f");
writer.Bool(false);
writer.Key("n");
writer.Null();
writer.Key("i");
writer.Uint(123);
writer.Key("pi");
writer.Double(3.1416);
writer.Key("a");
writer.StartArray();
for (unsigned i = 0; i < 4; i++)
writer.Uint(i);
writer.EndArray();
writer.EndObject();
cout << s.GetString() << endl;
}
~~~~~~~~~~
~~~~~~~~~~
{"hello":"world","t":true,"f":false,"n":null,"i":123,"pi":3.1416,"a":[0,1,2,3]}
~~~~~~~~~~
There are two `String()` and `Key()` overloads. One is the same as defined in handler concept with 3 parameters. It can handle string with null characters. Another one is the simpler version used in the above example.
Note that, the example code does not pass any parameters in `EndArray()` and `EndObject()`. An `SizeType` can be passed but it will be simply ignored by `Writer`.
You may doubt that, why not just using `sprintf()` or `std::stringstream` to build a JSON?
There are various reasons:
1. `Writer` must output a well-formed JSON. If there is incorrect event sequence (e.g. `Int()` just after `StartObject()`), it generates assertion fail in debug mode.
2. `Writer::String()` can handle string escaping (e.g. converting code point `U+000A` to `\n`) and Unicode transcoding.
3. `Writer` handles number output consistently.
4. `Writer` implements the event handler concept. It can be used to handle events from `Reader`, `Document` or other event publisher.
5. `Writer` can be optimized for different platforms.
Anyway, using `Writer` API is even simpler than generating a JSON by ad hoc methods.
## Template {#WriterTemplate}
`Writer` has a minor design difference to `Reader`. `Writer` is a template class, not a typedef. There is no `GenericWriter`. The following is the declaration.
~~~~~~~~~~cpp
namespace rapidjson {
template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename Allocator = CrtAllocator<> >
class Writer {
public:
Writer(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth)
// ...
};
} // namespace rapidjson
~~~~~~~~~~
The `OutputStream` template parameter is the type of output stream. It cannot be deduced and must be specified by user.
The `SourceEncoding` template parameter specifies the encoding to be used in `String(const Ch*, ...)`.
The `TargetEncoding` template parameter specifies the encoding in the output stream.
The last one, `Allocator` is the type of allocator, which is used for allocating internal data structure (a stack).
Besides, the constructor of `Writer` has a `levelDepth` parameter. This parameter affects the initial memory allocated for storing information per hierarchy level.
## PrettyWriter {#PrettyWriter}
While the output of `Writer` is the most condensed JSON without white-spaces, suitable for network transfer or storage, it is not easily readable by human.
Therefore, RapidJSON provides a `PrettyWriter`, which adds indentation and line feeds in the output.
The usage of `PrettyWriter` is exactly the same as `Writer`, expect that `PrettyWriter` provides a `SetIndent(Ch indentChar, unsigned indentCharCount)` function. The default is 4 spaces.
## Completeness and Reset {#CompletenessReset}
A `Writer` can only output a single JSON, which can be any JSON type at the root. Once the singular event for root (e.g. `String()`), or the last matching `EndObject()` or `EndArray()` event, is handled, the output JSON is well-formed and complete. User can detect this state by calling `Writer::IsComplete()`.
When a JSON is complete, the `Writer` cannot accept any new events. Otherwise the output will be invalid (i.e. having more than one root). To reuse the `Writer` object, user can call `Writer::Reset(OutputStream& os)` to reset all internal states of the `Writer` with a new output stream.
# Techniques {#Techniques}
## Parsing JSON to Custom Data Structure {#CustomDataStructure}
`Document`'s parsing capability is completely based on `Reader`. Actually `Document` is a handler which receives events from a reader to build a DOM during parsing.
User may uses `Reader` to build other data structures directly. This eliminates building of DOM, thus reducing memory and improving performance.
In the following `messagereader` example, `ParseMessages()` parses a JSON which should be an object with key-string pairs.
~~~~~~~~~~cpp
#include "rapidjson/reader.h"
#include "rapidjson/error/en.h"
#include <iostream>
#include <string>
#include <map>
using namespace std;
using namespace rapidjson;
typedef map<string, string> MessageMap;
struct MessageHandler
: public BaseReaderHandler<UTF8<>, MessageHandler> {
MessageHandler() : state_(kExpectObjectStart) {
}
bool StartObject() {
switch (state_) {
case kExpectObjectStart:
state_ = kExpectNameOrObjectEnd;
return true;
default:
return false;
}
}
bool String(const char* str, SizeType length, bool) {
switch (state_) {
case kExpectNameOrObjectEnd:
name_ = string(str, length);
state_ = kExpectValue;
return true;
case kExpectValue:
messages_.insert(MessageMap::value_type(name_, string(str, length)));
state_ = kExpectNameOrObjectEnd;
return true;
default:
return false;
}
}
bool EndObject(SizeType) { return state_ == kExpectNameOrObjectEnd; }
bool Default() { return false; } // All other events are invalid.
MessageMap messages_;
enum State {
kExpectObjectStart,
kExpectNameOrObjectEnd,
kExpectValue,
}state_;
std::string name_;
};
void ParseMessages(const char* json, MessageMap& messages) {
Reader reader;
MessageHandler handler;
StringStream ss(json);
if (reader.Parse(ss, handler))
messages.swap(handler.messages_); // Only change it if success.
else {
ParseErrorCode e = reader.GetParseErrorCode();
size_t o = reader.GetErrorOffset();
cout << "Error: " << GetParseError_En(e) << endl;;
cout << " at offset " << o << " near '" << string(json).substr(o, 10) << "...'" << endl;
}
}
int main() {
MessageMap messages;
const char* json1 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\" }";
cout << json1 << endl;
ParseMessages(json1, messages);
for (MessageMap::const_iterator itr = messages.begin(); itr != messages.end(); ++itr)
cout << itr->first << ": " << itr->second << endl;
cout << endl << "Parse a JSON with invalid schema." << endl;
const char* json2 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\", \"foo\" : {} }";
cout << json2 << endl;
ParseMessages(json2, messages);
return 0;
}
~~~~~~~~~~
~~~~~~~~~~
{ "greeting" : "Hello!", "farewell" : "bye-bye!" }
farewell: bye-bye!
greeting: Hello!
Parse a JSON with invalid schema.
{ "greeting" : "Hello!", "farewell" : "bye-bye!", "foo" : {} }
Error: Terminate parsing due to Handler error.
at offset 59 near '} }...'
~~~~~~~~~~
The first JSON (`json1`) was successfully parsed into `MessageMap`. Since `MessageMap` is a `std::map`, the printing order are sorted by the key. This order is different from the JSON's order.
In the second JSON (`json2`), `foo`'s value is an empty object. As it is an object, `MessageHandler::StartObject()` will be called. However, at that moment `state_ = kExpectValue`, so that function returns `false` and cause the parsing process be terminated. The error code is `kParseErrorTermination`.
## Filtering of JSON {#Filtering}
As mentioned earlier, `Writer` can handle the events published by `Reader`. `condense` example simply set a `Writer` as handler of a `Reader`, so it can remove all white-spaces in JSON. `pretty` example uses the same relationship, but replacing `Writer` by `PrettyWriter`. So `pretty` can be used to reformat a JSON with indentation and line feed.
Actually, we can add intermediate layer(s) to filter the contents of JSON via these SAX-style API. For example, `capitalize` example capitalize all strings in a JSON.
~~~~~~~~~~cpp
#include "rapidjson/reader.h"
#include "rapidjson/writer.h"
#include "rapidjson/filereadstream.h"
#include "rapidjson/filewritestream.h"
#include "rapidjson/error/en.h"
#include <vector>
#include <cctype>
using namespace rapidjson;
template<typename OutputHandler>
struct CapitalizeFilter {
CapitalizeFilter(OutputHandler& out) : out_(out), buffer_() {
}
bool Null() { return out_.Null(); }
bool Bool(bool b) { return out_.Bool(b); }
bool Int(int i) { return out_.Int(i); }
bool Uint(unsigned u) { return out_.Uint(u); }
bool Int64(int64_t i) { return out_.Int64(i); }
bool Uint64(uint64_t u) { return out_.Uint64(u); }
bool Double(double d) { return out_.Double(d); }
bool String(const char* str, SizeType length, bool) {
buffer_.clear();
for (SizeType i = 0; i < length; i++)
buffer_.push_back(std::toupper(str[i]));
return out_.String(&buffer_.front(), length, true); // true = output handler need to copy the string
}
bool StartObject() { return out_.StartObject(); }
bool Key(const char* str, SizeType length, bool copy) { return String(str, length, copy); }
bool EndObject(SizeType memberCount) { return out_.EndObject(memberCount); }
bool StartArray() { return out_.StartArray(); }
bool EndArray(SizeType elementCount) { return out_.EndArray(elementCount); }
OutputHandler& out_;
std::vector<char> buffer_;
};
int main(int, char*[]) {
// Prepare JSON reader and input stream.
Reader reader;
char readBuffer[65536];
FileReadStream is(stdin, readBuffer, sizeof(readBuffer));
// Prepare JSON writer and output stream.
char writeBuffer[65536];
FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer));
Writer<FileWriteStream> writer(os);
// JSON reader parse from the input stream and let writer generate the output.
CapitalizeFilter<Writer<FileWriteStream> > filter(writer);
if (!reader.Parse(is, filter)) {
fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), GetParseError_En(reader.GetParseErrorCode()));
return 1;
}
return 0;
}
~~~~~~~~~~
Note that, it is incorrect to simply capitalize the JSON as a string. For example:
~~~~~~~~~~
["Hello\nWorld"]
~~~~~~~~~~
Simply capitalizing the whole JSON would contain incorrect escape character:
~~~~~~~~~~
["HELLO\NWORLD"]
~~~~~~~~~~
The correct result by `capitalize`:
~~~~~~~~~~
["HELLO\nWORLD"]
~~~~~~~~~~
More complicated filters can be developed. However, since SAX-style API can only provide information about a single event at a time, user may need to book-keeping the contextual information (e.g. the path from root value, storage of other related values). Some processing may be easier to be implemented in DOM than SAX.

View File

@@ -0,0 +1,374 @@
# Stream
In RapidJSON, `rapidjson::Stream` is a concept for reading/writing JSON. Here we first show how to use streams provided. And then see how to create a custom stream.
[TOC]
# Memory Streams {#MemoryStreams}
Memory streams store JSON in memory.
## StringStream (Input) {#StringStream}
`StringStream` is the most basic input stream. It represents a complete, read-only JSON stored in memory. It is defined in `rapidjson/rapidjson.h`.
~~~~~~~~~~cpp
#include "rapidjson/document.h" // will include "rapidjson/rapidjson.h"
using namespace rapidjson;
// ...
const char json[] = "[1, 2, 3, 4]";
StringStream s(json);
Document d;
d.ParseStream(s);
~~~~~~~~~~
Since this is very common usage, `Document::Parse(const char*)` is provided to do exactly the same as above:
~~~~~~~~~~cpp
// ...
const char json[] = "[1, 2, 3, 4]";
Document d;
d.Parse(json);
~~~~~~~~~~
Note that, `StringStream` is a typedef of `GenericStringStream<UTF8<> >`, user may use another encodings to represent the character set of the stream.
## StringBuffer (Output) {#StringBuffer}
`StringBuffer` is a simple output stream. It allocates a memory buffer for writing the whole JSON. Use `GetString()` to obtain the buffer.
~~~~~~~~~~cpp
#include "rapidjson/stringbuffer.h"
StringBuffer buffer;
Writer<StringBuffer> writer(buffer);
d.Accept(writer);
const char* output = buffer.GetString();
~~~~~~~~~~
When the buffer is full, it will increases the capacity automatically. The default capacity is 256 characters (256 bytes for UTF8, 512 bytes for UTF16, etc.). User can provide an allocator and a initial capacity.
~~~~~~~~~~cpp
StringBuffer buffer1(0, 1024); // Use its allocator, initial size = 1024
StringBuffer buffer2(allocator, 1024);
~~~~~~~~~~
By default, `StringBuffer` will instantiate an internal allocator.
Similarly, `StringBuffer` is a typedef of `GenericStringBuffer<UTF8<> >`.
# File Streams {#FileStreams}
When parsing a JSON from file, you may read the whole JSON into memory and use ``StringStream`` above.
However, if the JSON is big, or memory is limited, you can use `FileReadStream`. It only read a part of JSON from file into buffer, and then let the part be parsed. If it runs out of characters in the buffer, it will read the next part from file.
## FileReadStream (Input) {#FileReadStream}
`FileReadStream` reads the file via a `FILE` pointer. And user need to provide a buffer.
~~~~~~~~~~cpp
#include "rapidjson/filereadstream.h"
#include <cstdio>
using namespace rapidjson;
FILE* fp = fopen("big.json", "rb"); // non-Windows use "r"
char readBuffer[65536];
FileReadStream is(fp, readBuffer, sizeof(readBuffer));
Document d;
d.ParseStream(is);
fclose(fp);
~~~~~~~~~~
Different from string streams, `FileReadStream` is byte stream. It does not handle encodings. If the file is not UTF-8, the byte stream can be wrapped in a `EncodedInputStream`. It will be discussed very soon.
Apart from reading file, user can also use `FileReadStream` to read `stdin`.
## FileWriteStream (Output) {#FileWriteStream}
`FileWriteStream` is buffered output stream. Its usage is very similar to `FileReadStream`.
~~~~~~~~~~cpp
#include "rapidjson/filewritestream.h"
#include <cstdio>
using namespace rapidjson;
Document d;
d.Parse(json);
// ...
FILE* fp = fopen("output.json", "wb"); // non-Windows use "w"
char writeBuffer[65536];
FileWriteStream os(fp, writeBuffer, sizeof(writeBuffer));
Writer<FileWriteStream> writer(os);
d.Accept(writer);
fclose(fp);
~~~~~~~~~~
It can also directs the output to `stdout`.
# Encoded Streams {#EncodedStreams}
Encoded streams do not contain JSON itself, but they wrap byte streams to provide basic encoding/decoding function.
As mentioned above, UTF-8 byte streams can be read directly. However, UTF-16 and UTF-32 have endian issue. To handle endian correctly, it needs to convert bytes into characters (e.g. `wchar_t` for UTF-16) while reading, and characters into bytes while writing.
Besides, it also need to handle [byte order mark (BOM)](http://en.wikipedia.org/wiki/Byte_order_mark). When reading from a byte stream, it is needed to detect or just consume the BOM if exists. When writing to a byte stream, it can optionally write BOM.
If the encoding of stream is known in compile-time, you may use `EncodedInputStream` and `EncodedOutputStream`. If the stream can be UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE JSON, and it is only known in runtime, you may use `AutoUTFInputStream` and `AutoUTFOutputStream`. These streams are defined in `rapidjson/encodedstream.h`.
Note that, these encoded streams can be applied to streams other than file. For example, you may have a file in memory, or a custom byte stream, be wrapped in encoded streams.
## EncodedInputStream {#EncodedInputStream}
`EncodedInputStream` has two template parameters. The first one is a `Encoding` class, such as `UTF8`, `UTF16LE`, defined in `rapidjson/encodings.h`. The second one is the class of stream to be wrapped.
~~~~~~~~~~cpp
#include "rapidjson/document.h"
#include "rapidjson/filereadstream.h" // FileReadStream
#include "rapidjson/encodedstream.h" // EncodedInputStream
#include <cstdio>
using namespace rapidjson;
FILE* fp = fopen("utf16le.json", "rb"); // non-Windows use "r"
char readBuffer[256];
FileReadStream bis(fp, readBuffer, sizeof(readBuffer));
EncodedInputStream<UTF16LE<>, FileReadStream> eis(bis); // wraps bis into eis
Document d; // Document is GenericDocument<UTF8<> >
d.ParseStream<0, UTF16LE<> >(eis); // Parses UTF-16LE file into UTF-8 in memory
fclose(fp);
~~~~~~~~~~
## EncodedOutputStream {#EncodedOutputStream}
`EncodedOutputStream` is similar but it has a `bool putBOM` parameter in the constructor, controlling whether to write BOM into output byte stream.
~~~~~~~~~~cpp
#include "rapidjson/filewritestream.h" // FileWriteStream
#include "rapidjson/encodedstream.h" // EncodedOutputStream
#include <cstdio>
Document d; // Document is GenericDocument<UTF8<> >
// ...
FILE* fp = fopen("output_utf32le.json", "wb"); // non-Windows use "w"
char writeBuffer[256];
FileWriteStream bos(fp, writeBuffer, sizeof(writeBuffer));
typedef EncodedOutputStream<UTF32LE<>, FileWriteStream> OutputStream;
OutputStream eos(bos, true); // Write BOM
Writer<OutputStream, UTF32LE<>, UTF8<>> writer(eos);
d.Accept(writer); // This generates UTF32-LE file from UTF-8 in memory
fclose(fp);
~~~~~~~~~~
## AutoUTFInputStream {#AutoUTFInputStream}
Sometimes an application may want to handle all supported JSON encoding. `AutoUTFInputStream` will detection encoding by BOM first. If BOM is unavailable, it will use characteristics of valid JSON to make detection. If neither method success, it falls back to the UTF type provided in constructor.
Since the characters (code units) may be 8-bit, 16-bit or 32-bit. `AutoUTFInputStream` requires a character type which can hold at least 32-bit. We may use `unsigned`, as in the template parameter:
~~~~~~~~~~cpp
#include "rapidjson/document.h"
#include "rapidjson/filereadstream.h" // FileReadStream
#include "rapidjson/encodedstream.h" // AutoUTFInputStream
#include <cstdio>
using namespace rapidjson;
FILE* fp = fopen("any.json", "rb"); // non-Windows use "r"
char readBuffer[256];
FileReadStream bis(fp, readBuffer, sizeof(readBuffer));
AutoUTFInputStream<unsigned, FileReadStream> eis(bis); // wraps bis into eis
Document d; // Document is GenericDocument<UTF8<> >
d.ParseStream<0, AutoUTF<unsigned> >(eis); // This parses any UTF file into UTF-8 in memory
fclose(fp);
~~~~~~~~~~
When specifying the encoding of stream, uses `AutoUTF<CharType>` as in `ParseStream()` above.
You can obtain the type of UTF via `UTFType GetType()`. And check whether a BOM is found by `HasBOM()`
## AutoUTFOutputStream {#AutoUTFOutputStream}
Similarly, to choose encoding for output during runtime, we can use `AutoUTFOutputStream`. This class is not automatic *per se*. You need to specify the UTF type and whether to write BOM in runtime.
~~~~~~~~~~cpp
using namespace rapidjson;
void WriteJSONFile(FILE* fp, UTFType type, bool putBOM, const Document& d) {
char writeBuffer[256];
FileWriteStream bos(fp, writeBuffer, sizeof(writeBuffer));
typedef AutoUTFOutputStream<unsigned, FileWriteStream> OutputStream;
OutputStream eos(bos, type, putBOM);
Writer<OutputStream, UTF8<>, AutoUTF<> > writer;
d.Accept(writer);
}
~~~~~~~~~~
`AutoUTFInputStream` and `AutoUTFOutputStream` is more convenient than `EncodedInputStream` and `EncodedOutputStream`. They just incur a little bit runtime overheads.
# Custom Stream {#CustomStream}
In addition to memory/file streams, user can create their own stream classes which fits RapidJSON's API. For example, you may create network stream, stream from compressed file, etc.
RapidJSON combines different types using templates. A class containing all required interface can be a stream. The Stream interface is defined in comments of `rapidjson/rapidjson.h`:
~~~~~~~~~~cpp
concept Stream {
typename Ch; //!< Character type of the stream.
//! Read the current character from stream without moving the read cursor.
Ch Peek() const;
//! Read the current character from stream and moving the read cursor to next character.
Ch Take();
//! Get the current read cursor.
//! \return Number of characters read from start.
size_t Tell();
//! Begin writing operation at the current read pointer.
//! \return The begin writer pointer.
Ch* PutBegin();
//! Write a character.
void Put(Ch c);
//! Flush the buffer.
void Flush();
//! End the writing operation.
//! \param begin The begin write pointer returned by PutBegin().
//! \return Number of characters written.
size_t PutEnd(Ch* begin);
}
~~~~~~~~~~
For input stream, they must implement `Peek()`, `Take()` and `Tell()`.
For output stream, they must implement `Put()` and `Flush()`.
There are two special interface, `PutBegin()` and `PutEnd()`, which are only for *in situ* parsing. Normal streams do not implement them. However, if the interface is not needed for a particular stream, it is still need to a dummy implementation, otherwise will generate compilation error.
## Example: istream wrapper {#ExampleIStreamWrapper}
The following example is a wrapper of `std::istream`, which only implements 3 functions.
~~~~~~~~~~cpp
class IStreamWrapper {
public:
typedef char Ch;
IStreamWrapper(std::istream& is) : is_(is) {
}
Ch Peek() const { // 1
int c = is_.peek();
return c == std::char_traits<char>::eof() ? '\0' : (Ch)c;
}
Ch Take() { // 2
int c = is_.get();
return c == std::char_traits<char>::eof() ? '\0' : (Ch)c;
}
size_t Tell() const { return (size_t)is_.tellg(); } // 3
Ch* PutBegin() { assert(false); return 0; }
void Put(Ch) { assert(false); }
void Flush() { assert(false); }
size_t PutEnd(Ch*) { assert(false); return 0; }
private:
IStreamWrapper(const IStreamWrapper&);
IStreamWrapper& operator=(const IStreamWrapper&);
std::istream& is_;
};
~~~~~~~~~~
User can use it to wrap instances of `std::stringstream`, `std::ifstream`.
~~~~~~~~~~cpp
const char* json = "[1,2,3,4]";
std::stringstream ss(json);
IStreamWrapper is(ss);
Document d;
d.Parse(is);
~~~~~~~~~~
Note that, this implementation may not be as efficient as RapidJSON's memory or file streams, due to internal overheads of the standard library.
## Example: ostream wrapper {#ExampleOStreamWrapper}
The following example is a wrapper of `std::istream`, which only implements 2 functions.
~~~~~~~~~~cpp
class OStreamWrapper {
public:
typedef char Ch;
OStreamWrapper(std::ostream& os) : os_(os) {
}
Ch Peek() const { assert(false); return '\0'; }
Ch Take() { assert(false); return '\0'; }
size_t Tell() const { }
Ch* PutBegin() { assert(false); return 0; }
void Put(Ch c) { os_.put(c); } // 1
void Flush() { os_.flush(); } // 2
size_t PutEnd(Ch*) { assert(false); return 0; }
private:
OStreamWrapper(const OStreamWrapper&);
OStreamWrapper& operator=(const OStreamWrapper&);
std::ostream& os_;
};
~~~~~~~~~~
User can use it to wrap instances of `std::stringstream`, `std::ofstream`.
~~~~~~~~~~cpp
Document d;
// ...
std::stringstream ss;
OSStreamWrapper os(ss);
Writer<OStreamWrapper> writer(os);
d.Accept(writer);
~~~~~~~~~~
Note that, this implementation may not be as efficient as RapidJSON's memory or file streams, due to internal overheads of the standard library.
# Summary {#Summary}
This section describes stream classes available in RapidJSON. Memory streams are simple. File stream can reduce the memory required during JSON parsing and generation, if the JSON is stored in file system. Encoded streams converts between byte streams and character streams. Finally, user may create custom streams using a simple interface.

View File

@@ -0,0 +1,515 @@
# Tutorial
This tutorial introduces the basics of the Document Object Model(DOM) API.
As shown in [Usage at a glance](@ref index), a JSON can be parsed into DOM, and then the DOM can be queried and modified easily, and finally be converted back to JSON.
[TOC]
# Value & Document {#ValueDocument}
Each JSON value is stored in a type called `Value`. A `Document`, representing the DOM, contains the root `Value` of the DOM tree. All public types and functions of RapidJSON are defined in the `rapidjson` namespace.
# Query Value {#QueryValue}
In this section, we will use excerpt of `example/tutorial/tutorial.cpp`.
Assumes we have a JSON stored in a C string (`const char* json`):
~~~~~~~~~~js
{
"hello": "world",
"t": true ,
"f": false,
"n": null,
"i": 123,
"pi": 3.1416,
"a": [1, 2, 3, 4]
}
~~~~~~~~~~
Parse it into a `Document`:
~~~~~~~~~~cpp
#include "rapidjson/document.h"
using namespace rapidjson;
// ...
Document document;
document.Parse(json);
~~~~~~~~~~
The JSON is now parsed into `document` as a *DOM tree*:
![DOM in the tutorial](diagram/tutorial.png)
Since the update to RFC 7159, the root of a conforming JSON document can be any JSON value. In earlier RFC 4627, only objects or arrays were allowed as root values. In this case, the root is an object.
~~~~~~~~~~cpp
assert(document.IsObject());
~~~~~~~~~~
Let's query whether a `"hello"` member exists in the root object. Since a `Value` can contain different types of value, we may need to verify its type and use suitable API to obtain the value. In this example, `"hello"` member associates with a JSON string.
~~~~~~~~~~cpp
assert(document.HasMember("hello"));
assert(document["hello"].IsString());
printf("hello = %s\n", document["hello"].GetString());
~~~~~~~~~~
~~~~~~~~~~
world
~~~~~~~~~~
JSON true/false values are represented as `bool`.
~~~~~~~~~~cpp
assert(document["t"].IsBool());
printf("t = %s\n", document["t"].GetBool() ? "true" : "false");
~~~~~~~~~~
~~~~~~~~~~
true
~~~~~~~~~~
JSON null can be queryed by `IsNull()`.
~~~~~~~~~~cpp
printf("n = %s\n", document["n"].IsNull() ? "null" : "?");
~~~~~~~~~~
~~~~~~~~~~
null
~~~~~~~~~~
JSON number type represents all numeric values. However, C++ needs more specific type for manipulation.
~~~~~~~~~~cpp
assert(document["i"].IsNumber());
// In this case, IsUint()/IsInt64()/IsUInt64() also return true.
assert(document["i"].IsInt());
printf("i = %d\n", document["i"].GetInt());
// Alternative (int)document["i"]
assert(document["pi"].IsNumber());
assert(document["pi"].IsDouble());
printf("pi = %g\n", document["pi"].GetDouble());
~~~~~~~~~~
~~~~~~~~~~
i = 123
pi = 3.1416
~~~~~~~~~~
JSON array contains a number of elements.
~~~~~~~~~~cpp
// Using a reference for consecutive access is handy and faster.
const Value& a = document["a"];
assert(a.IsArray());
for (SizeType i = 0; i < a.Size(); i++) // Uses SizeType instead of size_t
printf("a[%d] = %d\n", i, a[i].GetInt());
~~~~~~~~~~
~~~~~~~~~~
a[0] = 1
a[1] = 2
a[2] = 3
a[3] = 4
~~~~~~~~~~
Note that, RapidJSON does not automatically convert values between JSON types. If a value is a string, it is invalid to call `GetInt()`, for example. In debug mode it will fail an assertion. In release mode, the behavior is undefined.
In the following, details about querying individual types are discussed.
## Query Array {#QueryArray}
By default, `SizeType` is typedef of `unsigned`. In most systems, array is limited to store up to 2^32-1 elements.
You may access the elements in array by integer literal, for example, `a[0]`, `a[1]`, `a[2]`.
Array is similar to `std::vector`, instead of using indices, you may also use iterator to access all the elements.
~~~~~~~~~~cpp
for (Value::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr)
printf("%d ", itr->GetInt());
~~~~~~~~~~
And other familiar query functions:
* `SizeType Capacity() const`
* `bool Empty() const`
## Query Object {#QueryObject}
Similar to array, we can access all object members by iterator:
~~~~~~~~~~cpp
static const char* kTypeNames[] =
{ "Null", "False", "True", "Object", "Array", "String", "Number" };
for (Value::ConstMemberIterator itr = document.MemberBegin();
itr != document.MemberEnd(); ++itr)
{
printf("Type of member %s is %s\n",
itr->name.GetString(), kTypeNames[itr->value.GetType()]);
}
~~~~~~~~~~
~~~~~~~~~~
Type of member hello is String
Type of member t is True
Type of member f is False
Type of member n is Null
Type of member i is Number
Type of member pi is Number
Type of member a is Array
~~~~~~~~~~
Note that, when `operator[](const char*)` cannot find the member, it will fail an assertion.
If we are unsure whether a member exists, we need to call `HasMember()` before calling `operator[](const char*)`. However, this incurs two lookup. A better way is to call `FindMember()`, which can check the existence of member and obtain its value at once:
~~~~~~~~~~cpp
Value::ConstMemberIterator itr = document.FindMember("hello");
if (itr != document.MemberEnd())
printf("%s %s\n", itr->value.GetString());
~~~~~~~~~~
## Querying Number {#QueryNumber}
JSON provide a single numerical type called Number. Number can be integer or real numbers. RFC 4627 says the range of Number is specified by parser.
As C++ provides several integer and floating point number types, the DOM tries to handle these with widest possible range and good performance.
When a Number is parsed, it is stored in the DOM as either one of the following type:
Type | Description
-----------|---------------------------------------
`unsigned` | 32-bit unsigned integer
`int` | 32-bit signed integer
`uint64_t` | 64-bit unsigned integer
`int64_t` | 64-bit signed integer
`double` | 64-bit double precision floating point
When querying a number, you can check whether the number can be obtained as target type:
Checking | Obtaining
------------------|---------------------
`bool IsNumber()` | N/A
`bool IsUint()` | `unsigned GetUint()`
`bool IsInt()` | `int GetInt()`
`bool IsUint64()` | `uint64_t GetUint()`
`bool IsInt64()` | `int64_t GetInt64()`
`bool IsDouble()` | `double GetDouble()`
Note that, an integer value may be obtained in various ways without conversion. For example, A value `x` containing 123 will make `x.IsInt() == x.IsUint() == x.IsInt64() == x.IsUint64() == true`. But a value `y` containing -3000000000 will only makes `x.IsInt64() == true`.
When obtaining the numeric values, `GetDouble()` will convert internal integer representation to a `double`. Note that, `int` and `unsigned` can be safely convert to `double`, but `int64_t` and `uint64_t` may lose precision (since mantissa of `double` is only 52-bits).
## Query String {#QueryString}
In addition to `GetString()`, the `Value` class also contains `GetStringLength()`. Here explains why.
According to RFC 4627, JSON strings can contain Unicode character `U+0000`, which must be escaped as `"\u0000"`. The problem is that, C/C++ often uses null-terminated string, which treats ``\0'` as the terminator symbol.
To conform RFC 4627, RapidJSON supports string containing `U+0000`. If you need to handle this, you can use `GetStringLength()` API to obtain the correct length of string.
For example, after parsing a the following JSON to `Document d`:
~~~~~~~~~~js
{ "s" : "a\u0000b" }
~~~~~~~~~~
The correct length of the value `"a\u0000b"` is 3. But `strlen()` returns 1.
`GetStringLength()` can also improve performance, as user may often need to call `strlen()` for allocating buffer.
Besides, `std::string` also support a constructor:
~~~~~~~~~~cpp
string(const char* s, size_t count);
~~~~~~~~~~
which accepts the length of string as parameter. This constructor supports storing null character within the string, and should also provide better performance.
## Comparing values
You can use `==` and `!=` to compare values. Two values are equal if and only if they are have same type and contents. You can also compare values with primitive types. Here is an example.
~~~~~~~~~~cpp
if (document["hello"] == document["n"]) /*...*/; // Compare values
if (document["hello"] == "world") /*...*/; // Compare value with literal string
if (document["i"] != 123) /*...*/; // Compare with integers
if (document["pi"] != 3.14) /*...*/; // Compare with double.
~~~~~~~~~~
Array/object compares their elements/members in order. They are equal if and only if their whole subtrees are equal.
Note that, currently if an object contains duplicated named member, comparing equality with any object is always `false`.
# Create/Modify Values {#CreateModifyValues}
There are several ways to create values. After a DOM tree is created and/or modified, it can be saved as JSON again using `Writer`.
## Change Value Type {#ChangeValueType}
When creating a Value or Document by default constructor, its type is Null. To change its type, call `SetXXX()` or assignment operator, for example:
~~~~~~~~~~cpp
Document d; // Null
d.SetObject();
Value v; // Null
v.SetInt(10);
v = 10; // Shortcut, same as above
~~~~~~~~~~
### Overloaded Constructors
There are also overloaded constructors for several types:
~~~~~~~~~~cpp
Value b(true); // calls Value(bool)
Value i(-123); // calls Value(int)
Value u(123u); // calls Value(unsigned)
Value d(1.5); // calls Value(double)
~~~~~~~~~~
To create empty object or array, you may use `SetObject()`/`SetArray()` after default constructor, or using the `Value(Type)` in one shot:
~~~~~~~~~~cpp
Value o(kObjectType);
Value a(kArrayType);
~~~~~~~~~~
## Move Semantics {#MoveSemantics}
A very special decision during design of RapidJSON is that, assignment of value does not copy the source value to destination value. Instead, the value from source is moved to the destination. For example,
~~~~~~~~~~cpp
Value a(123);
Value b(456);
b = a; // a becomes a Null value, b becomes number 123.
~~~~~~~~~~
![Assignment with move semantics.](diagram/move1.png)
Why? What is the advantage of this semantics?
The simple answer is performance. For fixed size JSON types (Number, True, False, Null), copying them is fast and easy. However, For variable size JSON types (String, Array, Object), copying them will incur a lot of overheads. And these overheads are often unnoticed. Especially when we need to create temporary object, copy it to another variable, and then destruct it.
For example, if normal *copy* semantics was used:
~~~~~~~~~~cpp
Value o(kObjectType);
{
Value contacts(kArrayType);
// adding elements to contacts array.
// ...
o.AddMember("contacts", contacts); // deep clone contacts (may be with lots of allocations)
// destruct contacts.
}
~~~~~~~~~~
![Copy semantics makes a lots of copy operations.](diagram/move2.png)
The object `o` needs to allocate a buffer of same size as contacts, makes a deep clone of it, and then finally contacts is destructed. This will incur a lot of unnecessary allocations/deallocations and memory copying.
There are solutions to prevent actual copying these data, such as reference counting and garbage collection(GC).
To make RapidJSON simple and fast, we chose to use *move* semantics for assignment. It is similar to `std::auto_ptr` which transfer ownership during assignment. Move is much faster and simpler, it just destructs the original value, `memcpy()` the source to destination, and finally sets the source as Null type.
So, with move semantics, the above example becomes:
~~~~~~~~~~cpp
Value o(kObjectType);
{
Value contacts(kArrayType);
// adding elements to contacts array.
o.AddMember("contacts", contacts); // just memcpy() of contacts itself to the value of new member (16 bytes)
// contacts became Null here. Its destruction is trivial.
}
~~~~~~~~~~
![Move semantics makes no copying.](diagram/move3.png)
This is called move assignment operator in C++11. As RapidJSON supports C++03, it adopts move semantics using assignment operator, and all other modifying function like `AddMember()`, `PushBack()`.
### Move semantics and temporary values {#TemporaryValues}
Sometimes, it is convenient to construct a Value in place, before passing it to one of the "moving" functions, like `PushBack()` or `AddMember()`. As temporary objects can't be converted to proper Value references, the convenience function `Move()` is available:
~~~~~~~~~~cpp
Value a(kArrayType);
Document::AllocatorType& allocator = document.GetAllocator();
// a.PushBack(Value(42), allocator); // will not compile
a.PushBack(Value().SetInt(42), allocator); // fluent API
a.PushBack(Value(42).Move(), allocator); // same as above
~~~~~~~~~~
## Create String {#CreateString}
RapidJSON provide two strategies for storing string.
1. copy-string: allocates a buffer, and then copy the source data into it.
2. const-string: simply store a pointer of string.
Copy-string is always safe because it owns a copy of the data. Const-string can be used for storing string literal, and in-situ parsing which we will mentioned in Document section.
To make memory allocation customizable, RapidJSON requires user to pass an instance of allocator, whenever an operation may require allocation. This design is needed to prevent storing a allocator (or Document) pointer per Value.
Therefore, when we assign a copy-string, we call this overloaded `SetString()` with allocator:
~~~~~~~~~~cpp
Document document;
Value author;
char buffer[10];
int len = sprintf(buffer, "%s %s", "Milo", "Yip"); // dynamically created string.
author.SetString(buffer, len, document.GetAllocator());
memset(buffer, 0, sizeof(buffer));
// author.GetString() still contains "Milo Yip" after buffer is destroyed
~~~~~~~~~~
In this example, we get the allocator from a `Document` instance. This is a common idiom when using RapidJSON. But you may use other instances of allocator.
Besides, the above `SetString()` requires length. This can handle null characters within a string. There is another `SetString()` overloaded function without the length parameter. And it assumes the input is null-terminated and calls a `strlen()`-like function to obtain the length.
Finally, for string literal or string with safe life-cycle can use const-string version of `SetString()`, which lacks allocator parameter. For string literals (or constant character arrays), simply passing the literal as parameter is safe and efficient:
~~~~~~~~~~cpp
Value s;
s.SetString("rapidjson"); // can contain null character, length derived at compile time
s = "rapidjson"; // shortcut, same as above
~~~~~~~~~~
For character pointer, the RapidJSON requires to mark it as safe before using it without copying. This can be achieved by using the `StringRef` function:
~~~~~~~~~cpp
const char * cstr = getenv("USER");
size_t cstr_len = ...; // in case length is available
Value s;
// s.SetString(cstr); // will not compile
s.SetString(StringRef(cstr)); // ok, assume safe lifetime, null-terminated
s = StringRef(cstr); // shortcut, same as above
s.SetString(StringRef(cstr,cstr_len)); // faster, can contain null character
s = StringRef(cstr,cstr_len); // shortcut, same as above
~~~~~~~~~
## Modify Array {#ModifyArray}
Value with array type provides similar APIs as `std::vector`.
* `Clear()`
* `Reserve(SizeType, Allocator&)`
* `Value& PushBack(Value&, Allocator&)`
* `template <typename T> GenericValue& PushBack(T, Allocator&)`
* `Value& PopBack()`
* `ValueIterator Erase(ConstValueIterator pos)`
* `ValueIterator Erase(ConstValueIterator first, ConstValueIterator last)`
Note that, `Reserve(...)` and `PushBack(...)` may allocate memory for the array elements, therefore require an allocator.
Here is an example of `PushBack()`:
~~~~~~~~~~cpp
Value a(kArrayType);
Document::AllocatorType& allocator = document.GetAllocator();
for (int i = 5; i <= 10; i++)
a.PushBack(i, allocator); // allocator is needed for potential realloc().
// Fluent interface
a.PushBack("Lua", allocator).PushBack("Mio", allocator);
~~~~~~~~~~
Differs from STL, `PushBack()`/`PopBack()` returns the array reference itself. This is called _fluent interface_.
If you want to add a non-constant string or a string without sufficient lifetime (see [Create String](#CreateString)) to the array, you need to create a string Value by using the copy-string API. To avoid the need for an intermediate variable, you can use a [temporary value](#TemporaryValues) in place:
~~~~~~~~~~cpp
// in-place Value parameter
contact.PushBack(Value("copy", document.GetAllocator()).Move(), // copy string
document.GetAllocator());
// explicit parameters
Value val("key", document.GetAllocator()); // copy string
contact.PushBack(val, document.GetAllocator());
~~~~~~~~~~
## Modify Object {#ModifyObject}
Object is a collection of key-value pairs (members). Each key must be a string value. To modify an object, either add or remove members. THe following APIs are for adding members:
* `Value& AddMember(Value&, Value&, Allocator& allocator)`
* `Value& AddMember(StringRefType, Value&, Allocator&)`
* `template <typename T> Value& AddMember(StringRefType, T value, Allocator&)`
Here is an example.
~~~~~~~~~~cpp
Value contact(kObject);
contact.AddMember("name", "Milo", document.GetAllocator());
contact.AddMember("married", true, document.GetAllocator());
~~~~~~~~~~
The name parameter with `StringRefType` is similar to the interface of `SetString` function for string values. These overloads are used to avoid the need for copying the `name` string, as constant key names are very common in JSON objects.
If you need to create a name from a non-constant string or a string without sufficient lifetime (see [Create String](#CreateString)), you need to create a string Value by using the copy-string API. To avoid the need for an intermediate variable, you can use a [temporary value](#TemporaryValues) in place:
~~~~~~~~~~cpp
// in-place Value parameter
contact.AddMember(Value("copy", document.GetAllocator()).Move(), // copy string
Value().Move(), // null value
document.GetAllocator());
// explicit parameters
Value key("key", document.GetAllocator()); // copy string name
Value val(42); // some value
contact.AddMember(key, val, document.GetAllocator());
~~~~~~~~~~
For removing members, there are several choices:
* `bool RemoveMember(const Ch* name)`: Remove a member by search its name (linear time complexity).
* `bool RemoveMember(const Value& name)`: same as above but `name` is a Value.
* `MemberIterator RemoveMember(MemberIterator)`: Remove a member by iterator (_constant_ time complexity).
* `MemberIterator EraseMember(MemberIterator)`: similar to the above but it preserves order of members (linear time complexity).
* `MemberIterator EraseMember(MemberIterator first, MemberIterator last)`: remove a range of members, preserves order (linear time complexity).
`MemberIterator RemoveMember(MemberIterator)` uses a "move-last" trick to achieve constant time complexity. Basically the member at iterator is destructed, and then the last element is moved to that position. So the order of the remaining members are changed.
## Deep Copy Value {#DeepCopyValue}
If we really need to copy a DOM tree, we can use two APIs for deep copy: constructor with allocator, and `CopyFrom()`.
~~~~~~~~~~cpp
Document d;
Document::AllocatorType& a = d.GetAllocator();
Value v1("foo");
// Value v2(v1); // not allowed
Value v2(v1, a); // make a copy
assert(v1.IsString()); // v1 untouched
d.SetArray().PushBack(v1, a).PushBack(v2, a);
assert(v1.IsNull() && v2.IsNull()); // both moved to d
v2.CopyFrom(d, a); // copy whole document to v2
assert(d.IsArray() && d.Size() == 2); // d untouched
v1.SetObject().AddMember("array", v2, a);
d.PushBack(v1, a);
~~~~~~~~~~
## Swap Values {#SwapValues}
`Swap()` is also provided.
~~~~~~~~~~cpp
Value a(123);
Value b("Hello");
a.Swap(b);
assert(a.IsString());
assert(b.IsInt());
~~~~~~~~~~
Swapping two DOM trees is fast (constant time), despite the complexity of the trees.
# What's next {#WhatsNext}
This tutorial shows the basics of DOM tree query and manipulation. There are several important concepts in RapidJSON:
1. [Streams](doc/stream.md) are channels for reading/writing JSON, which can be a in-memory string, or file stream, etc. User can also create their streams.
2. [Encoding](doc/encoding.md) defines which character encoding is used in streams and memory. RapidJSON also provide Unicode conversion/validation internally.
3. [DOM](doc/dom.md)'s basics are already covered in this tutorial. Uncover more advanced features such as *in situ* parsing, other parsing options and advanced usages.
4. [SAX](doc/sax.md) is the foundation of parsing/generating facility in RapidJSON. Learn how to use `Reader`/`Writer` to implement even faster applications. Also try `PrettyWriter` to format the JSON.
5. [Performance](doc/performance.md) shows some in-house and third-party benchmarks.
6. [Internals](doc/internals.md) describes some internal designs and techniques of RapidJSON.
You may also refer to the [FAQ](doc/faq.md), API documentation, examples and unit tests.

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2011 Milo Yip (miloyip@gmail.com)
# Copyright (c) 2013 Rafal Jeczalik (rjeczalik@gmail.com)
# Distributed under the MIT License (see license.txt file)
set(EXAMPLES
capitalize
condense
messagereader
pretty
prettyauto
serialize
simpledom
simplereader
simplewriter
tutorial)
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -Wextra -Weffc++ -Wswitch-default")
elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -Wextra -Weffc++ -Wswitch-default")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_definitions(-D_CRT_SECURE_NO_WARNINGS=1)
endif()
foreach (example ${EXAMPLES})
add_executable(${example} ${example}/${example}.cpp)
endforeach()
add_custom_target(examples ALL DEPENDS ${EXAMPLES})

View File

@@ -0,0 +1,66 @@
// JSON condenser exmaple
// This example parses JSON from stdin with validation,
// and re-output the JSON content to stdout with all string capitalized, and without whitespace.
#include "rapidjson/reader.h"
#include "rapidjson/writer.h"
#include "rapidjson/filereadstream.h"
#include "rapidjson/filewritestream.h"
#include "rapidjson/error/en.h"
#include <vector>
#include <cctype>
using namespace rapidjson;
template<typename OutputHandler>
struct CapitalizeFilter {
CapitalizeFilter(OutputHandler& out) : out_(out), buffer_() {}
bool Null() { return out_.Null(); }
bool Bool(bool b) { return out_.Bool(b); }
bool Int(int i) { return out_.Int(i); }
bool Uint(unsigned u) { return out_.Uint(u); }
bool Int64(int64_t i) { return out_.Int64(i); }
bool Uint64(uint64_t u) { return out_.Uint64(u); }
bool Double(double d) { return out_.Double(d); }
bool String(const char* str, SizeType length, bool) {
buffer_.clear();
for (SizeType i = 0; i < length; i++)
buffer_.push_back(std::toupper(str[i]));
return out_.String(&buffer_.front(), length, true); // true = output handler need to copy the string
}
bool StartObject() { return out_.StartObject(); }
bool Key(const char* str, SizeType length, bool copy) { return String(str, length, copy); }
bool EndObject(SizeType memberCount) { return out_.EndObject(memberCount); }
bool StartArray() { return out_.StartArray(); }
bool EndArray(SizeType elementCount) { return out_.EndArray(elementCount); }
OutputHandler& out_;
std::vector<char> buffer_;
private:
CapitalizeFilter(const CapitalizeFilter&);
CapitalizeFilter& operator=(const CapitalizeFilter&);
};
int main(int, char*[]) {
// Prepare JSON reader and input stream.
Reader reader;
char readBuffer[65536];
FileReadStream is(stdin, readBuffer, sizeof(readBuffer));
// Prepare JSON writer and output stream.
char writeBuffer[65536];
FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer));
Writer<FileWriteStream> writer(os);
// JSON reader parse from the input stream and let writer generate the output.
CapitalizeFilter<Writer<FileWriteStream> > filter(writer);
if (!reader.Parse(is, filter)) {
fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), GetParseError_En(reader.GetParseErrorCode()));
return 1;
}
return 0;
}

View File

@@ -0,0 +1,32 @@
// JSON condenser exmaple
// This example parses JSON text from stdin with validation,
// and re-output the JSON content to stdout without whitespace.
#include "rapidjson/reader.h"
#include "rapidjson/writer.h"
#include "rapidjson/filereadstream.h"
#include "rapidjson/filewritestream.h"
#include "rapidjson/error/en.h"
using namespace rapidjson;
int main(int, char*[]) {
// Prepare JSON reader and input stream.
Reader reader;
char readBuffer[65536];
FileReadStream is(stdin, readBuffer, sizeof(readBuffer));
// Prepare JSON writer and output stream.
char writeBuffer[65536];
FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer));
Writer<FileWriteStream> writer(os);
// JSON reader parse from the input stream and let writer generate the output.
if (!reader.Parse(is, writer)) {
fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), GetParseError_En(reader.GetParseErrorCode()));
return 1;
}
return 0;
}

View File

@@ -0,0 +1,96 @@
// Reading a message JSON with Reader (SAX-style API).
// The JSON should be an object with key-string pairs.
#include "rapidjson/reader.h"
#include "rapidjson/error/en.h"
#include <iostream>
#include <string>
#include <map>
using namespace std;
using namespace rapidjson;
typedef map<string, string> MessageMap;
#if defined(__GNUC__)
RAPIDJSON_DIAG_PUSH
RAPIDJSON_DIAG_OFF(effc++)
#endif
struct MessageHandler
: public BaseReaderHandler<UTF8<>, MessageHandler> {
MessageHandler() : messages_(), state_(kExpectObjectStart), name_() {}
bool StartObject() {
switch (state_) {
case kExpectObjectStart:
state_ = kExpectNameOrObjectEnd;
return true;
default:
return false;
}
}
bool String(const char* str, SizeType length, bool) {
switch (state_) {
case kExpectNameOrObjectEnd:
name_ = string(str, length);
state_ = kExpectValue;
return true;
case kExpectValue:
messages_.insert(MessageMap::value_type(name_, string(str, length)));
state_ = kExpectNameOrObjectEnd;
return true;
default:
return false;
}
}
bool EndObject(SizeType) { return state_ == kExpectNameOrObjectEnd; }
bool Default() { return false; } // All other events are invalid.
MessageMap messages_;
enum State {
kExpectObjectStart,
kExpectNameOrObjectEnd,
kExpectValue,
}state_;
std::string name_;
};
#if defined(__GNUC__)
RAPIDJSON_DIAG_POP
#endif
void ParseMessages(const char* json, MessageMap& messages) {
Reader reader;
MessageHandler handler;
StringStream ss(json);
if (reader.Parse(ss, handler))
messages.swap(handler.messages_); // Only change it if success.
else {
ParseErrorCode e = reader.GetParseErrorCode();
size_t o = reader.GetErrorOffset();
cout << "Error: " << GetParseError_En(e) << endl;;
cout << " at offset " << o << " near '" << string(json).substr(o, 10) << "...'" << endl;
}
}
int main() {
MessageMap messages;
const char* json1 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\" }";
cout << json1 << endl;
ParseMessages(json1, messages);
for (MessageMap::const_iterator itr = messages.begin(); itr != messages.end(); ++itr)
cout << itr->first << ": " << itr->second << endl;
cout << endl << "Parse a JSON with invalid schema." << endl;
const char* json2 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\", \"foo\" : {} }";
cout << json2 << endl;
ParseMessages(json2, messages);
return 0;
}

View File

@@ -0,0 +1,30 @@
// JSON pretty formatting example
// This example can only handle UTF-8. For handling other encodings, see prettyauto example.
#include "rapidjson/reader.h"
#include "rapidjson/prettywriter.h"
#include "rapidjson/filereadstream.h"
#include "rapidjson/filewritestream.h"
#include "rapidjson/error/en.h"
using namespace rapidjson;
int main(int, char*[]) {
// Prepare reader and input stream.
Reader reader;
char readBuffer[65536];
FileReadStream is(stdin, readBuffer, sizeof(readBuffer));
// Prepare writer and output stream.
char writeBuffer[65536];
FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer));
PrettyWriter<FileWriteStream> writer(os);
// JSON reader parse from the input stream and let writer generate the output.
if (!reader.Parse<kParseValidateEncodingFlag>(is, writer)) {
fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), GetParseError_En(reader.GetParseErrorCode()));
return 1;
}
return 0;
}

Some files were not shown because too many files have changed in this diff Show More