From 540f95c3de3cce9ff13a9e03ff769454c15a2006 Mon Sep 17 00:00:00 2001 From: Qianqian Fang <fangqq@gmail.com> Date: Mon, 14 Feb 2022 22:35:08 -0500 Subject: [PATCH] add optional preceding whitespace, explain format --- loadbj.m | 8 +++- loadjson.m | 95 +++++++++++++++++++++++++++++++---------- savejson.m | 10 ++--- test/run_jsonlab_test.m | 6 +-- 4 files changed, 87 insertions(+), 32 deletions(-) diff --git a/loadbj.m b/loadbj.m index a94a515..589b787 100644 --- a/loadbj.m +++ b/loadbj.m @@ -12,7 +12,7 @@ function [data, mmap] = loadbj(fname,varargin) % including uint16(u), uint32(m), uint64(M) and half-precision float (h). % Starting from BJD Draft-2 (JSONLab 3.0 beta or later), all integer and % floating-point numbers are parsed in Little-Endian as opposed to -% Big-Endian form as in BJD Draft-1/UBJSON Draft-12 (JSONLab 2.1 or older) +% Big-Endian form as in BJD Draft-1/UBJSON Draft-12 (JSONLab 2.0 or older) % % authors:Qianqian Fang (q.fang <at> neu.edu) % initially created on 2013/08/01 @@ -71,6 +71,12 @@ function [data, mmap] = loadbj(fname,varargin) % output: % dat: a cell array, where {...} blocks are converted into cell arrays, % and [...] are converted to arrays +% mmap: (optional) a cell array in the form of +% {{jsonpath1,[start,length]}, {jsonpath2,[start,length]}, ...} +% where jsonpath_i is a string in the form of JSONPath, and +% start is an integer referring to the offset from the begining +% of the stream, and length is the JSON object string length. +% For more details, please see the help section of loadjson.m % % examples: % obj=struct('string','value','array',[1 2 3]); diff --git a/loadjson.m b/loadjson.m index 60d34d8..1ee4c12 100644 --- a/loadjson.m +++ b/loadjson.m @@ -80,11 +80,60 @@ function [data, mmap] = loadjson(fname,varargin) % output: % dat: a cell array, where {...} blocks are converted into cell arrays, % and [...] are converted to arrays -% mmap: (optional) a cell array in the form of -% {{jsonpath1,[start,length]}, {jsonpath2,[start,length]}, ...} -% where jsonpath_i is a string in the form of JSONPath [1], and -% start is an integer referring to the offset from the begining -% of the stream, and length is the JSON object string length. +% mmap: (optional) a cell array as memory-mapping table in the form of +% {{jsonpath1,[start,length,<whitespace>]}, +% {jsonpath2,[start,length,<whitespace>]}, ...} +% where jsonpath_i is a string in the JSONPath [1,2] format, and +% "start" is an integer referring to the offset from the begining +% of the stream, and "length" is the JSON object string length. +% An optional 3rd integer "whitespace" may appear to record the +% preceding whitespace length in case expansion of the data +% record is needed when using the mmap. +% +% Memory-mapping table (mmap) is useful when fast reading/writing +% specific data records inside a large JSON file without needing +% to load/parse/overwrite the entire file. +% +% The JSONPath keys used in mmap is largely compatible to the +% upstream specification defined in [1], with a slight extension +% to handle contatenated JSON files. +% +% In the mmap jsonpath key, a '$' denotes the root object, a '.' +% denotes a child of the preceding element; '.key' points to the +% value segment of the child named "key" of the preceding +% object; '.[i]' denotes the (i+1)th member of the preceding +% element, which must be an array. For example, a key +% +% $.obj1.obj2.[0].obj3 +% +% defines the memory-map of the "value" section in the below +% hierarchy: +% { +% "obj1":{ +% "obj2":[ +% {"obj3":value}, +% ... +% ], +% ... +% } +% } +% Please note that "value" can be any valid JSON value, including +% an array, an object, a string or numerical value. +% +% To handle concatenated JSON objects (including ndjson, +% http://ndjson.org/), such as +% +% {"root1": {"obj1": ...}} +% ["root2", value1, value2, {"obj2": ...}] +% {"root3": ...} +% +% we use '$' or '$0' for the first root-object, and '$1' refers +% to the 2nd root object (["root2",...]) and '$2' referrs to the +% 3rd root object, and so on. Please note that this syntax is an +% extension from the JSONPath documentation [1,2] +% +% [1] https://goessner.net/articles/JsonPath/ +% [2] http://jsonpath.herokuapp.com/ % % examples: % dat=loadjson('{"obj":{"string":"value","array":[1,2,3]}}') @@ -151,7 +200,7 @@ function [data, mmap] = loadjson(fname,varargin) opt.parsestringarray=jsonopt('ParseStringArray',0,opt); opt.usemap=jsonopt('UseMap',0,opt); opt.arraydepth_=1; - mmaponly=jsonopt('MmapOnly',0,opt); + opt.mmaponly=jsonopt('MmapOnly',0,opt); if(jsonopt('ShowProgress',0,opt)==1) opt.progressbar_=waitbar(0,'loading ...'); @@ -163,7 +212,7 @@ function [data, mmap] = loadjson(fname,varargin) maxobjid=inf; end opt.jsonpath_='$'; - if(nargout>1 || mmaponly) + if(nargout>1 || opt.mmaponly) mmap={}; end jsoncount=1; @@ -171,19 +220,19 @@ function [data, mmap] = loadjson(fname,varargin) [cc,pos,w1]=next_char(inputstr, pos); switch(cc) case '{' - if(nargout>1 || mmaponly) - mmap{end+1}={opt.jsonpath_,pos-w1}; + if(nargout>1 || opt.mmaponly) + mmap{end+1}={opt.jsonpath_,[pos, 0, w1]}; [data{jsoncount},pos,index_esc,newmmap] = parse_object(inputstr, pos, esc, index_esc,opt); - mmap{end}{2}=[mmap{end}{2},pos-mmap{end}{2}]; + mmap{end}{2}(2)=pos-mmap{end}{2}(1); mmap=[mmap(:);newmmap(:)]; else [data{jsoncount},pos,index_esc] = parse_object(inputstr, pos, esc, index_esc,opt); end case '[' - if(nargout>1 || mmaponly) - mmap{end+1}={opt.jsonpath_,pos-w1}; + if(nargout>1 || opt.mmaponly) + mmap{end+1}={opt.jsonpath_,[pos,0,w1]}; [data{jsoncount},pos,index_esc,newmmap] = parse_array(inputstr, pos, esc, index_esc,opt); - mmap{end}{2}=[mmap{end}{2},pos-mmap{end}{2}]; + mmap{end}{2}(2)=pos-mmap{end}{2}(1); mmap=[mmap(:);newmmap(:)]; else [data{jsoncount},pos,index_esc] = parse_array(inputstr, pos, esc, index_esc,opt); @@ -206,10 +255,11 @@ function [data, mmap] = loadjson(fname,varargin) if(jsoncount==1 && iscell(data)) data=data{1}; end - if(nargout>1 || mmaponly) + if(nargout>1 || opt.mmaponly) mmap=mmap'; mmap=filterjsonmmap(mmap, jsonopt('MMapExclude',{},opt), 0); mmap=filterjsonmmap(mmap, jsonopt('MMapInclude',{},opt), 1); + mmap=cellfun(@(x) {x{1},x{2}(1:(2+int8(length(x{2})>=3 && (x{2}(3)>0))))}, mmap, 'UniformOutput', false); end if(jsonopt('JDataDecode',1,varargin{:})==1) try @@ -220,7 +270,7 @@ function [data, mmap] = loadjson(fname,varargin) ME.identifier, ME.message, savejson('',ME.stack)); end end - if(mmaponly) + if(opt.mmaponly) data=mmap; end if(isfield(opt,'progressbar_')) @@ -307,13 +357,14 @@ function [object, pos,index_esc, mmap] = parse_array(inputstr, pos, esc, index_e catch end if(isempty(endpos) || pos~=endpos) + w2=0; while 1 varargin{1}.arraydepth_=arraydepth+1; if(nargout>3) varargin{1}.jsonpath_=[origpath '.' sprintf('[%d]',length(object))]; - mmap{end+1}={varargin{1}.jsonpath_, pos}; + mmap{end+1}={varargin{1}.jsonpath_, [pos, 0, w2]}; [val, pos, index_esc, newmmap] = parse_value(inputstr, pos, esc, index_esc,varargin{:}); - mmap{end}{2}=[mmap{end}{2}, pos-mmap{end}{2}]; + mmap{end}{2}(2)=pos-mmap{end}{2}(1); mmap=[mmap(:);newmmap(:)]; else [val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc,varargin{:}); @@ -323,7 +374,7 @@ function [object, pos,index_esc, mmap] = parse_array(inputstr, pos, esc, index_e if cc == ']' break; end - pos=parse_char(inputstr, pos, ','); + [pos, w1, w2]=parse_char(inputstr, pos, ','); end end end @@ -528,14 +579,12 @@ function [object, pos, index_esc, mmap] = parse_object(inputstr, pos, esc, index if isempty(str) pos=error_pos('Name of value at position %d cannot be empty',inputstr,pos); end - pos=parse_char(inputstr, pos, ':'); + [pos, w1, w2]=parse_char(inputstr, pos, ':'); if(nargout>3) varargin{1}.jsonpath_=[origpath,'.',str]; - mmap{end+1}={varargin{1}.jsonpath_,pos}; - end - if(nargout>3) + mmap{end+1}={varargin{1}.jsonpath_,[pos,0,w2]}; [val, pos,index_esc, newmmap] = parse_value(inputstr, pos, esc, index_esc, varargin{:}); - mmap{end}{2}=[mmap{end}{2}, pos-mmap{end}{2}]; + mmap{end}{2}(2)=pos-mmap{end}{2}(1); mmap=[mmap(:);newmmap(:)]; else [val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc, varargin{:}); diff --git a/savejson.m b/savejson.m index 4ab2cc1..2b6a0d5 100644 --- a/savejson.m +++ b/savejson.m @@ -70,7 +70,7 @@ function json=savejson(rootname,obj,varargin) % wrapped inside a function call as 'foo(...);' % UnpackHex [1|0]: conver the 0x[hex code] output by loadjson % back to the string form -% SaveBinary [0|1]: 1 - save the JSON file in binary mode; 0 - text mode. +% SaveBinary [1|0]: 1 - save the JSON file in binary mode; 0 - text mode. % Compact [0|1]: 1- out compact JSON format (remove all newlines and tabs) % Compression 'zlib', 'gzip', 'lzma', 'lzip', 'lz4' or 'lz4hc': specify array % compression method; currently only supports 6 methods. The @@ -254,11 +254,11 @@ if(~isempty(filename)) if(jsonopt('Append',0,opt)) mode='a'; end - if(jsonopt('SaveBinary',0,opt)==1) - if(~isempty(encoding)) - fid = fopen(filename, [mode 'b'],endian,encoding); - else + if(jsonopt('SaveBinary',1,opt)==1) + if(isempty(encoding)) fid = fopen(filename, [mode 'b'],endian); + else + fid = fopen(filename, [mode 'b'],endian,encoding); end fwrite(fid,json); else diff --git a/test/run_jsonlab_test.m b/test/run_jsonlab_test.m index 5907691..11a9a97 100644 --- a/test/run_jsonlab_test.m +++ b/test/run_jsonlab_test.m @@ -302,7 +302,7 @@ if(ismember('jmap',tests)) test_jsonlab('mmap of an object',@savejson,loadjson('{"a":1,"b":[2,3]}','mmaponly',1),... '[["$",[1,17]],["$.a",[6,1]],["$.b",[12,5]]]','compact',1); test_jsonlab('mmap of object with white-space',@savejson,loadjson('{"a":1 , "b" : [2,3]}','mmaponly',1),... - '[["$",[1,23]],["$.a",[6,1]],["$.b",[18,5]]]','compact',1); + '[["$",[1,23]],["$.a",[6,1]],["$.b",[18,5,2]]]','compact',1); test_jsonlab('mmapinclude option',@savejson,loadjson('[[1,2,3],{"a":[4,5]}]','mmaponly',1,'mmapinclude','.a'),... '[["$.[1].a",[15,5]]]','compact',1); test_jsonlab('mmapexclude option',@savejson,loadjson('[[1,2,3],{"a":[4,5]}]','mmaponly',1,'mmapexclude',{'[0]','[1]','[2]'}),... @@ -328,6 +328,6 @@ if(ismember('bmap',tests)) '[["$.[1].a",[15,8]]]','compact',1); test_jsonlab('mmapexclude option',@savejson,loadbj(savebj({[1,2,3],struct('a',[4,5])}),'mmaponly',1,'mmapexclude',{'[0]','[1]','[2]'}),... '[["$",[1,24]]]','compact',1); - test_jsonlab('json with indentation',@savejson,loadbj(savebj({[1,2,3],struct('a',[4,5])}),'mmaponly',1,'mmapinclude','.a'),... - '[["$.[1].a",[15,8]]]','compact',1); + test_jsonlab('test multiple root objects with N padding',@savejson,loadbj([savebj({[1,2,3],struct('a',[4,5])}) 'NNN' savebj(struct('b',[4,5]))],'mmaponly',1,'mmapinclude','.b'),... + '[["$1.b",[32,8]]]','compact',1); end \ No newline at end of file -- GitLab