From b0f0ebd60150c6ef3b876f374b5b76f55c4092ee Mon Sep 17 00:00:00 2001
From: Qianqian Fang <fangqq@gmail.com>
Date: Wed, 2 Feb 2022 00:10:42 -0500
Subject: [PATCH] return disk-map or memory-map table in loadjson

---
 loadjson.m | 102 +++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 76 insertions(+), 26 deletions(-)

diff --git a/loadjson.m b/loadjson.m
index 42b42f5..a1c36de 100644
--- a/loadjson.m
+++ b/loadjson.m
@@ -1,10 +1,11 @@
-function data = loadjson(fname,varargin)
+function [data, mmap] = loadjson(fname,varargin)
 %
 % data=loadjson(fname,opt)
 %    or
-% data=loadjson(fname,'param1',value1,'param2',value2,...)
+% [data, mmap]=loadjson(fname,'param1',value1,'param2',value2,...)
 %
-% parse a JSON (JavaScript Object Notation) file or string
+% parse a JSON (JavaScript Object Notation) file or string and return a
+% matlab data structure with optional memory-map table
 %
 % authors:Qianqian Fang (q.fang <at> neu.edu)
 % created on 2011/09/09, including previous works from 
@@ -70,11 +71,16 @@ function data = loadjson(fname,varargin)
 % output:
 %      dat: a cell array, where {...} blocks are converted into cell arrays,
 %           and [...] are converted to arrays
+%      mmap: (optional) a cell array in the form of 
+%           {{jsonpath1,[start,length]}, {jsonpath2,[start,length]}, ...}
+%           where jsonpath_i is a string in the form of JSONPath [1], and
+%           start is an integer referring to the offset from the begining
+%           of the stream, and length is the JSON object string length.
 %
 % examples:
 %      dat=loadjson('{"obj":{"string":"value","array":[1,2,3]}}')
 %      dat=loadjson(['examples' filesep 'example1.json'])
-%      dat=loadjson(['examples' filesep 'example1.json'],'SimplifyCell',0)
+%      [dat, mmap]=loadjson(['examples' filesep 'example1.json'],'SimplifyCell',0)
 %
 % license:
 %     BSD or GPL version 3, see LICENSE_{BSD,GPLv3}.txt files for details 
@@ -106,7 +112,8 @@ function data = loadjson(fname,varargin)
     else
        error_pos('input file does not exist');
     end
-    
+
+    mmap={};
     if(jsonopt('BuiltinJSON',0,opt) && exist('jsondecode','builtin'))
         try
             newstring=regexprep(string,'[\r\n]','');
@@ -146,15 +153,25 @@ function data = loadjson(fname,varargin)
     if(maxobjid==0)
         maxobjid=inf;
     end
-
+    opt.jsonpath_='$';
     jsoncount=1;
     while pos <= inputlen
         [cc,pos]=next_char(inputstr, pos);
         switch(cc)
             case '{'
-                [data{jsoncount},pos,index_esc] = parse_object(inputstr, pos, esc, index_esc,opt);
+                if(nargout>1)
+                    [data{jsoncount},pos,index_esc,newmmap] = parse_object(inputstr, pos, esc, index_esc,opt);
+                    mmap=[mmap(:);newmmap(:)];
+                else
+                    [data{jsoncount},pos,index_esc] = parse_object(inputstr, pos, esc, index_esc,opt);
+                end
             case '['
-                [data{jsoncount},pos,index_esc] = parse_array(inputstr, pos, esc, index_esc,opt);
+                if(nargout>1)
+                    [data{jsoncount},pos,index_esc,newmmap] = parse_array(inputstr, pos, esc, index_esc,opt);
+                    mmap=[mmap(:);newmmap(:)];
+                else
+                    [data{jsoncount},pos,index_esc] = parse_array(inputstr, pos, esc, index_esc,opt);
+                end
             otherwise
                 pos=error_pos('Outer level structure must be an object or an array',inputstr,pos);
         end
@@ -186,7 +203,11 @@ end
 %% helper functions
 %%-------------------------------------------------------------------------
 
-function [object, pos,index_esc] = parse_array(inputstr, pos, esc, index_esc, varargin) % JSON array is written in row-major order
+function [object, pos,index_esc, mmap] = parse_array(inputstr, pos, esc, index_esc, varargin) % JSON array is written in row-major order
+    if(nargout>3)
+        mmap={{[varargin{1}.jsonpath_ '.[*]'],pos}};
+        origpath=varargin{1}.jsonpath_;
+    end
     pos=parse_char(inputstr, pos, '[');
     object = cell(0, 1);
     arraydepth=varargin{1}.arraydepth_;
@@ -202,6 +223,9 @@ function [object, pos,index_esc] = parse_array(inputstr, pos, esc, index_esc, va
         try
             if((varargin{1}.fastarrayparser)>=1 && arraydepth>=varargin{1}.fastarrayparser)
                 [endpos, maxlevel]=fast_match_bracket(varargin{1}.arraytoken_,varargin{1}.arraytokenidx_,pos);
+                if(nargout>3)
+                    mmap{1}{2}=[mmap{1}{2},endpos-mmap{1}{2}+1];
+                end
                 if(~isempty(endpos))
                     arraystr=['[' inputstr(pos:endpos)];
                     arraystr=sscanf_prep(arraystr);
@@ -281,7 +305,13 @@ function [object, pos,index_esc] = parse_array(inputstr, pos, esc, index_esc, va
         if(isempty(endpos) || pos~=endpos)
             while 1
                 varargin{1}.arraydepth_=arraydepth+1;
-                [val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc,varargin{:});
+                if(nargout>3)
+                    varargin{1}.jsonpath_=[origpath '.' sprintf('[%d]',length(object))];
+                    [val, pos,index_esc, newmmap] = parse_value(inputstr, pos, esc, index_esc,varargin{:});
+                    mmap=[mmap(:);newmmap(:)];
+                else
+                    [val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc,varargin{:});
+                end
                 object{end+1} = val;
                 [cc,pos]=next_char(inputstr,pos);
                 if cc == ']'
@@ -418,49 +448,56 @@ function [num, pos] = parse_number(inputstr, pos, varargin)
 end
 %%-------------------------------------------------------------------------
 
-function [val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc, varargin)
+function varargout = parse_value(inputstr, pos, esc, index_esc, varargin)
     len=length(inputstr);
     if(isfield(varargin{1},'progressbar_'))
         waitbar(pos/len,varargin{1}.progressbar_,'loading ...');
     end
-
+    varargout{3}=index_esc;
+    if(nargout>3)
+            varargout{4}={};
+    end
     switch(inputstr(pos))
         case '"'
-            [val, pos,index_esc] = parseStr(inputstr, pos, esc, index_esc,varargin{:});
+            [varargout{1:3}] = parseStr(inputstr, pos, esc, index_esc,varargin{:});
+            varargout{3}=index_esc;
             return;
         case '['
-            [val, pos,index_esc] = parse_array(inputstr, pos, esc, index_esc, varargin{:});
+            [varargout{1:nargout}] = parse_array(inputstr, pos, esc, index_esc, varargin{:});
             return;
         case '{'
-            [val, pos,index_esc] = parse_object(inputstr, pos, esc, index_esc, varargin{:});
+            [varargout{1:nargout}] = parse_object(inputstr, pos, esc, index_esc, varargin{:});
             return;
         case {'-','0','1','2','3','4','5','6','7','8','9'}
-            [val, pos] = parse_number(inputstr, pos, varargin{:});
+            [varargout{1:2}] = parse_number(inputstr, pos, varargin{:});
             return;
         case 't'
             if pos+3 <= len && strcmpi(inputstr(pos:pos+3), 'true')
-                val = true;
-                pos = pos + 4;
+                varargout{1} = true;
+                varargout{2} = pos + 4;
                 return;
             end
         case 'f'
             if pos+4 <= len && strcmpi(inputstr(pos:pos+4), 'false')
-                val = false;
-                pos = pos + 5;
+                varargout{1} = false;
+                varargout{2} = pos + 5;
                 return;
             end
         case 'n'
             if pos+3 <= len && strcmpi(inputstr(pos:pos+3), 'null')
-                val = [];
-                pos = pos + 4;
+                varargout{1} = [];
+                varargout{2} = pos + 4;
                 return;
             end
     end
-    pos=error_pos('Value expected at position %d',inputstr,pos);
+    varargout{2}=error_pos('Value expected at position %d',inputstr,pos);
 end
 
 %%-------------------------------------------------------------------------
-function [object, pos, index_esc] = parse_object(inputstr, pos, esc, index_esc, varargin)
+function [object, pos, index_esc, mmap] = parse_object(inputstr, pos, esc, index_esc, varargin)
+    if(nargout>3)
+        mmap={{varargin{1}.jsonpath_,pos}};
+    end
     pos=parse_char(inputstr, pos, '{');
     usemap=varargin{1}.usemap;
     if(usemap)
@@ -475,8 +512,18 @@ function [object, pos, index_esc] = parse_object(inputstr, pos, esc, index_esc,
             if isempty(str)
                 pos=error_pos('Name of value at position %d cannot be empty',inputstr,pos);
             end
+            if(nargout>3)
+                varargin{1}.jsonpath_=[mmap{1}{1},'.',str];
+                mmap{end+1}={varargin{1}.jsonpath_,pos-length(str)-2};
+            end
             pos=parse_char(inputstr, pos, ':');
-            [val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc, varargin{:});
+            if(nargout>3)
+                [val, pos,index_esc, newmmap] = parse_value(inputstr, pos, esc, index_esc, varargin{:});
+                mmap{end}{2}=[mmap{end}{2}, pos-mmap{end}{2}];
+                mmap=[mmap(:);newmmap(:)];
+            else
+                [val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc, varargin{:});
+            end
             if(usemap)
 		object(str)=val;
 	    else
@@ -490,6 +537,9 @@ function [object, pos, index_esc] = parse_object(inputstr, pos, esc, index_esc,
         end
     end
     pos=parse_char(inputstr, pos, '}');
+    if(nargout>3)
+        mmap{1}={[mmap{1}{1} '.*'],[mmap{1}{2}, pos-mmap{1}{2}]};
+    end
 end
 
 %%-------------------------------------------------------------------------
@@ -526,7 +576,7 @@ function newstr=unescapejsonstring(str)
         return;
     end
     escapechars={'\\','\"','\/','\a','\b','\f','\n','\r','\t','\v'};
-    for i=1:length(escapechars);
+    for i=1:length(escapechars)
         newstr=regexprep(newstr,regexprep(escapechars{i},'\\','\\\\'), escapechars{i});
     end
     newstr=regexprep(newstr,'\\u([0-9A-Fa-f]{4})', '${char(base2dec($1,16))}');
-- 
GitLab