@@ -22,6 +22,7 @@ import {
2222 isArrayEqual ,
2323 makeArr ,
2424 objectSize ,
25+ OPS ,
2526 PageActionEventType ,
2627 RenderingIntentFlag ,
2728 shadow ,
@@ -37,6 +38,17 @@ import {
3738 PopupAnnotation ,
3839 WidgetAnnotation ,
3940} from "./annotation.js" ;
41+ import {
42+ Cmd ,
43+ Dict ,
44+ EOF ,
45+ isName ,
46+ isRefsEqual ,
47+ Name ,
48+ Ref ,
49+ RefSet ,
50+ RefSetCache ,
51+ } from "./primitives.js" ;
4052import {
4153 collectActions ,
4254 getInheritableProperty ,
@@ -51,27 +63,21 @@ import {
5163 XRefEntryException ,
5264 XRefParseException ,
5365} from "./core_utils.js" ;
54- import {
55- Dict ,
56- isName ,
57- isRefsEqual ,
58- Name ,
59- Ref ,
60- RefSet ,
61- RefSetCache ,
62- } from "./primitives.js" ;
66+ import { EvaluatorPreprocessor , PartialEvaluator } from "./evaluator.js" ;
6367import { getXfaFontDict , getXfaFontName } from "./xfa_fonts.js" ;
68+ import { Lexer , Linearization , Parser } from "./parser.js" ;
6469import { NullStream , Stream } from "./stream.js" ;
6570import { BaseStream } from "./base_stream.js" ;
6671import { calculateMD5 } from "./calculate_md5.js" ;
6772import { Catalog } from "./catalog.js" ;
6873import { clearGlobalCaches } from "./cleanup_helper.js" ;
6974import { DatasetReader } from "./dataset_reader.js" ;
7075import { Intersector } from "./intersector.js" ;
71- import { Linearization } from "./parser .js" ;
76+ import { LocalColorSpaceCache } from "./image_utils .js" ;
7277import { ObjectLoader } from "./object_loader.js" ;
7378import { OperatorList } from "./operator_list.js" ;
74- import { PartialEvaluator } from "./evaluator.js" ;
79+ import { PDFFunctionFactory } from "./function.js" ;
80+ import { PDFImage } from "./image.js" ;
7581import { StreamsSequenceStream } from "./decode_stream.js" ;
7682import { StructTreePage } from "./struct_tree.js" ;
7783import { XFAFactory } from "./xfa/factory.js" ;
@@ -2030,6 +2036,219 @@ class PDFDocument {
20302036 AnnotationFactory . createGlobals ( this . pdfManager )
20312037 ) ;
20322038 }
2039+
2040+ async toJSObject ( value , firstCall = true ) {
2041+ if ( typeof PDFJSDev !== "undefined" && PDFJSDev . test ( "MOZCENTRAL" ) ) {
2042+ throw new Error ( "Not implemented: toJSObject" ) ;
2043+ }
2044+
2045+ if ( value === null && firstCall ) {
2046+ return this . toJSObject ( this . xref . trailer , false ) ;
2047+ }
2048+ if ( value instanceof Dict ) {
2049+ const obj = Object . create ( null ) ;
2050+ const isPage = isName ( value . get ( "Type" ) , "Page" ) ;
2051+ for ( const [ key , val ] of value . getRawEntries ( ) ) {
2052+ obj [ key ] =
2053+ isPage && key === "Contents"
2054+ ? _getContentTokens ( val , this . xref )
2055+ : await this . toJSObject ( val , false ) ;
2056+ }
2057+ return obj ;
2058+ }
2059+ if ( Array . isArray ( value ) ) {
2060+ return Promise . all ( value . map ( v => this . toJSObject ( v , false ) ) ) ;
2061+ }
2062+ if ( value instanceof Ref ) {
2063+ if ( firstCall ) {
2064+ return this . toJSObject ( this . xref . fetch ( value ) , false ) ;
2065+ }
2066+ const result = Object . create ( null ) ;
2067+ result . num = value . num ;
2068+ result . gen = value . gen ;
2069+ return result ;
2070+ }
2071+ if ( value instanceof BaseStream ) {
2072+ const { dict } = value ;
2073+ const obj = Object . create ( null ) ;
2074+ obj . dict = await this . toJSObject ( dict , false ) ;
2075+
2076+ if (
2077+ isName ( dict . get ( "Type" ) , "XObject" ) &&
2078+ isName ( dict . get ( "Subtype" ) , "Image" )
2079+ ) {
2080+ try {
2081+ const pdfFunctionFactory = new PDFFunctionFactory ( {
2082+ xref : this . xref ,
2083+ isEvalSupported : this . pdfManager . evaluatorOptions . isEvalSupported ,
2084+ } ) ;
2085+ const imageObj = await PDFImage . buildImage ( {
2086+ xref : this . xref ,
2087+ res : Dict . empty ,
2088+ image : value ,
2089+ pdfFunctionFactory,
2090+ globalColorSpaceCache : this . catalog . globalColorSpaceCache ,
2091+ localColorSpaceCache : new LocalColorSpaceCache ( ) ,
2092+ } ) ;
2093+ const imgData = await imageObj . createImageData (
2094+ /* forceRGBA = */ true ,
2095+ /* isOffscreenCanvasSupported = */ false
2096+ ) ;
2097+ obj . imageData = {
2098+ width : imgData . width ,
2099+ height : imgData . height ,
2100+ kind : imgData . kind ,
2101+ data : imgData . data ,
2102+ } ;
2103+ return obj ;
2104+ } catch {
2105+ // Fall through to regular byte stream if image decoding fails.
2106+ }
2107+ }
2108+
2109+ if ( isName ( dict . get ( "Subtype" ) , "Form" ) ) {
2110+ obj . bytes = value . getString ( ) ;
2111+ value . reset ( ) ;
2112+ const { instructions, cmdNames } = _groupIntoInstructions (
2113+ _tokenizeStream ( value , this . xref )
2114+ ) ;
2115+ obj . contentStream = true ;
2116+ obj . instructions = instructions ;
2117+ obj . cmdNames = cmdNames ;
2118+ return obj ;
2119+ }
2120+
2121+ obj . bytes = value . getString ( ) ;
2122+ return obj ;
2123+ }
2124+ return value ;
2125+ }
2126+ }
2127+
2128+ function _tokenizeStream ( stream , xref ) {
2129+ const tokens = [ ] ;
2130+ const parser = new Parser ( {
2131+ lexer : new Lexer ( stream ) ,
2132+ xref,
2133+ allowStreams : false ,
2134+ } ) ;
2135+ while ( true ) {
2136+ let obj ;
2137+ try {
2138+ obj = parser . getObj ( ) ;
2139+ } catch {
2140+ break ;
2141+ }
2142+ if ( obj === EOF ) {
2143+ break ;
2144+ }
2145+ const token = _tokenToJSObject ( obj ) ;
2146+ if ( token !== null ) {
2147+ tokens . push ( token ) ;
2148+ }
2149+ }
2150+ return tokens ;
2151+ }
2152+
2153+ function _getContentTokens ( contentsVal , xref ) {
2154+ const refs = Array . isArray ( contentsVal ) ? contentsVal : [ contentsVal ] ;
2155+ const rawContents = [ ] ;
2156+ const tokens = [ ] ;
2157+ for ( const rawRef of refs ) {
2158+ if ( rawRef instanceof Ref ) {
2159+ rawContents . push ( { num : rawRef . num , gen : rawRef . gen } ) ;
2160+ }
2161+ const stream = xref . fetchIfRef ( rawRef ) ;
2162+ if ( ! ( stream instanceof BaseStream ) ) {
2163+ continue ;
2164+ }
2165+ tokens . push ( ..._tokenizeStream ( stream , xref ) ) ;
2166+ }
2167+ const { instructions, cmdNames } = _groupIntoInstructions ( tokens ) ;
2168+ return { contentStream : true , instructions, cmdNames, rawContents } ;
2169+ }
2170+
2171+ // Lazily-built reverse map: OPS numeric id → property name string.
2172+ let _opsIdToName = null ;
2173+
2174+ function _getOpsIdToName ( ) {
2175+ if ( ! _opsIdToName ) {
2176+ _opsIdToName = Object . create ( null ) ;
2177+ for ( const [ name , id ] of Object . entries ( OPS ) ) {
2178+ _opsIdToName [ id ] = name ;
2179+ }
2180+ }
2181+ return _opsIdToName ;
2182+ }
2183+
2184+ function _groupIntoInstructions ( tokens ) {
2185+ const { opMap } = EvaluatorPreprocessor ;
2186+ const opsIdToName = _getOpsIdToName ( ) ;
2187+ const instructions = [ ] ;
2188+ const cmdNames = Object . create ( null ) ;
2189+ const argBuffer = [ ] ;
2190+ for ( const token of tokens ) {
2191+ if ( token . type !== "cmd" ) {
2192+ argBuffer . push ( token ) ;
2193+ continue ;
2194+ }
2195+ const op = opMap [ token . value ] ;
2196+ if ( op && ! ( token . value in cmdNames ) ) {
2197+ cmdNames [ token . value ] = opsIdToName [ op . id ] ;
2198+ }
2199+ let args ;
2200+ if ( ! op || op . variableArgs ) {
2201+ // Unknown command or variable args: consume all pending args.
2202+ args = argBuffer . splice ( 0 ) ;
2203+ } else {
2204+ // Fixed args: consume exactly numArgs, orphan the rest.
2205+ const orphanCount = Math . max ( 0 , argBuffer . length - op . numArgs ) ;
2206+ for ( let i = 0 ; i < orphanCount ; i ++ ) {
2207+ instructions . push ( { cmd : null , args : [ argBuffer . shift ( ) ] } ) ;
2208+ }
2209+ args = argBuffer . splice ( 0 ) ;
2210+ }
2211+ instructions . push ( { cmd : token . value , args } ) ;
2212+ }
2213+ for ( const t of argBuffer ) {
2214+ instructions . push ( { cmd : null , args : [ t ] } ) ;
2215+ }
2216+ return { instructions, cmdNames } ;
2217+ }
2218+
2219+ function _tokenToJSObject ( obj ) {
2220+ if ( obj instanceof Cmd ) {
2221+ return { type : "cmd" , value : obj . cmd } ;
2222+ }
2223+ if ( obj instanceof Name ) {
2224+ return { type : "name" , value : obj . name } ;
2225+ }
2226+ if ( obj instanceof Ref ) {
2227+ return { type : "ref" , num : obj . num , gen : obj . gen } ;
2228+ }
2229+ if ( Array . isArray ( obj ) ) {
2230+ return { type : "array" , value : obj . map ( _tokenToJSObject ) } ;
2231+ }
2232+ if ( obj instanceof Dict ) {
2233+ const result = Object . create ( null ) ;
2234+ for ( const [ key , val ] of obj . getRawEntries ( ) ) {
2235+ result [ key ] = _tokenToJSObject ( val ) ;
2236+ }
2237+ return { type : "dict" , value : result } ;
2238+ }
2239+ if ( typeof obj === "number" ) {
2240+ return { type : "number" , value : obj } ;
2241+ }
2242+ if ( typeof obj === "string" ) {
2243+ return { type : "string" , value : obj } ;
2244+ }
2245+ if ( typeof obj === "boolean" ) {
2246+ return { type : "boolean" , value : obj } ;
2247+ }
2248+ if ( obj === null ) {
2249+ return { type : "null" } ;
2250+ }
2251+ return null ;
20332252}
20342253
20352254export { Page , PDFDocument } ;
0 commit comments