1 files changed, 1028 insertions, 0 deletions
diff --git a/pdf/pdf_deref.c b/pdf/pdf_deref.c
new file mode 100644
index 00000000..fb3ce67b
--- /dev/null
+++ b/pdf/pdf_deref.c
@@ -0,0 +1,1028 @@
+/* Copyright (C) 2020-2021 Artifex Software, Inc.
+   All Rights Reserved.
+
+   This software is provided AS-IS with no warranty, either express or
+   implied.
+
+   This software is distributed under license and may not be copied,
+   modified or distributed except as expressly authorized under the terms
+   of the license contained in the file LICENSE in this distribution.
+
+   Refer to licensing information at http://www.artifex.com or contact
+   Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
+   CA 94945, U.S.A., +1(415)492-9861, for further information.
+*/
+
+/* Functions to deal with dereferencing indirect objects
+ * for the PDF interpreter. In here we also keep the code
+ * for dealing with the object cache, because the dereferencing
+ * functions are currently the only place that deals with it.
+ */
+
+#include "pdf_int.h"
+#include "pdf_stack.h"
+#include "pdf_loop_detect.h"
+#include "strmio.h"
+#include "stream.h"
+#include "pdf_file.h"
+#include "pdf_misc.h"
+#include "pdf_dict.h"
+#include "pdf_array.h"
+#include "pdf_deref.h"
+#include "pdf_repair.h"
+
+/* Start with the object caching functions */
+
+/* given an object, create a cache entry for it. If we have too many entries
+ * then delete the leat-recently-used cache entry. Make the new entry be the
+ * most-recently-used entry. The actual entries are attached to the xref table
+ * (as well as being a double-linked list), because we detect an existing
+ * cache entry by seeing that the xref table for the object number has a non-NULL
+ * 'cache' member.
+ * So we need to update the xref as well if we add or delete cache entries.
+ */
+static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o)
+{
+    pdf_obj_cache_entry *entry;
+
+    if (ctx->xref_table->xref[o->object_num].cache != NULL) {
+#if DEBUG_CACHE
+        dmprintf1(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num);
+#endif
+        return_error(gs_error_unknownerror);
+    }
+
+    if (o->object_num > ctx->xref_table->xref_size)
+        return_error(gs_error_rangecheck);
+
+    if (ctx->cache_entries == MAX_OBJECT_CACHE_SIZE)
+    {
+#if DEBUG_CACHE
+        dbgmprintf(ctx->memory, "Cache full, evicting LRU\n");
+#endif
+        if (ctx->cache_LRU) {
+            entry = ctx->cache_LRU;
+            ctx->cache_LRU = entry->next;
+            if (entry->next)
+                ((pdf_obj_cache_entry *)entry->next)->previous = NULL;
+            ctx->xref_table->xref[entry->o->object_num].cache = NULL;
+            pdfi_countdown(entry->o);
+            ctx->cache_entries--;
+            gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU");
+        } else
+            return_error(gs_error_unknownerror);
+    }
+    entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache");
+    if (entry == NULL)
+        return_error(gs_error_VMerror);
+
+    memset(entry, 0x00, sizeof(pdf_obj_cache_entry));
+
+    entry->o = o;
+    pdfi_countup(o);
+    if (ctx->cache_MRU) {
+        entry->previous = ctx->cache_MRU;
+        ctx->cache_MRU->next = entry;
+    }
+    ctx->cache_MRU = entry;
+    if (ctx->cache_LRU == NULL)
+        ctx->cache_LRU = entry;
+
+    ctx->cache_entries++;
+    ctx->xref_table->xref[o->object_num].cache = entry;
+    return 0;
+}
+
+/* Given an existing cache entry, promote it to be the most-recently-used
+ * cache entry.
+ */
+static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry)
+{
+    if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) {
+        if ((pdf_obj_cache_entry *)cache_entry->next != NULL)
+            ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous;
+        if ((pdf_obj_cache_entry *)cache_entry->previous != NULL)
+            ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next;
+        else {
+            /* the existing entry is the current least recently used, we need to make the 'next'
+             * cache entry into the LRU.
+             */
+            ctx->cache_LRU = cache_entry->next;
+        }
+        cache_entry->next = NULL;
+        cache_entry->previous = ctx->cache_MRU;
+        ctx->cache_MRU->next = cache_entry;
+        ctx->cache_MRU = cache_entry;
+    }
+    return;
+}
+
+/* This one's a bit of an oddity, its used for fonts. When we build a PDF font object
+ * we want the object cache to reference *that* object, not the dictionary which was
+ * read out of the PDF file, so this allows us to replace the font dictionary in the
+ * cache with the actual font object, so that later dereferences will get this font
+ * object.
+ */
+int replace_cache_entry(pdf_context *ctx, pdf_obj *o)
+{
+    xref_entry *entry;
+    pdf_obj_cache_entry *cache_entry;
+    pdf_obj *old_cached_obj = NULL;
+
+    /* Limited error checking here, we assume that things like the
+     * validity of the object (eg not a free oobject) have already been handled.
+     */
+
+    entry = &ctx->xref_table->xref[o->object_num];
+    cache_entry = entry->cache;
+
+    if (cache_entry == NULL) {
+        return(pdfi_add_to_cache(ctx, o));
+    } else {
+        /* NOTE: We grab the object without decrementing, to avoid triggering
+         * a warning message for freeing an object that's in the cache
+         */
+        if (cache_entry->o != NULL)
+            old_cached_obj = cache_entry->o;
+
+        /* Put new entry in the cache */
+        cache_entry->o = o;
+        pdfi_countup(o);
+        pdfi_promote_cache_entry(ctx, cache_entry);
+
+        /* Now decrement the old cache entry, if any */
+        pdfi_countdown(old_cached_obj);
+    }
+    return 0;
+}
+
+/* Now the dereferencing functions */
+
+/*
+ * Technically we can accept a stream other than the main PDF file stream here. This is
+ * really for the case of compressed objects where we read tokens from the compressed
+ * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode
+ * on top of the main file stream, which may be useful. Note that this cannot work with
+ * objects in compressed object streams! They should always pass a value of 0 for the stream_offset.
+ * The stream_offset is the offset from the start of the underlying uncompressed PDF file of
+ * the stream we are using. See the comments below when keyword is PDF_STREAM.
+ */
+
+/* Determine if a PDF object is in a compressed ObjStm. Returns < 0
+ * for an error, 0 if it is not in a compressed ObjStm and 1 if it is.
+ * Currently errors are inmpossible. This is only used by the decryption code
+ * to determine if a string is in a compressed object stream, if it is then
+ * it can't be used for decryption.
+ */
+int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen)
+{
+    xref_entry *entry;
+
+    /* Can't possibly be a compressed object before we have finished reading
+     * the xref.
+     */
+    if (ctx->xref_table == NULL)
+        return 0;
+
+    entry = &ctx->xref_table->xref[obj];
+
+    if (entry->compressed)
+        return 1;
+
+    return 0;
+}
+
+/* We should never read a 'stream' keyword from a compressed object stream
+ * so this case should never end up here.
+ */
+static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset,
+                                   uint32_t objnum, uint32_t gen)
+{
+    int code = 0;
+    int64_t i;
+    pdf_keyword *keyword = NULL;
+    pdf_dict *dict = NULL;
+    gs_offset_t offset;
+    pdf_stream *stream_obj = NULL;
+
+    /* Strange code time....
+     * If we are using a stream which is *not* the PDF uncompressed main file stream
+     * then doing stell on it will only tell us how many bytes have been read from
+     * that stream, it won't tell us the underlying file position. So we add on the
+     * 'unread' bytes, *and* we add on the position of the start of the stream in
+     * the actual main file. This is all done so that we can check the /Length
+     * of the object. Note that this will *only* work for regular objects it can
+     * not be used for compressed object streams, but those don't need checking anyway
+     * they have a different mechanism altogether and should never get here.
+     */
+    offset = stell(s->s) - s->unread_size + stream_offset;
+    code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
+
+    if (pdfi_count_stack(ctx) < 1)
+        return_error(gs_error_stackunderflow);
+
+    dict = (pdf_dict *)ctx->stack_top[-1];
+    dict->indirect_num = dict->object_num = objnum;
+    dict->indirect_gen = dict->generation_num = gen;
+
+    if (dict->type != PDF_DICT) {
+        pdfi_pop(ctx, 1);
+        return_error(gs_error_syntaxerror);
+    }
+
+    /* Convert the dict into a stream */
+    code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true);
+    if (code < 0) {
+        pdfi_pop(ctx, 1);
+        return code;
+    }
+    /* Pop off the dict and push the stream */
+    pdfi_pop(ctx, 1);
+    dict = NULL;
+    pdfi_push(ctx, (pdf_obj *)stream_obj);
+    pdfi_countdown(stream_obj); /* get rid of extra ref */
+
+    stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum;
+    stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen;
+    stream_obj->stream_offset = offset;
+
+    /* This code may be a performance overhead, it simply skips over the stream contents
+     * and checks that the stream ends with a 'endstream endobj' pair. We could add a
+     * 'go faster' flag for users who are certain their PDF files are well-formed. This
+     * could also allow us to skip all kinds of other checking.....
+     */
+
+    code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i);
+    if (code < 0) {
+        char extra_info[gp_file_name_sizeof];
+
+        gs_sprintf(extra_info, "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum);
+        pdfi_set_error(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info);
+        return 0;
+    }
+
+    if (i < 0 || (i + offset)> ctx->main_stream_length) {
+        char extra_info[gp_file_name_sizeof];
+
+        gs_sprintf(extra_info, "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum);
+        pdfi_set_error(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info);
+    } else {
+        code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR);
+        if (code < 0) {
+            pdfi_pop(ctx, 1);
+            return code;
+        }
+
+        stream_obj->Length = 0;
+        stream_obj->length_valid = false;
+
+        code = pdfi_read_token(ctx, ctx->main_stream, objnum, gen);
+        if (code < 0 || pdfi_count_stack(ctx) < 2) {
+            char extra_info[gp_file_name_sizeof];
+
+            gs_sprintf(extra_info, "Failed to find a valid object at end of stream object %u.\n", objnum);
+            pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
+        }
+        else {
+            if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_KEYWORD) {
+                char extra_info[gp_file_name_sizeof];
+
+                gs_sprintf(extra_info, "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum);
+                pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info);
+            } else {
+                keyword = ((pdf_keyword *)ctx->stack_top[-1]);
+                if (keyword->key != TOKEN_ENDSTREAM) {
+                    char extra_info[gp_file_name_sizeof];
+
+                    gs_sprintf(extra_info, "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i);
+                    pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
+                } else {
+                    /* Cache the Length in the stream object and mark it valid */
+                    stream_obj->Length = i;
+                    stream_obj->length_valid = true;
+                }
+            }
+            pdfi_pop(ctx, 1);
+        }
+    }
+
+    /* If we failed to find a valid object, or the object wasn't a keyword, or the
+     * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct
+     * Length for streams if we have encrypted files, because we must install a
+     * SubFileDecode filter iwth a Length (EODString is incompatible with AES encryption)
+     * Rather than mess about checking for encryption, we'll choose to just correctly
+     * calculate the Length of all streams. Although this takes time, it will only
+     * happen for files which are invalid.
+     */
+    if (stream_obj->length_valid != true) {
+        char Buffer[10];
+        unsigned int loop, bytes, total = 0;
+
+        code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET);
+        if (code < 0) {
+            pdfi_pop(ctx, 1);
+            return code;
+        }
+        memset(Buffer, 0x00, 10);
+        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream);
+        if (bytes < 9)
+            return_error(gs_error_ioerror);
+
+        total = bytes;
+        do {
+            if (memcmp(Buffer, "endstream", 9) == 0) {
+                stream_obj->Length = total - 9;
+                stream_obj->length_valid = true;
+                break;
+            }
+            if (memcmp(Buffer, "endobj", 6) == 0) {
+                stream_obj->Length = total - 6;
+                stream_obj->length_valid = true;
+                break;
+            }
+            for (loop = 0;loop < 9;loop++){
+                Buffer[loop] = Buffer[loop + 1];
+            }
+            bytes = pdfi_read_bytes(ctx, (byte *)&Buffer[9], 1, 1, ctx->main_stream);
+            total += bytes;
+        } while(bytes);
+        if (bytes <= 0)
+            return_error(gs_error_ioerror);
+        return 0;
+    }
+
+    code = pdfi_read_token(ctx, ctx->main_stream, objnum, gen);
+    if (code < 0) {
+        if (ctx->args.pdfstoponerror)
+            return code;
+        else
+            /* Something went wrong looking for endobj, but we found endstream, so assume
+             * for now that will suffice.
+             */
+            pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
+        return 0;
+    }
+
+    if (pdfi_count_stack(ctx) < 2)
+        return_error(gs_error_stackunderflow);
+
+    if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_KEYWORD) {
+        pdfi_pop(ctx, 1);
+        if (ctx->args.pdfstoponerror)
+            return_error(gs_error_typecheck);
+        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
+        /* Didn't find an endobj, but we have an endstream, so assume
+         * for now that will suffice
+         */
+        return 0;
+    }
+    keyword = ((pdf_keyword *)ctx->stack_top[-1]);
+    if (keyword->key != TOKEN_ENDOBJ) {
+        pdfi_pop(ctx, 2);
+        return_error(gs_error_typecheck);
+    }
+    pdfi_pop(ctx, 1);
+    return 0;
+}
+
+/* This reads an object *after* the x y obj keyword has been found. Its broken out
+ * separately for the benefit of the repair code when reading the dictionary following
+ * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does
+ * not have an 'endobj', we rely on the error handling to take care of that for us.
+ */
+int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen)
+{
+    int code = 0;
+    pdf_keyword *keyword = NULL;
+    gs_offset_t saved_offset[3];
+
+    saved_offset[0] = saved_offset[1] = saved_offset[2] = 0;
+
+    code = pdfi_read_token(ctx, s, objnum, gen);
+    if (code < 0)
+        return code;
+
+    do {
+        /* move all the saved offsets up by one */
+        saved_offset[0] = saved_offset[1];
+        saved_offset[1] = saved_offset[2];
+        saved_offset[2] = pdfi_unread_tell(ctx);;
+
+        code = pdfi_read_token(ctx, s, objnum, gen);
+        if (code < 0) {
+            pdfi_clearstack(ctx);
+            return code;
+        }
+        if (s->eof)
+            return_error(gs_error_syntaxerror);
+    }while (ctx->stack_top[-1]->type != PDF_KEYWORD);
+
+    keyword = ((pdf_keyword *)ctx->stack_top[-1]);
+    if (keyword->key == TOKEN_ENDOBJ) {
+        pdf_obj *o;
+
+        if (pdfi_count_stack(ctx) < 2) {
+            pdfi_clearstack(ctx);
+            return_error(gs_error_stackunderflow);
+        }
+
+        o = ctx->stack_top[-2];
+
+        pdfi_pop(ctx, 1);
+
+        o->indirect_num = o->object_num = objnum;
+        o->indirect_gen = o->generation_num = gen;
+        return code;
+    }
+    if (keyword->key == TOKEN_STREAM) {
+        pdfi_pop(ctx, 1);
+        return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen);
+    }
+    if (keyword->key == TOKEN_OBJ) {
+        pdf_obj *o;
+
+        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL);
+
+        /* 4 for; the object we want, the object number, generation number and 'obj' keyword */
+        if (pdfi_count_stack(ctx) < 4)
+            return_error(gs_error_stackunderflow);
+
+        /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */
+        o = ctx->stack_top[-4];
+
+        pdfi_pop(ctx, 3);
+
+        o->indirect_num = o->object_num = objnum;
+        o->indirect_gen = o->generation_num = gen;
+        if (saved_offset[0] > 0)
+            (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET);
+        return 0;
+    }
+
+    /* Assume that any other keyword means a missing 'endobj' */
+    if (!ctx->args.pdfstoponerror) {
+        pdf_obj *o;
+
+        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL);
+
+        if (pdfi_count_stack(ctx) < 2)
+            return_error(gs_error_stackunderflow);
+
+        o = ctx->stack_top[-2];
+
+        pdfi_pop(ctx, 1);
+
+        o->indirect_num = o->object_num = objnum;
+        o->indirect_gen = o->generation_num = gen;
+        return code;
+    }
+    pdfi_pop(ctx, 2);
+    return_error(gs_error_syntaxerror);
+}
+
+static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset)
+{
+    int code = 0, stack_size = pdfi_count_stack(ctx);
+    uint64_t objnum = 0, gen = 0;
+    pdf_keyword *keyword = NULL;
+
+    /* An object consists of 'num gen obj' followed by a token, follwed by an endobj
+     * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we
+     * want to deal with it specially by getting the Length, jumping to the end and checking
+     * for an endobj. Or not, possibly, because it would be slow.
+     */
+    code = pdfi_read_token(ctx, s, 0, 0);
+    if (code < 0)
+        return code;
+    if (stack_size >= pdfi_count_stack(ctx))
+        return gs_note_error(gs_error_ioerror);
+    if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_INT) {
+        pdfi_pop(ctx, 1);
+        return_error(gs_error_typecheck);
+    }
+    objnum = ((pdf_num *)ctx->stack_top[-1])->value.i;
+    pdfi_pop(ctx, 1);
+
+    code = pdfi_read_token(ctx, s, 0, 0);
+    if (code < 0)
+        return code;
+    if (stack_size >= pdfi_count_stack(ctx))
+        return gs_note_error(gs_error_ioerror);
+    if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_INT) {
+        pdfi_pop(ctx, 1);
+        return_error(gs_error_typecheck);
+    }
+    gen = ((pdf_num *)ctx->stack_top[-1])->value.i;
+    pdfi_pop(ctx, 1);
+
+    code = pdfi_read_token(ctx, s, 0, 0);
+    if (code < 0)
+        return code;
+    if (stack_size >= pdfi_count_stack(ctx))
+        return gs_note_error(gs_error_ioerror);
+    if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_KEYWORD) {
+        pdfi_pop(ctx, 1);
+        return_error(gs_error_typecheck);
+    }
+    keyword = ((pdf_keyword *)ctx->stack_top[-1]);
+    if (keyword->key != TOKEN_OBJ) {
+        pdfi_pop(ctx, 1);
+        return_error(gs_error_syntaxerror);
+    }
+    pdfi_pop(ctx, 1);
+
+    return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen);
+}
+
+static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object,
+                                 const xref_entry *entry)
+{
+    int code = 0;
+    xref_entry *compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num];
+    pdf_c_stream *compressed_stream = NULL;
+    pdf_c_stream *SubFile_stream = NULL;
+    pdf_c_stream *Object_stream = NULL;
+    char Buffer[256];
+    int i = 0, object_length = 0;
+    int64_t num_entries, found_object;
+    int64_t Length;
+    gs_offset_t offset = 0;
+    pdf_stream *compressed_object = NULL;
+    pdf_dict *compressed_sdict = NULL; /* alias */
+    pdf_name *Type = NULL;
+    pdf_obj *temp_obj;
+
+    if (ctx->args.pdfdebug) {
+        dmprintf1(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj);
+        dmprintf1(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num);
+    }
+
+    if (compressed_entry->cache == NULL) {
+#if CACHE_STATISTICS
+        ctx->compressed_misses++;
+#endif
+        code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET);
+        if (code < 0)
+            goto exit;
+
+        code = pdfi_read_object(ctx, ctx->main_stream, 0);
+        if (code < 0)
+            goto exit;
+
+        if ((ctx->stack_top[-1])->type != PDF_STREAM) {
+            pdfi_pop(ctx, 1);
+            code = gs_note_error(gs_error_typecheck);
+            goto exit;
+        }
+        if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) {
+            pdfi_pop(ctx, 1);
+            /* Same error (undefined) as when we read an uncompressed object with the wrong number */
+            code = gs_note_error(gs_error_undefined);
+            goto exit;
+        }
+        compressed_object = (pdf_stream *)ctx->stack_top[-1];
+        pdfi_countup(compressed_object);
+        pdfi_pop(ctx, 1);
+        code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object);
+        if (code < 0)
+            goto exit;
+    } else {
+#if CACHE_STATISTICS
+        ctx->compressed_hits++;
+#endif
+        compressed_object = (pdf_stream *)compressed_entry->cache->o;
+        pdfi_countup(compressed_object);
+        pdfi_promote_cache_entry(ctx, compressed_entry->cache);
+    }
+    code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict);
+    if (code < 0)
+        return code;
+
+    /* Check its an ObjStm ! */
+    code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type);
+    if (code < 0)
+        goto exit;
+
+    if (!pdfi_name_is(Type, "ObjStm")){
+        code = gs_note_error(gs_error_syntaxerror);
+        goto exit;
+    }
+
+    /* Need to check the /N entry to see if the object is actually in this stream! */
+    code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries);
+    if (code < 0)
+        goto exit;
+
+    if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) {
+        code = gs_note_error(gs_error_rangecheck);
+        goto exit;
+    }
+
+    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
+    if (code < 0)
+        goto exit;
+
+    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
+    if (code < 0)
+        goto exit;
+
+    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
+    if (code < 0)
+        goto exit;
+
+    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
+    if (code < 0)
+        goto exit;
+
+    for (i=0;i < num_entries;i++)
+        {
+            code = pdfi_read_token(ctx, compressed_stream, obj, gen);
+            if (code < 0)
+                goto exit;
+            temp_obj = ctx->stack_top[-1];
+            if (temp_obj->type != PDF_INT) {
+                code = gs_note_error(gs_error_typecheck);
+                pdfi_pop(ctx, 1);
+                goto exit;
+            }
+            found_object = ((pdf_num *)temp_obj)->value.i;
+            pdfi_pop(ctx, 1);
+            code = pdfi_read_token(ctx, compressed_stream, obj, gen);
+            if (code < 0)
+                goto exit;
+            temp_obj = ctx->stack_top[-1];
+            if (temp_obj->type != PDF_INT) {
+                pdfi_pop(ctx, 1);
+                goto exit;
+            }
+            if (i == entry->u.compressed.object_index) {
+                if (found_object != obj) {
+                    pdfi_pop(ctx, 1);
+                    code = gs_note_error(gs_error_undefined);
+                    goto exit;
+                }
+                offset = ((pdf_num *)temp_obj)->value.i;
+            }
+            if (i == entry->u.compressed.object_index + 1)
+                object_length = ((pdf_num *)temp_obj)->value.i - offset;
+            pdfi_pop(ctx, 1);
+        }
+
+    /* Skip to the offset of the object we want to read */
+    for (i=0;i < offset;i++)
+        {
+            code = pdfi_read_bytes(ctx, (byte *)&Buffer[0], 1, 1, compressed_stream);
+            if (code <= 0) {
+                code = gs_note_error(gs_error_ioerror);
+                goto exit;
+            }
+        }
+
+    /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit
+     * the number of bytes we read to the declared size of the object (difference between
+     * the offsets of the object we want to read, and the next object). If it is 0 then
+     * we're reading the last object in the stream, so we just rely on the SubFileDecode
+     * we set up when we created compressed_stream to limit the bytes to the length of
+     * that stream.
+     */
+    if (object_length > 0) {
+        code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false);
+        if (code < 0)
+            goto exit;
+    } else {
+        Object_stream = compressed_stream;
+    }
+
+    code = pdfi_read_token(ctx, Object_stream, obj, gen);
+    if (code < 0)
+        goto exit;
+    if (ctx->stack_top[-1]->type == PDF_ARRAY_MARK || ctx->stack_top[-1]->type == PDF_DICT_MARK) {
+        int start_depth = pdfi_count_stack(ctx);
+
+        /* Need to read all the elements from COS objects */
+        do {
+            code = pdfi_read_token(ctx, Object_stream, obj, gen);
+            if (code < 0)
+                goto exit;
+            if (compressed_stream->eof == true) {
+                code = gs_note_error(gs_error_ioerror);
+                goto exit;
+            }
+        }while ((ctx->stack_top[-1]->type != PDF_ARRAY && ctx->stack_top[-1]->type != PDF_DICT) || pdfi_count_stack(ctx) > start_depth);
+    }
+
+    *object = ctx->stack_top[-1];
+    /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets
+     * the object number for uncompressed objects. So we need to do that here.
+     */
+    (*object)->indirect_num = (*object)->object_num = obj;
+    (*object)->indirect_gen = (*object)->generation_num = gen;
+    pdfi_countup(*object);
+    pdfi_pop(ctx, 1);
+
+    code = pdfi_add_to_cache(ctx, *object);
+    if (code < 0) {
+        pdfi_countdown(*object);
+        goto exit;
+    }
+
+ exit:
+    if (Object_stream)
+        pdfi_close_file(ctx, Object_stream);
+    if (Object_stream != compressed_stream)
+        if (compressed_stream)
+            pdfi_close_file(ctx, compressed_stream);
+    if (SubFile_stream)
+        pdfi_close_file(ctx, SubFile_stream);
+    pdfi_countdown(compressed_object);
+    pdfi_countdown(Type);
+    return code;
+}
+
+/* pdf_dereference returns an object with a reference count of at least 1, this represents the
+ * reference being held by the caller (in **object) when we return from this function.
+ */
+int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
+{
+    xref_entry *entry;
+    int code, stack_depth = pdfi_count_stack(ctx);
+    gs_offset_t saved_stream_offset;
+    bool saved_decrypt_strings = ctx->encryption.decrypt_strings;
+
+    *object = NULL;
+
+    if (ctx->xref_table == NULL)
+        return_error(gs_error_typecheck);
+
+    if (obj >= ctx->xref_table->xref_size) {
+        char extra_info[gp_file_name_sizeof];
+
+        gs_sprintf(extra_info, "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj);
+        pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info);
+
+        if(ctx->args.pdfstoponerror)
+            return_error(gs_error_rangecheck);
+
+        code = pdfi_object_alloc(ctx, PDF_NULL, 0, object);
+        if (code == 0)
+            pdfi_countup(*object);
+        return code;
+    }
+
+    entry = &ctx->xref_table->xref[obj];
+
+    if(entry->object_num == 0)
+        return_error(gs_error_undefined);
+
+    if (entry->free) {
+        char extra_info[gp_file_name_sizeof];
+
+        gs_sprintf(extra_info, "Attempt to dereference free object %"PRIu64", trying next object number as offset.\n", entry->object_num);
+        pdfi_set_error(ctx, 0, NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
+    }
+
+    if (ctx->loop_detection) {
+        if (pdfi_loop_detector_check_object(ctx, obj) == true)
+            return_error(gs_error_circular_reference);
+    }
+    if (entry->cache != NULL){
+        pdf_obj_cache_entry *cache_entry = entry->cache;
+
+#if CACHE_STATISTICS
+        ctx->hits++;
+#endif
+        *object = cache_entry->o;
+        pdfi_countup(*object);
+
+        pdfi_promote_cache_entry(ctx, cache_entry);
+    } else {
+        saved_stream_offset = pdfi_unread_tell(ctx);
+
+        if (entry->compressed) {
+            /* This is an object in a compressed object stream */
+            ctx->encryption.decrypt_strings = false;
+
+            code = pdfi_deref_compressed(ctx, obj, gen, object, entry);
+            if (code < 0 || *object == NULL)
+                goto error;
+        } else {
+            pdf_c_stream *SubFile_stream = NULL;
+            pdf_string *EODString;
+#if CACHE_STATISTICS
+            ctx->misses++;
+#endif
+            ctx->encryption.decrypt_strings = true;
+
+            code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET);
+            if (code < 0)
+                goto error;
+
+            code = pdfi_name_alloc(ctx, (byte *)"trailer", 6, (pdf_obj **)&EODString);
+            if (code < 0)
+                goto error;
+            pdfi_countup(EODString);
+
+            code = pdfi_apply_SubFileDecode_filter(ctx, 0, EODString, ctx->main_stream, &SubFile_stream, false);
+            if (code < 0) {
+                pdfi_countdown(EODString);
+                goto error;
+            }
+
+            code = pdfi_read_object(ctx, SubFile_stream, entry->u.uncompressed.offset);
+
+            pdfi_countdown(EODString);
+            pdfi_close_file(ctx, SubFile_stream);
+            if (code < 0) {
+                int code1 = 0;
+                if (entry->free) {
+                    dmprintf2(ctx->memory, "Dereference of free object %"PRIu64", next object number as offset failed (code = %d), returning NULL object.\n", entry->object_num, code);
+                    code = pdfi_object_alloc(ctx, PDF_NULL, 1, object);
+                    if (code >= 0) {
+                        pdfi_countup(*object);
+                        goto free_obj;
+                    }
+                }
+                ctx->encryption.decrypt_strings = saved_decrypt_strings;
+                (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
+                pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
+
+                code1 = pdfi_repair_file(ctx);
+                if (code1 == 0)
+                    return pdfi_dereference(ctx, obj, gen, object);
+                /* Repair failed, just give up and return an error */
+                return code;
+            }
+
+            if (pdfi_count_stack(ctx) > 0 && (ctx->stack_top[-1])->object_num == obj) {
+                *object = ctx->stack_top[-1];
+                pdfi_countup(*object);
+                pdfi_pop(ctx, 1);
+                code = pdfi_add_to_cache(ctx, *object);
+                if (code < 0) {
+                    pdfi_countdown(*object);
+                    goto error;
+                }
+            } else {
+                pdfi_pop(ctx, 1);
+                if (entry->free) {
+                    dmprintf1(ctx->memory, "Dereference of free object %"PRIu64", next object number as offset failed, returning NULL object.\n", entry->object_num);
+                    code = pdfi_object_alloc(ctx, PDF_NULL, 1, object);
+                    if (code >= 0)
+                        pdfi_countup(*object);
+                    return code;
+                }
+                code = gs_note_error(gs_error_undefined);
+                goto error;
+            }
+        }
+free_obj:
+        (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
+    }
+
+    if (ctx->loop_detection && (*object)->object_num != 0) {
+        code = pdfi_loop_detector_add_object(ctx, (*object)->object_num);
+        if (code < 0) {
+            ctx->encryption.decrypt_strings = saved_decrypt_strings;
+            return code;
+        }
+    }
+    ctx->encryption.decrypt_strings = saved_decrypt_strings;
+    return 0;
+
+error:
+    ctx->encryption.decrypt_strings = saved_decrypt_strings;
+    (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
+    /* Return the stack to the state at entry */
+    pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
+    return code;
+}
+
+/* do a derefence with loop detection */
+int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
+{
+    int code;
+
+    code = pdfi_loop_detector_mark(ctx);
+    if (code < 0)
+        return code;
+
+    code = pdfi_dereference(ctx, obj, gen, object);
+    (void)pdfi_loop_detector_cleartomark(ctx);
+    return code;
+}
+
+
+static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse)
+{
+    int code = 0;
+    uint64_t index, arraysize;
+    pdf_obj *object = NULL;
+    pdf_array *array = (pdf_array *)obj;
+
+    arraysize = pdfi_array_size(array);
+    for (index = 0; index < arraysize; index++) {
+        code = pdfi_array_get_no_store_R(ctx, array, index, &object);
+        if (code == gs_error_circular_reference) {
+            /* Just leave as an indirect ref */
+            code = 0;
+        } else {
+            if (code < 0) goto exit;
+            /* don't store the object if it's a stream (leave as a ref) */
+            if (object->type != PDF_STREAM)
+                code = pdfi_array_put(ctx, array, index, object);
+            if (recurse)
+                code = pdfi_resolve_indirect(ctx, object, recurse);
+        }
+        if (code < 0) goto exit;
+
+        pdfi_countdown(object);
+        object = NULL;
+    }
+
+ exit:
+    pdfi_countdown(object);
+    return code;
+}
+
+static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse)
+{
+    int code = 0;
+    pdf_dict *dict = (pdf_dict *)obj;
+    pdf_name *Key = NULL;
+    pdf_obj *Value = NULL;
+    uint64_t index, dictsize;
+
+    dictsize = pdfi_dict_entries(dict);
+
+    /* Note: I am not using pdfi_dict_first/next because of needing to handle
+     * circular references.
+     */
+    for (index=0; index<dictsize; index ++) {
+        Key = (pdf_name *)dict->keys[index];
+        code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value);
+        if (code == gs_error_circular_reference) {
+            /* Just leave as an indirect ref */
+            code = 0;
+        } else {
+            if (code < 0) goto exit;
+            /* don't store the object if it's a stream (leave as a ref) */
+            if (Value->type != PDF_STREAM)
+                pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value);
+            if (recurse)
+                code = pdfi_resolve_indirect(ctx, Value, recurse);
+        }
+        if (code < 0) goto exit;
+
+        pdfi_countdown(Value);
+        Value = NULL;
+    }
+
+ exit:
+    pdfi_countdown(Value);
+    return code;
+}
+
+/* Resolve all the indirect references for an object
+ * Note: This can be recursive
+ */
+int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse)
+{
+    int code = 0;
+
+    switch(value->type) {
+    case PDF_ARRAY:
+        code = pdfi_resolve_indirect_array(ctx, value, recurse);
+        break;
+    case PDF_DICT:
+        code = pdfi_resolve_indirect_dict(ctx, value, recurse);
+        break;
+    default:
+        break;
+    }
+    return code;
+}
+
+/* Resolve all the indirect references for an object
+ * Resolve indirect references, either one level or recursively, with loop detect on
+ * the parent (can by NULL) and the value.
+ */
+int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse)
+{
+    int code = 0;
+
+    code = pdfi_loop_detector_mark(ctx);
+    if (code < 0) goto exit;
+    if (parent && parent->object_num != 0) {
+        code = pdfi_loop_detector_add_object(ctx, parent->object_num);
+        if (code < 0) goto exit;
+    }
+    if (value->object_num != 0) {
+        code = pdfi_loop_detector_add_object(ctx, value->object_num);
+        if (code < 0) goto exit;
+    }
+    code = pdfi_resolve_indirect(ctx, value, false);
+
+ exit:
+    (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */
+    return code;
+}