\n"); for (s = 0; s < split->count; s++) { if (split->count > 1) { if (total == 0) { extract_astring_catf(alloc, output, "

\n"); } else { extract_astring_catf(alloc, output, "

\n", 100.0*split->split[s]->weight/total); } } ret = split_to_html(alloc, split->split[s], ppsubpage, output); if (ret) break; if (split->count > 1) extract_astring_cat(alloc, output, "

\n"); } if (split->count > 1) extract_astring_cat(alloc, output, "

\n"); return ret; } else if (split->type == SPLIT_VERTICAL) { int ret = 0; for (s = 0; s < split->count; s++) { ret = split_to_html(alloc, split->split[s], ppsubpage, output); if (ret) break; } return ret; } /* We'll deal with the next subpage entry. Increment the pointer for the * next caller. */ subpage = **ppsubpage; *ppsubpage = (*ppsubpage)+1; /* Output paragraphs and tables in order of increasing coordinate. Unfortunately the paragraph ordering we do in page->paragraphs[] isn't quite right and results in bad ordering if ctm/trm matrices are inconsistent. So we create our own list of paragraphs sorted strictly by y coordinate of the first char of each paragraph. */ if (extract_malloc(alloc, ¶graphs, sizeof(*paragraphs) * subpage->paragraphs_num)) goto end; for (p = 0; p < subpage->paragraphs_num; ++p) { paragraphs[p] = subpage->paragraphs[p]; } qsort(paragraphs, subpage->paragraphs_num, sizeof(*paragraphs), compare_paragraph_y); if (0) { int p; outf0("paragraphs are:"); for (p=0; pparagraphs_num; ++p) { paragraph_t* paragraph = subpage->paragraphs[p]; line_t* line = paragraph->lines[0]; span_t* span = line->spans[0]; outf0(" p=%i: %s", p, extract_span_string(NULL, span)); } } p = 0; t = 0; for(;;) { double y_paragraph; double y_table; paragraph_t* paragraph = (p == subpage->paragraphs_num) ? NULL : paragraphs[p]; table_t* table = (t == subpage->tables_num) ? NULL : subpage->tables[t]; if (!paragraph && !table) break; y_paragraph = (paragraph) ? paragraph->lines[0]->spans[0]->chars[0].y : DBL_MAX; y_table = (table) ? table->pos.y : DBL_MAX; outf("p=%i y_paragraph=%f", p, y_paragraph); outf("t=%i y_table=%f", t, y_table); if (paragraph && y_paragraph < y_table) { //extract_astring_catf(alloc, output, "

@@@ paragraph %i y=%f @@@)

\n", p, y_paragraph); if (paragraph_to_html_content(alloc, &state, paragraph, 0 /*single_line*/, output)) goto end; if (content_state_reset(alloc, &state, output)) goto end; p += 1; } else if (table) { //extract_astring_catf(alloc, output, "

@@@ table %t y=%f @@@)

\n", p, y_table); if (append_table(alloc, &state, table, output)) goto end; t += 1; } } extract_free(alloc, ¶graphs); return 0; end: extract_free(alloc, ¶graphs); return -1; } int extract_document_to_html_content( extract_alloc_t* alloc, document_t* document, int rotation, int images, extract_astring_t* content ) { int ret = -1; int n; paragraph_t** paragraphs = NULL; (void) rotation; (void) images; extract_astring_cat(alloc, content, "\n"); extract_astring_cat(alloc, content, "\n"); /* Write paragraphs into . */ for (n=0; npages_num; ++n) { extract_page_t* page = document->pages[n]; subpage_t **psubpage = page->subpages; /* Every page gets its own div. */ extract_astring_cat(alloc, content, "

\n"); ret = split_to_html(alloc, page->split, &psubpage, content); if (ret) goto end; extract_astring_cat(alloc, content, "

\n"); } extract_astring_cat(alloc, content, "\n"); extract_astring_cat(alloc, content, "\n"); ret = 0; end: extract_free(alloc, ¶graphs); return ret; }