summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam James <sam@gentoo.org>2022-03-29 10:27:10 +0100
committerSam James <sam@gentoo.org>2022-04-17 12:53:05 +0100
commit085bde903b9e684c3c1160e4df912bea9a660997 (patch)
treec4f5e6e9f2422e869ca5bc0b944520d451001282 /extract/src/odt.c
parentImport Ghostscript 9.55 (diff)
downloadghostscript-gpl-patches-085bde903b9e684c3c1160e4df912bea9a660997.tar.gz
ghostscript-gpl-patches-085bde903b9e684c3c1160e4df912bea9a660997.tar.bz2
ghostscript-gpl-patches-085bde903b9e684c3c1160e4df912bea9a660997.zip
Import Ghostscript 9.56.0ghostscript-9.56
Signed-off-by: Sam James <sam@gentoo.org>
Diffstat (limited to 'extract/src/odt.c')
-rw-r--r--extract/src/odt.c627
1 files changed, 368 insertions, 259 deletions
diff --git a/extract/src/odt.c b/extract/src/odt.c
index bacb362d..9e369078 100644
--- a/extract/src/odt.c
+++ b/extract/src/odt.c
@@ -21,6 +21,7 @@ odt_paragraph_finish(). */
#include <assert.h>
#include <errno.h>
+#include <float.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
@@ -29,17 +30,16 @@ odt_paragraph_finish(). */
#include <sys/stat.h>
-static int extract_odt_paragraph_start(extract_alloc_t* alloc, extract_astring_t* content)
+static int s_odt_paragraph_start(extract_alloc_t* alloc, extract_astring_t* content)
{
return extract_astring_cat(alloc, content, "\n\n<text:p>");
}
-static int extract_odt_paragraph_finish(extract_alloc_t* alloc, extract_astring_t* content)
+static int s_odt_paragraph_finish(extract_alloc_t* alloc, extract_astring_t* content)
{
return extract_astring_cat(alloc, content, "</text:p>");
}
-
/* ODT doesn't seem to support ad-hoc inline font specifications; instead
we have to define a style at the start of the content.xml file. So when
writing content we insert a style name and add the required styles to a
@@ -48,10 +48,7 @@ extract_odt_styles_t struct. */
struct extract_odt_style_t
{
int id; /* A unique id for this style. */
- char* font_name;
- double font_size;
- int font_bold;
- int font_italic;
+ font_t font;
};
struct extract_odt_styles_t
@@ -61,41 +58,47 @@ struct extract_odt_styles_t
int styles_num;
};
-static int extract_odt_style_compare(extract_odt_style_t* a, extract_odt_style_t*b)
+static int s_odt_style_compare(extract_odt_style_t* a, extract_odt_style_t*b)
{
int d;
double dd;
- if ((d = strcmp(a->font_name, b->font_name))) return d;
- if ((dd = a->font_size - b->font_size) != 0.0) return (dd > 0.0) ? 1 : -1;
- if ((d = a->font_bold - b->font_bold)) return d;
- if ((d = a->font_italic - b->font_italic)) return d;
+ if ((d = strcmp(a->font.name, b->font.name))) return d;
+ if ((dd = a->font.size - b->font.size) != 0.0) return (dd > 0.0) ? 1 : -1;
+ if ((d = a->font.bold - b->font.bold)) return d;
+ if ((d = a->font.italic - b->font.italic)) return d;
return 0;
}
-static int extract_odt_style_append_definition(extract_alloc_t* alloc, extract_odt_style_t* style, extract_astring_t* text)
+static int s_odt_style_append_definition(extract_alloc_t* alloc, extract_odt_style_t* style, extract_astring_t* text)
{
- const char* font_name = style->font_name;
+ const char* font_name = style->font.name;
/* This improves output e.g. for zlib.3.pdf, but clearly a hack. */
if (0 && strstr(font_name, "Helvetica"))
{
font_name = "Liberation Sans";
}
- outf("style->font_name=%s font_name=%s", style->font_name, font_name);
+ outf("style->font_name=%s font_name=%s", style->font.name, font_name);
if (extract_astring_catf(alloc, text, "<style:style style:name=\"T%i\" style:family=\"text\">", style->id)) return -1;
if (extract_astring_catf(alloc, text, "<style:text-properties style:font-name=\"%s\"", font_name)) return -1;
- if (extract_astring_catf(alloc, text, " fo:font-size=\"%.2fpt\"", style->font_size)) return -1;
- if (extract_astring_catf(alloc, text, " fo:font-weight=\"%s\"", style->font_bold ? "bold" : "normal")) return -1;
- if (extract_astring_catf(alloc, text, " fo:font-style=\"%s\"", style->font_italic ? "italic" : "normal")) return -1;
+ if (extract_astring_catf(alloc, text, " fo:font-size=\"%.2fpt\"", style->font.size)) return -1;
+ if (extract_astring_catf(alloc, text, " fo:font-weight=\"%s\"", style->font.bold ? "bold" : "normal")) return -1;
+ if (extract_astring_catf(alloc, text, " fo:font-style=\"%s\"", style->font.italic ? "italic" : "normal")) return -1;
if (extract_astring_cat(alloc, text, " /></style:style>")) return -1;
return 0;
}
void extract_odt_styles_free(extract_alloc_t* alloc, extract_odt_styles_t* styles)
{
+ int i;
+ for (i=0; i<styles->styles_num; ++i)
+ {
+ extract_odt_style_t* style = &styles->styles[i];
+ extract_free(alloc, &style->font.name);
+ }
extract_free(alloc, &styles->styles);
}
-static int extract_odt_styles_definitions(
+static int s_odt_styles_definitions(
extract_alloc_t* alloc,
extract_odt_styles_t* styles,
extract_astring_t* out
@@ -105,7 +108,7 @@ static int extract_odt_styles_definitions(
if (extract_astring_cat(alloc, out, "<office:automatic-styles>")) return -1;
for (i=0; i<styles->styles_num; ++i)
{
- if (extract_odt_style_append_definition(alloc, &styles->styles[i], out)) return -1;
+ if (s_odt_style_append_definition(alloc, &styles->styles[i], out)) return -1;
}
extract_astring_cat(alloc, out, "<style:style style:name=\"gr1\" style:family=\"graphic\">\n");
extract_astring_cat(alloc, out, "<style:graphic-properties"
@@ -159,25 +162,22 @@ static int extract_odt_styles_definitions(
return 0;
}
-static int styles_add(
+static int s_odt_styles_add(
extract_alloc_t* alloc,
extract_odt_styles_t* styles,
- const char* font_name,
- double font_size,
- int font_bold,
- int font_italic,
+ font_t* font,
extract_odt_style_t** o_style
)
/* Adds specified style to <styles> if not already present. Sets *o_style to
point to the style_t within <styles>. */
{
- extract_odt_style_t style = {0 /*id*/, (char*) font_name, font_size, font_bold, font_italic};
+ extract_odt_style_t style = {0 /*id*/, *font};
int i;
/* We keep styles->styles[] sorted; todo: use bsearch or similar when
searching. */
for (i=0; i<styles->styles_num; ++i)
{
- int d = extract_odt_style_compare(&style, &styles->styles[i]);
+ int d = s_odt_style_compare(&style, &styles->styles[i]);
if (d == 0)
{
*o_style = &styles->styles[i];
@@ -190,92 +190,79 @@ point to the style_t within <styles>. */
memmove(&styles->styles[i+1], &styles->styles[i], sizeof(styles->styles[0]) * (styles->styles_num - i));
styles->styles_num += 1;
styles->styles[i].id = styles->styles_num + 10; /* Leave space for template's built-in styles. */
- if (extract_strdup(alloc, font_name, &styles->styles[i].font_name)) return -1;
- styles->styles[i].font_size = font_size;
- styles->styles[i].font_bold = font_bold;
- styles->styles[i].font_italic = font_italic;
+ if (extract_strdup(alloc, font->name, &styles->styles[i].font.name)) return -1;
+ styles->styles[i].font.size = font->size;
+ styles->styles[i].font.bold = font->bold;
+ styles->styles[i].font.italic = font->italic;
*o_style = &styles->styles[i];
return 0;
}
static int extract_odt_run_start(
- extract_alloc_t* alloc,
- extract_astring_t* content,
- extract_odt_styles_t* styles,
- const char* font_name,
- double font_size,
- int bold,
- int italic
+ extract_alloc_t* alloc,
+ extract_astring_t* content,
+ extract_odt_styles_t* styles,
+ content_state_t* content_state
)
-/* Starts a new run. Caller must ensure that extract_odt_run_finish() was
+/* Starts a new run. Caller must ensure that s_odt_run_finish() was
called to terminate any previous run. */
{
extract_odt_style_t* style;
- if (styles_add(alloc, styles, font_name, font_size, bold, italic, &style)) return -1;
+ if (s_odt_styles_add(
+ alloc,
+ styles,
+ &content_state->font,
+ &style
+ )) return -1;
if (extract_astring_catf(alloc, content, "<text:span text:style-name=\"T%i\">", style->id)) return -1;
return 0;
}
-static int extract_odt_run_finish(extract_alloc_t* alloc, extract_astring_t* content)
+static int s_odt_run_finish(extract_alloc_t* alloc, content_state_t* content_state, extract_astring_t* content)
{
+ if (content_state) content_state->font.name = NULL;
return extract_astring_cat(alloc, content, "</text:span>");
}
-static int extract_odt_paragraph_empty(extract_alloc_t* alloc, extract_astring_t* content, extract_odt_styles_t* styles)
+static int s_odt_append_empty_paragraph(extract_alloc_t* alloc, extract_astring_t* content, extract_odt_styles_t* styles)
/* Append an empty paragraph to *content. */
{
int e = -1;
- if (extract_odt_paragraph_start(alloc, content)) goto end;
+ static char fontname[] = "OpenSans";
+ content_state_t content_state = {0};
+ if (s_odt_paragraph_start(alloc, content)) goto end;
/* [This comment is from docx, haven't checked odt.] It seems like our
- choice of font size here doesn't make any difference to the ammount of
+ choice of font size here doesn't make any difference to the amount of
vertical space, unless we include a non-space character. Presumably
something to do with the styles in the template document. */
- if (extract_odt_run_start(
- alloc,
- content,
- styles,
- "OpenSans",
- 10 /*font_size*/,
- 0 /*font_bold*/,
- 0 /*font_italic*/
- )) goto end;
+ content_state.font.name = fontname;
+ content_state.font.size = 10;
+ content_state.font.bold = 0;
+ content_state.font.italic = 0;
+ if (extract_odt_run_start(alloc, content, styles, &content_state)) goto end;
//docx_char_append_string(content, "&#160;"); /* &#160; is non-break space. */
- if (extract_odt_run_finish(alloc, content)) goto end;
- if (extract_odt_paragraph_finish(alloc, content)) goto end;
+ if (s_odt_run_finish(alloc, NULL /*content_state*/, content)) goto end;
+ if (s_odt_paragraph_finish(alloc, content)) goto end;
e = 0;
end:
return e;
}
-typedef struct
-{
- const char* font_name;
- double font_size;
- int font_bold;
- int font_italic;
- matrix_t* ctm_prev;
- /* todo: add extract_odt_styles_t member? */
-} content_state_t;
-/* Used to keep track of font information when writing paragraphs of odt
-content, e.g. so we know whether a font has changed so need to start a new odt
-span. */
-
-
-static int extract_document_to_odt_content_paragraph(
+static int s_document_to_odt_content_paragraph(
extract_alloc_t* alloc,
- content_state_t* state,
+ content_state_t* content_state,
paragraph_t* paragraph,
extract_astring_t* content,
extract_odt_styles_t* styles
)
-/* Append odt xml for <paragraph> to <content>. Updates *state if we change
-font. */
+/* Append odt xml for <paragraph> to <content>. Updates *content_state if we
+change font. */
{
int e = -1;
int l;
- if (extract_odt_paragraph_start(alloc, content)) goto end;
+ if (s_odt_paragraph_start(alloc, content)) goto end;
for (l=0; l<paragraph->lines_num; ++l)
{
@@ -286,50 +273,41 @@ font. */
int si;
span_t* span = line->spans[s];
double font_size_new;
- state->ctm_prev = &span->ctm;
+ content_state->ctm_prev = &span->ctm;
font_size_new = extract_matrices_to_font_size(&span->ctm, &span->trm);
- if (!state->font_name
- || strcmp(span->font_name, state->font_name)
- || span->flags.font_bold != state->font_bold
- || span->flags.font_italic != state->font_italic
- || font_size_new != state->font_size
+ if (!content_state->font.name
+ || strcmp(span->font_name, content_state->font.name)
+ || span->flags.font_bold != content_state->font.bold
+ || span->flags.font_italic != content_state->font.italic
+ || font_size_new != content_state->font.size
)
{
- if (state->font_name)
+ if (content_state->font.name)
{
- if (extract_odt_run_finish(alloc, content)) goto end;
+ if (s_odt_run_finish(alloc, content_state, content)) goto end;
}
- state->font_name = span->font_name;
- state->font_bold = span->flags.font_bold;
- state->font_italic = span->flags.font_italic;
- state->font_size = font_size_new;
- if (extract_odt_run_start(
- alloc,
- content,
- styles,
- state->font_name,
- state->font_size,
- state->font_bold,
- state->font_italic
- )) goto end;
+ content_state->font.name = span->font_name;
+ content_state->font.bold = span->flags.font_bold;
+ content_state->font.italic = span->flags.font_italic;
+ content_state->font.size = font_size_new;
+ if (extract_odt_run_start( alloc, content, styles, content_state)) goto end;
}
for (si=0; si<span->chars_num; ++si)
{
char_t* char_ = &span->chars[si];
int c = char_->ucs;
- if (extract_astring_cat_xmlc(alloc, content, c)) goto end;
+ if (extract_astring_catc_unicode_xml(alloc, content, c)) goto end;
}
/* Remove any trailing '-' at end of line. */
- if (astring_char_truncate_if(content, '-')) goto end;
+ if (extract_astring_char_truncate_if(content, '-')) goto end;
}
}
- if (state->font_name)
+ if (content_state->font.name)
{
- if (extract_odt_run_finish(alloc, content)) goto end;
- state->font_name = NULL;
+ if (s_odt_run_finish(alloc, content_state, content)) goto end;
}
- if (extract_odt_paragraph_finish(alloc, content)) goto end;
+ if (s_odt_paragraph_finish(alloc, content)) goto end;
e = 0;
@@ -337,7 +315,7 @@ font. */
return e;
}
-static int extract_document_append_image(
+static int s_odt_append_image(
extract_alloc_t* alloc,
extract_astring_t* content,
image_t* image
@@ -362,7 +340,7 @@ static int extract_document_append_image(
}
-static int extract_document_output_rotated_paragraphs(
+static int s_odt_output_rotated_paragraphs(
extract_alloc_t* alloc,
extract_page_t* page,
int paragraph_begin,
@@ -375,14 +353,14 @@ static int extract_document_output_rotated_paragraphs(
int text_box_id,
extract_astring_t* content,
extract_odt_styles_t* styles,
- content_state_t* state
+ content_state_t* content_state
)
/* Writes paragraph to content inside rotated text box. */
{
int e = 0;
int p;
double pt_to_inch = 1/72.0;
- outf("rotated paragraphs: rotation_rad=%f (x y)=(%i %i) (w h)=(%i %i)", rotation_rad, x_pt, y_pt, w_pt, h_pt);
+ outf("rotated paragraphs: rotation_rad=%f (x y)=(%f %f) (w h)=(%f %f)", rotation_rad, x_pt, y_pt, w_pt, h_pt);
// https://docs.oasis-open.org/office/OpenDocument/v1.3/cs02/part3-schema/OpenDocument-v1.3-cs02-part3-schema.html#attribute-draw_transform
// says rotation is in degrees, but we seem to require -radians.
@@ -414,7 +392,7 @@ static int extract_document_output_rotated_paragraphs(
for (p=paragraph_begin; p<paragraph_end; ++p)
{
paragraph_t* paragraph = page->paragraphs[p];
- if (!e) e = extract_document_to_odt_content_paragraph(alloc, state, paragraph, content, styles);
+ if (!e) e = s_document_to_odt_content_paragraph(alloc, content_state, paragraph, content, styles);
}
if (!e) e = extract_astring_cat(alloc, content, "\n");
@@ -427,6 +405,219 @@ static int extract_document_output_rotated_paragraphs(
}
+static int s_odt_append_table(extract_alloc_t* alloc, table_t* table, extract_astring_t* content, extract_odt_styles_t* styles)
+{
+ int e = -1;
+ int y;
+
+ {
+ int x;
+ static int table_number = 0;
+ table_number += 1;
+ if (extract_astring_catf(alloc, content,
+ "\n"
+ " <table:table text:style-name=\"extract.table\" table:name=\"extract.table.%i\">\n"
+ " <table:table-columns>\n"
+ ,
+ table_number
+ )) goto end;
+
+ for (x=0; x<table->cells_num_x; ++x)
+ {
+ if (extract_astring_cat(alloc, content,
+ " <table:table-column table:style-name=\"extract.table.column\"/>\n"
+ )) goto end;
+ }
+ if (extract_astring_cat(alloc, content,
+ " </table:table-columns>\n"
+ )) goto end;
+ }
+ for (y=0; y<table->cells_num_y; ++y)
+ {
+ int x;
+ if (extract_astring_cat(alloc, content,
+ " <table:table-row>\n"
+ )) goto end;
+
+ for (x=0; x<table->cells_num_x; ++x)
+ {
+ cell_t* cell = table->cells[y*table->cells_num_x + x];
+ if (!cell->above || !cell->left)
+ {
+ if (extract_astring_cat(alloc, content, " <table:covered-table-cell/>\n")) goto end;
+ continue;
+ }
+
+ if (extract_astring_cat(alloc, content, " <table:table-cell")) goto end;
+ if (cell->extend_right > 1)
+ {
+ if (extract_astring_catf(alloc, content, " table:number-columns-spanned=\"%i\"", cell->extend_right)) goto end;
+ }
+ if (cell->extend_down > 1)
+ {
+ if (extract_astring_catf(alloc, content, " table:number-rows-spanned=\"%i\"", cell->extend_down)) goto end;
+ }
+ if (extract_astring_catf(alloc, content, ">\n")) goto end;
+
+ /* Write contents of this cell. */
+ {
+ int p;
+ content_state_t content_state;
+ content_state.font.name = NULL;
+ content_state.ctm_prev = NULL;
+ for (p=0; p<cell->paragraphs_num; ++p)
+ {
+ paragraph_t* paragraph = cell->paragraphs[p];
+ if (s_document_to_odt_content_paragraph(alloc, &content_state, paragraph, content, styles)) goto end;
+ }
+ if (content_state.font.name)
+ {
+ if (s_odt_run_finish(alloc, &content_state, content)) goto end;
+ }
+ if (extract_astring_cat(alloc, content, "\n")) goto end;
+ }
+ if (extract_astring_cat(alloc, content, " </table:table-cell>\n")) goto end;
+ }
+ if (extract_astring_cat(alloc, content, " </table:table-row>\n")) goto end;
+ }
+ if (extract_astring_cat(alloc, content, " </table:table>\n")) goto end;
+ e = 0;
+
+ end:
+ return e;
+}
+
+
+static int s_odt_append_rotated_paragraphs(
+ extract_alloc_t* alloc,
+ extract_page_t* page,
+ content_state_t* content_state,
+ int* p,
+ int* text_box_id,
+ const matrix_t* ctm,
+ double rotate,
+ extract_astring_t* content,
+ extract_odt_styles_t* styles
+ )
+/* Appends paragraphs with same rotation, starting with page->paragraphs[*p]
+and updates *p. */
+{
+ /* Find extent of paragraphs with this same rotation. extent
+ will contain max width and max height of paragraphs, in units
+ before application of ctm, i.e. before rotation. */
+ int e = -1;
+ point_t extent = {0, 0};
+ int p0 = *p;
+ int p1;
+ paragraph_t* paragraph = page->paragraphs[*p];
+
+ outf("rotate=%.2frad=%.1fdeg ctm: ef=(%f %f) abcd=(%f %f %f %f)",
+ rotate, rotate * 180 / pi,
+ ctm->e,
+ ctm->f,
+ ctm->a,
+ ctm->b,
+ ctm->c,
+ ctm->d
+ );
+
+ {
+ /* We assume that first span is at origin of text
+ block. This assumes left-to-right text. */
+ double rotate0 = rotate;
+ const matrix_t* ctm0 = ctm;
+ point_t origin =
+ {
+ paragraph->lines[0]->spans[0]->chars[0].x,
+ paragraph->lines[0]->spans[0]->chars[0].y
+ };
+ matrix_t ctm_inverse = {1, 0, 0, 1, 0, 0};
+ double ctm_det = ctm->a*ctm->d - ctm->b*ctm->c;
+ if (ctm_det != 0)
+ {
+ ctm_inverse.a = +ctm->d / ctm_det;
+ ctm_inverse.b = -ctm->b / ctm_det;
+ ctm_inverse.c = -ctm->c / ctm_det;
+ ctm_inverse.d = +ctm->a / ctm_det;
+ }
+ else
+ {
+ outf("cannot invert ctm=(%f %f %f %f)",
+ ctm->a, ctm->b, ctm->c, ctm->d);
+ }
+
+ for (*p=p0; *p<page->paragraphs_num; ++*p)
+ {
+ paragraph = page->paragraphs[*p];
+ ctm = &paragraph->lines[0]->spans[0]->ctm;
+ rotate = atan2(ctm->b, ctm->a);
+ if (rotate != rotate0)
+ {
+ break;
+ }
+
+ /* Update <extent>. */
+ {
+ int l;
+ for (l=0; l<paragraph->lines_num; ++l)
+ {
+ line_t* line = paragraph->lines[l];
+ span_t* span = extract_line_span_last(line);
+ char_t* char_ = extract_span_char_last(span);
+ double adv = char_->adv * extract_matrix_expansion(span->trm);
+ double x = char_->x + adv * cos(rotate);
+ double y = char_->y + adv * sin(rotate);
+
+ double dx = x - origin.x;
+ double dy = y - origin.y;
+
+ /* Position relative to origin and before box rotation. */
+ double xx = ctm_inverse.a * dx + ctm_inverse.b * dy;
+ double yy = ctm_inverse.c * dx + ctm_inverse.d * dy;
+ yy = -yy;
+ if (xx > extent.x) extent.x = xx;
+ if (yy > extent.y) extent.y = yy;
+ if (0) outf("rotate=%f *p=%i: origin=(%f %f) xy=(%f %f) dxy=(%f %f) xxyy=(%f %f) span: %s",
+ rotate, *p, origin.x, origin.y, x, y, dx, dy, xx, yy, extract_span_string(alloc, span));
+ }
+ }
+ }
+ p1 = *p;
+ rotate = rotate0;
+ ctm = ctm0;
+ outf("rotate=%f p0=%i p1=%i. extent is: (%f %f)",
+ rotate, p0, p1, extent.x, extent.y);
+ }
+
+ /* Paragraphs p0..p1-1 have same rotation. We output them into
+ a single rotated text box. */
+
+ /* We need unique id for text box. */
+ *text_box_id += 1;
+
+ if (s_odt_output_rotated_paragraphs(
+ alloc,
+ page,
+ p0,
+ p1,
+ rotate,
+ ctm->e,
+ ctm->f,
+ extent.x,
+ extent.y,
+ *text_box_id,
+ content,
+ styles,
+ content_state
+ )) goto end;
+ *p = p1 - 1;
+ e = 0;
+
+ end:
+ return e;
+}
+
+
int extract_document_to_odt_content(
extract_alloc_t* alloc,
document_t* document,
@@ -445,156 +636,66 @@ int extract_document_to_odt_content(
for (p=0; p<document->pages_num; ++p)
{
extract_page_t* page = document->pages[p];
- int p;
- content_state_t state;
- state.font_name = NULL;
- state.font_size = 0;
- state.font_bold = 0;
- state.font_italic = 0;
- state.ctm_prev = NULL;
+ int p = 0;
+ int t = 0;
+ content_state_t content_state;
+ content_state.font.name = NULL;
+ content_state.font.size = 0;
+ content_state.font.bold = 0;
+ content_state.font.italic = 0;
+ content_state.ctm_prev = NULL;
- for (p=0; p<page->paragraphs_num; ++p)
+ for(;;)
{
- paragraph_t* paragraph = page->paragraphs[p];
- const matrix_t* ctm = &paragraph->lines[0]->spans[0]->ctm;
- double rotate = atan2(ctm->b, ctm->a);
+ paragraph_t* paragraph = (p == page->paragraphs_num) ? NULL : page->paragraphs[p];
+ table_t* table = (t == page->tables_num) ? NULL : page->tables[t];
+ double y_paragraph;
+ double y_table;
+ if (!paragraph && !table) break;
+ y_paragraph = (paragraph) ? paragraph->lines[0]->spans[0]->chars[0].y : DBL_MAX;
+ y_table = (table) ? table->pos.y : DBL_MAX;
- if (spacing
- && state.ctm_prev
- && paragraph->lines_num
- && paragraph->lines[0]->spans_num
- && matrix_cmp4(
- state.ctm_prev,
- &paragraph->lines[0]->spans[0]->ctm
- )
- )
+ if (paragraph && y_paragraph < y_table)
{
- /* Extra vertical space between paragraphs that were at
- different angles in the original document. */
- if (extract_odt_paragraph_empty(alloc, content, styles)) goto end;
- }
+ const matrix_t* ctm = &paragraph->lines[0]->spans[0]->ctm;
+ double rotate = atan2(ctm->b, ctm->a);
+
+ if (spacing
+ && content_state.ctm_prev
+ && paragraph->lines_num
+ && paragraph->lines[0]->spans_num
+ && extract_matrix_cmp4(
+ content_state.ctm_prev,
+ &paragraph->lines[0]->spans[0]->ctm
+ )
+ )
+ {
+ /* Extra vertical space between paragraphs that were at
+ different angles in the original document. */
+ if (s_odt_append_empty_paragraph(alloc, content, styles)) goto end;
+ }
- if (spacing)
- {
- /* Extra vertical space between paragraphs. */
- if (extract_odt_paragraph_empty(alloc, content, styles)) goto end;
- }
-
- if (rotation && rotate != 0)
- {
- /* Find extent of paragraphs with this same rotation. extent
- will contain max width and max height of paragraphs, in units
- before application of ctm, i.e. before rotation. */
- point_t extent = {0, 0};
- int p0 = p;
- int p1;
-
- outf("rotate=%.2frad=%.1fdeg ctm: ef=(%f %f) abcd=(%f %f %f %f)",
- rotate, rotate * 180 / pi,
- ctm->e,
- ctm->f,
- ctm->a,
- ctm->b,
- ctm->c,
- ctm->d
- );
-
+ if (spacing)
{
- /* We assume that first span is at origin of text
- block. This assumes left-to-right text. */
- double rotate0 = rotate;
- const matrix_t* ctm0 = ctm;
- point_t origin =
- {
- paragraph->lines[0]->spans[0]->chars[0].x,
- paragraph->lines[0]->spans[0]->chars[0].y
- };
- matrix_t ctm_inverse = {1, 0, 0, 1, 0, 0};
- double ctm_det = ctm->a*ctm->d - ctm->b*ctm->c;
- if (ctm_det != 0)
- {
- ctm_inverse.a = +ctm->d / ctm_det;
- ctm_inverse.b = -ctm->b / ctm_det;
- ctm_inverse.c = -ctm->c / ctm_det;
- ctm_inverse.d = +ctm->a / ctm_det;
- }
- else
- {
- outf("cannot invert ctm=(%f %f %f %f)",
- ctm->a, ctm->b, ctm->c, ctm->d);
- }
-
- for (p=p0; p<page->paragraphs_num; ++p)
- {
- paragraph = page->paragraphs[p];
- ctm = &paragraph->lines[0]->spans[0]->ctm;
- rotate = atan2(ctm->b, ctm->a);
- if (rotate != rotate0)
- {
- break;
- }
-
- /* Update <extent>. */
- {
- int l;
- for (l=0; l<paragraph->lines_num; ++l)
- {
- line_t* line = paragraph->lines[l];
- span_t* span = line_span_last(line);
- char_t* char_ = span_char_last(span);
- double adv = char_->adv * matrix_expansion(span->trm);
- double x = char_->x + adv * cos(rotate);
- double y = char_->y + adv * sin(rotate);
-
- double dx = x - origin.x;
- double dy = y - origin.y;
-
- /* Position relative to origin and before box rotation. */
- double xx = ctm_inverse.a * dx + ctm_inverse.b * dy;
- double yy = ctm_inverse.c * dx + ctm_inverse.d * dy;
- yy = -yy;
- if (xx > extent.x) extent.x = xx;
- if (yy > extent.y) extent.y = yy;
- if (0) outf("rotate=%f p=%i: origin=(%f %f) xy=(%f %f) dxy=(%f %f) xxyy=(%f %f) span: %s",
- rotate, p, origin.x, origin.y, x, y, dx, dy, xx, yy, span_string(alloc, span));
- }
- }
- }
- p1 = p;
- rotate = rotate0;
- ctm = ctm0;
- outf("rotate=%f p0=%i p1=%i. extent is: (%f %f)",
- rotate, p0, p1, extent.x, extent.y);
+ /* Extra vertical space between paragraphs. */
+ if (s_odt_append_empty_paragraph(alloc, content, styles)) goto end;
}
-
- /* Paragraphs p0..p1-1 have same rotation. We output them into
- a single rotated text box. */
-
- /* We need unique id for text box. */
- text_box_id += 1;
-
- if (extract_document_output_rotated_paragraphs(
- alloc,
- page,
- p0,
- p1,
- rotate,
- ctm->e,
- ctm->f,
- extent.x,
- extent.y,
- text_box_id,
- content,
- styles,
- &state
- )) goto end;
- p = p1 - 1;
+
+ if (rotation && rotate != 0)
+ {
+ if (s_odt_append_rotated_paragraphs(alloc, page, &content_state, &p, &text_box_id, ctm, rotate, content, styles)) goto end;
+ }
+ else
+ {
+ if (s_document_to_odt_content_paragraph(alloc, &content_state, paragraph, content, styles)) goto end;
+ }
+ p += 1;
}
- else
+ else if (table)
{
- if (extract_document_to_odt_content_paragraph(alloc, &state, paragraph, content, styles)) goto end;
+ if (s_odt_append_table(alloc, table, content, styles)) goto end;
+ t += 1;
}
-
}
outf("images=%i", images);
@@ -604,7 +705,7 @@ int extract_document_to_odt_content(
outf("page->images_num=%i", page->images_num);
for (i=0; i<page->images_num; ++i)
{
- extract_document_append_image(alloc, content, &page->images[i]);
+ s_odt_append_image(alloc, content, &page->images[i]);
}
}
}
@@ -658,26 +759,39 @@ int extract_odt_content_item(
char* text_intermediate = NULL;
extract_astring_t styles_definitions = {0};
+ /* Insert content before '</office:text>'. */
if (extract_content_insert(
alloc,
text,
NULL /*single*/,
- NULL,
- "</office:text>",
+ NULL /*mid_begin_name*/,
+ "</office:text>" /*mid_end_name*/,
contentss,
contentss_num,
&text_intermediate
)) goto end;
outf("text_intermediate: %s", text_intermediate);
- if (extract_odt_styles_definitions(alloc, styles, &styles_definitions)) goto end;
+ /* Convert <styles> to text. */
+ if (s_odt_styles_definitions(alloc, styles, &styles_definitions)) goto end;
+ /* To make tables work, we seem to need to specify table and column
+ styles, and these can be empty. todo: maybe specify exact sizes based
+ on the pdf table and cell dimensions. */
+ if (extract_astring_cat(alloc, &styles_definitions,
+ "\n"
+ "<style:style style:name=\"extract.table\" style:family=\"table\"/>\n"
+ "<style:style style:name=\"extract.table.column\" style:family=\"table-column\"/>\n"
+ )) goto end;
+
+ /* Replace '<office:automatic-styles/>' with text from
+ <styles_definitions>. */
e = extract_content_insert(
alloc,
text_intermediate,
"<office:automatic-styles/>" /*single*/,
- NULL,
- NULL, //"</office:automatic-styles>",
+ NULL /*mid_begin_name*/,
+ NULL /*mid_end_name*/,
&styles_definitions,
1,
text2
@@ -719,14 +833,14 @@ int extract_odt_content_item(
}
e = 0;
end:
- outf("e=%i errno=%i text2=%s", e, errno, text2);
+ outf("e=%i errno=%i text2=%s", e, errno, text2 ? *text2 : "");
if (e)
{
/* We might have set <text2> to new content. */
extract_free(alloc, text2);
/* We might have used <temp> as a temporary buffer. */
- extract_astring_free(alloc, &temp);
}
+ extract_astring_free(alloc, &temp);
extract_astring_init(&temp);
return e;
}
@@ -747,7 +861,6 @@ int extract_odt_write_template(
int e = -1;
int i;
char* path_tempdir = NULL;
- FILE* f = NULL;
char* path = NULL;
char* text = NULL;
char* text2 = NULL;
@@ -827,7 +940,6 @@ int extract_odt_write_template(
}
/* Copy images into <path_tempdir>/Pictures/. */
- outf("");
extract_free(alloc, &path);
if (extract_asprintf(alloc, &path, "%s/Pictures", path_tempdir) < 0) goto end;
if (extract_mkdir(path, 0777))
@@ -835,7 +947,6 @@ int extract_odt_write_template(
outf("Failed to mkdir %s", path);
goto end;
}
- outf("");
for (i=0; i<images->images_num; ++i)
{
image_t* image = &images->images[i];
@@ -869,8 +980,6 @@ int extract_odt_write_template(
extract_free(alloc, &path);
extract_free(alloc, &text);
extract_free(alloc, &text2);
- //extract_odt_styles_free(alloc, &styles);
- if (f) fclose(f);
if (e)
{