summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'leptonica/src/bmf.c')
-rw-r--r--leptonica/src/bmf.c876
1 files changed, 876 insertions, 0 deletions
diff --git a/leptonica/src/bmf.c b/leptonica/src/bmf.c
new file mode 100644
index 00000000..c597e480
--- /dev/null
+++ b/leptonica/src/bmf.c
@@ -0,0 +1,876 @@
+/*====================================================================*
+ - Copyright (C) 2001 Leptonica. All rights reserved.
+ -
+ - Redistribution and use in source and binary forms, with or without
+ - modification, are permitted provided that the following conditions
+ - are met:
+ - 1. Redistributions of source code must retain the above copyright
+ - notice, this list of conditions and the following disclaimer.
+ - 2. Redistributions in binary form must reproduce the above
+ - copyright notice, this list of conditions and the following
+ - disclaimer in the documentation and/or other materials
+ - provided with the distribution.
+ -
+ - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
+ - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+/*!
+ * \file bmf.c
+ * <pre>
+ *
+ * Acquisition and generation of bitmap fonts.
+ *
+ * L_BMF *bmfCreate()
+ * L_BMF *bmfDestroy()
+ *
+ * PIX *bmfGetPix()
+ * l_int32 bmfGetWidth()
+ * l_int32 bmfGetBaseline()
+ *
+ * PIXA *pixaGetFont()
+ * l_int32 pixaSaveFont()
+ * static PIXA *pixaGenerateFontFromFile()
+ * static PIXA *pixaGenerateFontFromString()
+ * static PIXA *pixaGenerateFont()
+ * static l_int32 pixGetTextBaseline()
+ * static l_int32 bmfMakeAsciiTables()
+ *
+ * This is not a very general utility, because it only uses bitmap
+ * representations of a single font, Palatino-Roman, with the
+ * normal style. It uses bitmaps generated for nine sizes, from
+ * 4 to 20 pts, rendered at 300 ppi. Generalization to different
+ * fonts, styles and sizes is straightforward.
+ *
+ * I chose Palatino-Roman is because I like it.
+ * The input font images were generated from a set of small
+ * PostScript files, such as chars-12.ps, which were rendered
+ * into the inputfont[] bitmap files using GhostScript. See, for
+ * example, the bash script prog/ps2tiff, which will "rip" a
+ * PostScript file into a set of ccitt-g4 compressed tiff files.
+ *
+ * The set of ascii characters from 32 through 126 are the 95
+ * printable ascii chars. Palatino-Roman is missing char 92, '\'.
+ * I have substituted an LR flip of '/', char 47, for 92, so that
+ * there are no missing printable chars in this set. The space is
+ * char 32, and I have given it a width equal to twice the width of '!'.
+ * </pre>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <string.h>
+#include "allheaders.h"
+#include "bmfdata.h"
+
+static const l_float32 VertFractSep = 0.3f;
+
+#ifndef NO_CONSOLE_IO
+#define DEBUG_BASELINE 0
+#define DEBUG_CHARS 0
+#define DEBUG_FONT_GEN 0
+#endif /* ~NO_CONSOLE_IO */
+
+static PIXA *pixaGenerateFontFromFile(const char *dir, l_int32 fontsize,
+ l_int32 *pbl0, l_int32 *pbl1,
+ l_int32 *pbl2);
+static PIXA *pixaGenerateFontFromString(l_int32 fontsize, l_int32 *pbl0,
+ l_int32 *pbl1, l_int32 *pbl2);
+static PIXA *pixaGenerateFont(PIX *pixs, l_int32 fontsize, l_int32 *pbl0,
+ l_int32 *pbl1, l_int32 *pbl2);
+static l_int32 pixGetTextBaseline(PIX *pixs, l_int32 *tab8, l_int32 *py);
+static l_int32 bmfMakeAsciiTables(L_BMF *bmf);
+
+/*---------------------------------------------------------------------*/
+/* Bmf create/destroy */
+/*---------------------------------------------------------------------*/
+/*!
+ * \brief bmfCreate()
+ *
+ * \param[in] dir [optional] directory holding pixa of character set
+ * \param[in] fontsize 4, 6, 8, ... , 20
+ * \return bmf holding the bitmap font and associated information
+ *
+ * <pre>
+ * Notes:
+ * (1) If %dir == null, this generates the font bitmaps from a
+ * compiled string.
+ * (2) Otherwise, this tries to read a pre-computed pixa file with the
+ * 95 ascii chars in it. If the file is not found, it then
+ * attempts to generate the pixa and associated baseline
+ * data from a tiff image containing all the characters. If
+ * that fails, it uses the compiled string.
+ * </pre>
+ */
+L_BMF *
+bmfCreate(const char *dir,
+ l_int32 fontsize)
+{
+L_BMF *bmf;
+PIXA *pixa;
+
+ PROCNAME("bmfCreate");
+
+ if (fontsize < 4 || fontsize > 20 || (fontsize % 2))
+ return (L_BMF *)ERROR_PTR("fontsize must be in {4, 6, ..., 20}",
+ procName, NULL);
+
+ bmf = (L_BMF *)LEPT_CALLOC(1, sizeof(L_BMF));
+
+ if (!dir) { /* Generate from a string */
+ pixa = pixaGenerateFontFromString(fontsize, &bmf->baseline1,
+ &bmf->baseline2, &bmf->baseline3);
+ } else { /* Look for the pixa in a directory */
+ pixa = pixaGetFont(dir, fontsize, &bmf->baseline1, &bmf->baseline2,
+ &bmf->baseline3);
+ if (!pixa) { /* Not found; make it from a file */
+ L_INFO("Generating pixa of bitmap fonts from file\n", procName);
+ pixa = pixaGenerateFontFromFile(dir, fontsize, &bmf->baseline1,
+ &bmf->baseline2, &bmf->baseline3);
+ if (!pixa) { /* Not made; make it from a string after all */
+ L_ERROR("Failed to make font; use string\n", procName);
+ pixa = pixaGenerateFontFromString(fontsize, &bmf->baseline1,
+ &bmf->baseline2, &bmf->baseline3);
+ }
+ }
+ }
+
+ if (!pixa) {
+ bmfDestroy(&bmf);
+ return (L_BMF *)ERROR_PTR("font pixa not made", procName, NULL);
+ }
+
+ bmf->pixa = pixa;
+ bmf->size = fontsize;
+ if (dir) bmf->directory = stringNew(dir);
+ bmfMakeAsciiTables(bmf);
+ return bmf;
+}
+
+
+/*!
+ * \brief bmfDestroy()
+ *
+ * \param[in,out] pbmf will be set to null before returning
+ * \return void
+ */
+void
+bmfDestroy(L_BMF **pbmf)
+{
+L_BMF *bmf;
+
+ PROCNAME("bmfDestroy");
+
+ if (pbmf == NULL) {
+ L_WARNING("ptr address is null!\n", procName);
+ return;
+ }
+
+ if ((bmf = *pbmf) == NULL)
+ return;
+
+ pixaDestroy(&bmf->pixa);
+ LEPT_FREE(bmf->directory);
+ LEPT_FREE(bmf->fonttab);
+ LEPT_FREE(bmf->baselinetab);
+ LEPT_FREE(bmf->widthtab);
+ LEPT_FREE(bmf);
+ *pbmf = NULL;
+}
+
+
+/*---------------------------------------------------------------------*/
+/* Bmf accessors */
+/*---------------------------------------------------------------------*/
+/*!
+ * \brief bmfGetPix()
+ *
+ * \param[in] bmf
+ * \param[in] chr should be one of the 95 supported printable bitmaps
+ * \return pix clone of pix in bmf, or NULL on error
+ */
+PIX *
+bmfGetPix(L_BMF *bmf,
+ char chr)
+{
+l_int32 i, index;
+PIXA *pixa;
+
+ PROCNAME("bmfGetPix");
+
+ if ((index = (l_int32)chr) == 10) /* NL */
+ return NULL;
+ if (!bmf)
+ return (PIX *)ERROR_PTR("bmf not defined", procName, NULL);
+
+ i = bmf->fonttab[index];
+ if (i == UNDEF) {
+ L_ERROR("no bitmap representation for %d\n", procName, index);
+ return NULL;
+ }
+
+ if ((pixa = bmf->pixa) == NULL)
+ return (PIX *)ERROR_PTR("pixa not found", procName, NULL);
+
+ return pixaGetPix(pixa, i, L_CLONE);
+}
+
+
+/*!
+ * \brief bmfGetWidth()
+ *
+ * \param[in] bmf
+ * \param[in] chr should be one of the 95 supported bitmaps
+ * \param[out] pw character width; -1 if not printable
+ * \return 0 if OK, 1 on error
+ */
+l_ok
+bmfGetWidth(L_BMF *bmf,
+ char chr,
+ l_int32 *pw)
+{
+l_int32 i, index;
+PIXA *pixa;
+
+ PROCNAME("bmfGetWidth");
+
+ if (!pw)
+ return ERROR_INT("&w not defined", procName, 1);
+ *pw = -1;
+ if (!bmf)
+ return ERROR_INT("bmf not defined", procName, 1);
+ if ((index = (l_int32)chr) == 10) /* NL */
+ return 0;
+
+ i = bmf->fonttab[index];
+ if (i == UNDEF) {
+ L_ERROR("no bitmap representation for %d\n", procName, index);
+ return 1;
+ }
+
+ if ((pixa = bmf->pixa) == NULL)
+ return ERROR_INT("pixa not found", procName, 1);
+
+ return pixaGetPixDimensions(pixa, i, pw, NULL, NULL);
+}
+
+
+/*!
+ * \brief bmfGetBaseline()
+ *
+ * \param[in] bmf
+ * \param[in] chr should be one of the 95 supported bitmaps
+ * \param[out] pbaseline distance below UL corner of bitmap char
+ * \return 0 if OK, 1 on error
+ */
+l_ok
+bmfGetBaseline(L_BMF *bmf,
+ char chr,
+ l_int32 *pbaseline)
+{
+l_int32 bl, index;
+
+ PROCNAME("bmfGetBaseline");
+
+ if (!pbaseline)
+ return ERROR_INT("&baseline not defined", procName, 1);
+ *pbaseline = 0;
+ if (!bmf)
+ return ERROR_INT("bmf not defined", procName, 1);
+ if ((index = (l_int32)chr) == 10) /* NL */
+ return 0;
+
+ bl = bmf->baselinetab[index];
+ if (bl == UNDEF) {
+ L_ERROR("no bitmap representation for %d\n", procName, index);
+ return 1;
+ }
+
+ *pbaseline = bl;
+ return 0;
+}
+
+
+/*---------------------------------------------------------------------*/
+/* Font bitmap acquisition and generation */
+/*---------------------------------------------------------------------*/
+/*!
+ * \brief pixaGetFont()
+ *
+ * \param[in] dir directory holding pixa of character set
+ * \param[in] fontsize 4, 6, 8, ... , 20
+ * \param[out] pbl0 baseline of row 1
+ * \param[out] pbl1 baseline of row 2
+ * \param[out] pbl2 baseline of row 3
+ * \return pixa of font bitmaps for 95 characters, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ * (1) This reads a pre-computed pixa file with the 95 ascii chars.
+ * </pre>
+ */
+PIXA *
+pixaGetFont(const char *dir,
+ l_int32 fontsize,
+ l_int32 *pbl0,
+ l_int32 *pbl1,
+ l_int32 *pbl2)
+{
+char *pathname;
+l_int32 fileno;
+PIXA *pixa;
+
+ PROCNAME("pixaGetFont");
+
+ fileno = (fontsize / 2) - 2;
+ if (fileno < 0 || fileno >= NUM_FONTS)
+ return (PIXA *)ERROR_PTR("font size invalid", procName, NULL);
+ if (!pbl0 || !pbl1 || !pbl2)
+ return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL);
+ *pbl0 = baselines[fileno][0];
+ *pbl1 = baselines[fileno][1];
+ *pbl2 = baselines[fileno][2];
+
+ pathname = pathJoin(dir, outputfonts[fileno]);
+ pixa = pixaRead(pathname);
+ LEPT_FREE(pathname);
+
+ if (!pixa)
+ L_WARNING("pixa of char bitmaps not found\n", procName);
+ return pixa;
+}
+
+
+/*!
+ * \brief pixaSaveFont()
+ *
+ * \param[in] indir [optional] directory holding image of character set
+ * \param[in] outdir directory into which the output pixa file
+ * will be written
+ * \param[in] fontsize in pts, at 300 ppi
+ * \return 0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ * (1) This saves a font of a particular size.
+ * (2) If %indir == null, this generates the font bitmaps from a
+ * compiled string.
+ * (3) prog/genfonts calls this function for each of the
+ * nine font sizes, to generate all the font pixa files.
+ * </pre>
+ */
+l_ok
+pixaSaveFont(const char *indir,
+ const char *outdir,
+ l_int32 fontsize)
+{
+char *pathname;
+l_int32 bl1, bl2, bl3;
+PIXA *pixa;
+
+ PROCNAME("pixaSaveFont");
+
+ if (fontsize < 4 || fontsize > 20 || (fontsize % 2))
+ return ERROR_INT("fontsize must be in {4, 6, ..., 20}", procName, 1);
+
+ if (!indir) /* Generate from a string */
+ pixa = pixaGenerateFontFromString(fontsize, &bl1, &bl2, &bl3);
+ else /* Generate from an image file */
+ pixa = pixaGenerateFontFromFile(indir, fontsize, &bl1, &bl2, &bl3);
+ if (!pixa)
+ return ERROR_INT("pixa not made", procName, 1);
+
+ pathname = pathJoin(outdir, outputfonts[(fontsize - 4) / 2]);
+ pixaWrite(pathname, pixa);
+
+#if DEBUG_FONT_GEN
+ L_INFO("Found %d chars in font size %d\n", procName, pixaGetCount(pixa),
+ fontsize);
+ L_INFO("Baselines are at: %d, %d, %d\n", procName, bl1, bl2, bl3);
+#endif /* DEBUG_FONT_GEN */
+
+ LEPT_FREE(pathname);
+ pixaDestroy(&pixa);
+ return 0;
+}
+
+
+/*!
+ * \brief pixaGenerateFontFromFile()
+ *
+ * \param[in] dir directory holding image of character set
+ * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi
+ * \param[out] pbl0 baseline of row 1
+ * \param[out] pbl1 baseline of row 2
+ * \param[out] pbl2 baseline of row 3
+ * \return pixa of font bitmaps for 95 characters, or NULL on error
+ *
+ * These font generation functions use 9 sets, each with bitmaps
+ * of 94 ascii characters, all in Palatino-Roman font.
+ * Each input bitmap has 3 rows of characters. The range of
+ * ascii values in each row is as follows:
+ * row 0: 32-57 32 is a space
+ * row 1: 58-91 92, '\', is not represented in this font
+ * row 2: 93-126
+ * We LR flip the '/' char to generate a bitmap for the missing
+ * '\' character, so that we have representations of all 95
+ * printable chars.
+ *
+ * Typically, use pixaGetFont() to generate the character bitmaps
+ * in memory for a bmf. This will simply access the bitmap files
+ * in a serialized pixa that were produced in prog/genfonts.c using
+ * this function.
+ */
+static PIXA *
+pixaGenerateFontFromFile(const char *dir,
+ l_int32 fontsize,
+ l_int32 *pbl0,
+ l_int32 *pbl1,
+ l_int32 *pbl2)
+{
+char *pathname;
+l_int32 fileno;
+PIX *pix;
+PIXA *pixa;
+
+ PROCNAME("pixaGenerateFontFromFile");
+
+ if (!pbl0 || !pbl1 || !pbl2)
+ return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL);
+ *pbl0 = *pbl1 = *pbl2 = 0;
+ if (!dir)
+ return (PIXA *)ERROR_PTR("dir not defined", procName, NULL);
+ fileno = (fontsize / 2) - 2;
+ if (fileno < 0 || fileno >= NUM_FONTS)
+ return (PIXA *)ERROR_PTR("font size invalid", procName, NULL);
+
+ pathname = pathJoin(dir, inputfonts[fileno]);
+ pix = pixRead(pathname);
+ LEPT_FREE(pathname);
+ if (!pix) {
+ L_ERROR("pix not found for font size %d\n", procName, fontsize);
+ return NULL;
+ }
+
+ pixa = pixaGenerateFont(pix, fontsize, pbl0, pbl1, pbl2);
+ pixDestroy(&pix);
+ return pixa;
+}
+
+
+/*!
+ * \brief pixaGenerateFontFromString()
+ *
+ * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi
+ * \param[out] pbl0 baseline of row 1
+ * \param[out] pbl1 baseline of row 2
+ * \param[out] pbl2 baseline of row 3
+ * \return pixa of font bitmaps for 95 characters, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ * (1) See pixaGenerateFontFromFile() for details.
+ * </pre>
+ */
+static PIXA *
+pixaGenerateFontFromString(l_int32 fontsize,
+ l_int32 *pbl0,
+ l_int32 *pbl1,
+ l_int32 *pbl2)
+{
+l_uint8 *data;
+l_int32 redsize, nbytes;
+PIX *pix;
+PIXA *pixa;
+
+ PROCNAME("pixaGenerateFontFromString");
+
+ if (!pbl0 || !pbl1 || !pbl2)
+ return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL);
+ *pbl0 = *pbl1 = *pbl2 = 0;
+ redsize = (fontsize / 2) - 2;
+ if (redsize < 0 || redsize >= NUM_FONTS)
+ return (PIXA *)ERROR_PTR("invalid font size", procName, NULL);
+
+ if (fontsize == 4) {
+ data = decodeBase64(fontdata_4, strlen(fontdata_4), &nbytes);
+ } else if (fontsize == 6) {
+ data = decodeBase64(fontdata_6, strlen(fontdata_6), &nbytes);
+ } else if (fontsize == 8) {
+ data = decodeBase64(fontdata_8, strlen(fontdata_8), &nbytes);
+ } else if (fontsize == 10) {
+ data = decodeBase64(fontdata_10, strlen(fontdata_10), &nbytes);
+ } else if (fontsize == 12) {
+ data = decodeBase64(fontdata_12, strlen(fontdata_12), &nbytes);
+ } else if (fontsize == 14) {
+ data = decodeBase64(fontdata_14, strlen(fontdata_14), &nbytes);
+ } else if (fontsize == 16) {
+ data = decodeBase64(fontdata_16, strlen(fontdata_16), &nbytes);
+ } else if (fontsize == 18) {
+ data = decodeBase64(fontdata_18, strlen(fontdata_18), &nbytes);
+ } else { /* fontsize == 20 */
+ data = decodeBase64(fontdata_20, strlen(fontdata_20), &nbytes);
+ }
+ if (!data)
+ return (PIXA *)ERROR_PTR("data not made", procName, NULL);
+
+ pix = pixReadMem(data, nbytes);
+ LEPT_FREE(data);
+ if (!pix)
+ return (PIXA *)ERROR_PTR("pix not made", procName, NULL);
+
+ pixa = pixaGenerateFont(pix, fontsize, pbl0, pbl1, pbl2);
+ pixDestroy(&pix);
+ return pixa;
+}
+
+
+/*!
+ * \brief pixaGenerateFont()
+ *
+ * \param[in] pixs of 95 characters in 3 rows
+ * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi
+ * \param[out] pbl0 baseline of row 1
+ * \param[out] pbl1 baseline of row 2
+ * \param[out] pbl2 baseline of row 3
+ * \return pixa of font bitmaps for 95 characters, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ * (1) This does all the work. See pixaGenerateFontFromFile()
+ * for an overview.
+ * (2) The pix is for one of the 9 fonts. %fontsize is only
+ * used here for debugging.
+ * </pre>
+ */
+static PIXA *
+pixaGenerateFont(PIX *pixs,
+ l_int32 fontsize,
+ l_int32 *pbl0,
+ l_int32 *pbl1,
+ l_int32 *pbl2)
+{
+l_int32 i, j, nrows, nrowchars, nchars, h, yval;
+l_int32 width, height;
+l_int32 baseline[3];
+l_int32 *tab = NULL;
+BOX *box, *box1, *box2;
+BOXA *boxar, *boxac, *boxacs;
+PIX *pix1, *pix2, *pixr, *pixrc, *pixc;
+PIXA *pixa;
+l_int32 n, w, inrow, top;
+l_int32 *ia;
+NUMA *na;
+
+ PROCNAME("pixaGenerateFont");
+
+ if (!pbl0 || !pbl1 || !pbl2)
+ return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL);
+ *pbl0 = *pbl1 = *pbl2 = 0;
+ if (!pixs)
+ return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL);
+
+ /* Locate the 3 rows of characters */
+ w = pixGetWidth(pixs);
+ na = pixCountPixelsByRow(pixs, NULL);
+ boxar = boxaCreate(0);
+ n = numaGetCount(na);
+ ia = numaGetIArray(na);
+ inrow = 0;
+ for (i = 0; i < n; i++) {
+ if (!inrow && ia[i] > 0) {
+ inrow = 1;
+ top = i;
+ } else if (inrow && ia[i] == 0) {
+ inrow = 0;
+ box = boxCreate(0, top, w, i - top);
+ boxaAddBox(boxar, box, L_INSERT);
+ }
+ }
+ LEPT_FREE(ia);
+ numaDestroy(&na);
+ nrows = boxaGetCount(boxar);
+#if DEBUG_FONT_GEN
+ L_INFO("For fontsize %s, have %d rows\n", procName, fontsize, nrows);
+#endif /* DEBUG_FONT_GEN */
+ if (nrows != 3) {
+ L_INFO("nrows = %d; skipping fontsize %d\n", procName, nrows, fontsize);
+ boxaDestroy(&boxar);
+ return (PIXA *)ERROR_PTR("3 rows not generated", procName, NULL);
+ }
+
+ /* Grab the character images and baseline data */
+#if DEBUG_BASELINE
+ lept_rmdir("baseline");
+ lept_mkdir("baseline");
+#endif /* DEBUG_BASELINE */
+ tab = makePixelSumTab8();
+ pixa = pixaCreate(95);
+ for (i = 0; i < nrows; i++) {
+ box = boxaGetBox(boxar, i, L_CLONE);
+ pixr = pixClipRectangle(pixs, box, NULL); /* row of chars */
+ pixGetTextBaseline(pixr, tab, &yval);
+ baseline[i] = yval;
+
+#if DEBUG_BASELINE
+ L_INFO("Baseline info: row %d, yval = %d, h = %d\n", procName,
+ i, yval, pixGetHeight(pixr));
+ pix1 = pixCopy(NULL, pixr);
+ pixRenderLine(pix1, 0, yval, pixGetWidth(pix1), yval, 1,
+ L_FLIP_PIXELS);
+ if (i == 0 )
+ pixWriteDebug("/tmp/baseline/row0.png", pix1, IFF_PNG);
+ else if (i == 1)
+ pixWriteDebug("/tmp/baseline/row1.png", pix1, IFF_PNG);
+ else
+ pixWriteDebug("/tmp/baseline/row2.png", pix1, IFF_PNG);
+ pixDestroy(&pix1);
+#endif /* DEBUG_BASELINE */
+
+ boxDestroy(&box);
+ pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35);
+ boxac = pixConnComp(pixrc, NULL, 8);
+ boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL);
+ if (i == 0) { /* consolidate the two components of '"' */
+ box1 = boxaGetBox(boxacs, 1, L_CLONE);
+ box2 = boxaGetBox(boxacs, 2, L_CLONE);
+ box1->w = box2->x + box2->w - box1->x; /* increase width */
+ boxDestroy(&box1);
+ boxDestroy(&box2);
+ boxaRemoveBox(boxacs, 2);
+ }
+ h = pixGetHeight(pixr);
+ nrowchars = boxaGetCount(boxacs);
+ for (j = 0; j < nrowchars; j++) {
+ box = boxaGetBox(boxacs, j, L_COPY);
+ if (box->w <= 2 && box->h == 1) { /* skip 1x1, 2x1 components */
+ boxDestroy(&box);
+ continue;
+ }
+ box->y = 0;
+ box->h = h - 1;
+ pixc = pixClipRectangle(pixr, box, NULL);
+ boxDestroy(&box);
+ if (i == 0 && j == 0) /* add a pix for the space; change later */
+ pixaAddPix(pixa, pixc, L_COPY);
+ if (i == 2 && j == 0) /* add a pix for the '\'; change later */
+ pixaAddPix(pixa, pixc, L_COPY);
+ pixaAddPix(pixa, pixc, L_INSERT);
+ }
+ pixDestroy(&pixr);
+ pixDestroy(&pixrc);
+ boxaDestroy(&boxac);
+ boxaDestroy(&boxacs);
+ }
+ LEPT_FREE(tab);
+
+ nchars = pixaGetCount(pixa);
+ if (nchars != 95)
+ return (PIXA *)ERROR_PTR("95 chars not generated", procName, NULL);
+
+ *pbl0 = baseline[0];
+ *pbl1 = baseline[1];
+ *pbl2 = baseline[2];
+
+ /* Fix the space character up; it should have no ON pixels,
+ * and be about twice as wide as the '!' character. */
+ pix1 = pixaGetPix(pixa, 0, L_CLONE);
+ width = 2 * pixGetWidth(pix1);
+ height = pixGetHeight(pix1);
+ pixDestroy(&pix1);
+ pix1 = pixCreate(width, height, 1);
+ pixaReplacePix(pixa, 0, pix1, NULL);
+
+ /* Fix up the '\' character; use a LR flip of the '/' char */
+ pix1 = pixaGetPix(pixa, 15, L_CLONE);
+ pix2 = pixFlipLR(NULL, pix1);
+ pixDestroy(&pix1);
+ pixaReplacePix(pixa, 60, pix2, NULL);
+
+#if DEBUG_CHARS
+ pix1 = pixaDisplayTiled(pixa, 1500, 0, 10);
+ pixDisplay(pix1, 100 * i, 200);
+ pixDestroy(&pix1);
+#endif /* DEBUG_CHARS */
+
+ boxaDestroy(&boxar);
+ return pixa;
+}
+
+
+/*!
+ * \brief pixGetTextBaseline()
+ *
+ * \param[in] pixs 1 bpp, one textline character set
+ * \param[in] tab8 [optional] pixel sum table
+ * \param[out] py baseline value
+ * \return 0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ * (1) Method: find the largest difference in pixel sums from one
+ * raster line to the next one below it. The baseline is the
+ * upper raster line for the pair of raster lines that
+ * maximizes this function.
+ * </pre>
+ */
+static l_int32
+pixGetTextBaseline(PIX *pixs,
+ l_int32 *tab8,
+ l_int32 *py)
+{
+l_int32 i, h, val1, val2, diff, diffmax, ymax;
+l_int32 *tab;
+NUMA *na;
+
+ PROCNAME("pixGetTextBaseline");
+
+ if (!pixs)
+ return ERROR_INT("pixs not defined", procName, 1);
+ if (!py)
+ return ERROR_INT("&y not defined", procName, 1);
+ *py = 0;
+ if (!tab8)
+ tab = makePixelSumTab8();
+ else
+ tab = tab8;
+
+ na = pixCountPixelsByRow(pixs, tab);
+ h = numaGetCount(na);
+ diffmax = 0;
+ ymax = 0;
+ for (i = 1; i < h; i++) {
+ numaGetIValue(na, i - 1, &val1);
+ numaGetIValue(na, i, &val2);
+ diff = L_MAX(0, val1 - val2);
+ if (diff > diffmax) {
+ diffmax = diff;
+ ymax = i - 1; /* upper raster line */
+ }
+ }
+ *py = ymax;
+
+ if (!tab8)
+ LEPT_FREE(tab);
+ numaDestroy(&na);
+ return 0;
+}
+
+
+/*!
+ * \brief bmfMakeAsciiTables
+ *
+ * \param[in] bmf
+ * \return 0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ * (1) This makes three tables, each of size 128, as follows:
+ * ~ fonttab is a table containing the index of the Pix
+ * that corresponds to each input ascii character;
+ * it maps (ascii-index) --> Pixa index
+ * ~ baselinetab is a table containing the baseline offset
+ * for the Pix that corresponds to each input ascii character;
+ * it maps (ascii-index) --> baseline offset
+ * ~ widthtab is a table containing the character width in
+ * pixels for the Pix that corresponds to that character;
+ * it maps (ascii-index) --> bitmap width
+ * (2) This also computes
+ * ~ lineheight (sum of maximum character extensions above and
+ * below the baseline)
+ * ~ kernwidth (spacing between characters within a word)
+ * ~ spacewidth (space between words)
+ * ~ vertlinesep (extra vertical spacing between textlines)
+ * (3) The baselines apply as follows:
+ * baseline1 (ascii 32 - 57), ascii 92
+ * baseline2 (ascii 58 - 91)
+ * baseline3 (ascii 93 - 126)
+ * (4) The only array in bmf that is not ascii-based is the
+ * array of bitmaps in the pixa, which starts at ascii 32.
+ * </pre>
+ */
+static l_int32
+bmfMakeAsciiTables(L_BMF *bmf)
+{
+l_int32 i, maxh, height, charwidth, xwidth, kernwidth;
+l_int32 *fonttab, *baselinetab, *widthtab;
+PIX *pix;
+
+ PROCNAME("bmfMakeAsciiTables");
+
+ if (!bmf)
+ return ERROR_INT("bmf not defined", procName, 1);
+
+ /* First get the fonttab; we use this later for the char widths */
+ fonttab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32));
+ bmf->fonttab = fonttab;
+ for (i = 0; i < 128; i++)
+ fonttab[i] = UNDEF;
+ for (i = 32; i < 127; i++)
+ fonttab[i] = i - 32;
+
+ baselinetab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32));
+ bmf->baselinetab = baselinetab;
+ for (i = 0; i < 128; i++)
+ baselinetab[i] = UNDEF;
+ for (i = 32; i <= 57; i++)
+ baselinetab[i] = bmf->baseline1;
+ for (i = 58; i <= 91; i++)
+ baselinetab[i] = bmf->baseline2;
+ baselinetab[92] = bmf->baseline1; /* the '\' char */
+ for (i = 93; i < 127; i++)
+ baselinetab[i] = bmf->baseline3;
+
+ /* Generate array of character widths; req's fonttab to exist */
+ widthtab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32));
+ bmf->widthtab = widthtab;
+ for (i = 0; i < 128; i++)
+ widthtab[i] = UNDEF;
+ for (i = 32; i < 127; i++) {
+ bmfGetWidth(bmf, i, &charwidth);
+ widthtab[i] = charwidth;
+ }
+
+ /* Get the line height of text characters, from the highest
+ * ascender to the lowest descender; req's fonttab to exist. */
+ pix = bmfGetPix(bmf, 32);
+ maxh = pixGetHeight(pix);
+ pixDestroy(&pix);
+ pix = bmfGetPix(bmf, 58);
+ height = pixGetHeight(pix);
+ pixDestroy(&pix);
+ maxh = L_MAX(maxh, height);
+ pix = bmfGetPix(bmf, 93);
+ height = pixGetHeight(pix);
+ pixDestroy(&pix);
+ maxh = L_MAX(maxh, height);
+ bmf->lineheight = maxh;
+
+ /* Get the kern width (distance between characters).
+ * We let it be the same for all characters in a given
+ * font size, and scale it linearly with the size;
+ * req's fonttab to be built first. */
+ bmfGetWidth(bmf, 120, &xwidth);
+ kernwidth = (l_int32)(0.08 * (l_float32)xwidth + 0.5);
+ bmf->kernwidth = L_MAX(1, kernwidth);
+
+ /* Save the space width (between words) */
+ bmfGetWidth(bmf, 32, &charwidth);
+ bmf->spacewidth = charwidth;
+
+ /* Save the extra vertical space between lines */
+ bmf->vertlinesep = (l_int32)(VertFractSep * bmf->lineheight + 0.5);
+
+ return 0;
+}