nixpkgs/pkgs/applications/misc/k2pdfopt/mupdf.patch

From d8927c969e3387ca2669a616c0ba53bce918a031 Mon Sep 17 00:00:00 2001
From: Daniel Fullmer <danielrf12@gmail.com>
Date: Fri, 13 Sep 2019 15:11:45 -0400
Subject: [PATCH] Willus mod for k2pdfopt

---
 source/fitz/filter-basic.c |   3 +
 source/fitz/font-win32.c   | 866 +++++++++++++++++++++++++++++++++++++
 source/fitz/font.c         |   3 +
 source/fitz/stext-device.c |   5 +
 source/fitz/string.c       |   5 +
 source/pdf/pdf-annot.c     |  14 +-
 source/pdf/pdf-link.c      |   3 +
 source/pdf/pdf-parse.c     |   5 +
 source/pdf/pdf-xref.c      |   9 +
 9 files changed, 912 insertions(+), 1 deletion(-)
 create mode 100644 source/fitz/font-win32.c

diff --git a/source/fitz/filter-basic.c b/source/fitz/filter-basic.c
index 0713a62e7..b8ef4d292 100644
--- a/source/fitz/filter-basic.c
+++ b/source/fitz/filter-basic.c
@@ -259,7 +259,10 @@ look_for_endstream:
 	if (!state->warned)
 	{
 		state->warned = 1;
+/* willus mod -- no warning */
+/*
 		fz_warn(ctx, "PDF stream Length incorrect");
+*/
 	}
 	return *stm->rp++;
 }
diff --git a/source/fitz/font-win32.c b/source/fitz/font-win32.c
new file mode 100644
index 000000000..45de8cfd3
--- /dev/null
+++ b/source/fitz/font-win32.c
@@ -0,0 +1,866 @@
+/*
+** Routines to access MS Windows system fonts.
+** From sumatra PDF distro.
+** Modified for MuPDF v1.9a by willus.com
+*/
+#include "mupdf/pdf.h"
+
+/*
+	Which fonts are embedded is based on a few preprocessor definitions.
+
+	The base 14 fonts are always embedded.
+	For CJK font substitution we embed DroidSansFallback.
+
+	Set NOCJK to skip all CJK support (this also omits embedding the CJK CMaps)
+	Set NOCJKFONT to skip the embedded CJK font.
+	Set NOCJKFULL to embed a smaller CJK font without CJK Extension A support.
+*/
+
+#ifdef NOCJK
+#define NOCJKFONT
+#endif
+
+/* SumatraPDF: also load fonts included with Windows */
+#ifdef _WIN32
+
+#ifndef UNICODE
+#define UNICODE
+#endif
+#ifndef _UNICODE
+#define _UNICODE
+#endif
+
+#include <windows.h>
+
+// TODO: Use more of FreeType for TTF parsing (for performance reasons,
+//       the fonts can't be parsed completely, though)
+#include <ft2build.h>
+#include FT_TRUETYPE_IDS_H
+#include FT_TRUETYPE_TAGS_H
+
+#define TTC_VERSION1	0x00010000
+#define TTC_VERSION2	0x00020000
+
+#define MAX_FACENAME	128
+
+// Note: the font face must be the first field so that the structure
+//       can be treated like a simple string for searching
+typedef struct pdf_fontmapMS_s
+{
+	char fontface[MAX_FACENAME];
+	char fontpath[MAX_PATH];
+	int index;
+} pdf_fontmapMS;
+
+typedef struct pdf_fontlistMS_s
+{
+	pdf_fontmapMS *fontmap;
+	int len;
+	int cap;
+} pdf_fontlistMS;
+
+typedef struct _tagTT_OFFSET_TABLE
+{
+	ULONG	uVersion;
+	USHORT	uNumOfTables;
+	USHORT	uSearchRange;
+	USHORT	uEntrySelector;
+	USHORT	uRangeShift;
+} TT_OFFSET_TABLE;
+
+typedef struct _tagTT_TABLE_DIRECTORY
+{
+	ULONG	uTag;				//table name
+	ULONG	uCheckSum;			//Check sum
+	ULONG	uOffset;			//Offset from beginning of file
+	ULONG	uLength;			//length of the table in bytes
+} TT_TABLE_DIRECTORY;
+
+typedef struct _tagTT_NAME_TABLE_HEADER
+{
+	USHORT	uFSelector;			//format selector. Always 0
+	USHORT	uNRCount;			//Name Records count
+	USHORT	uStorageOffset;		//Offset for strings storage, from start of the table
+} TT_NAME_TABLE_HEADER;
+
+typedef struct _tagTT_NAME_RECORD
+{
+	USHORT	uPlatformID;
+	USHORT	uEncodingID;
+	USHORT	uLanguageID;
+	USHORT	uNameID;
+	USHORT	uStringLength;
+	USHORT	uStringOffset;	//from start of storage area
+} TT_NAME_RECORD;
+
+typedef struct _tagFONT_COLLECTION
+{
+	ULONG	Tag;
+	ULONG	Version;
+	ULONG	NumFonts;
+} FONT_COLLECTION;
+
+static struct {
+	char *name;
+	char *pattern;
+} baseSubstitutes[] = {
+	{ "Courier", "CourierNewPSMT" },
+	{ "Courier-Bold", "CourierNewPS-BoldMT" },
+	{ "Courier-Oblique", "CourierNewPS-ItalicMT" },
+	{ "Courier-BoldOblique", "CourierNewPS-BoldItalicMT" },
+	{ "Helvetica", "ArialMT" },
+	{ "Helvetica-Bold", "Arial-BoldMT" },
+	{ "Helvetica-Oblique", "Arial-ItalicMT" },
+	{ "Helvetica-BoldOblique", "Arial-BoldItalicMT" },
+	{ "Times-Roman", "TimesNewRomanPSMT" },
+	{ "Times-Bold", "TimesNewRomanPS-BoldMT" },
+	{ "Times-Italic", "TimesNewRomanPS-ItalicMT" },
+	{ "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT" },
+	{ "Symbol", "SymbolMT" },
+};
+static const char *base_font_names[][10] =
+{
+	{ "Courier", "CourierNew", "CourierNewPSMT", NULL },
+	{ "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
+		"CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
+	{ "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
+		"CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
+	{ "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
+		"CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
+	{ "Helvetica", "ArialMT", "Arial", NULL },
+	{ "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
+		"Helvetica,Bold", NULL },
+	{ "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
+		"Helvetica,Italic", "Helvetica-Italic", NULL },
+	{ "Helvetica-BoldOblique", "Arial-BoldItalicMT",
+		"Arial,BoldItalic", "Arial-BoldItalic",
+		"Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
+	{ "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
+		"TimesNewRomanPS", NULL },
+	{ "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
+		"TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
+	{ "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
+		"TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
+	{ "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
+		"TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
+		"TimesNewRoman-BoldItalic", NULL },
+	{ "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic",
+		"SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL },
+	{ "ZapfDingbats", NULL }
+};
+
+static pdf_fontlistMS fontlistMS =
+{
+	NULL,
+	0,
+	0,
+};
+static int strcmp_ignore_space(const char *a, const char *b);
+static const char *clean_font_name(const char *fontname);
+static const char *pdf_clean_base14_name(const char *fontname);
+
+static inline USHORT BEtoHs(USHORT x)
+{
+	BYTE *data = (BYTE *)&x;
+	return (data[0] << 8) | data[1];
+}
+
+static inline ULONG BEtoHl(ULONG x)
+{
+	BYTE *data = (BYTE *)&x;
+	return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
+}
+
+static int strcmp_ignore_space(const char *a, const char *b)
+{
+	while (1)
+	{
+		while (*a == ' ')
+			a++;
+		while (*b == ' ')
+			b++;
+		if (*a != *b)
+			return 1;
+		if (*a == 0)
+			return *a != *b;
+		if (*b == 0)
+			return *a != *b;
+		a++;
+		b++;
+	}
+}
+
+/* A little bit more sophisticated name matching so that e.g. "EurostileExtended"
+   matches "EurostileExtended-Roman" or "Tahoma-Bold,Bold" matches "Tahoma-Bold" */
+static int
+lookup_compare(const void *elem1, const void *elem2)
+{
+	const char *val1 = elem1;
+	const char *val2 = elem2;
+	int len1 = strlen(val1);
+	int len2 = strlen(val2);
+
+	if (len1 != len2)
+	{
+		const char *rest = len1 > len2 ? val1 + len2 : val2 + len1;
+		if (',' == *rest || !_stricmp(rest, "-roman"))
+			return _strnicmp(val1, val2, fz_mini(len1, len2));
+	}
+
+	return _stricmp(val1, val2);
+}
+
+static void
+remove_spaces(char *srcDest)
+{
+	char *dest;
+
+	for (dest = srcDest; *srcDest; srcDest++)
+		if (*srcDest != ' ')
+			*dest++ = *srcDest;
+	*dest = '\0';
+}
+
+static int
+str_ends_with(const char *str, const char *end)
+{
+	size_t len1 = strlen(str);
+	size_t len2 = strlen(end);
+
+	return len1 >= len2 && !strcmp(str + len1 - len2, end);
+}
+
+static pdf_fontmapMS *
+pdf_find_windows_font_path(const char *fontname)
+{
+	return bsearch(fontname, fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), lookup_compare);
+}
+
+/* source and dest can be same */
+static void
+decode_unicode_BE(fz_context *ctx, char *source, int sourcelen, char *dest, int destlen)
+{
+	WCHAR *tmp;
+	int converted, i;
+
+	if (sourcelen % 2 != 0)
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string");
+
+	tmp = fz_malloc_array(ctx, sourcelen / 2 + 1, sizeof(WCHAR));
+	for (i = 0; i < sourcelen / 2; i++)
+		tmp[i] = BEtoHs(((WCHAR *)source)[i]);
+	tmp[sourcelen / 2] = '\0';
+
+	converted = WideCharToMultiByte(CP_UTF8, 0, tmp, -1, dest, destlen, NULL, NULL);
+	fz_free(ctx, tmp);
+	if (!converted)
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string");
+}
+
+static void
+decode_platform_string(fz_context *ctx, int platform, int enctype, char *source, int sourcelen, char *dest, int destlen)
+{
+	switch (platform)
+	{
+	case TT_PLATFORM_APPLE_UNICODE:
+		switch (enctype)
+		{
+		case TT_APPLE_ID_DEFAULT:
+		case TT_APPLE_ID_UNICODE_2_0:
+			decode_unicode_BE(ctx, source, sourcelen, dest, destlen);
+			return;
+		}
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
+	case TT_PLATFORM_MACINTOSH:
+		switch (enctype)
+		{
+		case TT_MAC_ID_ROMAN:
+			if (sourcelen + 1 > destlen)
+				fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : overlong fontname: %s", source);
+			// TODO: Convert to UTF-8 from what encoding?
+			memcpy(dest, source, sourcelen);
+			dest[sourcelen] = 0;
+			return;
+		}
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
+	case TT_PLATFORM_MICROSOFT:
+		switch (enctype)
+		{
+		case TT_MS_ID_SYMBOL_CS:
+		case TT_MS_ID_UNICODE_CS:
+		case TT_MS_ID_UCS_4:
+			decode_unicode_BE(ctx, source, sourcelen, dest, destlen);
+			return;
+		}
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
+	default:
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
+	}
+}
+
+static void
+grow_system_font_list(fz_context *ctx, pdf_fontlistMS *fl)
+{
+	int newcap;
+	pdf_fontmapMS *newitems;
+
+	if (fl->cap == 0)
+		newcap = 1024;
+	else
+		newcap = fl->cap * 2;
+
+	// use realloc/free for the fontmap, since the list can
+	// remain in memory even with all fz_contexts destroyed
+	newitems = realloc(fl->fontmap, newcap * sizeof(pdf_fontmapMS));
+	if (!newitems)
+		fz_throw(ctx, FZ_ERROR_GENERIC, "OOM in grow_system_font_list");
+	memset(newitems + fl->cap, 0, sizeof(pdf_fontmapMS) * (newcap - fl->cap));
+
+	fl->fontmap = newitems;
+	fl->cap = newcap;
+}
+
+static void
+append_mapping(fz_context *ctx, pdf_fontlistMS *fl, const char *facename, const char *path, int index)
+{
+	if (fl->len == fl->cap)
+		grow_system_font_list(ctx, fl);
+
+	if (fl->len >= fl->cap)
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : fontlist overflow");
+
+	fz_strlcpy(fl->fontmap[fl->len].fontface, facename, sizeof(fl->fontmap[0].fontface));
+	fz_strlcpy(fl->fontmap[fl->len].fontpath, path, sizeof(fl->fontmap[0].fontpath));
+	fl->fontmap[fl->len].index = index;
+
+	++fl->len;
+}
+
+static void
+safe_read(fz_context *ctx, fz_stream *file, int offset, char *buf, int size)
+{
+	int n;
+	fz_seek(ctx, file, offset, 0);
+	n = fz_read(ctx, file, (unsigned char *)buf, size);
+	if (n != size)
+		fz_throw(ctx, FZ_ERROR_GENERIC, "safe_read: read %d, expected %d", n, size);
+}
+
+static void
+read_ttf_string(fz_context *ctx, fz_stream *file, int offset, TT_NAME_RECORD *ttRecordBE, char *buf, int size)
+{
+	char szTemp[MAX_FACENAME * 2];
+	// ignore empty and overlong strings
+	int stringLength = BEtoHs(ttRecordBE->uStringLength);
+	if (stringLength == 0 || stringLength >= sizeof(szTemp))
+		return;
+
+	safe_read(ctx, file, offset + BEtoHs(ttRecordBE->uStringOffset), szTemp, stringLength);
+	decode_platform_string(ctx, BEtoHs(ttRecordBE->uPlatformID),
+		BEtoHs(ttRecordBE->uEncodingID), szTemp, stringLength, buf, size);
+}
+
+static void
+makeFakePSName(char szName[MAX_FACENAME], const char *szStyle)
+{
+	// append the font's subfamily, unless it's a Regular font
+	if (*szStyle && _stricmp(szStyle, "Regular") != 0)
+	{
+		fz_strlcat(szName, "-", MAX_FACENAME);
+		fz_strlcat(szName, szStyle, MAX_FACENAME);
+	}
+	remove_spaces(szName);
+}
+
+static void
+parseTTF(fz_context *ctx, fz_stream *file, int offset, int index, const char *path)
+{
+	TT_OFFSET_TABLE ttOffsetTableBE;
+	TT_TABLE_DIRECTORY tblDirBE;
+	TT_NAME_TABLE_HEADER ttNTHeaderBE;
+	TT_NAME_RECORD ttRecordBE;
+
+	char szPSName[MAX_FACENAME] = { 0 };
+	char szTTName[MAX_FACENAME] = { 0 };
+	char szStyle[MAX_FACENAME] = { 0 };
+	char szCJKName[MAX_FACENAME] = { 0 };
+	int i, count, tblOffset;
+
+	safe_read(ctx, file, offset, (char *)&ttOffsetTableBE, sizeof(TT_OFFSET_TABLE));
+
+	// check if this is a TrueType font of version 1.0 or an OpenType font
+	if (BEtoHl(ttOffsetTableBE.uVersion) != TTC_VERSION1 &&
+		BEtoHl(ttOffsetTableBE.uVersion) != TTAG_OTTO)
+	{
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid font version %x", (unsigned int)BEtoHl(ttOffsetTableBE.uVersion));
+	}
+
+	// determine the name table's offset by iterating through the offset table
+	count = BEtoHs(ttOffsetTableBE.uNumOfTables);
+	for (i = 0; i < count; i++)
+	{
+		int entryOffset = offset + sizeof(TT_OFFSET_TABLE) + i * sizeof(TT_TABLE_DIRECTORY);
+		safe_read(ctx, file, entryOffset, (char *)&tblDirBE, sizeof(TT_TABLE_DIRECTORY));
+		if (!BEtoHl(tblDirBE.uTag) || BEtoHl(tblDirBE.uTag) == TTAG_name)
+			break;
+	}
+	if (count == i || !BEtoHl(tblDirBE.uTag))
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : nameless font");
+	tblOffset = BEtoHl(tblDirBE.uOffset);
+
+	// read the 'name' table for record count and offsets
+	safe_read(ctx, file, tblOffset, (char *)&ttNTHeaderBE, sizeof(TT_NAME_TABLE_HEADER));
+	offset = tblOffset + sizeof(TT_NAME_TABLE_HEADER);
+	tblOffset += BEtoHs(ttNTHeaderBE.uStorageOffset);
+
+	// read through the strings for PostScript name and font family
+	count = BEtoHs(ttNTHeaderBE.uNRCount);
+	for (i = 0; i < count; i++)
+	{
+		short langId, nameId;
+		BOOL isCJKName;
+
+		safe_read(ctx, file, offset + i * sizeof(TT_NAME_RECORD), (char *)&ttRecordBE, sizeof(TT_NAME_RECORD));
+
+		langId = BEtoHs(ttRecordBE.uLanguageID);
+		nameId = BEtoHs(ttRecordBE.uNameID);
+		isCJKName = TT_NAME_ID_FONT_FAMILY == nameId && LANG_CHINESE == PRIMARYLANGID(langId);
+
+		// ignore non-English strings (except for Chinese font names)
+		if (langId && langId != TT_MS_LANGID_ENGLISH_UNITED_STATES && !isCJKName)
+			continue;
+		// ignore names other than font (sub)family and PostScript name
+		fz_try(ctx)
+		{
+			if (isCJKName)
+				read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szCJKName, sizeof(szCJKName));
+			else if (TT_NAME_ID_FONT_FAMILY == nameId)
+				read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szTTName, sizeof(szTTName));
+			else if (TT_NAME_ID_FONT_SUBFAMILY == nameId)
+				read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szStyle, sizeof(szStyle));
+			else if (TT_NAME_ID_PS_NAME == nameId)
+				read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szPSName, sizeof(szPSName));
+		}
+		fz_catch(ctx)
+		{
+			fz_warn(ctx, "ignoring face name decoding fonterror");
+		}
+	}
+
+	// try to prevent non-Arial fonts from accidentally substituting Arial
+	if (!strcmp(szPSName, "ArialMT"))
+	{
+		// cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2471
+		if (strcmp(szTTName, "Arial") != 0)
+			szPSName[0] = '\0';
+		// TODO: is there a better way to distinguish Arial Caps from Arial proper?
+		// cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1290
+		else if (strstr(path, "caps") || strstr(path, "Caps"))
+			fz_throw(ctx, FZ_ERROR_GENERIC, "ignore %s, as it can't be distinguished from Arial,Regular", path);
+	}
+
+	if (szPSName[0])
+		append_mapping(ctx, &fontlistMS, szPSName, path, index);
+	if (szTTName[0])
+	{
+		// derive a PostScript-like name and add it, if it's different from the font's
+		// included PostScript name; cf. http://code.google.com/p/sumatrapdf/issues/detail?id=376
+		makeFakePSName(szTTName, szStyle);
+		// compare the two names before adding this one
+		if (lookup_compare(szTTName, szPSName))
+			append_mapping(ctx, &fontlistMS, szTTName, path, index);
+	}
+	if (szCJKName[0])
+	{
+		makeFakePSName(szCJKName, szStyle);
+		if (lookup_compare(szCJKName, szPSName) && lookup_compare(szCJKName, szTTName))
+			append_mapping(ctx, &fontlistMS, szCJKName, path, index);
+	}
+}
+
+static void
+parseTTFs(fz_context *ctx, const char *path)
+{
+	fz_stream *file = fz_open_file(ctx, path);
+	/* "fonterror : %s not found", path */
+	fz_try(ctx)
+	{
+		parseTTF(ctx, file, 0, 0, path);
+	}
+	fz_always(ctx)
+	{
+		fz_drop_stream(ctx,file);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow(ctx);
+	}
+}
+
+static void
+parseTTCs(fz_context *ctx, const char *path)
+{
+	FONT_COLLECTION fontcollectionBE;
+	ULONG i, numFonts, *offsettableBE = NULL;
+
+	fz_stream *file = fz_open_file(ctx, path);
+	/* "fonterror : %s not found", path */
+
+	fz_var(offsettableBE);
+
+	fz_try(ctx)
+	{
+		safe_read(ctx, file, 0, (char *)&fontcollectionBE, sizeof(FONT_COLLECTION));
+		if (BEtoHl(fontcollectionBE.Tag) != TTAG_ttcf)
+			fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : wrong format %x", (unsigned int)BEtoHl(fontcollectionBE.Tag));
+		if (BEtoHl(fontcollectionBE.Version) != TTC_VERSION1 &&
+			BEtoHl(fontcollectionBE.Version) != TTC_VERSION2)
+		{
+			fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid version %x", (unsigned int)BEtoHl(fontcollectionBE.Version));
+		}
+
+		numFonts = BEtoHl(fontcollectionBE.NumFonts);
+		offsettableBE = fz_malloc_array(ctx, numFonts, sizeof(ULONG));
+
+		safe_read(ctx, file, sizeof(FONT_COLLECTION), (char *)offsettableBE, numFonts * sizeof(ULONG));
+		for (i = 0; i < numFonts; i++)
+			parseTTF(ctx, file, BEtoHl(offsettableBE[i]), i, path);
+	}
+	fz_always(ctx)
+	{
+		fz_free(ctx, offsettableBE);
+		fz_drop_stream(ctx,file);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow(ctx);
+	}
+}
+
+static void
+extend_system_font_list(fz_context *ctx, const WCHAR *path)
+{
+	WCHAR szPath[MAX_PATH], *lpFileName;
+	WIN32_FIND_DATA FileData;
+	HANDLE hList;
+
+	GetFullPathName(path, nelem(szPath), szPath, &lpFileName);
+
+	hList = FindFirstFile(szPath, &FileData);
+	if (hList == INVALID_HANDLE_VALUE)
+	{
+		// Don't complain about missing directories
+		if (GetLastError() == ERROR_FILE_NOT_FOUND)
+			return;
+		fz_throw(ctx, FZ_ERROR_GENERIC, "extend_system_font_list: unknown error %d", (int)GetLastError());
+	}
+	do
+	{
+		if (!(FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
+		{
+			char szPathUtf8[MAX_PATH], *fileExt;
+			int res;
+			lstrcpyn(lpFileName, FileData.cFileName, szPath + MAX_PATH - lpFileName);
+			res = WideCharToMultiByte(CP_UTF8, 0, szPath, -1, szPathUtf8, sizeof(szPathUtf8), NULL, NULL);
+			if (!res)
+			{
+				fz_warn(ctx, "WideCharToMultiByte failed for %S", szPath);
+				continue;
+			}
+			fileExt = szPathUtf8 + strlen(szPathUtf8) - 4;
+			fz_try(ctx)
+			{
+				if (!_stricmp(fileExt, ".ttc"))
+					parseTTCs(ctx, szPathUtf8);
+				else if (!_stricmp(fileExt, ".ttf") || !_stricmp(fileExt, ".otf"))
+					parseTTFs(ctx, szPathUtf8);
+			}
+			fz_catch(ctx)
+			{
+				// ignore errors occurring while parsing a given font file
+			}
+		}
+	} while (FindNextFile(hList, &FileData));
+	FindClose(hList);
+}
+
+static void
+destroy_system_font_list(void)
+{
+	free(fontlistMS.fontmap);
+	memset(&fontlistMS, 0, sizeof(fontlistMS));
+}
+
+static void
+create_system_font_list(fz_context *ctx)
+{
+	WCHAR szFontDir[MAX_PATH];
+	UINT cch;
+
+	cch = GetWindowsDirectory(szFontDir, nelem(szFontDir) - 12);
+	if (0 < cch && cch < nelem(szFontDir) - 12)
+	{
+        /* willus.com edit--Win XP default MSVCRT.DLL doesn't have wcscat_s */
+#ifdef _WIN64
+		wcscat_s(szFontDir, MAX_PATH, L"\\Fonts\\*.?t?");
+#else
+		wcscat(szFontDir,L"\\Fonts\\*.?t?");
+#endif
+		extend_system_font_list(ctx, szFontDir);
+	}
+
+	if (fontlistMS.len == 0)
+		fz_warn(ctx, "couldn't find any usable system fonts");
+
+#ifdef NOCJKFONT
+	{
+		// If no CJK fallback font is builtin but one has been shipped separately (in the same
+		// directory as the main executable), add it to the list of loadable system fonts
+		WCHAR szFile[MAX_PATH], *lpFileName;
+		GetModuleFileName(0, szFontDir, MAX_PATH);
+		GetFullPathName(szFontDir, MAX_PATH, szFile, &lpFileName);
+		lstrcpyn(lpFileName, L"DroidSansFallback.ttf", szFile + MAX_PATH - lpFileName);
+		extend_system_font_list(ctx, szFile);
+	}
+#endif
+
+	// sort the font list, so that it can be searched binarily
+	qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp);
+
+#ifdef DEBUG
+	// allow to overwrite system fonts for debugging purposes
+	// (either pass a full path or a search pattern such as "fonts\*.ttf")
+	cch = GetEnvironmentVariable(L"MUPDF_FONTS_PATTERN", szFontDir, nelem(szFontDir));
+	if (0 < cch && cch < nelem(szFontDir))
+	{
+		int i, prev_len = fontlistMS.len;
+		extend_system_font_list(ctx, szFontDir);
+		for (i = prev_len; i < fontlistMS.len; i++)
+		{
+			pdf_fontmapMS *entry = bsearch(fontlistMS.fontmap[i].fontface, fontlistMS.fontmap, prev_len, sizeof(pdf_fontmapMS), lookup_compare);
+			if (entry)
+				*entry = fontlistMS.fontmap[i];
+		}
+		qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp);
+	}
+#endif
+
+	// make sure to clean up after ourselves
+	atexit(destroy_system_font_list);
+}
+
+static fz_font *
+pdf_load_windows_font_by_name(fz_context *ctx, const char *orig_name)
+{
+	pdf_fontmapMS *found = NULL;
+	char *comma, *fontname;
+	fz_font *font;
+
+    /* WILLUS MOD--not multi-threaded for k2pdfopt */
+	/* fz_synchronize_begin(); */
+	if (fontlistMS.len == 0)
+	{
+		fz_try(ctx)
+		{
+			create_system_font_list(ctx);
+		}
+		fz_catch(ctx) { }
+	}
+    /* WILLUS MOD--not multi-threaded for k2pdfopt */
+	/* fz_synchronize_end(); */
+	if (fontlistMS.len == 0)
+		fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: couldn't find any fonts");
+
+	// work on a normalized copy of the font name
+	fontname = fz_strdup(ctx, orig_name);
+	remove_spaces(fontname);
+
+	// first, try to find the exact font name (including appended style information)
+	comma = strchr(fontname, ',');
+	if (comma)
+	{
+		*comma = '-';
+		found = pdf_find_windows_font_path(fontname);
+		*comma = ',';
+	}
+	// second, substitute the font name with a known PostScript name
+	else
+	{
+		int i;
+		for (i = 0; i < nelem(baseSubstitutes) && !found; i++)
+			if (!strcmp(fontname, baseSubstitutes[i].name))
+				found = pdf_find_windows_font_path(baseSubstitutes[i].pattern);
+	}
+	// third, search for the font name without additional style information
+	if (!found)
+		found = pdf_find_windows_font_path(fontname);
+	// fourth, try to separate style from basename for prestyled fonts (e.g. "ArialBold")
+	if (!found && !comma && (str_ends_with(fontname, "Bold") || str_ends_with(fontname, "Italic")))
+	{
+		int styleLen = str_ends_with(fontname, "Bold") ? 4 : str_ends_with(fontname, "BoldItalic") ? 10 : 6;
+		fontname = fz_resize_array(ctx, fontname, strlen(fontname) + 2, sizeof(char));
+		comma = fontname + strlen(fontname) - styleLen;
+		memmove(comma + 1, comma, styleLen + 1);
+		*comma = '-';
+		found = pdf_find_windows_font_path(fontname);
+		*comma = ',';
+		if (!found)
+			found = pdf_find_windows_font_path(fontname);
+	}
+	// fifth, try to convert the font name from the common Chinese codepage 936
+	if (!found && fontname[0] < 0)
+	{
+		WCHAR cjkNameW[MAX_FACENAME];
+		char cjkName[MAX_FACENAME];
+		if (MultiByteToWideChar(936, MB_ERR_INVALID_CHARS, fontname, -1, cjkNameW, nelem(cjkNameW)) &&
+			WideCharToMultiByte(CP_UTF8, 0, cjkNameW, -1, cjkName, nelem(cjkName), NULL, NULL))
+		{
+			comma = strchr(cjkName, ',');
+			if (comma)
+			{
+				*comma = '-';
+				found = pdf_find_windows_font_path(cjkName);
+				*comma = ',';
+			}
+			if (!found)
+				found = pdf_find_windows_font_path(cjkName);
+		}
+	}
+
+	fz_free(ctx, fontname);
+	if (!found)
+		fz_throw(ctx, FZ_ERROR_GENERIC, "couldn't find system font '%s'", orig_name);
+
+    /*
+	fz_warn(ctx, "loading non-embedded font '%s' from '%s'", orig_name, found->fontpath);
+    */
+
+	font = fz_new_font_from_file(ctx, orig_name, found->fontpath, found->index,
+		strcmp(found->fontface, "DroidSansFallback") != 0);
+    /* willus mod for MuPDF v1.10, 10-21-2016 */
+    {
+    fz_font_flags_t *flags;
+    flags=fz_font_flags(font);
+    if (flags!=NULL)
+    	flags->ft_substitute = 1;
+    }
+	return font;
+}
+
+static fz_font *
+pdf_load_windows_font(fz_context *ctx, const char *fontname, int bold, int italic, int needs_exact_metrics)
+{
+	if (needs_exact_metrics)
+	{
+		const char *clean_name;
+        /* WILLUS: Declare pdf_clean_base14_name() */
+        extern const char *pdf_clean_base14_name(const char *fontname);
+
+		/* TODO: the metrics for Times-Roman and Courier don't match
+		   those of Windows' Times New Roman and Courier New; for
+		   some reason, Poppler doesn't seem to have this problem */
+		int len;
+		if (fz_lookup_builtin_font(ctx,fontname, bold, italic, &len))
+			return NULL;
+
+		/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2173 */
+		clean_name = pdf_clean_base14_name(fontname);
+		if (clean_name != fontname && !strncmp(clean_name, "Times-", 6))
+			return NULL;
+	}
+
+	// TODO: unset font->ft_substitute for base14/needs_exact_metrics?
+	return pdf_load_windows_font_by_name(ctx, fontname);
+}
+
+static const char *clean_font_name(const char *fontname)
+{
+	int i, k;
+	for (i = 0; i < nelem(base_font_names); i++)
+		for (k = 0; base_font_names[i][k]; k++)
+			if (!strcmp_ignore_space(base_font_names[i][k], fontname))
+				return base_font_names[i][0];
+	return fontname;
+}
+
+
+/* SumatraPDF: expose clean_font_name */
+static const char * pdf_clean_base14_name(const char *fontname)
+{
+	return clean_font_name(fontname);
+}
+
+static fz_font *
+pdf_load_windows_cjk_font(fz_context *ctx, const char *fontname, int ros, int serif)
+{
+	fz_font *font;
+
+    font=NULL; /* WILLUS: Avoid compiler warning */
+	/* try to find a matching system font before falling back to an approximate one */
+	fz_try(ctx)
+	{
+		font = pdf_load_windows_font_by_name(ctx, fontname);
+	}
+	fz_catch(ctx)
+	{
+		font = NULL;
+	}
+	if (font)
+		return font;
+
+	/* try to fall back to a reasonable system font */
+	fz_try(ctx)
+	{
+		if (serif)
+		{
+			switch (ros)
+			{
+			case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "MingLiU"); break;
+			case FZ_ADOBE_GB: font = pdf_load_windows_font_by_name(ctx, "SimSun"); break;
+			case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Mincho"); break;
+			case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Batang"); break;
+			default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid serif ros");
+			}
+		}
+		else
+		{
+			switch (ros)
+			{
+			case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "DFKaiShu-SB-Estd-BF"); break;
+			case FZ_ADOBE_GB:
+				fz_try(ctx)
+				{
+					font = pdf_load_windows_font_by_name(ctx, "KaiTi");
+				}
+				fz_catch(ctx)
+				{
+					font = pdf_load_windows_font_by_name(ctx, "KaiTi_GB2312");
+				}
+				break;
+			case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Gothic"); break;
+			case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Gulim"); break;
+			default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid sans-serif ros");
+			}
+		}
+	}
+	fz_catch(ctx)
+	{
+#ifdef NOCJKFONT
+		/* If no CJK fallback font is builtin, maybe one has been shipped separately */
+		font = pdf_load_windows_font_by_name(ctx, "DroidSansFallback");
+#else
+		fz_rethrow(ctx);
+#endif
+	}
+
+	return font;
+}
+
+#endif
+
+void pdf_install_load_system_font_funcs(fz_context *ctx)
+{
+#ifdef _WIN32
+	fz_install_load_system_font_funcs(ctx, pdf_load_windows_font, pdf_load_windows_cjk_font, NULL);
+#endif
+}
diff --git a/source/fitz/font.c b/source/fitz/font.c
index 00c6e8f99..1448b4a56 100644
--- a/source/fitz/font.c
+++ b/source/fitz/font.c
@@ -4,8 +4,11 @@
 #include "draw-imp.h"

 #include <ft2build.h>
+/* willus mod -- remove hb includes */
+/*
 #include "hb.h"
 #include "hb-ft.h"
+*/

 #include <assert.h>

diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c
index 2df90305e..b1f99e056 100644
--- a/source/fitz/stext-device.c
+++ b/source/fitz/stext-device.c
@@ -825,6 +825,11 @@ fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options
 	dev->lastchar = ' ';
 	dev->curdir = 1;
 	dev->lasttext = NULL;
+    /* willus mod -- seems like this should be here, but not sure. */
+    if (opts)
+        dev->flags = opts->flags;
+    else
+        dev->flags = 0;

 	return (fz_device*)dev;
 }
diff --git a/source/fitz/string.c b/source/fitz/string.c
index f8eedb682..7a767983d 100644
--- a/source/fitz/string.c
+++ b/source/fitz/string.c
@@ -560,6 +560,10 @@ fz_utflen(const char *s)
 */
 float fz_atof(const char *s)
 {
+/* willus mod: atof(s), #if-#else-#endif */
+#if (!defined(__SSE__))
+    return(atof(s));
+#else
 	float result;

 	if (s == NULL)
@@ -572,6 +576,7 @@ float fz_atof(const char *s)
 		return 1;
 	result = fz_clamp(result, -FLT_MAX, FLT_MAX);
 	return result;
+#endif
 }

 /*
diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c
index 4dfdf36fe..acff7d12a 100644
--- a/source/pdf/pdf-annot.c
+++ b/source/pdf/pdf-annot.c
@@ -5,8 +5,20 @@
 #include <string.h>
 #include <time.h>

+/* willus mod--don't use _mkgmtime--not available in Win XP */
 #ifdef _WIN32
-#define timegm _mkgmtime
+static time_t timegm(struct tm *date);
+static time_t timegm(struct tm *date)
+
+    {
+    time_t t,z;
+    struct tm gmz;
+
+    z=(time_t)0;
+    gmz=(*gmtime(&z));
+    t=mktime(date)-mktime(&gmz);
+    return(t);
+    }
 #endif

 #define isdigit(c) (c >= '0' && c <= '9')
diff --git a/source/pdf/pdf-link.c b/source/pdf/pdf-link.c
index 37444b471..613cc05b9 100644
--- a/source/pdf/pdf-link.c
+++ b/source/pdf/pdf-link.c
@@ -345,6 +345,9 @@ pdf_resolve_link(fz_context *ctx, pdf_document *doc, const char *uri, float *xp,
 		}
 		return page;
 	}
+/* willus mod -- be quiet */
+/*
 	fz_warn(ctx, "unknown link uri '%s'", uri);
+*/
 	return -1;
 }
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index 04a772204..9dd0cd898 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -663,9 +663,14 @@ pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc,
 			if (c == '\r')
 			{
 				c = fz_peek_byte(ctx, file);
+/* willus mod -- no warning */
+/*
 				if (c != '\n')
 					fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
 				else
+*/
+if (c=='\n')
+/* willus mod -- end */
 					fz_read_byte(ctx, file);
 			}
 			stm_ofs = fz_tell(ctx, file);
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 8f888059b..08de7bfba 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -710,8 +710,11 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
 		if (!s)
 			fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length missing");
 		len = fz_atoi(fz_strsep(&s, " "));
+/* willus mod -- no warning */
+/*
 		if (len < 0)
 			fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length must be positive");
+*/

 		/* broken pdfs where the section is not on a separate line */
 		if (s && *s != '\0')
@@ -1378,7 +1381,10 @@ pdf_init_document(fz_context *ctx, pdf_document *doc)
 	{
 		pdf_drop_xref_sections(ctx, doc);
 		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+/* willus mod -- be quiet */
+/*
 		fz_warn(ctx, "trying to repair broken xref");
+*/
 		repaired = 1;
 	}

@@ -1506,7 +1512,10 @@ pdf_drop_document_imp(fz_context *ctx, pdf_document *doc)
 		/* Swallow error, but continue dropping */
 	}

+/* willu smod -- no pdf_drop_js */
+/*
 	pdf_drop_js(ctx, doc->js);
+*/

 	pdf_drop_xref_sections(ctx, doc);
 	fz_free(ctx, doc->xref_index);
--
2.22.0