1
0
mirror of https://github.com/VCMP-SqMod/SqMod.git synced 2025-02-24 05:37:14 +01:00
SqMod/vendor/POCO/PDF/src/hpdf_encoder_utf.c
Sandu Liviu Catalin 4a6bfc086c Major plugin refactor and cleanup.
Switched to POCO library for unified platform/library interface.
Deprecated the external module API. It was creating more problems than solving.
Removed most built-in libraries in favor of system libraries for easier maintenance.
Cleaned and secured code with help from static analyzers.
2021-01-30 08:51:39 +02:00

277 lines
7.9 KiB
C

/*
* << Haru Free PDF Library >> -- hpdf_encoder_utf.c
*
* URL: http://libharu.org
*
* Copyright (c) 1999-2006 Takeshi Kanno <takeshi_kanno@est.hi-ho.ne.jp>
* Copyright (c) 2007-2008 Antony Dovgal <tony@daylessday.org>
* Copyright (c) 2010 Sergey Konovalov <webmaster@crynet.ru>
* Copyright (c) 2011 Koen Deforche <koen@emweb.be>
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation.
* It is provided "as is" without express or implied warranty.
*
*/
#include "hpdf_conf.h"
#include "hpdf_utils.h"
#include "hpdf_encoder.h"
#include "hpdf.h"
typedef struct _UTF8_EncoderAttr_Rec *UTF8_EncoderAttr;
typedef struct _UTF8_EncoderAttr_Rec {
HPDF_BYTE current_byte;
HPDF_BYTE end_byte;
HPDF_BYTE utf8_bytes[8];
} UTF8_EncoderAttr_Rec;
static const HPDF_CidRange_Rec UTF8_NOTDEF_RANGE = {0x0000, 0x001F, 1};
static const HPDF_CidRange_Rec UTF8_SPACE_RANGE = {0x0000, 0xFFFF, 0};
static const HPDF_CidRange_Rec UTF8_CID_RANGE[] = {
{ 0x0000, 0xFFFF, 0x0 },
{ 0xFFFF, 0xFFFF, 0x0 }
};
static HPDF_ByteType
UTF8_Encoder_ByteType_Func (HPDF_Encoder encoder,
HPDF_ParseText_Rec *state);
static HPDF_UNICODE
UTF8_Encoder_ToUnicode_Func (HPDF_Encoder encoder,
HPDF_UINT16 code);
static char *
UTF8_Encoder_EncodeText_Func (HPDF_Encoder encoder,
const char *text,
HPDF_UINT len,
HPDF_UINT *length);
static HPDF_STATUS
UTF8_Init (HPDF_Encoder encoder);
/*--------------------------------------------------------------------------*/
/*
* This function is taken from hpdf_encoder_utf8.c, originally submitted
* to libharu by 'Mirco'
*/
static HPDF_ByteType
UTF8_Encoder_ByteType_Func (HPDF_Encoder encoder,
HPDF_ParseText_Rec *state)
{
// This function is supposed to increment state->index
// Not logical ! (look at function HPDF_String_Write in hpdf_string.c)
// When HPDF_BYTE_TYPE_SINGLE is returned, the current byte is the
// CODE argument in call ToUnicode_Func
// When HPDF_BYTE_TYPE_LEAD is returned, the current byte (msb) and the
// next byte (lsb) is the CODE arguement in call ToUnicodeFunc
// When HPDF_BYTE_TYPE_TRIAL is returned, the current byte is ignored
HPDF_CMapEncoderAttr encoder_attr;
HPDF_BYTE byte;
UTF8_EncoderAttr utf8_attr;
encoder_attr = (HPDF_CMapEncoderAttr) encoder->attr;
utf8_attr = (UTF8_EncoderAttr) ((void *)encoder_attr->cid_map[0]);
if (state->index == 0) {
//First byte, initialize.
HPDF_PTRACE ((" UTF8_Encoder_ByteType_Func - Initialize: (%u) %s\n",
state->len, state->text));
utf8_attr->current_byte = 0;
}
byte = state->text[state->index];
state->index++;
HPDF_PTRACE ((" UTF8_Encoder_ByteType_Func - Byte: %hx\n", byte));
if (utf8_attr->current_byte == 0) {
utf8_attr->utf8_bytes[0] = byte;
utf8_attr->current_byte = 1;
if (!(byte & 0x80)) {
utf8_attr->current_byte = 0;
utf8_attr->end_byte = 0;
return HPDF_BYTE_TYPE_SINGLE;
}
if ((byte & 0xf8) == 0xf0)
utf8_attr->end_byte = 3;
else if ((byte & 0xf0) == 0xe0)
utf8_attr->end_byte = 2;
else if ((byte & 0xe0) == 0xc0)
utf8_attr->end_byte = 1;
else
utf8_attr->current_byte = 0; //ERROR, skip this byte
} else {
utf8_attr->utf8_bytes[utf8_attr->current_byte] = byte;
if (utf8_attr->current_byte == utf8_attr->end_byte) {
utf8_attr->current_byte = 0;
return HPDF_BYTE_TYPE_SINGLE;
}
utf8_attr->current_byte++;
}
return HPDF_BYTE_TYPE_TRIAL;
}
/*
* This function is taken from hpdf_encoder_utf8.c, originally submitted
* to libharu by 'Mirco'
*/
static HPDF_UNICODE
UTF8_Encoder_ToUnicode_Func (HPDF_Encoder encoder,
HPDF_UINT16 code)
{
// Supposed to convert CODE to unicode.
// This function is allways called after ByteType_Func.
// ByteType_Func recognizes the utf-8 bytes belonging to one character.
HPDF_CMapEncoderAttr encoder_attr;
UTF8_EncoderAttr utf8_attr;
unsigned int val;
encoder_attr = (HPDF_CMapEncoderAttr) encoder->attr;
utf8_attr = (UTF8_EncoderAttr) ((void *)encoder_attr->cid_map[0]);
switch (utf8_attr->end_byte) {
case 3:
val = (unsigned int) ((utf8_attr->utf8_bytes[0] & 0x7) << 18) +
(unsigned int) ((utf8_attr->utf8_bytes[1]) << 12) +
(unsigned int) ((utf8_attr->utf8_bytes[2] & 0x3f) << 6) +
(unsigned int) ((utf8_attr->utf8_bytes[3] & 0x3f));
break;
case 2:
val = (unsigned int) ((utf8_attr->utf8_bytes[0] & 0xf) << 12) +
(unsigned int) ((utf8_attr->utf8_bytes[1] & 0x3f) << 6) +
(unsigned int) ((utf8_attr->utf8_bytes[2] & 0x3f));
break;
case 1:
val = (unsigned int) ((utf8_attr->utf8_bytes[0] & 0x1f) << 6) +
(unsigned int) ((utf8_attr->utf8_bytes[1] & 0x3f));
break;
case 0:
val = (unsigned int) utf8_attr->utf8_bytes[0];
break;
default:
val = 32; // Unknown character
}
if (val > 65535) //Convert everything outside UCS-2 to space
val = 32;
return val;
}
static char *
UTF8_Encoder_EncodeText_Func (HPDF_Encoder encoder,
const char *text,
HPDF_UINT len,
HPDF_UINT *length)
{
char *result = malloc(len * 2);
char *c = result;
HPDF_ParseText_Rec parse_state;
HPDF_UINT i;
HPDF_Encoder_SetParseText (encoder, &parse_state,
(const HPDF_BYTE *)text, len);
for (i = 0; i < len; i++) {
HPDF_UNICODE tmp_unicode;
HPDF_ByteType btype = HPDF_Encoder_ByteType (encoder, &parse_state);
if (btype != HPDF_BYTE_TYPE_TRIAL) {
tmp_unicode = HPDF_Encoder_ToUnicode (encoder, 0);
HPDF_UInt16Swap (&tmp_unicode);
HPDF_MemCpy ((HPDF_BYTE *)c, (const HPDF_BYTE*)&tmp_unicode, 2);
c += 2;
}
}
*length = c - result;
return result;
}
static HPDF_STATUS
UTF8_Init (HPDF_Encoder encoder)
{
HPDF_CMapEncoderAttr attr;
HPDF_STATUS ret;
if ((ret = HPDF_CMapEncoder_InitAttr (encoder)) != HPDF_OK)
return ret;
/*
* We override these two
*/
encoder->byte_type_fn = UTF8_Encoder_ByteType_Func;
encoder->to_unicode_fn = UTF8_Encoder_ToUnicode_Func;
encoder->encode_text_fn = UTF8_Encoder_EncodeText_Func;
attr = (HPDF_CMapEncoderAttr)encoder->attr;
if (HPDF_CMapEncoder_AddCMap (encoder, UTF8_CID_RANGE) != HPDF_OK)
return encoder->error->error_no;
if (HPDF_CMapEncoder_AddCodeSpaceRange (encoder, UTF8_SPACE_RANGE)
!= HPDF_OK)
return encoder->error->error_no;
if (HPDF_CMapEncoder_AddNotDefRange (encoder, UTF8_NOTDEF_RANGE)
!= HPDF_OK)
return encoder->error->error_no;
attr->is_lead_byte_fn = NULL;
attr->is_trial_byte_fn = NULL;
HPDF_StrCpy (attr->registry, "Adobe", attr->registry +
HPDF_LIMIT_MAX_NAME_LEN);
HPDF_StrCpy (attr->ordering, "Identity-H", attr->ordering +
HPDF_LIMIT_MAX_NAME_LEN);
attr->suppliment = 0;
attr->writing_mode = HPDF_WMODE_HORIZONTAL;
/* Not sure about this
attr->uid_offset = 0;
attr->xuid[0] = 0;
attr->xuid[1] = 0;
attr->xuid[2] = 0;
*/
encoder->type = HPDF_ENCODER_TYPE_DOUBLE_BYTE;
return HPDF_OK;
}
/*--------------------------------------------------------------------------*/
HPDF_EXPORT(HPDF_STATUS)
HPDF_UseUTFEncodings (HPDF_Doc pdf)
{
HPDF_Encoder encoder;
HPDF_STATUS ret;
if (!HPDF_HasDoc (pdf))
return HPDF_INVALID_DOCUMENT;
encoder = HPDF_CMapEncoder_New (pdf->mmgr, "UTF-8",
UTF8_Init);
if ((ret = HPDF_Doc_RegisterEncoder (pdf, encoder)) != HPDF_OK)
return ret;
return HPDF_OK;
}