File:  [ELWIX - Embedded LightWeight unIX -] / libaitwww / src / mime.c
Revision 1.5: download - view: text, annotated - select for diffs - revision graph
Thu May 30 09:25:35 2013 UTC (11 years, 5 months ago) by misho
Branches: MAIN
CVS tags: www3_4, www3_3, www3_2, www3_1, WWW3_3, WWW3_2, WWW3_1, WWW3_0, HEAD
version 3.0

/*************************************************************************
* (C) 2012 AITNET ltd - Sofia/Bulgaria - <misho@aitnet.org>
*  by Michael Pounov <misho@elwix.org>
*
* $Author: misho $
* $Id: mime.c,v 1.5 2013/05/30 09:25:35 misho Exp $
*
**************************************************************************
The ELWIX and AITNET software is distributed under the following
terms:

All of the documentation and software included in the ELWIX and AITNET
Releases is copyrighted by ELWIX - Sofia/Bulgaria <info@elwix.org>

Copyright 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
	by Michael Pounov <misho@elwix.org>.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
   must display the following acknowledgement:
This product includes software developed by Michael Pounov <misho@elwix.org>
ELWIX - Embedded LightWeight unIX and its contributors.
4. Neither the name of AITNET nor the names of its contributors
   may be used to endorse or promote products derived from this software
   without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY AITNET AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
#include "global.h"
#include "mime.h"


static int decode_quoted(char *, int, char *);
static int decode_base64(char *, int, char *);

static const char *n_encode[] = { "7bit", "8bit", "binary" };
static struct _tagEncode {
	char *name;
	float mul;

	int (*decode)(char *, int, char *);
} encode[] = {
	{ "quoted-printable", 1, decode_quoted },
	{ "base64", (float) 3 / 4, decode_base64 }
};


static inline char *
bd_begin(const char *str)
{
	char *s;
	int len = strlen(str) + 6;

	s = e_malloc(len + 1);
	if (!s) {
		www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
		return NULL;
	} else {
		snprintf(s, len + 1, "\r\n--%s\r\n", str);
		s[len] = 0;
	}

	return s;
}

static inline char *
bd_end(const char *str)
{
	char *s;
	int len = strlen(str) + 8;

	s = e_malloc(len + 1);
	if (!s) {
		www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
		return NULL;
	} else {
		snprintf(s, len + 1, "\r\n--%s--\r\n", str);
		s[len] = 0;
	}

	return s;
}

static u_int
powmod(int x, int y, int q)
{
	u_int ret = 1;

	while (y) {
		if (y & 1)
			ret = ((unsigned long long)ret * x) % q;
		x = (unsigned long long) x * x % q;
		y = y / 2;
	}
	return ret;
}

static const char *
findtextpos(const char *T, size_t tlen, const char *P, size_t plen)
{
	const u_int q = 4294967291u;
	const u_int d = 256;
	u_int hash, p = 0, t = 0;
	register int i;

	hash = powmod(d, plen - 1, q);

	/* calculate initial hash tags */
	for (i = 0; i < plen; i++) {
		p = (d * p + P[i]) % q;
		t = (d * t + T[i]) % q;
	}

	tlen -= plen;
	for (i = 0; i <= tlen; i++) {
		if (p == t) {
			/* match pattern */
			if (!memcmp(P, T + i, plen))
				return T + i;
		}

		/* rehashing */
		if (i < tlen)
			t = (d * (t - T[i] * hash) + T[i + plen]) % q;
	}

	return NULL;
}

static inline void
freeHeader(struct tagMIME * __restrict m)
{
	struct tagCGI *c;

	while ((c = SLIST_FIRST(&m->mime_header))) {
		ait_freeVar(&c->cgi_name);
		ait_freeVar(&c->cgi_value);

		SLIST_REMOVE_HEAD(&m->mime_header, cgi_node);
		e_free(c);
	}
}

static ait_val_t *
hdrValue(const char *str, size_t len, const char **end)
{
	const char *e, *crlf = NULL;
	char *tmp, *s = NULL;
	int off = 0;
	ait_val_t *ret = NULL;

	e = str + len;
	while (str < e) {
		if (!(crlf = findtextpos(str, e - str, CRLF, strlen(CRLF)))) {
			www_SetErr(EFAULT, "Bad header format of MIME part");
			return NULL;
		}

		tmp = e_realloc(s, crlf - str + off + 1);
		if (!tmp) {
			www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
			e_free(s);
			return NULL;
		} else
			s = tmp;

		memcpy(s + off, str, crlf - str);
		s[crlf - str + off] = 0;
		off += crlf - str;

		/* if is multi part header value */
		tmp = (char*) crlf + strlen(CRLF);
		if (*tmp == ' ' || *tmp == '\t')
			str = ++tmp;
		else
			break;
	}

	*end = crlf + strlen(CRLF);
	ret = ait_makeVar(string, s);
	if (!ret)
		www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
	e_free(s);

	return ret;
}

static inline int
hexdigit(char a)
{
	if (a >= '0' && a <= '9')
		return a - '0';
	if (a >= 'a' && a <= 'f')
		return a - 'a' + 10;
	if (a >= 'A' && a <= 'F')
		return a - 'A' + 10;
	/* error!!! */
	return -1;
}

static int
decode_quoted(char *in, int len, char *out)
{
	register int i, cx;

	for (i = cx = 0; i < len; i++)
		if (in[i] == '=') {
			/* special handling */
			i++;
			if ((in[i] >= '0' && in[i] <= '9') || 
					(in[i] >= 'A' && in[i] <= 'F') || 
					(in[i] >= 'a' && in[i] <= 'f')) {
				/* encoding a special char */
				*out++ = hexdigit(in[i]) << 4 | hexdigit(in[i+ 1]);
				cx++;
			} else
				i += strlen(CRLF);
		} else {
			*out++ = in[i++];
			cx++;
		}

	return cx;
}

static int
decode_base64(char *in, int len, char *out)
{
	register int cx, i, j;
	int bits, eqc;

	for (cx = i = eqc = bits = 0; i < len && !eqc; bits = 0) {
		for (j = 0; i < len && j < 4; i++) {
			switch (in[i]) {
				case 'A':  case 'B':  case 'C':  case 'D':  case 'E':
				case 'F':  case 'G':  case 'H':  case 'I':  case 'J':
				case 'K':  case 'L':  case 'M':  case 'N':  case 'O':
				case 'P':  case 'Q':  case 'R':  case 'S':  case 'T':
				case 'U':  case 'V':  case 'W':  case 'X':  case 'Y':
				case 'Z':
					bits = (bits << 6) | (in[i] - 'A');
					j++;
					break;
				case 'a':  case 'b':  case 'c':  case 'd':  case 'e':
				case 'f':  case 'g':  case 'h':  case 'i':  case 'j':
				case 'k':  case 'l':  case 'm':  case 'n':  case 'o':
				case 'p':  case 'q':  case 'r':  case 's':  case 't':
				case 'u':  case 'v':  case 'w':  case 'x':  case 'y':
				case 'z':
					bits = (bits << 6) | (in[i] - 'a' + 26);
					j++;
					break;
				case '0':  case '1':  case '2':  case '3':  case '4':
				case '5':  case '6':  case '7':  case '8':  case '9':
					bits = (bits << 6) | (in[i] - '0' + 52);
					j++;
					break;
				case '+':
					bits = (bits << 6) | 62;
					j++;
					break;
				case '/':
					bits = (bits << 6) | 63;
					j++;
					break;
				case '=':
					bits <<= 6;
					j++;
					eqc++;
					break;
				default:
					break;
			}
		}

		if (!j && i >= len)
			continue;

		switch (eqc) {
			case 0:
				*out++ = (bits >> 16) & 0xff;
				*out++ = (bits >> 8) & 0xff;
				*out++ = bits & 0xff;
				cx += 3;
				break;
			case 1:
				*out++ = (bits >> 16) & 0xff;
				*out++ = (bits >> 8) & 0xff;
				cx += 2;
				break;
			case 2:
				*out++ = (bits >> 16) & 0xff;
				cx += 1;
				break;
		}
	}

	return cx;
}

/* ------------------------------------------------------------------ */

/*
 * mime_parseMultiPart() - Parse multi part MIME message
 *
 * @str = String
 * @len = String length
 * @bd = Boundary tag
 * @end = End of parsed part
 * return: NULL error or !=NULL allocated MIME session
 */
mime_t *
mime_parseMultiPart(const char *str, size_t len, const char *bdtag, const char **end)
{
	mime_t *mime = NULL;
	struct iovec bd[2];
	struct tagMIME *m, *old = NULL;
	const char *next = NULL;

	if (!str | !bdtag) {
		www_SetErr(EINVAL, "String or boundary tag is NULL");
		return NULL;
	}

	/* init MIME */
	mime = e_malloc(sizeof(mime_t));
	if (!mime) {
		www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
		return NULL;
	} else {
		memset(mime, 0, sizeof(mime_t));
		SLIST_INIT(mime);
	}

	/* prepare boundary format */
	bd[0].iov_base = bd_begin(bdtag);
	if (!bd[0].iov_base) {
		e_free(mime);
		return NULL;
	} else
		bd[0].iov_len = strlen(bd[0].iov_base);
	bd[1].iov_base = bd_end(bdtag);
	if (!bd[1].iov_base) {
		e_free(bd[0].iov_base);
		e_free(mime);
		return NULL;
	} else
		bd[1].iov_len = strlen(bd[1].iov_base);

	/* check boundary tag */
	if (memcmp(str, strstr(bd[0].iov_base, "--"), strlen(strstr(bd[0].iov_base, "--")))) {
		www_SetErr(EFAULT, "Bad content data, not found boundary tag");
		e_free(bd[1].iov_base);
		e_free(bd[0].iov_base);
		e_free(mime);
		return NULL;
	} else {
		str += strlen(strstr(bd[0].iov_base, "--"));
		len -= strlen(strstr(bd[0].iov_base, "--"));
	}

	while (len > 0) {
		m = e_malloc(sizeof(struct tagMIME));
		if (!m) {
			www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
			mime_close(&mime);
			e_free(bd[1].iov_base);
			e_free(bd[0].iov_base);
			return NULL;
		} else {
			memset(m, 0, sizeof(struct tagMIME));
			SLIST_INIT(&m->mime_header);
		}

		if (!(next = findtextpos(str, len, bd[0].iov_base, bd[0].iov_len)))
			next = findtextpos(str, len, bd[1].iov_base, bd[1].iov_len);

		/* parse message between tags */
		if (mime_readPart(m, str, next - str)) {
			mime_close(&mime);
			e_free(bd[1].iov_base);
			e_free(bd[0].iov_base);
			return NULL;
		}

		str += next - str;
		len -= next - str;

		/* add to mime session */
		if (!old)
			SLIST_INSERT_HEAD(mime, m, mime_node);
		else
			SLIST_INSERT_AFTER(old, m, mime_node);
		old = m;

		/* match part termination tag */
		if (!memcmp(str, bd[1].iov_base, bd[1].iov_len))
			break;

		str += bd[0].iov_len;
		len -= bd[0].iov_len;
	}

	str += bd[0].iov_len;
	/* LLVM static code analyzer said for this - unusable
	 *
	len -= bd[0].iov_len;
	*/

	e_free(bd[1].iov_base);
	e_free(bd[0].iov_base);

	if (end)
		*end = str;
	return mime;
}

static inline void
freeMIME(struct tagMIME * __restrict m)
{
	if (m->mime_body.iov_base)
		e_free(m->mime_body.iov_base);
	if (m->mime_prolog.iov_base)
		e_free(m->mime_prolog.iov_base);
	if (m->mime_epilog.iov_base)
		e_free(m->mime_epilog.iov_base);

	freeHeader(m);
	mime_close(&m->mime_attach);
}

/*
 * mime_close() - Close MIME session and free all resources
 *
 * @mime = Inited mime session
 * return: none
 */
void
mime_close(mime_t ** __restrict mime)
{
	struct tagMIME *m;

	if (!mime || !*mime)
		return;

	while ((m = SLIST_FIRST(*mime))) {
		SLIST_REMOVE_HEAD(*mime, mime_node);
		freeMIME(m);
		e_free(m);
	}

	e_free(*mime);
	*mime = NULL;
}

/*
 * mime_parseHeader() - Parse MIME header pairs
 *
 * @m = Mime part
 * @str = String
 * @len = String length
 * @end = End of parsed part
 * return: -1 error or 0 ok
 */
int
mime_parseHeader(struct tagMIME * __restrict m, const char *str, size_t len, const char **end)
{
	const char *e, *colon, *eoh;
	struct tagCGI *c, *old = NULL;

	if (!m || !str) {
		www_SetErr(EINVAL, "Mime part or string is NULL");
		return -1;
	} else
		e = str + len;

	while (str < e) {
		if (!memcmp(str, CRLF, strlen(CRLF))) {
			str += 2;
			break;
		}

		colon = memchr(str, ':', e - str);
		eoh = findtextpos(str, e - str, CRLF, strlen(CRLF));
		if (!colon || !eoh || colon > eoh) {
			www_SetErr(EFAULT, "Bad MIME format message");
			freeHeader(m);
			return -1;
		}

		c = e_malloc(sizeof(struct tagCGI));
		if (!c) {
			www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
			freeHeader(m);
			return -1;
		}
		/* get name */
		c->cgi_name = ait_allocVar();
		if (!c->cgi_name) {
			www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
			e_free(c);
			freeHeader(m);
			return -1;
		} else
			AIT_SET_STRLCPY(c->cgi_name, str, colon - str + 1);
		/* get value */
		c->cgi_value = hdrValue(colon + 1, e - colon - 1, &str);
		if (!c->cgi_value) {
			free(c->cgi_name);
			free(c);
			freeHeader(m);
			return -1;
		}

		if (!old)
			SLIST_INSERT_HEAD(&m->mime_header, c, cgi_node);
		else
			SLIST_INSERT_AFTER(old, c, cgi_node);
		old = c;
	}

	if (end)
		*end = str;
	return 0;
}

/*
 * mime_getValue() - Get value from MIME header
 *
 * @m = Mime part
 * @name = Header name
 * return: NULL not found or !=NULL value
 */
const char *
mime_getValue(struct tagMIME * __restrict m, const char *name)
{
	struct tagCGI *c;

	SLIST_FOREACH(c, &m->mime_header, cgi_node)
		if (!strcasecmp(AIT_GET_STR(c->cgi_name), name))
			return AIT_GET_STR(c->cgi_value);

	return NULL;
}

/*
 * mime_readPart() Read and parse MIME part
 *
 * @m = Mime part
 * @str = String
 * @len = String length
 * return: -1 error or 0 ok
 */
int
mime_readPart(struct tagMIME * __restrict m, const char *str, size_t len)
{
	const char *eoh, *ct, *eb;
	cgi_t *attr;
	struct iovec bd;
	ait_val_t *v;

	if (!m || !str || (ssize_t) len < 0) {
		www_SetErr(EINVAL, "Mime part, string is NULL or length is less 0");
		return -1;
	}

	if (mime_parseHeader(m, str, len, &eoh))
		return -1;

	ct = mime_getValue(m, "content-type");
	if (!ct || www_cmptype(ct, "multipart")) {
		/* not multi part, assign like body element */
		m->mime_body.iov_base = e_malloc(len - (eoh - str) + 1);
		if (!m->mime_body.iov_base) {
			www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
			freeHeader(m);
			return -1;
		}
		memcpy(m->mime_body.iov_base, eoh, len - (eoh - str));
		((char*) m->mime_body.iov_base)[len - (eoh - str)] = 0;
		m->mime_body.iov_len = len - (eoh - str) + 1;
	} else {
		/* multi part */
		attr = www_parseAttributes(&ct);
		if (!attr) {
			freeHeader(m);
			return -1;
		}
		v = www_getAttribute(attr, "boundary");
		bd.iov_base = bd_begin(AIT_GET_STR(v));
		bd.iov_len = strlen(bd.iov_base);
		eb = findtextpos(eoh, len - (eoh - str), bd.iov_base, bd.iov_len);
		e_free(bd.iov_base);

		/* set prolog if exists */
		if (eb != eoh) {
			m->mime_prolog.iov_base = e_malloc(eb - eoh + 1);
			if (!m->mime_prolog.iov_base) {
				www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
				www_freeAttributes(&attr);
				freeHeader(m);
				return -1;
			}
			memcpy(m->mime_prolog.iov_base, eoh, eb - eoh);
			((char*) m->mime_prolog.iov_base)[eb - eoh] = 0;
			m->mime_prolog.iov_len = eb - eoh + 1;
		}

		v = www_getAttribute(attr, "boundary");
		m->mime_attach = mime_parseMultiPart(eb + 1, len - (eb + 1 - str), 
				AIT_GET_STR(v), &eoh);

		/* set epilog if exists */
		if (eoh - str < len) {
			m->mime_epilog.iov_base = e_malloc(len - (eoh - str) + 1);
			if (!m->mime_epilog.iov_base) {
				www_SetErr(elwix_GetErrno(), "%s", elwix_GetError());
				www_freeAttributes(&attr);
				freeHeader(m);
				return -1;
			}
			memcpy(m->mime_epilog.iov_base, str, len - (eoh - str));
			((char*) m->mime_epilog.iov_base)[len - (eoh - str)] = 0;
			m->mime_epilog.iov_len = len - (eoh - str) + 1;

		}

		www_freeAttributes(&attr);
	}

	return 0;
}

/*
 * mime_calcRawSize() - Calculate estimated memory for data from parsed MIME part
 *
 * @m = Mime part
 * return: -1 error or >-1 data size in mime part
 */
int
mime_calcRawSize(struct tagMIME * __restrict m)
{
	const char *s;
	char *t;
	int len;
	register int i;

	if (!m) {
		www_SetErr(EINVAL, "Mime part is NULL");
		return -1;
	}

	/* no body */
	if (m->mime_body.iov_len < 1)
		return 0;

	s = mime_getValue(m, "content-transfer-encoding");
	if (!s)
		return m->mime_body.iov_len;
	/* strip whitespaces */
	while (isspace((int) *s))
		s++;
	t = strchr(s, ';');
	len = t ? strlen(s) : t - s;

	/* find proper encoding */
	for (i = 0; i < sizeof n_encode / sizeof *n_encode; i++)
		if (len == strlen(n_encode[i]) && !strncasecmp(s, n_encode[i], len))
			return m->mime_body.iov_len;

	for (i = 0; i < sizeof encode / sizeof *encode; i++)
		if (len == strlen(encode[i].name) && !strncasecmp(s, encode[i].name, len))
			return m->mime_body.iov_len * encode[i].mul;

	/* fail */
	return -1;
}

/*
 * mime_getRawData() - Get ready parsed data from MIME part body
 *
 * @m = Mime part
 * @str = output data buffer
 * @len = output data buffer length
 * return: -1 error or >-1 data length in output buffer
 */
int
mime_getRawData(struct tagMIME * __restrict m, char * __restrict str, int slen)
{
	const char *s;
	char *t;
	int len;
	register int i;

	if (!m || !str) {
		www_SetErr(EINVAL, "Mime part or string is NULL");
		return -1;
	}

	/* no body */
	if (m->mime_body.iov_len < 1)
		return 0;

	s = mime_getValue(m, "content-transfer-encoding");
	if (!s) {
		memcpy(str, m->mime_body.iov_base, m->mime_body.iov_len > (slen - 1) ? 
				slen - 1 : m->mime_body.iov_len);
		return m->mime_body.iov_len;
	}

	/* strip whitespaces */
	while (isspace((int) *s))
		s++;
	t = strchr(s, ';');
	len = t ? strlen(s) : t - s;

	/* decoding body */
	for (i = 0; i < sizeof encode / sizeof *encode; i++)
		if (len == strlen(encode[i].name) && !strncasecmp(s, encode[i].name, len))
			return encode[i].decode(m->mime_body.iov_base, 
					m->mime_body.iov_len, str);

	/* fail */
	return -1;
}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>