/*
 * KTCPVS       An implementation of the TCP Virtual Server daemon inside
 *              kernel for the LINUX operating system. KTCPVS can be used
 *              to build a moderately scalable and highly available server
 *              based on a cluster of servers, with more flexibility.
 *
 * tcp_vs_phttp.c: KTCPVS content-based scheduling module for HTTP service
 *		   with persistent connection support
 *
 * Version:     $Id: tcp_vs_phttp.c,v 1.1 2003/02/14 09:21:32 wensong Exp $
 *
 * Authors:     Wensong Zhang, <wensong@linuxvirtualserver.org>
 *              Hai Long, <david_lung@yahoo.com>
 *
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
 *
 */

#include <linux/config.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/ctype.h>

#include <linux/net.h>
#include <linux/sched.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <asm/uaccess.h>

#include "tcp_vs.h"

#define CR 13
#define LF 10
#define SP ' '

#define KEEP_ALIVE_TIMES	60

#define PARSE_OK		0
#define PARSE_ERROR		1

#ifndef MIN
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#endif

typedef enum http_method_s {
	HTTP_M_UNKNOWN,
	HTTP_M_OPTIONS,
	HTTP_M_GET,
	HTTP_M_HEAD,
	HTTP_M_POST,
	HTTP_M_PUT,
	HTTP_M_DELETE,
	HTTP_M_TRACE,
	HTTP_M_MAX
} http_method_t;

typedef struct methods {
	const short number;
	const char * const name;
	int len;
} methods_t;


const char * http_protocol_strings[] = {
	"HTTP/0.9", /* The position is crucial & magic! do not insert, only add */
	"HTTP/1.0",
	"HTTP/1.1",
	NULL
};

const methods_t http_methods [HTTP_M_MAX] = {
	{HTTP_M_UNKNOWN,	"UNKNOWN",	7},
	{HTTP_M_OPTIONS,	"OPTIONS",	7},
	{HTTP_M_GET,		"GET",		3},
	{HTTP_M_HEAD,		"HEAD",		4},
	{HTTP_M_POST,		"POST",		4},
	{HTTP_M_PUT,		"PUT",		3},
	{HTTP_M_DELETE,		"DELETE",	6},
	{HTTP_M_TRACE,		"TRACE",	5}
};

typedef enum http_versions {
	HTTP_V_UNKNOWN,
	HTTP_V_0_9,
	HTTP_V_1_0,
	HTTP_V_1_1
} http_version_t;

typedef struct http_mime_headers {
	int	content_length;
	int	transfer_encoding;
	int	connection_close;
	char	*sep;	/* THIS_STRING_SEPARATES */
} http_mime_header_t;

typedef struct http_request_s
{
	const char *message;
	unsigned int message_len;
	unsigned int parsed_len;

	/* request method */
	http_method_t method;
	const char *method_str;
	unsigned int method_len;

	/* request URI */
	const char *uri_str;
	unsigned int uri_len;

	/* http version */
	http_version_t version;
	const char *version_str;
	unsigned int version_len;

	/* cookies */
	unsigned int cookies;
	const char *cookies_str;
	unsigned int cookies_len;

	/* MIME header */
	http_mime_header_t mime;
} http_request_t;

typedef struct http_response_s
{
	/* http verison */
	int version;

	/* response status code */
	int status_code;

	/* MIME header */
	http_mime_header_t mime;
} http_response_t;


typedef struct http_read_ctl_block_s
{
	struct socket	*sock;		/* socket that message read from */
	char*		buffer;		/* buffer to store read message */
	int		len;		/* size of buffer */
	char*		info;		/* point to the current information */
	int		offset;		/* offset of remaining bytes */
	int		remaining;	/* remaining bytes not return */
	int		flag;		/* read flag */
} http_read_ctl_block_t;

typedef struct http_dest_server_s {
	struct list_head	d_list;	/* server list for a client connection */
	struct tcp_vs_dest	*dest;	/* dest server entry */
	struct socket		*sock;	/* destination socket */
} http_dest_server_t;


static int tcp_vs_phttp_init_svc(struct tcp_vs_service *svc)
{
	return 0;
}


static int tcp_vs_phttp_done_svc(struct tcp_vs_service *svc)
{
	return 0;
}


static int tcp_vs_phttp_update_svc(struct tcp_vs_service *svc)
{
	return 0;
}


static inline struct tcp_vs_dest *
__tcp_vs_phttp_wlc_schedule(struct list_head *destinations)
{
	register struct list_head *e;
	struct tcp_vs_dest *dest, *least;

	list_for_each (e, destinations) {
		least = list_entry(e, struct tcp_vs_dest, r_list);
		if (least->weight > 0) {
			goto nextstage;
		}
	}
	return NULL;

	/*
	 *	Find the destination with the least load.
	 */
  nextstage:
	for (e=e->next; e!=destinations; e=e->next) {
		dest = list_entry(e, struct tcp_vs_dest, r_list);
		if (atomic_read(&least->conns) * dest->weight >
		    atomic_read(&dest->conns) * least->weight) {
			least = dest;
		}
	}

	return least;
}

/****************************************************************************
*	skip whitespace
*/
static inline char * skip_lws (const char* buffer)
{
	char* s = (char*)buffer;
	while ((*s == ' ') || (*s == '\t') || (*s == '\n') || (*s == '\r')) {
		s++;
	}
	return s;
}

/****************************************************************************
*	This doesn't accept 0x if the radix is 16. The overflow code assumes
*	a 2's complement architecture
*/
#ifndef strtol
static long strtol( char *string, char **endptr, int radix)
{
	char *s;
	long value;
	long new_value;
	int sign;
	int increment;

	value = 0;
	sign = 1;
	s = string;

	if ((radix == 1) || (radix > 36) || (radix < 0)) {
		goto done;
	}

	/* skip whitespace */
	while ((*s == ' ') || (*s == '\t') || (*s == '\n') || (*s == '\r')) {
		s++;
	}

	if (*s == '-') {
		sign = -1;
		s++;
	}
	else if (*s == '+') {
		s++;
	}

	if (radix == 0) {
		if (*s == '0') {
			s++;
			if ((*s == 'x') || (*s == 'X')) {
				s++;
				radix = 16;
			}
			else
				radix = 8;
		}
		else
			radix = 10;
	}

	/* read number */
	while (1) {
		if ((*s >= '0') && (*s <= '9'))
			increment = *s - '0';
		else if ((*s >= 'a') && (*s <= 'z'))
			increment = *s - 'a' + 10;
		else if ((*s >= 'A') && (*s <= 'Z'))
			increment = *s - 'A' + 10;
		else
			break;

		if (increment >= radix)
			break;

		new_value = value * radix + increment;
		/* detect overflow */
		if ((new_value - increment)/radix != value) {
			s = string;
			value = -1 >> 1;
			if (sign < 0)
				value += 1;

			goto done;
		}

		value = new_value;
		s++;
	}

  done:
	if (endptr)
		*endptr = s;

	return value*sign;
}
#endif


/****************************************************************************
*  Parse a chunk extension, detect overflow.
*  There are two error cases:
*  1) If the conversion would require too many bits, a -1 is returned.
*  2) If the conversion used the correct number of bits, but an overflow
*     caused only the sign bit to flip, then that negative number is
*     returned.
*  In general, any negative number can be considered an overflow error.
*/
static long get_chunk_size(char *b)
{
	long chunksize = 0;
	size_t chunkbits = sizeof(long) * 8;

	/* skip whitespace */
	while ((*b == ' ') || (*b == '\t') || (*b == '\n') || (*b == '\r')) {
		b++;
	}

	/* Skip leading zeros */
	while (*b == '0') {
		++b;
	}

	while (isxdigit(*b) && (chunkbits > 0)) {
		int xvalue = 0;

		if (*b >= '0' && *b <= '9') {
			xvalue = *b - '0';
		}
		else if (*b >= 'A' && *b <= 'F') {
			xvalue = *b - 'A' + 0xa;
		}
		else if (*b >= 'a' && *b <= 'f') {
			xvalue = *b - 'a' + 0xa;
		}

		chunksize = (chunksize << 4) | xvalue;
		chunkbits -= 4;
		++b;
	}
	if (isxdigit(*b) && (chunkbits <= 0)) {
		/* overflow */
		return -1;
	}

	return chunksize;
}

/****************************************************************************
*   Parse http request line. (request line is terminated by CRLF)
*
*   RFC 2616, 19.3
*   Clients SHOULD be tolerant in parsing the Status-Line and servers
*   tolerant when parsing the Request-Line. In particular, they SHOULD
*   accept any amount of SP or HT characters between fields, even though
*   only a single SP is required.
*
*/
static int parse_http_request_line (char *buffer, size_t len, http_request_t *req)
{
	char	*pos, c;
	int	ret = PARSE_ERROR;
	int	i;

	EnterFunction(5);

	/* terminate string */
	c = buffer[len];
	buffer[len] = 0;

	TCP_VS_DBG(5, "parsing request:\n");
	TCP_VS_DBG(5, "--------------------\n");
	TCP_VS_DBG(5, "%s\n", buffer);
	TCP_VS_DBG(5, "--------------------\n");

	req->message		= buffer;
	req->message_len	= len;

	/*
	 * RFC 2616, 5.1:
	 *	Request-Line = Method SP Request-URI SP HTTP-Version CRLF
	 */

	/* try to get method */
	pos = skip_lws (buffer);
	req->method = HTTP_M_UNKNOWN;	/* Default :) */
	for (i = 1; i < HTTP_M_MAX; i++) {
		if (strnicmp (pos, http_methods[i].name, http_methods[i].len) == 0) {
			req->method = i;
			break;
		}
	}
	if (req->method == HTTP_M_UNKNOWN) {
		goto exit;
	}
	TCP_VS_DBG(6, "HTTP METHOD: %s\n", http_methods[i].name);

	pos += http_methods[i].len;

	/* get URI string */
	req->uri_str = skip_lws (pos + 1);
	TCP_VS_DBG(6, "URI: %s\n", req->uri_str);

	if ((pos = strchr ((char*)req->uri_str, SP)) == NULL) {
		goto exit;
	}

	req->uri_len = pos - req->uri_str;

	/* get http version */
	req->version_str	= skip_lws (pos + 1);
	req->version	= HTTP_V_UNKNOWN;
	for (i = 0; http_protocol_strings[i] != NULL; i++) {
		if (strnicmp (req->version_str, http_protocol_strings[i],
			      strlen (http_protocol_strings[i])) == 0) {
			req->version = i + HTTP_V_0_9;
			break;
		}
	}

	if (req->version == HTTP_V_UNKNOWN) {
		goto exit;
	}

	TCP_VS_DBG(6, "HTTP VERSION: %s\n", http_protocol_strings[i]);
	ret = PARSE_OK;
  exit:
	buffer[len] = c;	/* restore string */
	LeaveFunction(5);
	return ret;
}


/****************************************************************************
* parse_http_status_line - parse the http status line.
*
*   RFC 2616, 19.3
*   Clients SHOULD be tolerant in parsing the Status-Line and servers
*   tolerant when parsing the Request-Line. In particular, they SHOULD
*   accept any amount of SP or HT characters between fields, even though
*   only a single SP is required.
*
*/
static int parse_http_status_line (char *buffer, size_t len, http_response_t *resp)
{
	char *pos, c;
	int i, ret = PARSE_ERROR;

	EnterFunction(5);

	assert(buffer != NULL)

	/* terminate string */
	c = buffer[len];
	buffer[len] = '\0';

	TCP_VS_DBG(5, "parsing response:\n");
	TCP_VS_DBG(5, "--------------------\n");
	TCP_VS_DBG(5, "%s\n", buffer);
	TCP_VS_DBG(5, "--------------------\n");

	/*
	 * RFC 2616, 6.1:
	 *	Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF
	 */

	pos = skip_lws (buffer);

	resp->version = HTTP_V_UNKNOWN;
	for (i = 0; http_protocol_strings[i] != NULL; i++) {
		if (strnicmp (pos, http_protocol_strings[i],
			strlen (http_protocol_strings[i])) == 0) {
			resp->version = i + HTTP_V_0_9;
			break;
		}
	}

	if (resp->version != HTTP_V_UNKNOWN) {
		/* get the status code */
		pos += strlen (http_protocol_strings[i]) + 1;
		resp->status_code = strtol (pos, NULL, 10);
		assert(resp->status_code >= 100)
		TCP_VS_DBG(6, "Status Code: %d\n", resp->status_code);
		ret = PARSE_OK;
	}

	buffer[len] = c;	/* restore string */
	LeaveFunction(5);
	return ret;
}


static struct tcp_vs_dest *
tcp_vs_phttp_matchrule(struct tcp_vs_service *svc, http_request_t *req)
{
	struct list_head *l;
	struct tcp_vs_rule *r;
	struct tcp_vs_dest *dest = NULL;
	char *uri;

	if (!(uri = kmalloc(req->uri_len+1, GFP_KERNEL))) {
		TCP_VS_ERR("No memory!\n");
		return NULL;
	}
	memcpy(uri, req->uri_str, req->uri_len);
	uri[req->uri_len] = '\0';
	TCP_VS_DBG(5, "matching request URI: %s\n", uri);

	read_lock(&svc->lock);
	list_for_each(l, &svc->rule_list) {
		r = list_entry(l, struct tcp_vs_rule, list);
		if (!regexec(&r->rx, uri, 0, NULL, 0)) {
			/* HIT */
			dest = __tcp_vs_phttp_wlc_schedule(&r->destinations);
			break;
		}
	}
	read_unlock(&svc->lock);

	kfree(uri);
	return dest;
}


/****************************************************************************
*
* http_read_line - read a line from socket.
*
* Try to get the line from the remaining bytes. then read max_line_size bytes
* from socket and get a line. (the line delimeter is "CRLF")
* Return the len of the line (not including CRLF), or -1 if failed.
*
*   Note: 1, http_read_line does not terminate the line with '\0', it still end
*   with CRLF.
*	2, the acutul search length may bigger than max_line_size.
*/
static int http_read_line (http_read_ctl_block_t *ctl_blk, int max_line_size)
{
	char* buf;
	int nbytes, i, offset, reads;
	int len = -1;

	DECLARE_WAIT_QUEUE_HEAD(WQ);

	EnterFunction(5);

	assert(max_line_size < ctl_blk->len);
	assert(ctl_blk->remaining <= (ctl_blk->len - ctl_blk->offset));

	ctl_blk->info = NULL;
	if (ctl_blk->remaining == 0) {
		ctl_blk->offset = 0;
	}

	offset = ctl_blk->offset;
	buf = ctl_blk->buffer + offset;

	/* try to get a line from the remaining bytes */
	for (i = 0; i < ctl_blk->remaining -1; i++) {
		if ((buf[i] == CR) && (buf[i + 1] == LF)) {
			len = i;
			goto done;
		}
	}

	/* overflow? */
	if ((offset + max_line_size) > ctl_blk->len) {
		memmove (ctl_blk->buffer, buf, ctl_blk->remaining);
		ctl_blk->offset = 0;
		buf = ctl_blk->buffer;
	}

	nbytes = max_line_size - ctl_blk->remaining;

	/* try to read a line from the socket */
	while ((nbytes > 0) && (len < 0)) {
		/* go out if the connection is closed */
		if (ctl_blk->sock->sk->state != TCP_ESTABLISHED
		    && ctl_blk->sock->sk->state != TCP_CLOSE_WAIT) {
			if (len > 0)
				break;
			else
				goto exit;
		}

		assert(ctl_blk->remaining <= (ctl_blk->len - ctl_blk->offset));
		reads = tcp_vs_recvbuffer(ctl_blk->sock,
					  ctl_blk->buffer + ctl_blk->offset + ctl_blk->remaining,
					  ctl_blk->len - ctl_blk->offset - ctl_blk->remaining,
					  ctl_blk->flag);

		if (reads == 0) {
			interruptible_sleep_on_timeout(&WQ, HZ);
			TCP_VS_DBG(5, "Read 0 bytes while reading a line\n");
			continue;
		}

		if (reads < 0) {
			TCP_VS_ERR("Error in reading a line\n");
			goto exit;
		}

		ctl_blk->remaining += reads;

		/* try to get a line from the remaing bytes */
		for (; i < ctl_blk->remaining -1; i++) {
			if ((buf[i] == CR) && (buf[i + 1] == LF)) {
				len = i;
				goto done;
			}
		}
		nbytes -= reads;
	}

  done:
	ctl_blk->info		= buf;
	ctl_blk->offset		+= len + 2;
	ctl_blk->remaining	-= len + 2;

	assert(ctl_blk->remaining >= 0);
	assert(ctl_blk->offset  < ctl_blk->len);

  exit:
	LeaveFunction(5);
	return len;
}


/****************************************************************************
* http_line_unescape - convert escaped characters in buffer to ASCII
*
* This routine can be used to convert an "escaped" form of a URL or
* parameter (appended to a URL) to standard ASCII format.
* The escaping is done by the browser on the client side, for
* transferring characters not allowed by the HTTP protocol.
* For example, a whitespace character is not allowed in an URL.
* It must be substituted by an escape sequence to be transferred.
*
* ESCAPING
* When you want to include any character, not part of the standard
* set allowed in URLs, you can do this by specifying its hex value
* in the format %xx, where xx is the hex representation.
* In addition, every '+' character will be substituted by a space.
*
*/
static void http_line_unescape
	(
	char	*string,	/* escaped string to unescape */
	int	len	/* length of the string */
	)
{
	int	i = 0;
	char	buffer[3];
	char	c;

	EnterFunction(5);

	assert(string != NULL)

	while (i < len) {
		if (string[i] == '+') {
			string[i] = ' ';	/* replace '+' by spaces */
		}
		if ((string[i] == '%') && (i < len-2)) {
			if (isxdigit (string[i+1]) && isxdigit (string[i+2])) {
				strncpy (buffer, &(string[i+1]), 2);
				buffer[2] = 0;
				c = (char) strtol (buffer, NULL, 16);
				if (c != 0) {
					memmove (&(string [i]),	/* move string 2 chars */
						&(string[i+2]), 2);
					string[i] = c;	/* replace % by new char */
					len -= 2;
				}
			}
		}
		i++;
	}

	LeaveFunction(5);
	return;
}


/******************************************************************************
* http_mime_parse - parse MIME line in a buffer
*
* This routine parses the MIME line in a buffer.
*
* NOTE: Some MIME headers (host, Referer) need be considered again, tbd.
*
*/
static int http_mime_parse
	(
	char			*buffer,
	int			len,
	http_mime_header_t	*mime,
	int			partial
	)
{
	char	*pos, *buf, c;
	int	l, ret = PARSE_ERROR;

	EnterFunction(5);

	assert(buffer != NULL)

		/* terminate string */
		c = buffer[len];
	buffer[len] = 0;

	buffer = skip_lws (buffer);
	if ((pos = strchr (buffer, ':')) == NULL) {
		goto exit;
	}

	l = pos - buffer;

	TCP_VS_DBG(5, "MIME Header: %s\n", buffer);

	/*
	 * Messages MUST NOT include both a Content-Length header field and
	 * a non-identity transfer-coding. If the message does include a non-
	 * identity transfer-coding, the Content-Length MUST be ignored.
	 */
	if (strnicmp (buffer, "Transfer-Encoding", l) == 0) { /* maybe strlen is better than 'l' */
		pos = skip_lws (pos + 1);
		if (strnicmp (pos, "identity", strlen ("identity")) != 0) {
			mime->transfer_encoding = 1;
			TCP_VS_DBG(5, "Transfer-Encoding: chunked\n");
		}
	}
	else if (strnicmp (buffer, "Content-Length", l) == 0) {
		mime->content_length = strtol (pos + 1, NULL, 10);
		TCP_VS_DBG(5, "Content-Length: %d\n", mime->content_length);
	} else if (strnicmp (buffer, "Connection", l) == 0) {
		pos = skip_lws (pos + 1);
		if (strnicmp (pos, "close", strlen ("close")) == 0) {
			mime->connection_close = 1;
			TCP_VS_DBG(5, "Connection: close\n");
		}
	}
	else if (partial && (strnicmp (buffer, "Content-type", l) == 0)){
		l = strlen ("multipart/byteranges");
		pos = skip_lws (pos + 1);
		if (strnicmp (pos, "multipart/byteranges", l)== 0){
			TCP_VS_DBG(5, "multipart/byteranges\n");
			pos += l + 1; /* skip ';' */
			pos = skip_lws (pos + 1);
			l = strlen ("boundary=");
			if (strnicmp (pos, "boundary=", l) != 0) {
				goto exit;
			}
			buf = pos + l;

			/* the rest of this line is THIS_STRING_SEPARATES */
			l = buffer + len - buf;
			if ((mime->sep = kmalloc (l + 1, GFP_KERNEL)) == NULL) {
				goto exit;
			}

			/* RFC 2046 [40] permits the boundary string to be quoted */
			if (buf[0] == '"' || buf[0] == '\'') {
				buf++;
				l--;
			}
			strncpy (mime->sep, buf, l);
			mime->sep[l] = 0;
			TCP_VS_DBG(5, "THIS_STRING_SEPARATES : %s\n", mime->sep);
		}
	}

	ret = PARSE_OK;
  exit:
	buffer[len] = c;	/* restore the string */
	LeaveFunction(5);
	return ret;
}

/****************************************************************************
*	relay data between source socket and destination socket
*/
static int relay_http_data
	(
	struct socket* dsock,		/* destination socket */
	http_read_ctl_block_t *ctl_blk,	/* read control block with source socket */
	int len				/* relay data length */
	)
{
	int nbytes, reads, w = 0;
	int ret = -1;

	DECLARE_WAIT_QUEUE_HEAD(WQ);

	EnterFunction(5);

	assert(ctl_blk->remaining <= (ctl_blk->len - ctl_blk->offset))
	assert(len > 0)

	/* if there is enough data in read buffer */
	nbytes = len - ctl_blk->remaining;
	if (nbytes <= 0) {
		if (tcp_vs_xmit (dsock, ctl_blk->buffer + ctl_blk->offset,
			len, MSG_MORE) < 0) {
			TCP_VS_ERR("Error in xmitting message body\n");
			goto exit;
		}
		ctl_blk->offset		+= len;
		ctl_blk->remaining	-= len;
		goto done;
	}

	/* xmit the remaining bytes */
	if (ctl_blk->remaining > 0) {
		if (tcp_vs_xmit (dsock, ctl_blk->buffer + ctl_blk->offset,
			ctl_blk->remaining, MSG_MORE) < 0) {
				TCP_VS_ERR("Error in xmitting remaining bytes\n");
				goto exit;
		}
	}

	do {
		reads = tcp_vs_recvbuffer (ctl_blk->sock, ctl_blk->buffer, ctl_blk->len, ctl_blk->flag);
		if (reads == 0) {
			interruptible_sleep_on_timeout(&WQ, HZ);
			TCP_VS_DBG (5, "Reads 0 bytes while relay\n");
			continue;
		}

		if (reads < 0) {
			TCP_VS_ERR("Error in reading while relaying\n");
			goto exit;
		}

		w = MIN(nbytes, reads);

		if (tcp_vs_xmit (dsock, ctl_blk->buffer, w, MSG_MORE) < 0) {
			TCP_VS_ERR("Error in relaying bytes\n");
			goto exit;
		}

		nbytes -= w;
	}while (nbytes > 0);

	ctl_blk->offset		= w;
	ctl_blk->remaining	= reads - w;

	assert(ctl_blk->remaining >= 0)
	assert(ctl_blk->offset  < ctl_blk->len)
done:
	ret = 0;
exit:
	LeaveFunction(5);
	return ret;
}

/****************************************************************************
*	search the seperator in a string
*/
static char* search_sep (const char * s, int len, const char * sep)
{
	int l, ll;

	l = strlen(sep);
	if (!l)
		return (char *)s;

	ll = len;
	while (ll >= l) {
		ll--;
		if (!memcmp(s, sep, l))
			return (char *) s;
		s++;
	}
	return NULL;
}


/****************************************************************************
* relay_multiparts: relay multipart/byteranges body
*
*  relay all data until "CRLF--THIS_STRING_SEPARATES--CRLF" is found.
*
*  Note: there may be a endless loop if the separate string is not found, tbd.
*
*/
static int relay_multiparts
	(
	struct socket* dsock,
	http_read_ctl_block_t *ctl_blk,
	http_mime_header_t *mime
	)
{
	int len, sep_len, l, reads;
	int ret = -1;
	char *buf, *pos;
	char *sep = NULL;

	DECLARE_WAIT_QUEUE_HEAD(WQ);

	EnterFunction(5);

	sep_len = strlen (mime->sep) + 8;
	if ((sep = kmalloc (sep_len + 1, GFP_KERNEL)) == NULL) {
		goto exit;
	}

	snprintf (sep, sep_len + 1, "\r\n--%s--\r\n" , mime->sep);

	/* deal with the remaining bytes */
	buf = ctl_blk->buffer + ctl_blk->offset;
	len = ctl_blk->remaining;

	if ((len > 0) && (tcp_vs_xmit (dsock, buf, len, MSG_MORE) < 0)) {
		TCP_VS_ERR("Error in xmitting multiparts (remaining)\n");
		goto exit;
	}

	pos = search_sep (buf, len, sep);
	if (pos != NULL) {
		goto done;
	}

	l = MIN(len, sep_len);
	memmove (ctl_blk->buffer, buf + len - l, l);

	/* search for CRLF--THIS_STRING_SEPARATES--CRLF */
	while (1) {
		reads = tcp_vs_recvbuffer (ctl_blk->sock, ctl_blk->buffer + l, ctl_blk->len - l, 0);
		if (reads == 0) {
			interruptible_sleep_on_timeout(&WQ, HZ);
			TCP_VS_DBG (5, "Reads 0 bytes while relaying multiparts\n");
		}

		if (reads < 0) {
			TCP_VS_ERR("Error in receiving multiparts\n");
			goto exit;
		}

		if (tcp_vs_xmit (dsock, ctl_blk->buffer + l, reads, MSG_MORE) < 0) {
			TCP_VS_ERR("Error in xmitting multiparts\n");
			goto exit;
		}

		len = l + reads;
		pos = search_sep (ctl_blk->buffer, len, sep);
		if (pos != NULL) {
			goto done;
		}

		l = MIN(len, sep_len);
		memmove (ctl_blk->buffer, ctl_blk->buffer + len - l, l);
	}

done:
	ret = 0;
exit:
	if (sep) {
		kfree (sep);
	}

	LeaveFunction(5);
	return ret;
}

/****************************************************************************
* transfer http message body.
*
* When a message-body is included with a message, the transfer-length of that
* body is determined by one of the following (in order of precedence):
*  1. Any response message which "MUST NOT" include a message-body (such as
* the 1xx, 204, and 304 responses and any response to a HEAD request) is always
* terminated by the first empty line after the header fields, regardless of the
* entity-header fields present in the message.
*  2. If a Transfer-Encoding header field (section 14.41) is present and has
* any value other than "identity", then the transfer-length is defined by use of
* the "chunked" transfer-coding (section 3.6), unless the message is terminated
* by closing the connection.
*  3. If a Content-Length header field (section 14.13) is present, its decimal
* value in OCTETs represents both the entity-length and the transfer-length. The
* Content-Length header field MUST NOT be sent if these two lengths are different
* (i.e., if a Transfer-Encoding header field is present). If a message is received
* with both a Transfer-Encoding header field and a Content-Length header field,
* the latter MUST be ignored.
*  4. If the message uses the media type "multipart/byteranges", and the transfer-
* length is not otherwise specified, then this self-delimiting media type defines
* the transfer-length. This media type MUST NOT be used unless the sender knows
* that the recipient can arse it; the presence in a request of a Range header with
* multiple byte-range specifiers from a 1.1 client implies that the client can parse
* multipart/byteranges responses.
*      A range header might be forwarded by a 1.0 proxy that does not
	understand multipart/byteranges; in this case the server MUST
	delimit the message using methods defined in items 1,3 or 5 of this
	section.
*  5. By the server closing the connection. (Closing the connection cannot be
* used to indicate the end of a request body, since that would leave no possibility
* for the server to send back a response.)
*
*/
static int relay_http_message_body
	(
	struct socket* dsock,		/* destination socket */
	http_read_ctl_block_t *ctl_blk,	/* read control block */
	http_mime_header_t *mime
	)
{
	int ret = -1;

	EnterFunction(5);

	if (mime->transfer_encoding) {
		/*
		 * 19.4.6 Introduction of Transfer-Encoding
		 *           HTTP/1.1 introduces the Transfer-Encoding header field (section
		 *           14.41). Proxies/gateways MUST remove any transfer-coding prior to
		 *        forwarding a message via a MIME-compliant protocol.
		 *           A process for decoding the "chunked" transfer-coding (section 3.6) can be
		 *        represented in pseudo-code as:
		 *               length := 0
		 *               read chunk-size, chunk-extension (if any) and CRLF
		 *               while (chunk-size > 0) {
		 *                      read chunk-data and CRLF
		 *                      append chunk-data to entity-body
		 *                      length := length + chunk-size
		 *                      read chunk-size and CRLF
		 *               }
		 *               read entity-header
		 *               while (entity-header not empty) {
		 *                      append entity-header to existing header fields
		 *                      read entity-header
		 *               }
		 *               Content-Length := length
		 *               Remove "chunked" from Transfer-Encoding
		 */
		int len, chunk_size;
		do {
			len = http_read_line (ctl_blk, HTTP_MAX_MIMLINE_SIZE);
			if (len < 0) {
				TCP_VS_ERR("Error in reading chunk size from client\n");
				goto exit;
			}

			if (tcp_vs_xmit (dsock, ctl_blk->info, len + 2, MSG_MORE) < 0) {
				TCP_VS_ERR("Error in xmitting chunk size & extension\n");
				goto exit;
			}

			ctl_blk->info[len] = 0;
			chunk_size = get_chunk_size (ctl_blk->info);

			TCP_VS_DBG(5, "Chunked line: %s\n", ctl_blk->info);

			if (chunk_size > 0) {
				if (relay_http_data (dsock, ctl_blk, chunk_size + 2) < 0) {
					TCP_VS_ERR("Error in xmitting chunk data\n");
					goto exit;
				}
			}
		} while ( chunk_size > 0);

		/* relay the trailer */
		do {
			len = http_read_line (ctl_blk, HTTP_MAX_MIMLINE_SIZE);
			if (len < 0) {
				TCP_VS_ERR("Error in reading trailer.\n");
				goto exit;
			}
			if (tcp_vs_xmit (dsock, ctl_blk->info, len + 2, MSG_MORE) < 0) {
				TCP_VS_ERR("Error in xmitting trailer\n");
				goto exit;
			}
		} while (len != 0);
		ret = 0;
	} else if (mime->content_length) {
		ret = relay_http_data (dsock, ctl_blk, mime->content_length);
	} else if (mime->sep) {
		ret = relay_multiparts (dsock, ctl_blk, mime);
	} else {
		ret = 0;		/* ? */
	}

  exit:
	LeaveFunction(5);
	return ret;
}


/****************************************************************************
*	get response from the specified server
*/
static int
http_get_response (struct socket *csock, struct socket* dsock,
	http_request_t *req, char* buffer, int buflen, int *close)
{
	http_read_ctl_block_t	read_ctl_blk;
	http_response_t		resp;
	int			len, ret = -1;

	EnterFunction(5);

	memset (&read_ctl_blk, 0, sizeof (read_ctl_blk));
	read_ctl_blk.buffer	= buffer;
	read_ctl_blk.len	= buflen;
	read_ctl_blk.sock	= dsock;

	*close = 0;

	/* Do we have data ? */
	while (skb_queue_empty(&(dsock->sk->receive_queue))) {
		interruptible_sleep_on_timeout(&dsock->wait, HZ);
	}

	/* read status line from server */
	len = http_read_line (&read_ctl_blk, HTTP_MAX_STALINE_SIZE);
	if (len < 0) {
		TCP_VS_ERR("Error in reading status line from server\n");
		goto exit;
	}

	/* xmit status line to client (2 more bytes for CRLF) */
	if (tcp_vs_xmit (csock, read_ctl_blk.info, len + 2,
			 MSG_MORE) < 0) {
		TCP_VS_ERR("Error in sending status line\n");
		goto exit;
	}

	/* parse status line */
	memset (&resp, 0, sizeof(resp));
	if (parse_http_status_line (read_ctl_blk.info, len, &resp) == PARSE_ERROR) {
		goto exit;
	}

	/* parse MIME header */
	do {
		if ((len = http_read_line (&read_ctl_blk, HTTP_MAX_MIMLINE_SIZE)) < 0) {
			goto exit;
		}

		/* xmit MIME header (2 more bytes for CRLF) */
		if (tcp_vs_xmit (csock, read_ctl_blk.info, len + 2, MSG_MORE) < 0) {
			TCP_VS_ERR("Error in sending status line\n");
			goto exit;
		}

		http_line_unescape (read_ctl_blk.info, len);
		http_mime_parse (read_ctl_blk.info, len, &resp.mime, resp.status_code == 206);
	} while (len != 0); /* http header end with CRLF,CRLF */

	*close = resp.mime.connection_close;

	/*
	 * Any response message which "MUST NOT" include a message-body (such
	 * as the 1xx, 204, and 304 responses and any response to a HEAD
	 * request) is always terminated by the first empty line after the
	 * header fields, regardless of the entity-header fields present in
	 * the message.
	 */
	if (req->method != HTTP_M_HEAD) {
		if ((resp.status_code < 200)
		    || (resp.status_code == 204 || resp.status_code == 304)) {
			ret = 0;
			goto exit;
		}

		ret = relay_http_message_body (csock, &read_ctl_blk, &resp.mime);
		if (resp.mime.sep) {
			kfree (resp.mime.sep);
		}
	}

  exit:
	LeaveFunction(5);
	return ret;
}


/****************************************************************************
*	Is there any data in socket?
*
* return:
*	-1,	Socket error
*	 0,	No data can read from socket
*	 1,	Data available
*/
static inline int data_available (http_read_ctl_block_t *ctl_blk)
{
	char buf[10];
	int ret = 0;

	EnterFunction(6);

	if (ctl_blk->remaining == 0) {
		/* Do we have data ? */
		if (skb_queue_empty(&(ctl_blk->sock->sk->receive_queue)))
			goto out;

		ret = tcp_vs_recvbuffer(ctl_blk->sock, buf, 10, MSG_PEEK);
		if (ret > 0)
			ret = 1;
	}

  out:
	LeaveFunction(6);
	return ret;

}

/****************************************************************************
*	HTTP content-based scheduling:
*	1, For http 1.0 request, parse the http request, select a server
*	according to the request, and create a socket the server finally.
*	2, For http 1.1 request, do all the work by itself. Parse every http
*	message header and direct each message to the right server according
*	to the scheduling rule. For a worker thread to get response by order
*	from the server.
*	returns:
*		0,	success, schedule just chose a dest server
*		1,	success, schedule has done all the jobs
*		-1,	redirect to the local server
*		-2,	error
*/
static int tcp_vs_phttp_schedule(struct tcp_vs_conn *conn, struct tcp_vs_service *svc)
{
	http_request_t		req;
	http_read_ctl_block_t	read_ctl_blk;
	char*			buffer = NULL;	/* store data from server */
	int			ret = 1;	/* scheduler has done all the jobs */
	int			len;
	unsigned long		last_read;
	int			close_server = 0;
	http_dest_server_t	*server = NULL;
	struct list_head	dest_list;	/* destination server list */
	struct list_head	*l;
	struct tcp_vs_dest	*dest;
	struct socket		*dsock;

	DECLARE_WAIT_QUEUE_HEAD(WQ);

	EnterFunction(5);

	memset (&read_ctl_blk, 0, sizeof (read_ctl_blk));
	read_ctl_blk.buffer	= conn->buffer;
	read_ctl_blk.len	= conn->buflen;
	read_ctl_blk.flag	= MSG_PEEK;
	read_ctl_blk.sock	= conn->csock;

	conn->dest	= NULL;
	conn->dsock	= NULL;

	INIT_LIST_HEAD(&dest_list);

	if (read_ctl_blk.len < HTTP_MAX_REQLINE_SIZE) {
		TCP_VS_ERR("Error: buffer is too small!\n");
		goto exit;
	}

	/* Do we have data ? */
	while (skb_queue_empty(&(conn->csock->sk->receive_queue))) {
		interruptible_sleep_on_timeout(&conn->csock->wait, HZ);
	}

	/* allocate buffer to store data that get from servers */
	buffer = (char*) get_free_page(GFP_KERNEL);
	if (buffer == NULL) {
		goto exit;
	}

	last_read = jiffies;
	do {
		switch (data_available(&read_ctl_blk)) {
		case -1:
			TCP_VS_DBG(5, "Socket error before reading request line.\n");
			ret = -2;
			goto done;

		case 0:
			/* check if the service is stopped or system is
			   unloaded */
			if (svc->stop != 0 || sysctl_ktcpvs_unload != 0) {
				TCP_VS_DBG(5, "phttp scheduling exit (pid=%d)\n",
					   current->pid);
				goto done;
			}

			interruptible_sleep_on_timeout(&WQ, HZ);
			if ((jiffies - last_read) >  KEEP_ALIVE_TIMES*HZ) {
				TCP_VS_DBG(5, "Timeout, disconnect.\n");
				goto done;
			}
			continue;

		case 1:
			last_read = jiffies;
			break;
		}

		/* read request line from client socket */
		len = http_read_line(&read_ctl_blk, HTTP_MAX_REQLINE_SIZE);
		if (len < 0) {
			TCP_VS_ERR("Error reading request line from client\n");
			ret = -2;
			goto done;
		}

		/* parse the http request line */
		memset(&req, 0, sizeof (req));
		if (parse_http_request_line(read_ctl_blk.info, len, &req)
		    != PARSE_OK) {
			TCP_VS_ERR("Cannot parse http request\n");
			ret = -2;
			goto done;
		}

		/* select a server */
		dest = tcp_vs_phttp_matchrule(svc, &req);
		if (!dest) {
			TCP_VS_DBG(5, "Can't find a right server\n");
			if (read_ctl_blk.flag == MSG_PEEK) {
				ret = -1;
				goto exit;	/* redirect to a local port */
			} else {
				ret = -2;
				goto done;
			}
		}

		if (req.version <= HTTP_V_1_0) {
			TCP_VS_DBG(5, "HTTP: server %d.%d.%d.%d:%d "
				   "conns %d refcnt %d weight %d\n",
				   NIPQUAD(dest->addr), ntohs(dest->port),
				   atomic_read(&dest->conns),
				   atomic_read (&dest->refcnt), dest->weight);

			conn->dsock = tcp_vs_connect2dest(dest);
			if (conn->dsock == NULL) {
				TCP_VS_ERR("The destination is not available!\n");
				goto exit;
			}

			atomic_inc(&dest->conns);
			conn->dest = dest;
			ret = 0;
			goto exit;
		}

		/*
		 * For http 1.1 client, continue processing for
		 * persistent connections
		 */

		/* find the dest server from server list, delete it if it's a dead one */
		dsock = NULL;
		list_for_each (l, &dest_list) {
			server = list_entry (l, http_dest_server_t, d_list);
			if (server->dest == dest) {
				assert(server->sock != NULL);
				if (server->sock->sk->state != TCP_ESTABLISHED
				    && server->sock->sk->state != TCP_CLOSE_WAIT) {
					sock_release(server->sock);
					list_del(&server->d_list);
					atomic_dec(&server->dest->conns);
					kfree(server);
				} else {
					dsock = server->sock;
				}
				break;
			}
		}

		if (dsock == NULL) {
			/* open socket for a new server */
			dsock = tcp_vs_connect2dest(dest);
			if (dsock == NULL) {
				TCP_VS_ERR("The destination is not available\n");
				if (read_ctl_blk.flag == MSG_PEEK) {
					ret = -1;
					goto exit;	/* redirect to a local port */
				} else {
					ret = -2;
					goto done;
				}
			}

			if ((server = kmalloc(sizeof(http_dest_server_t),
					      GFP_ATOMIC)) == NULL) {
				ret = -2;
				goto done;
			}
			server->dest = dest;
			server->sock = dsock;
			atomic_inc(&dest->conns);
			list_add_tail(&server->d_list, &dest_list);
		}

		/* re-read the peeked data for the first http request of a connection */
		if (read_ctl_blk.flag == MSG_PEEK) {
			read_ctl_blk.flag		= 0;
			read_ctl_blk.offset		= 0;
			read_ctl_blk.remaining	= 0;
			if (tcp_vs_recvbuffer(conn->csock,
					      read_ctl_blk.buffer,
					      len + 2, 0) != (len + 2)) {
				TCP_VS_ERR("Error in re-reading http request line\n");
				goto exit;
			}
		}

		/* xmit request line (2 more bytes for CRLF) */
		if (tcp_vs_xmit(dsock, read_ctl_blk.info, len + 2, MSG_MORE) < 0) {
			TCP_VS_ERR("Error in sending request line\n");
			goto done;
		}

		/* Process MIME header */
		do {
			len = http_read_line (&read_ctl_blk, HTTP_MAX_MIMLINE_SIZE);
			if (len < 0) {
				goto done;
			}

			/* xmit MIME header (2 more bytes for CRLF) */
			if (tcp_vs_xmit (dsock, read_ctl_blk.info, len + 2, MSG_MORE) < 0) {
				TCP_VS_ERR("Error in sending MIME header\n");
				goto done;
			}

			http_line_unescape (read_ctl_blk.info, len);
			http_mime_parse (read_ctl_blk.info, len, &req.mime, 0);
		} while (len != 0); /* http header end with CRLF,CRLF */

		if (relay_http_message_body(dsock, &read_ctl_blk, &req.mime) != 0) {
			TCP_VS_ERR("Error in sending http message body\n");
			goto done;
		}

		if (http_get_response(conn->csock, dsock, &req, buffer,
				      PAGE_SIZE, &close_server) < 0) {
			goto done;
		}

		if (close_server) {
			sock_release(dsock);
			list_del (&server->d_list);
			atomic_dec (&server->dest->conns);
			kfree(server);
			TCP_VS_DBG(5, "Close server connection.\n");
			break;	/* close the connection? tbd */
		}
	} while (req.mime.connection_close != 1);

  done:
	for (l = &dest_list; l->next != l; ) {
		server = list_entry(l->next, http_dest_server_t, d_list);
		if (server->sock) {
			sock_release(server->sock);
		}
		list_del(&server->d_list);
		if (server->dest)
			atomic_dec(&server->dest->conns);
		kfree(server);
	}

  exit:
	if (buffer) {
		free_page ((unsigned long)buffer);
	}
	LeaveFunction(5);
	return ret;
}


static struct tcp_vs_scheduler tcp_vs_phttp_scheduler =
{
	{0},			/* n_list */
	"phttp",		/* name */
	THIS_MODULE,		/* this module */
	tcp_vs_phttp_init_svc,	/* initializer */
	tcp_vs_phttp_done_svc,	/* done */
	tcp_vs_phttp_update_svc,/* update */
	tcp_vs_phttp_schedule,	/* select a server by http request */
};


static int __init tcp_vs_phttp_init(void)
{
	INIT_LIST_HEAD(&tcp_vs_phttp_scheduler.n_list);
	return register_tcp_vs_scheduler(&tcp_vs_phttp_scheduler);
}

static void __exit tcp_vs_phttp_cleanup(void)
{
	unregister_tcp_vs_scheduler(&tcp_vs_phttp_scheduler);
}

module_init(tcp_vs_phttp_init);
module_exit(tcp_vs_phttp_cleanup);
MODULE_LICENSE("GPL");
