File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / lighttpd / src / stat_cache.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Nov 2 10:35:00 2016 UTC (7 years, 7 months ago) by misho
Branches: lighttpd, MAIN
CVS tags: v1_4_41p8, HEAD
lighttpd 1.4.41

#include "first.h"

#include "log.h"
#include "stat_cache.h"
#include "fdevent.h"
#include "etag.h"

#include <sys/types.h>
#include <sys/stat.h>

#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <assert.h>

#ifdef HAVE_ATTR_ATTRIBUTES_H
# include <attr/attributes.h>
#endif

#ifdef HAVE_SYS_EXTATTR_H
# include <sys/extattr.h>
#endif

#ifdef HAVE_FAM_H
# include <fam.h>
#endif

#include "sys-mmap.h"

/* NetBSD 1.3.x needs it */
#ifndef MAP_FAILED
# define MAP_FAILED -1
#endif

#ifndef O_LARGEFILE
# define O_LARGEFILE 0
#endif

#ifndef HAVE_LSTAT
# define lstat stat
#endif

#if 0
/* enables debug code for testing if all nodes in the stat-cache as accessable */
#define DEBUG_STAT_CACHE
#endif

/*
 * stat-cache
 *
 * we cache the stat() calls in our own storage
 * the directories are cached in FAM
 *
 * if we get a change-event from FAM, we increment the version in the FAM->dir mapping
 *
 * if the stat()-cache is queried we check if the version id for the directory is the
 * same and return immediatly.
 *
 *
 * What we need:
 *
 * - for each stat-cache entry we need a fast indirect lookup on the directory name
 * - for each FAMRequest we have to find the version in the directory cache (index as userdata)
 *
 * stat <<-> directory <-> FAMRequest
 *
 * if file is deleted, directory is dirty, file is rechecked ...
 * if directory is deleted, directory mapping is removed
 *
 * */

#ifdef HAVE_FAM_H
typedef struct {
	FAMRequest *req;

	buffer *name;

	int version;
} fam_dir_entry;
#endif

/* the directory name is too long to always compare on it
 * - we need a hash
 * - the hash-key is used as sorting criteria for a tree
 * - a splay-tree is used as we can use the caching effect of it
 */

/* we want to cleanup the stat-cache every few seconds, let's say 10
 *
 * - remove entries which are outdated since 30s
 * - remove entries which are fresh but havn't been used since 60s
 * - if we don't have a stat-cache entry for a directory, release it from the monitor
 */

#ifdef DEBUG_STAT_CACHE
typedef struct {
	int *ptr;

	size_t used;
	size_t size;
} fake_keys;

static fake_keys ctrl;
#endif

stat_cache *stat_cache_init(void) {
	stat_cache *sc = NULL;

	sc = calloc(1, sizeof(*sc));
	force_assert(NULL != sc);

	sc->dir_name = buffer_init();
	sc->hash_key = buffer_init();

#ifdef HAVE_FAM_H
	sc->fam_fcce_ndx = -1;
#endif

#ifdef DEBUG_STAT_CACHE
	ctrl.size = 0;
#endif

	return sc;
}

static stat_cache_entry * stat_cache_entry_init(void) {
	stat_cache_entry *sce = NULL;

	sce = calloc(1, sizeof(*sce));
	force_assert(NULL != sce);

	sce->name = buffer_init();
	sce->etag = buffer_init();
	sce->content_type = buffer_init();

	return sce;
}

static void stat_cache_entry_free(void *data) {
	stat_cache_entry *sce = data;
	if (!sce) return;

	buffer_free(sce->etag);
	buffer_free(sce->name);
	buffer_free(sce->content_type);

	free(sce);
}

#ifdef HAVE_FAM_H
static fam_dir_entry * fam_dir_entry_init(void) {
	fam_dir_entry *fam_dir = NULL;

	fam_dir = calloc(1, sizeof(*fam_dir));
	force_assert(NULL != fam_dir);

	fam_dir->name = buffer_init();

	return fam_dir;
}

static void fam_dir_entry_free(FAMConnection *fc, void *data) {
	fam_dir_entry *fam_dir = data;

	if (!fam_dir) return;

	FAMCancelMonitor(fc, fam_dir->req);

	buffer_free(fam_dir->name);
	free(fam_dir->req);

	free(fam_dir);
}
#endif

void stat_cache_free(stat_cache *sc) {
	while (sc->files) {
		int osize;
		splay_tree *node = sc->files;

		osize = sc->files->size;

		stat_cache_entry_free(node->data);
		sc->files = splaytree_delete(sc->files, node->key);

		force_assert(osize - 1 == splaytree_size(sc->files));
	}

	buffer_free(sc->dir_name);
	buffer_free(sc->hash_key);

#ifdef HAVE_FAM_H
	while (sc->dirs) {
		int osize;
		splay_tree *node = sc->dirs;

		osize = sc->dirs->size;

		fam_dir_entry_free(&sc->fam, node->data);
		sc->dirs = splaytree_delete(sc->dirs, node->key);

		if (osize == 1) {
			force_assert(NULL == sc->dirs);
		} else {
			force_assert(osize == (sc->dirs->size + 1));
		}
	}

	if (-1 != sc->fam_fcce_ndx) {
		/* fd events already gone */
		sc->fam_fcce_ndx = -1;

		FAMClose(&sc->fam);
	}
#endif
	free(sc);
}

#if defined(HAVE_XATTR)
static int stat_cache_attr_get(buffer *buf, char *name, char *xattrname) {
	int attrlen;
	int ret;

	buffer_string_prepare_copy(buf, 1023);
	attrlen = buf->size - 1;
	if(0 == (ret = attr_get(name, xattrname, buf->ptr, &attrlen, 0))) {
		buffer_commit(buf, attrlen);
	}
	return ret;
}
#elif defined(HAVE_EXTATTR)
static int stat_cache_attr_get(buffer *buf, char *name, char *xattrname) {
	ssize_t attrlen;

	buffer_string_prepare_copy(buf, 1023);

	if (-1 != (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, xattrname, buf->ptr, buf->size - 1))) {
		buf->used = attrlen + 1;
		buf->ptr[attrlen] = '\0';
		return 0;
	}
	return -1;
}
#endif

/* the famous DJB hash function for strings */
static uint32_t hashme(buffer *str) {
	uint32_t hash = 5381;
	const char *s;
	for (s = str->ptr; *s; s++) {
		hash = ((hash << 5) + hash) + *s;
	}

	hash &= ~(((uint32_t)1) << 31); /* strip the highest bit */

	return hash;
}

#ifdef HAVE_FAM_H
handler_t stat_cache_handle_fdevent(server *srv, void *_fce, int revent) {
	size_t i;
	stat_cache *sc = srv->stat_cache;
	size_t events;

	UNUSED(_fce);
	/* */

	if (revent & FDEVENT_IN) {
		events = FAMPending(&sc->fam);

		for (i = 0; i < events; i++) {
			FAMEvent fe;
			fam_dir_entry *fam_dir;
			splay_tree *node;
			int ndx, j;

			FAMNextEvent(&sc->fam, &fe);

			/* handle event */

			switch(fe.code) {
			case FAMChanged:
			case FAMDeleted:
			case FAMMoved:
				/* if the filename is a directory remove the entry */

				fam_dir = fe.userdata;
				fam_dir->version++;

				/* file/dir is still here */
				if (fe.code == FAMChanged) break;

				/* we have 2 versions, follow and no-follow-symlink */

				for (j = 0; j < 2; j++) {
					buffer_copy_string(sc->hash_key, fe.filename);
					buffer_append_int(sc->hash_key, j);

					ndx = hashme(sc->hash_key);

					sc->dirs = splaytree_splay(sc->dirs, ndx);
					node = sc->dirs;

					if (node && (node->key == ndx)) {
						int osize = splaytree_size(sc->dirs);

						fam_dir_entry_free(&sc->fam, node->data);
						sc->dirs = splaytree_delete(sc->dirs, ndx);

						force_assert(osize - 1 == splaytree_size(sc->dirs));
					}
				}
				break;
			default:
				break;
			}
		}
	}

	if (revent & FDEVENT_HUP) {
		/* fam closed the connection */
		fdevent_event_del(srv->ev, &(sc->fam_fcce_ndx), FAMCONNECTION_GETFD(&sc->fam));
		fdevent_unregister(srv->ev, FAMCONNECTION_GETFD(&sc->fam));

		FAMClose(&sc->fam);
	}

	return HANDLER_GO_ON;
}

static int buffer_copy_dirname(buffer *dst, buffer *file) {
	size_t i;

	if (buffer_string_is_empty(file)) return -1;

	for (i = buffer_string_length(file); i > 0; i--) {
		if (file->ptr[i] == '/') {
			buffer_copy_string_len(dst, file->ptr, i);
			return 0;
		}
	}

	return -1;
}
#endif

#ifdef HAVE_LSTAT
static int stat_cache_lstat(server *srv, buffer *dname, struct stat *lst) {
	if (lstat(dname->ptr, lst) == 0) {
		return S_ISLNK(lst->st_mode) ? 0 : 1;
	}
	else {
		log_error_write(srv, __FILE__, __LINE__, "sbs",
				"lstat failed for:",
				dname, strerror(errno));
	};
	return -1;
}
#endif

/***
 *
 *
 *
 * returns:
 *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
 *  - HANDLER_ERROR on stat() failed -> see errno for problem
 */

handler_t stat_cache_get_entry(server *srv, connection *con, buffer *name, stat_cache_entry **ret_sce) {
#ifdef HAVE_FAM_H
	fam_dir_entry *fam_dir = NULL;
	int dir_ndx = -1;
#endif
	stat_cache_entry *sce = NULL;
	stat_cache *sc;
	struct stat st;
	size_t k;
	int fd;
	struct stat lst;
#ifdef DEBUG_STAT_CACHE
	size_t i;
#endif

	int file_ndx;

	*ret_sce = NULL;

	/*
	 * check if the directory for this file has changed
	 */

	sc = srv->stat_cache;

	buffer_copy_buffer(sc->hash_key, name);
	buffer_append_int(sc->hash_key, con->conf.follow_symlink);

	file_ndx = hashme(sc->hash_key);
	sc->files = splaytree_splay(sc->files, file_ndx);

#ifdef DEBUG_STAT_CACHE
	for (i = 0; i < ctrl.used; i++) {
		if (ctrl.ptr[i] == file_ndx) break;
	}
#endif

	if (sc->files && (sc->files->key == file_ndx)) {
#ifdef DEBUG_STAT_CACHE
		/* it was in the cache */
		force_assert(i < ctrl.used);
#endif

		/* we have seen this file already and
		 * don't stat() it again in the same second */

		sce = sc->files->data;

		/* check if the name is the same, we might have a collision */

		if (buffer_is_equal(name, sce->name)) {
			if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
				if (sce->stat_ts == srv->cur_ts && con->conf.follow_symlink) {
					*ret_sce = sce;
					return HANDLER_GO_ON;
				}
			}
		} else {
			/* collision, forget about the entry */
			sce = NULL;
		}
	} else {
#ifdef DEBUG_STAT_CACHE
		if (i != ctrl.used) {
			log_error_write(srv, __FILE__, __LINE__, "xSB",
				file_ndx, "was already inserted but not found in cache, ", name);
		}
		force_assert(i == ctrl.used);
#endif
	}

#ifdef HAVE_FAM_H
	/* dir-check */
	if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
		if (0 != buffer_copy_dirname(sc->dir_name, name)) {
			log_error_write(srv, __FILE__, __LINE__, "sb",
				"no '/' found in filename:", name);
			return HANDLER_ERROR;
		}

		buffer_copy_buffer(sc->hash_key, sc->dir_name);
		buffer_append_int(sc->hash_key, con->conf.follow_symlink);

		dir_ndx = hashme(sc->hash_key);

		sc->dirs = splaytree_splay(sc->dirs, dir_ndx);

		if ((NULL != sc->dirs) && (sc->dirs->key == dir_ndx)) {
			fam_dir = sc->dirs->data;

			/* check whether we got a collision */
			if (buffer_is_equal(sc->dir_name, fam_dir->name)) {
				/* test whether a found file cache entry is still ok */
				if ((NULL != sce) && (fam_dir->version == sce->dir_version)) {
					/* the stat()-cache entry is still ok */

					*ret_sce = sce;
					return HANDLER_GO_ON;
				}
			} else {
				/* hash collision, forget about the entry */
				fam_dir = NULL;
			}
		}
	}
#endif

	/*
	 * *lol*
	 * - open() + fstat() on a named-pipe results in a (intended) hang.
	 * - stat() if regular file + open() to see if we can read from it is better
	 *
	 * */
	if (-1 == stat(name->ptr, &st)) {
		return HANDLER_ERROR;
	}


	if (S_ISREG(st.st_mode)) {
		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
		if (name->ptr[buffer_string_length(name) - 1] == '/') {
			errno = ENOTDIR;
			return HANDLER_ERROR;
		}

		/* try to open the file to check if we can read it */
		if (-1 == (fd = open(name->ptr, O_RDONLY))) {
			return HANDLER_ERROR;
		}
		close(fd);
	}

	if (NULL == sce) {

		sce = stat_cache_entry_init();
		buffer_copy_buffer(sce->name, name);

		/* already splayed file_ndx */
		if ((NULL != sc->files) && (sc->files->key == file_ndx)) {
			/* hash collision: replace old entry */
			stat_cache_entry_free(sc->files->data);
			sc->files->data = sce;
		} else {
			int osize = splaytree_size(sc->files);

			sc->files = splaytree_insert(sc->files, file_ndx, sce);
			force_assert(osize + 1 == splaytree_size(sc->files));

#ifdef DEBUG_STAT_CACHE
			if (ctrl.size == 0) {
				ctrl.size = 16;
				ctrl.used = 0;
				ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr));
				force_assert(NULL != ctrl.ptr);
			} else if (ctrl.size == ctrl.used) {
				ctrl.size += 16;
				ctrl.ptr = realloc(ctrl.ptr, ctrl.size * sizeof(*ctrl.ptr));
				force_assert(NULL != ctrl.ptr);
			}

			ctrl.ptr[ctrl.used++] = file_ndx;
#endif
		}
		force_assert(sc->files);
		force_assert(sc->files->data == sce);
	}

	sce->st = st;
	sce->stat_ts = srv->cur_ts;

	/* catch the obvious symlinks
	 *
	 * this is not a secure check as we still have a race-condition between
	 * the stat() and the open. We can only solve this by
	 * 1. open() the file
	 * 2. fstat() the fd
	 *
	 * and keeping the file open for the rest of the time. But this can
	 * only be done at network level.
	 *
	 * per default it is not a symlink
	 * */
#ifdef HAVE_LSTAT
	sce->is_symlink = 0;

	/* we want to only check for symlinks if we should block symlinks.
	 */
	if (!con->conf.follow_symlink) {
		if (stat_cache_lstat(srv, name, &lst)  == 0) {
#ifdef DEBUG_STAT_CACHE
				log_error_write(srv, __FILE__, __LINE__, "sb",
						"found symlink", name);
#endif
				sce->is_symlink = 1;
		}

		/*
		 * we assume "/" can not be symlink, so
		 * skip the symlink stuff if our path is /
		 **/
		else if (buffer_string_length(name) > 1) {
			buffer *dname;
			char *s_cur;

			dname = buffer_init();
			buffer_copy_buffer(dname, name);

			while ((s_cur = strrchr(dname->ptr, '/'))) {
				buffer_string_set_length(dname, s_cur - dname->ptr);
				if (dname->ptr == s_cur) {
#ifdef DEBUG_STAT_CACHE
					log_error_write(srv, __FILE__, __LINE__, "s", "reached /");
#endif
					break;
				}
#ifdef DEBUG_STAT_CACHE
				log_error_write(srv, __FILE__, __LINE__, "sbs",
						"checking if", dname, "is a symlink");
#endif
				if (stat_cache_lstat(srv, dname, &lst)  == 0) {
					sce->is_symlink = 1;
#ifdef DEBUG_STAT_CACHE
					log_error_write(srv, __FILE__, __LINE__, "sb",
							"found symlink", dname);
#endif
					break;
				};
			};
			buffer_free(dname);
		};
	};
#endif

	if (S_ISREG(st.st_mode)) {
		/* determine mimetype */
		buffer_reset(sce->content_type);
#if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
		if (con->conf.use_xattr) {
			stat_cache_attr_get(sce->content_type, name->ptr, srv->srvconf.xattr_name->ptr);
		}
#endif
		/* xattr did not set a content-type. ask the config */
		if (buffer_string_is_empty(sce->content_type)) {
			size_t namelen = buffer_string_length(name);

			for (k = 0; k < con->conf.mimetypes->used; k++) {
				data_string *ds = (data_string *)con->conf.mimetypes->data[k];
				buffer *type = ds->key;
				size_t typelen = buffer_string_length(type);

				if (buffer_is_empty(type)) continue;

				/* check if the right side is the same */
				if (typelen > namelen) continue;

				if (0 == strncasecmp(name->ptr + namelen - typelen, type->ptr, typelen)) {
					buffer_copy_buffer(sce->content_type, ds->value);
					break;
				}
			}
		}
		etag_create(sce->etag, &(sce->st), con->etag_flags);
	} else if (S_ISDIR(st.st_mode)) {
		etag_create(sce->etag, &(sce->st), con->etag_flags);
	}

#ifdef HAVE_FAM_H
	if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
		/* is this directory already registered ? */
		if (NULL == fam_dir) {
			fam_dir = fam_dir_entry_init();

			buffer_copy_buffer(fam_dir->name, sc->dir_name);

			fam_dir->version = 1;

			fam_dir->req = calloc(1, sizeof(FAMRequest));
			force_assert(NULL != fam_dir);

			if (0 != FAMMonitorDirectory(&sc->fam, fam_dir->name->ptr,
						     fam_dir->req, fam_dir)) {

				log_error_write(srv, __FILE__, __LINE__, "sbsbs",
						"monitoring dir failed:",
						fam_dir->name, 
						"file:", name,
						FamErrlist[FAMErrno]);

				fam_dir_entry_free(&sc->fam, fam_dir);
				fam_dir = NULL;
			} else {
				int osize = splaytree_size(sc->dirs);

				/* already splayed dir_ndx */
				if ((NULL != sc->dirs) && (sc->dirs->key == dir_ndx)) {
					/* hash collision: replace old entry */
					fam_dir_entry_free(&sc->fam, sc->dirs->data);
					sc->dirs->data = fam_dir;
				} else {
					sc->dirs = splaytree_insert(sc->dirs, dir_ndx, fam_dir);
					force_assert(osize == (splaytree_size(sc->dirs) - 1));
				}

				force_assert(sc->dirs);
				force_assert(sc->dirs->data == fam_dir);
			}
		}

		/* bind the fam_fc to the stat() cache entry */

		if (fam_dir) {
			sce->dir_version = fam_dir->version;
		}
	}
#endif

	*ret_sce = sce;

	return HANDLER_GO_ON;
}

int stat_cache_open_rdonly_fstat (server *srv, connection *con, buffer *name, struct stat *st) {
	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
	 * not any intermediate symlinks along the path)*/
	#ifndef O_BINARY
	#define O_BINARY 0
	#endif
	#ifndef O_LARGEFILE
	#define O_LARGEFILE 0
	#endif
	#ifndef O_NOCTTY
	#define O_NOCTTY 0
	#endif
	#ifndef O_NONBLOCK
	#define O_NONBLOCK 0
	#endif
	#ifndef O_NOFOLLOW
	#define O_NOFOLLOW 0
	#endif
	const int oflags = O_BINARY | O_LARGEFILE | O_NOCTTY | O_NONBLOCK
			 | (con->conf.follow_symlink ? 0 : O_NOFOLLOW);
	const int fd = open(name->ptr, O_RDONLY | oflags);
	if (fd >= 0) {
		if (0 == fstat(fd, st)) {
			return fd;
		} else {
			close(fd);
		}
	}
	UNUSED(srv); /*(might log_error_write(srv, ...) in the future)*/
	return -1;
}

/**
 * remove stat() from cache which havn't been stat()ed for
 * more than 10 seconds
 *
 *
 * walk though the stat-cache, collect the ids which are too old
 * and remove them in a second loop
 */

static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys, size_t *ndx) {
	stat_cache_entry *sce;

	if (!t) return 0;

	stat_cache_tag_old_entries(srv, t->left, keys, ndx);
	stat_cache_tag_old_entries(srv, t->right, keys, ndx);

	sce = t->data;

	if (srv->cur_ts - sce->stat_ts > 2) {
		keys[(*ndx)++] = t->key;
	}

	return 0;
}

int stat_cache_trigger_cleanup(server *srv) {
	stat_cache *sc;
	size_t max_ndx = 0, i;
	int *keys;

	sc = srv->stat_cache;

	if (!sc->files) return 0;

	keys = calloc(1, sizeof(int) * sc->files->size);
	force_assert(NULL != keys);

	stat_cache_tag_old_entries(srv, sc->files, keys, &max_ndx);

	for (i = 0; i < max_ndx; i++) {
		int ndx = keys[i];
		splay_tree *node;

		sc->files = splaytree_splay(sc->files, ndx);

		node = sc->files;

		if (node && (node->key == ndx)) {
#ifdef DEBUG_STAT_CACHE
			size_t j;
			int osize = splaytree_size(sc->files);
			stat_cache_entry *sce = node->data;
#endif
			stat_cache_entry_free(node->data);
			sc->files = splaytree_delete(sc->files, ndx);

#ifdef DEBUG_STAT_CACHE
			for (j = 0; j < ctrl.used; j++) {
				if (ctrl.ptr[j] == ndx) {
					ctrl.ptr[j] = ctrl.ptr[--ctrl.used];
					break;
				}
			}

			force_assert(osize - 1 == splaytree_size(sc->files));
#endif
		}
	}

	free(keys);

	return 0;
}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>