[RFC][PATCH] Inotify kernel API

Amy Griffis amy.griffis at hp.com
Tue Aug 23 20:37:38 UTC 2005


Attached is a patch (against Linus' git tree) that implements a basic
kernel API for inotify.  Here is a description of the patch:

The Inotify kernel API provides these functions:

    inotify_init - initialize an inotify instance
    inotify_add_watch - add a watch on an inode
    inotify_ignore - remove a watch on an inode
    inotify_free - destroy an inotify instance

The kernel API differs from the userspace API in the following ways:

- Instead of a file descriptor, inotify_init() returns a pointer to
  struct inotify_dev, which is an incomplete type to kernel consumers.

- The consumer provides a callback to inotify_init(), which is used
  for filesystem event notification instead of the kevents used for
  userspace.

- Watches are added on inodes rather than paths.

- inotify_add_watch() takes a callback argument, which is used to
  provide the consumer with a quick-access method back to its own data
  structure, rather than needing to hash the watch descriptor or walk
  a list.

- The path is given to the event callback as an additional argument
  rather than being appended to the inotify_event structure;
  inotify_event.len is unused.

- User-based limits on number of watches, etc. are ignored.

Here is a list of other things I've been working on, but are not
included in this patch:

- Adding inode information to the event callback.

- Allowing for adding/removing inotify watches from an event callback.

I've also sketched out some data structures and written some prototype
audit code that makes use of this patch.

Please take a look and let me know what you think!

Regards,
Amy
-------------- next part --------------
diff -r 8ecff93e704a -r 58e1301e9661 fs/inotify.c
--- a/fs/inotify.c	Thu Aug 18 19:53:59 2005
+++ b/fs/inotify.c	Thu Aug 18 23:19:52 2005
@@ -83,14 +83,18 @@
 	wait_queue_head_t 	wq;		/* wait queue for i/o */
 	struct idr		idr;		/* idr mapping wd -> watch */
 	struct semaphore	sem;		/* protects this bad boy */
-	struct list_head 	events;		/* list of queued events */
 	struct list_head	watches;	/* list of watches */
 	atomic_t		count;		/* reference count */
+	u32			last_wd;	/* the last wd allocated */
+	/* userland consumer API */
+	struct list_head 	events;		/* list of queued events */
 	struct user_struct	*user;		/* user who opened this dev */
 	unsigned int		queue_size;	/* size of the queue (bytes) */
 	unsigned int		event_count;	/* number of pending events */
 	unsigned int		max_events;	/* maximum number of events */
-	u32			last_wd;	/* the last wd allocated */
+	/* kernel consumer API */
+	void (*callback)(struct inotify_event *, const char *,
+			 void *); 		/* event callback */
 };
 
 /*
@@ -122,6 +126,7 @@
 	struct inode		*inode;	/* associated inode */
 	s32 			wd;	/* watch descriptor */
 	u32			mask;	/* event mask for this watch */
+	void		*callback_arg;	/* callback argument - kernel API */
 };
 
 #ifdef CONFIG_SYSCTL
@@ -173,8 +178,10 @@
 static inline void put_inotify_dev(struct inotify_device *dev)
 {
 	if (atomic_dec_and_test(&dev->count)) {
-		atomic_dec(&dev->user->inotify_devs);
-		free_uid(dev->user);
+		if (dev->user) {
+			atomic_dec(&dev->user->inotify_devs);
+			free_uid(dev->user);
+		}
 		kfree(dev);
 	}
 }
@@ -341,6 +348,23 @@
 }
 
 /*
+ * inotify_callback_event - notify kernel consumers of events
+ */
+static void inotify_callback_event(struct inotify_device *dev, 
+				  struct inotify_watch *watch, 
+				  u32 mask, u32 cookie, const char *name)
+{
+	struct inotify_event event;
+
+	event.wd = watch->wd;
+	event.mask = mask;
+	event.cookie = cookie;
+	event.len = 0; /* kernel consumers don't need length */
+
+	dev->callback(&event, name, watch->callback_arg);
+}
+
+/*
  * inotify_dev_get_wd - returns the next WD for use by the given dev
  *
  * Callers must hold dev->sem.  This function can sleep.
@@ -383,12 +407,13 @@
  * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
  */
 static struct inotify_watch *create_watch(struct inotify_device *dev,
-					  u32 mask, struct inode *inode)
+					  u32 mask, struct inode *inode,
+					  void *callback_arg)
 {
 	struct inotify_watch *watch;
 	int ret;
 
-	if (atomic_read(&dev->user->inotify_watches) >=
+	if (dev->user && atomic_read(&dev->user->inotify_watches) >=
 			inotify_max_user_watches)
 		return ERR_PTR(-ENOSPC);
 
@@ -404,6 +429,7 @@
 
 	dev->last_wd = watch->wd;
 	watch->mask = mask;
+	watch->callback_arg = callback_arg;
 	atomic_set(&watch->count, 0);
 	INIT_LIST_HEAD(&watch->d_list);
 	INIT_LIST_HEAD(&watch->i_list);
@@ -421,7 +447,8 @@
 	/* bump our own count, corresponding to our entry in dev->watches */
 	get_inotify_watch(watch);
 
-	atomic_inc(&dev->user->inotify_watches);
+	if (dev->user)
+		atomic_inc(&dev->user->inotify_watches);
 
 	return watch;
 }
@@ -453,7 +480,8 @@
 	list_del(&watch->i_list);
 	list_del(&watch->d_list);
 
-	atomic_dec(&dev->user->inotify_watches);
+	if (dev->user)
+		atomic_dec(&dev->user->inotify_watches);
 	idr_remove(&dev->idr, watch->wd);
 	put_inotify_watch(watch);
 }
@@ -471,7 +499,10 @@
  */
 static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
 {
-	inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
+	if (dev->callback)
+		inotify_callback_event(dev, watch, IN_IGNORED, 0, NULL);
+	else
+		inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
 	remove_watch_no_event(watch, dev);
 }
 
@@ -484,7 +515,172 @@
 	return !list_empty(&inode->inotify_watches);
 }
 
-/* Kernel API */
+/* Kernel consumer API */
+
+/*
+ * inotify_init - allocates and initializes an inotify device.
+ */
+struct inotify_device * 
+inotify_init(void (*callback)(struct inotify_event *, const char *, void *))
+{
+	struct inotify_device *dev;
+
+	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
+	if (unlikely(!dev))
+		return NULL;
+
+	idr_init(&dev->idr);
+	INIT_LIST_HEAD(&dev->events);
+	INIT_LIST_HEAD(&dev->watches);
+	init_waitqueue_head(&dev->wq);
+	sema_init(&dev->sem, 1);
+	dev->event_count = 0;
+	dev->queue_size = 0;
+	dev->max_events = inotify_max_queued_events;
+	dev->user = NULL;	/* set in sys_inotify_init */
+	dev->last_wd = 0;
+	dev->callback = callback;
+	atomic_set(&dev->count, 0);
+	get_inotify_dev(dev);
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(inotify_init);
+
+/*
+ * inotify_free - clean up and free an inotify device.
+ */
+int inotify_free(struct inotify_device *dev)
+{
+	/*
+	 * Destroy all of the watches on this device.  Unfortunately, not very
+	 * pretty.  We cannot do a simple iteration over the list, because we
+	 * do not know the inode until we iterate to the watch.  But we need to
+	 * hold inode->inotify_sem before dev->sem.  The following works.
+	 */
+	while (1) {
+		struct inotify_watch *watch;
+		struct list_head *watches;
+		struct inode *inode;
+
+		down(&dev->sem);
+		watches = &dev->watches;
+		if (list_empty(watches)) {
+			up(&dev->sem);
+			break;
+		}
+		watch = list_entry(watches->next, struct inotify_watch, d_list);
+		get_inotify_watch(watch);
+		up(&dev->sem);
+
+		inode = watch->inode;
+		down(&inode->inotify_sem);
+		down(&dev->sem);
+		remove_watch_no_event(watch, dev);
+		up(&dev->sem);
+		up(&inode->inotify_sem);
+		put_inotify_watch(watch);
+	}
+
+	/* destroy all of the events on this device */
+	down(&dev->sem);
+	while (!list_empty(&dev->events))
+		inotify_dev_event_dequeue(dev);
+	up(&dev->sem);
+
+	/* free this device: the put matching the get in inotify_init() */
+	put_inotify_dev(dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(inotify_free);
+
+/*
+ * inotify_add_watch - add a watch to this inotify instance.
+ */
+s32 inotify_add_watch(struct inotify_device *dev, struct inode *inode, 
+		      u32 mask, void *callback_arg)
+{
+	struct inotify_watch *watch, *old;
+	int ret;
+
+	down(&inode->inotify_sem);
+	down(&dev->sem);
+
+	/* don't let consumers set invalid bits: we don't want flags set */
+	mask &= IN_ALL_EVENTS;
+	if (unlikely(!mask)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Handle the case of re-adding a watch on an (inode,dev) pair that we
+	 * are already watching.  We just update the mask and callback_arg and
+	 * return its wd.
+	 */
+	old = inode_find_dev(inode, dev);
+	if (unlikely(old)) {
+		old->mask = mask;
+		old->callback_arg = callback_arg;
+		ret = old->wd;
+		goto out;
+	}
+
+	watch = create_watch(dev, mask, inode, callback_arg);
+	if (unlikely(IS_ERR(watch))) {
+		ret = PTR_ERR(watch);
+		goto out;
+	}
+
+	/* Add the watch to the device's and the inode's list */
+	list_add(&watch->d_list, &dev->watches);
+	list_add(&watch->i_list, &inode->inotify_watches);
+	ret = watch->wd;
+out:
+	up(&dev->sem);
+	up(&inode->inotify_sem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(inotify_add_watch);
+
+/*
+ * inotify_ignore - remove a given wd from this inotify instance.
+ *
+ * Can sleep.
+ */
+int inotify_ignore(struct inotify_device *dev, s32 wd)
+{
+	struct inotify_watch *watch;
+	struct inode *inode;
+
+	down(&dev->sem);
+	watch = idr_find(&dev->idr, wd);
+	if (unlikely(!watch)) {
+		up(&dev->sem);
+		return -EINVAL;
+	}
+	get_inotify_watch(watch);
+	inode = watch->inode;
+	up(&dev->sem);
+
+	down(&inode->inotify_sem);
+	down(&dev->sem);
+
+	/* make sure that we did not race */
+	watch = idr_find(&dev->idr, wd);
+	if (likely(watch))
+		remove_watch(watch, dev);
+
+	up(&dev->sem);
+	up(&inode->inotify_sem);
+	put_inotify_watch(watch);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(inotify_ignore);
+
+/* Kernel producer API */
 
 /**
  * inotify_inode_queue_event - queue an event to all watches on this inode
@@ -508,7 +704,12 @@
 			struct inotify_device *dev = watch->dev;
 			get_inotify_watch(watch);
 			down(&dev->sem);
-			inotify_dev_queue_event(dev, watch, mask, cookie, name);
+			if (dev->callback)
+				inotify_callback_event(dev, watch, mask, 
+						       cookie, name);
+			else
+				inotify_dev_queue_event(dev, watch, mask,
+							cookie, name);
 			if (watch_mask & IN_ONESHOT)
 				remove_watch_no_event(watch, dev);
 			up(&dev->sem);
@@ -622,7 +823,12 @@
 		list_for_each_entry_safe(watch, next_w, watches, i_list) {
 			struct inotify_device *dev = watch->dev;
 			down(&dev->sem);
-			inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
+			if (dev->callback)
+				inotify_callback_event(dev, watch, IN_UNMOUNT, 
+						       0, NULL);
+			else
+				inotify_dev_queue_event(dev, watch, IN_UNMOUNT,
+							0, NULL);
 			remove_watch(watch, dev);
 			up(&dev->sem);
 		}
@@ -748,83 +954,7 @@
 
 static int inotify_release(struct inode *ignored, struct file *file)
 {
-	struct inotify_device *dev = file->private_data;
-
-	/*
-	 * Destroy all of the watches on this device.  Unfortunately, not very
-	 * pretty.  We cannot do a simple iteration over the list, because we
-	 * do not know the inode until we iterate to the watch.  But we need to
-	 * hold inode->inotify_sem before dev->sem.  The following works.
-	 */
-	while (1) {
-		struct inotify_watch *watch;
-		struct list_head *watches;
-		struct inode *inode;
-
-		down(&dev->sem);
-		watches = &dev->watches;
-		if (list_empty(watches)) {
-			up(&dev->sem);
-			break;
-		}
-		watch = list_entry(watches->next, struct inotify_watch, d_list);
-		get_inotify_watch(watch);
-		up(&dev->sem);
-
-		inode = watch->inode;
-		down(&inode->inotify_sem);
-		down(&dev->sem);
-		remove_watch_no_event(watch, dev);
-		up(&dev->sem);
-		up(&inode->inotify_sem);
-		put_inotify_watch(watch);
-	}
-
-	/* destroy all of the events on this device */
-	down(&dev->sem);
-	while (!list_empty(&dev->events))
-		inotify_dev_event_dequeue(dev);
-	up(&dev->sem);
-
-	/* free this device: the put matching the get in inotify_init() */
-	put_inotify_dev(dev);
-
-	return 0;
-}
-
-/*
- * inotify_ignore - remove a given wd from this inotify instance.
- *
- * Can sleep.
- */
-static int inotify_ignore(struct inotify_device *dev, s32 wd)
-{
-	struct inotify_watch *watch;
-	struct inode *inode;
-
-	down(&dev->sem);
-	watch = idr_find(&dev->idr, wd);
-	if (unlikely(!watch)) {
-		up(&dev->sem);
-		return -EINVAL;
-	}
-	get_inotify_watch(watch);
-	inode = watch->inode;
-	up(&dev->sem);
-
-	down(&inode->inotify_sem);
-	down(&dev->sem);
-
-	/* make sure that we did not race */
-	watch = idr_find(&dev->idr, wd);
-	if (likely(watch))
-		remove_watch(watch, dev);
-
-	up(&dev->sem);
-	up(&inode->inotify_sem);
-	put_inotify_watch(watch);
-
-	return 0;
+	return inotify_free(file->private_data);
 }
 
 static long inotify_ioctl(struct file *file, unsigned int cmd,
@@ -878,11 +1008,14 @@
 		goto out_free_uid;
 	}
 
-	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
+	dev = inotify_init(NULL);
 	if (unlikely(!dev)) {
 		ret = -ENOMEM;
 		goto out_free_uid;
 	}
+
+	dev->user = user;
+	atomic_inc(&user->inotify_devs);
 
 	filp->f_op = &inotify_fops;
 	filp->f_vfsmnt = mntget(inotify_mnt);
@@ -892,20 +1025,6 @@
 	filp->f_flags = O_RDONLY;
 	filp->private_data = dev;
 
-	idr_init(&dev->idr);
-	INIT_LIST_HEAD(&dev->events);
-	INIT_LIST_HEAD(&dev->watches);
-	init_waitqueue_head(&dev->wq);
-	sema_init(&dev->sem, 1);
-	dev->event_count = 0;
-	dev->queue_size = 0;
-	dev->max_events = inotify_max_queued_events;
-	dev->user = user;
-	dev->last_wd = 0;
-	atomic_set(&dev->count, 0);
-
-	get_inotify_dev(dev);
-	atomic_inc(&user->inotify_devs);
 	fd_install(fd, filp);
 
 	return fd;
@@ -919,7 +1038,6 @@
 
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 {
-	struct inotify_watch *watch, *old;
 	struct inode *inode;
 	struct inotify_device *dev;
 	struct nameidata nd;
@@ -938,46 +1056,14 @@
 
 	ret = find_inode(path, &nd);
 	if (unlikely(ret))
-		goto fput_and_out;
+		goto out;
 
 	/* inode held in place by reference to nd; dev by fget on fd */
 	inode = nd.dentry->d_inode;
 	dev = filp->private_data;
 
-	down(&inode->inotify_sem);
-	down(&dev->sem);
-
-	/* don't let user-space set invalid bits: we don't want flags set */
-	mask &= IN_ALL_EVENTS;
-	if (unlikely(!mask)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/*
-	 * Handle the case of re-adding a watch on an (inode,dev) pair that we
-	 * are already watching.  We just update the mask and return its wd.
-	 */
-	old = inode_find_dev(inode, dev);
-	if (unlikely(old)) {
-		old->mask = mask;
-		ret = old->wd;
-		goto out;
-	}
-
-	watch = create_watch(dev, mask, inode);
-	if (unlikely(IS_ERR(watch))) {
-		ret = PTR_ERR(watch);
-		goto out;
-	}
-
-	/* Add the watch to the device's and the inode's list */
-	list_add(&watch->d_list, &dev->watches);
-	list_add(&watch->i_list, &inode->inotify_watches);
-	ret = watch->wd;
+	ret = inotify_add_watch(dev, inode, mask, NULL);
 out:
-	up(&dev->sem);
-	up(&inode->inotify_sem);
 	path_release(&nd);
 fput_and_out:
 	fput_light(filp, fput_needed);
diff -r 8ecff93e704a -r 58e1301e9661 include/linux/inotify.h
--- a/include/linux/inotify.h	Thu Aug 18 19:53:59 2005
+++ b/include/linux/inotify.h	Thu Aug 18 23:19:52 2005
@@ -14,6 +14,9 @@
  *
  * When you are watching a directory, you will receive the filename for events
  * such as IN_CREATE, IN_DELETE, IN_OPEN, IN_CLOSE, ..., relative to the wd.
+ *
+ * When using inotify from the kernel, len will always be zero.  Instead you
+ * should check the path for non-NULL in your callback.
  */
 struct inotify_event {
 	__s32		wd;		/* watch descriptor */
@@ -68,6 +71,17 @@
 
 #ifdef CONFIG_INOTIFY
 
+/* Kernel consumer API */
+
+extern struct inotify_device *inotify_init(void (*)(struct inotify_event *, 
+						    const char *, void *));
+extern int inotify_free(struct inotify_device *);
+extern __s32 inotify_add_watch(struct inotify_device *, struct inode *, __u32,
+			       void *);
+extern int inotify_ignore(struct inotify_device *, __s32);
+
+/* Kernel producer API */
+
 extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
 				      const char *);
 extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
@@ -77,6 +91,8 @@
 extern u32 inotify_get_cookie(void);
 
 #else
+
+/* Kernel producer API stubs */
 
 static inline void inotify_inode_queue_event(struct inode *inode,
 					     __u32 mask, __u32 cookie,


More information about the Linux-audit mailing list