[dm-devel] RFC: dm-switch target [v4] - Example userland code

Jim Ramsay jim_ramsay at dell.com
Fri Aug 24 17:24:30 UTC 2012


On Fri, Aug 24, 2012 at 01:16:51PM -0400, Jim Ramsay wrote:
> I will be posting a sample userland application that demonstrates how to upload
> a page table via the netlink interface in a later message.

Here it is:

--- ptupload.c ---
/*
 * Copyright (c) 2010-2012 by Dell Inc.  All rights reserved.
 *
 * This file is released under the GPL.
 *
 * Description:
 *
 *     file:    ptupload.c
 *     authors: Kevin_OKelley at dell.com
 *              Jim_Ramsay at dell.com
 *              Vanshil_Shah at dell.com
 *
 * This file contains an example implementation for uploading a page table over
 * the netlink socket to the proposed "switch" target.
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#include <fcntl.h>

#include <sys/socket.h>
#include <linux/netlink.h>
#include <linux/genetlink.h>

#include "dm-switch.h"

/* Convenience macros for Netlink structure manipulation */
#define GENLMSG_DATA(glh) ((void *)((char*)glh + GENL_HDRLEN))
#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN))
#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
#define ALL_NL_HEADERS (NLMSG_HDRLEN + GENL_HDRLEN + NLA_HDRLEN)

#define MIN(a, b) ((a < b) ? a : b)

void usage(void)
{
	printf("Usage: ptupload <pagesize> <device> [file]\n\n"

  "Uploads the pagetable in the file given.\n\n"

  "Where:\n"
  "  pagesize - The size of each page, in sectors.\n"
  "  device   - The device to upload (path to device node, or 'major:minor')\n\n"

  "If no file is given, or the file is \"-\", expects the page table on STDIN\n\n"

  "Page table format:\n"
  "------------------\n"
  "The page table must ascii text, containing a list of page-to-path mappings.\n"
  "Each mapping is represented by a single hexadecimal digit, thus the maximum\n"
  "number of paths is 0xf (15).  Whitespace and non-hex characters are ignored.\n\n"

  "Assumes each path is used at least once in the map (or at least the highest-\n"
  "numbered path, since the total number of paths is inferred from the largest\n"
  "entry).\n\n");
}

/* Create and bind a netlink generic socket
 * Returns the socket FD, or a negative number on failure
 */
int CreateNLSocket() 
{
	struct sockaddr_nl local;

	int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
	if (fd < 0) {
		perror("Unable to create netlink socket");
		return fd;
	}
	
	memset(&local, 0, sizeof(local));
	local.nl_family=AF_NETLINK;
	local.nl_groups = 0;
	if (bind(fd, (struct sockaddr *) &local, sizeof(local))<0) {
		perror("Unable to bind netlink socket");
		close(fd);
		fd = -1;
	}
	return fd;
}

/* Fetch the dynamically-assigned GENL family ID from sysfs */
int getFamilyId()
{
	FILE *f;
	int family, r;

	f = fopen("/sys/module/dm_switch/familyid","r");
	if (f == NULL) {
		perror("Cannot open family id file");
		return 0;
	}

	r = fscanf(f, "%d", &family);
	fclose(f);

	if (r == 1) {
		return family;
	}
	printf("Could not read or parse family id file\n");
	return 0;
}

/* Ensure the version of the loaded dm-switch matches what we were compiled
 * against
 */
int checkVersion() 
{
	FILE *f;
	int ver[3], r;

	f = fopen("/sys/module/dm_switch/version","r");
	if (f == NULL) {
		perror("Cannot open dm-switch version file");
		return -1;
	}

	r = fscanf(f, "%d.%d.%d", &ver[0], &ver[1], &ver[2]);
	fclose(f);

	if (r == 3) {
		if (ver[0] == SWITCH_VERSION_MAJ &&
		    ver[1] == SWITCH_VERSION_MIN) {
			return 0;
		} else {
			printf("Version mismatch: Expecting %d.%d.*, read %d.%d.%d instead\n",
				SWITCH_VERSION_MAJ, SWITCH_VERSION_MIN,
				ver[0], ver[1], ver[2]);
		}
	} else {
		printf("Invalid or unreadable version\n");
	}
	return -1;
}


/* Assembles the Netlink and Generic Nelink messages and sends them off. 
 * Returns >= 0 on succes, <0 on failure with errno set appropriately.
*/
int SendPayload(int socket, int familyid, struct IpcPgTable *payload, size_t payloadSize) 
{
	static char IpcSendBuffer[MAX_IPC_MSG_LEN];
	static int seq = 0;

	struct nlmsghdr *n;
	struct genlmsghdr *g;
	struct nlattr *na;
	struct sockaddr_nl nladdr;
	int r;

	/* Main Netlink message header */
	n = (struct nlmsghdr*)IpcSendBuffer;
	n->nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
	n->nlmsg_type = familyid;
	n->nlmsg_flags = NLM_F_REQUEST;
	n->nlmsg_seq = seq++;
	n->nlmsg_pid = getpid();
	
	/* Generic netlink header */
	g = (struct genlmsghdr*)NLMSG_DATA(n);
	g->cmd = NETLINK_CMD_GET_PAGE_TBL;
	g->version = 0;

	/* nlattr message header */
	na = (struct nlattr*)GENLMSG_DATA(g);
	na->nla_len = payloadSize + NLA_HDRLEN;
	na->nla_type = 1;
	n->nlmsg_len += NLMSG_ALIGN(na->nla_len);
	
	/* Copy the actual payload into nlattr data region */
	memcpy(NLA_DATA(na), payload, payloadSize);

	memset(&nladdr, 0, sizeof(nladdr));
	nladdr.nl_family = AF_NETLINK;
	r = sendto(socket, IpcSendBuffer, n->nlmsg_len, 0,
		(struct sockaddr*)&nladdr, sizeof(nladdr));
	if (r < 0) {
		perror("Failed to send message to kernel");
	}

	return r;
}

/*
 * Returns the dm-switch IpcResponse (which is a pointer into a static buffer)
 * or 'NULL' on receive failure.
 */
struct IpcResponse *RecvMsg(int socket)
{
	static char IpcRecvBuffer[MAX_IPC_MSG_LEN];
	struct nlmsghdr *n = (struct nlmsghdr*)IpcRecvBuffer;
	struct genlmsghdr *g = (struct genlmsghdr*)NLMSG_DATA(n);
	struct nlattr *na = (struct nlattr*)GENLMSG_DATA(g);
	struct IpcResponse *resp = (struct IpcResponse*)NLA_DATA(na);

	fd_set readfds;
	struct timeval timeout;
	int rsp;

	/* Wait up to 1s for a response */
	FD_ZERO(&readfds);
	FD_SET(socket, &readfds);
	memset(&timeout, 0, sizeof(timeout));
	timeout.tv_sec = 1;
	rsp = select(socket + 1, &readfds, NULL, NULL, &timeout );

	if (rsp < 0) {
		perror("Error calling select() on netlink socket");
		return NULL;
	}
	else if (rsp == 0) {
		printf("Timeout waiting for response\n");
		errno = ETIMEDOUT;
		return NULL;
	}

	/* Issue a non-blocking read */
	rsp = recv(socket, IpcRecvBuffer, MAX_IPC_MSG_LEN, MSG_DONTWAIT);
	if (rsp < 0) {
		perror("Error from recv()");
		return NULL;
	}

	/* Validate response message */
	if (n->nlmsg_type == NLMSG_ERROR) {
	  printf("Error from netlink socket\n");
	  errno = EIO;
	  return NULL;
	}
	if (!NLMSG_OK((n), (unsigned int)rsp)) {
		printf("Invalid reply message from netlink socket\n");
		errno = EINVAL;
		return NULL;
	}

	return resp;
}

/* Given a page size, major and minor device node information, number of total
 * devices, and array of page table entries, constructs the appropriate netlink
 * message and sends the bit-packed page table (in peices if necessary) to the
 * kernel driver.
 *
 * Returns 0 on success, -1 on failure with errno set appropriately.
 */
int upload(uint32_t pagesize, uint32_t maj, uint32_t min, uint16_t devcount,
	const uint8_t *table, size_t total_pte)
{
	int socket, familyid, r = 0;
	static const uint32_t bits[] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 };
	struct IpcPgTable *payload;
	struct IpcResponse *response;
	const size_t header = sizeof(*payload) - sizeof(payload->ptbl_buff[0]);
	const size_t max_payload = MAX_IPC_MSG_LEN - ALL_NL_HEADERS;
	size_t remaining = total_pte;
	uint32_t pte_bits, pte_fields, pte_max;
	const uint8_t *src;
	uint8_t pte_mask;
	int nCurrentPage = 0;

	/* Ensure the loaded switch module is one we can talk to */
	if (checkVersion() < 0) {
		errno = EINVAL;
		r = -1;
		goto out_error;
	}
	familyid = getFamilyId();
	if (familyid <= 0) {
		errno = EINVAL;
		r = -1;
		goto out_error;
	}

	socket = CreateNLSocket();
	if (socket < 0) {
		r = -1;
		goto out_error;
	}

	pte_bits = bits[devcount - 1]; 
	pte_fields = 32 / pte_bits;
	pte_max = CHUNK_PTE_COUNT;
	pte_mask = (1 << pte_bits) - 1;

	payload = malloc(max_payload);
	if (payload == NULL) {
		printf("Could not allocate %lu bytes for payload\n",
			(unsigned long)max_payload);
		r = -1;
		errno = ENOMEM;
		goto out_close;
	}
	payload->opcode = OPCODE_PAGE_TABLE_UPLOAD;
	payload->dev_major = maj;
	payload->dev_minor = min;
	payload->page_total = total_pte;
	payload->dev_count = devcount;
	payload->pte_bits = pte_bits;

	printf("Uploading %zu entries, packed %lu-per-word (%lu paths @ %lu bits)\n",
	       total_pte, (unsigned long)pte_fields, (unsigned long)devcount,
	       (unsigned long)pte_bits);

	src = table;
	while (remaining > 0) {
		size_t i;
		uint32_t pages = MIN(remaining, pte_max);
		uint32_t words = (pages + pte_fields - 1) / pte_fields;
		uint32_t bytes = words * sizeof(uint32_t);
		uint32_t msg_offset = total_pte - remaining;

		printf("  Sending %lu/%lu pages, starting at offset %lu\n",
			(unsigned long)pages, (unsigned long)total_pte,
			(unsigned long)msg_offset);

		payload->total_len = header + bytes;
		payload->userland[0] = time(NULL);
		payload->userland[1] = msg_offset + pages;
		payload->page_offset = msg_offset;
		payload->page_count = pages;

		/* Bit-packing:
		 * Fields are packed with the least significant fields in the
		 * low-order bytes so the kernel can use a division remainder
		 * to find the byte offset, then just shift to line up the
		 * proper value.
		 */
		for (i = 0; i < words; ++i) {
			size_t j;
			size_t toPack = MIN(remaining, pte_fields);
			uint32_t word = 0;
			for (j = 0; j < toPack; ++j) {
				word |= (*(src++) & pte_mask) << (pte_bits * j);
				remaining--;
			}
			payload->ptbl_buff[i] = word;
			/* Debug output: */
			#if 0
			printf("    Packed entry %3zu: 0x%08x\n",
				i, (unsigned int)word);
			#endif
		}

		/* Send IPC */
		r = SendPayload(socket, familyid, payload, payload->total_len);
		if (r < 0) {
		 	perror("Send failed");
			goto out_free;
		}

		/* Wait for response */
		response = RecvMsg(socket);
		if (!response) {
			r = -1;
			perror("No response");
			goto out_free;
		} 
	     	if (response->status != 0) {
			printf("Error from kernel module: %s (%d)\n",\
				response->err_str, response->status);
			return -1;
		}
		printf("  Send successful.\n");
		nCurrentPage++;
	}
out_free:
	free(payload);
out_close:
	close(socket);
out_error:
	return r;
}

int main(int argc, char *argv[])
{
	char *sizestring = argv[1], *device = argv[2], *filename = argv[3];
	FILE *input;
	uint32_t pagesize, maj, min;
	uint16_t devcount=0;
	uint8_t *pagetable;
	size_t allocsize, count;
	char inputchar;

	if (argc < 3) {
		usage();
		return 1;
	}

	pagesize = strtoull(sizestring, NULL, 0);
	if (pagesize == 0) {
		fprintf(stderr, "%s: Invalid page size\n", sizestring);
		return 2;
	}

	if (sscanf(device, "%d:%d", &maj, &min) < 2) {
		struct stat s;
		if (stat(device, &s) == -1) {
			perror(device);
			return 2;
		}
		if (!S_ISBLK(s.st_mode)) {
			fprintf(stderr, "%s: Not a block device\n", device);
			return 2;
		}
		maj = major(s.st_rdev);
		min = minor(s.st_rdev);
	}

	if (argc == 3 || strcmp(filename,"-") == 0) {
		input = stdin;
		filename = NULL;
	} else {
		input = fopen(filename, "r");
		if (input == NULL) {
			perror(filename);
			return 2;
		}
	}

	printf("Page size: %lu (0x%lx) sectors\n",
	       (unsigned long)pagesize, (unsigned long)pagesize);
	printf("Device is: %lu:%lu\n", (unsigned long)maj, (unsigned long)min);
	printf("Reading page table from: %s\n",
		(input == stdin) ? "STDIN" : filename);

	allocsize = CHUNK_PTE_COUNT;
	
	pagetable = malloc(allocsize);
	if (pagetable == NULL) {
		fprintf(stderr, "Could not allocate %zu bytes for pagetable",
			allocsize);
		return 3;
	}

	count = 0;
	while ((inputchar = fgetc(input)) != EOF) {
		if (inputchar >= '0' && inputchar <= '9') {
			pagetable[count] = (uint8_t)inputchar - (uint8_t)'0';
		} else if (inputchar >= 'a' && inputchar <= 'f') {
			pagetable[count] = (uint8_t)inputchar - (uint8_t)'a' + (uint8_t)0xa;
		} else if (inputchar >= 'A' && inputchar <= 'F') {
			pagetable[count] = (uint8_t)inputchar - (uint8_t)'A' + (uint8_t)0xa;
		} else {
			continue;
		}
		if (pagetable[count] >= devcount) {
			devcount = pagetable[count] + 1;
		}

		count++;

		if (count == allocsize) {
			uint8_t *oldpagetable = pagetable;
			allocsize *= 2;
			pagetable = realloc(pagetable, allocsize);
			if (pagetable == NULL) {
				fprintf(stderr, "Could not grow memory for pagetable to %zu bytes", allocsize);
				pagetable = oldpagetable;
				break;
			}
			printf("  (Reallocated up to %zu bytes)\n", allocsize);
		}
	}
	if (count == 0) {
		printf("No data to send\n");
		return 2;
	}

	return upload(pagesize, maj, min, devcount, pagetable, count);
}
------------------

-- 
Jim Ramsay




More information about the dm-devel mailing list