From: Aaron Young Date: Tue, 18 Aug 2015 19:10:23 +0000 (-0700) Subject: SPARC64: PORT LDOMS TO UEK4 X-Git-Tag: v4.1.12-92~283^2^2~2 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=6c87154b63230bc5e35c5df133e7ecfadf47b828;p=users%2Fjedix%2Flinux-maple.git SPARC64: PORT LDOMS TO UEK4 Initial port of LDoms code to UEK4. NOTE: due to UEK4 kernel issue(s) encountered during testing, this port has NOT been fully tested. Signed-off-by: Aaron Young Orabug: 21644721 (cherry picked from commit 6dfe4cf1cc02dbea298480804d030850bfef1ab3) Conflicts: arch/sparc/kernel/ds.c drivers/tty/Kconfig drivers/tty/Makefile (cherry picked from commit c398fd2a3c18f6385eb4db80305ab693027a58d5) Conflicts: drivers/tty/Kconfig drivers/tty/Makefile Signed-off-by: Allen Pais --- diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig index fb23fd6b186a..d8b26c882bdb 100644 --- a/arch/sparc/configs/sparc32_defconfig +++ b/arch/sparc/configs/sparc32_defconfig @@ -103,3 +103,7 @@ CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set CONFIG_LIBCRC32C=m +CONFIG_VCC=m +CONFIG_VLDC=m +CONFIG_VLDS=m +CONFIG_VDS=m diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 6b68f12f29db..98bb934751de 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -241,3 +241,7 @@ CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC16=m CONFIG_LIBCRC32C=m +CONFIG_VCC=m +CONFIG_VLDC=m +CONFIG_VLDS=m +CONFIG_VDS=m diff --git a/arch/sparc/include/asm/ldc.h b/arch/sparc/include/asm/ldc.h index 6e9004aa6f25..37b61235fe8a 100644 --- a/arch/sparc/include/asm/ldc.h +++ b/arch/sparc/include/asm/ldc.h @@ -24,6 +24,9 @@ struct ldc_channel_config { u32 mtu; unsigned int rx_irq; unsigned int tx_irq; + u64 rx_ino; + u64 tx_ino; + u64 dev_handle; u8 mode; #define LDC_MODE_RAW 0x00 #define LDC_MODE_UNRELIABLE 0x01 @@ -48,6 +51,8 @@ struct ldc_channel_config { #define LDC_STATE_READY 0x03 #define LDC_STATE_CONNECTED 0x04 +#define LDC_PACKET_SIZE 64 + struct ldc_channel; /* Allocate state for a channel. */ @@ -72,6 +77,11 @@ int ldc_connect(struct ldc_channel *lp); int ldc_disconnect(struct ldc_channel *lp); int ldc_state(struct ldc_channel *lp); +void ldc_set_state(struct ldc_channel *lp, u8 state); +int ldc_mode(struct ldc_channel *lp); +void ldc_print(struct ldc_channel *lp); +int ldc_rx_reset(struct ldc_channel *lp); +void ldc_clr_reset(struct ldc_channel *lp); /* Read and write operations. Only valid when the link is up. */ int ldc_write(struct ldc_channel *lp, const void *buf, @@ -137,4 +147,12 @@ void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len, struct ldc_trans_cookie *cookies, int ncookies); +int ldc_tx_space_available(struct ldc_channel *lp, unsigned long size); + +int ldc_rx_data_available(struct ldc_channel *lp); + +void ldc_enable_hv_intr(struct ldc_channel *lp); + +void ldc_disable_hv_intr(struct ldc_channel *lp); + #endif /* _SPARC64_LDC_H */ diff --git a/arch/sparc/include/asm/mdesc.h b/arch/sparc/include/asm/mdesc.h index aebeb88f70db..d4821f062fda 100644 --- a/arch/sparc/include/asm/mdesc.h +++ b/arch/sparc/include/asm/mdesc.h @@ -16,6 +16,7 @@ struct mdesc_handle *mdesc_grab(void); void mdesc_release(struct mdesc_handle *); #define MDESC_NODE_NULL (~(u64)0) +#define MDESC_MAX_STR_LEN 256 u64 mdesc_node_by_name(struct mdesc_handle *handle, u64 from_node, const char *name); @@ -62,8 +63,11 @@ u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc); void mdesc_update(void); struct mdesc_notifier_client { - void (*add)(struct mdesc_handle *handle, u64 node); - void (*remove)(struct mdesc_handle *handle, u64 node); + void (*add)(struct mdesc_handle *handle, u64 node, + const char *node_name); + void (*remove)(struct mdesc_handle *handle, u64 node, + const char *node_name); + const char *node_name; struct mdesc_notifier_client *next; @@ -71,6 +75,22 @@ struct mdesc_notifier_client { void mdesc_register_notifier(struct mdesc_notifier_client *client); +union md_node_info { + struct vdev_port { + char name[MDESC_MAX_STR_LEN]; /* name (property) */ + u64 id; /* id */ + u64 parent_cfg_hdl; /* parent config handle */ + } vdev_port; + struct ds_port { + u64 id; /* id */ + } ds_port; +}; +u64 mdesc_get_node(struct mdesc_handle *hp, char *node_name, + union md_node_info *node_info); +int mdesc_get_node_info(struct mdesc_handle *hp, u64 node, + char *node_name, union md_node_info *node_info); + + void mdesc_fill_in_cpu_data(cpumask_t *mask); void mdesc_populate_present_mask(cpumask_t *mask); void mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask); diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index 8174f6cdbbbb..e990d29b64a1 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -52,6 +52,8 @@ struct vio_ver_info { #define VDEV_NETWORK_SWITCH 0x02 #define VDEV_DISK 0x03 #define VDEV_DISK_SERVER 0x04 +#define VDEV_CONSOLE_CON 0x05 +#define VDEV_VLDC 0x06 u8 resv1[3]; u64 resv2[5]; @@ -100,6 +102,17 @@ struct vio_dring_data { u64 __par4[2]; }; +/* + * VIO Common header for inband descriptor messages. + * + * Clients will then combine this header with a device specific payload. + */ +struct vio_desc_data { + struct vio_msg_tag tag; + u64 seq; + u64 desc_handle; +}; + struct vio_dring_hdr { u8 state; #define VIO_DESC_FREE 0x01 @@ -162,6 +175,30 @@ struct vio_disk_desc { struct ldc_trans_cookie cookies[0]; }; +struct vio_disk_dring_payload { + u64 req_id; + u8 operation; + u8 slice; + u16 resv1; + u32 status; + u64 offset; + u64 size; + u32 ncookies; + u32 resv2; + struct ldc_trans_cookie cookies[0]; +}; + +/* + * VIO disk inband descriptor message. + * + * For clients that do not use descriptor rings, the descriptor contents + * are sent as part of an inband message. + */ +struct vio_disk_desc_inband { + struct vio_desc_data hdr; + struct vio_disk_dring_payload payload; +}; + #define VIO_DISK_VNAME_LEN 8 #define VIO_DISK_ALABEL_LEN 128 #define VIO_DISK_NUM_PART 8 @@ -282,6 +319,14 @@ struct vio_dring_state { struct ldc_trans_cookie cookies[VIO_MAX_RING_COOKIES]; }; +#define VIO_TAG_SIZE (sizeof(struct vio_msg_tag)) +#define VIO_VCC_MTU_SIZE (LDC_PACKET_SIZE - 8) + +struct vio_vcc { + struct vio_msg_tag tag; + char data[VIO_VCC_MTU_SIZE]; +}; + static inline void *vio_dring_cur(struct vio_dring_state *dr) { return dr->base + (dr->entry_size * dr->prod); @@ -316,24 +361,32 @@ static inline u32 vio_dring_prev(struct vio_dring_state *dr, u32 index) } #define VIO_MAX_TYPE_LEN 32 +#define VIO_MAX_NAME_LEN 32 #define VIO_MAX_COMPAT_LEN 64 struct vio_dev { - u64 mp; struct device_node *dp; + char node_name[VIO_MAX_NAME_LEN]; char type[VIO_MAX_TYPE_LEN]; char compat[VIO_MAX_COMPAT_LEN]; int compat_len; u64 dev_no; + unsigned long port_id; unsigned long channel_id; unsigned int tx_irq; unsigned int rx_irq; u64 rx_ino; + u64 dev_handle; + u64 tx_ino; + + /* MD specific data used to match the vdev in the MD */ + union md_node_info md_node_info; + struct device dev; }; @@ -346,6 +399,7 @@ struct vio_driver { void (*shutdown)(struct vio_dev *dev); unsigned long driver_data; struct device_driver driver; + bool no_irq; }; struct vio_version { @@ -489,5 +543,6 @@ int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, void vio_port_up(struct vio_driver_state *vio); int vio_set_intr(unsigned long dev_ino, int state); +u64 vio_vdev_node(struct mdesc_handle *hp, struct vio_dev *vdev); #endif /* _SPARC64_VIO_H */ diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 973c04531cec..944620d1e07e 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -1,8 +1,11 @@ -/* ds.c: Domain Services driver for Logical Domains +/* + * ds.c: Sun4v LDOMs Domain Services Driver * * Copyright (C) 2007, 2008 David S. Miller + * Copyright (C) 2015 Oracle. All rights reserved. */ - +#include +#include #include #include #include @@ -10,10 +13,16 @@ #include #include #include +#include #include #include +#include #include #include +#include +#include +#include +#include #include #include @@ -24,20 +33,275 @@ #include "kernel.h" +/* + * Def to enable kernel timer bug workaround. + * See additional comments below. + */ +#define DS_KERNEL_TIMER_BUG_WAR 1 + +/* + * Theory of operation: + * + * Domain Services provide a protocol for a logical domain (ldom) to provide + * or use a service to/from another ldom or the SP. For a given service there is + * a provider and a client. The provider and client can share a service across + * a LDC or directly in the case of a "loopback" service on the same local + * domain. For example, a guest ldom can provide a shutdown service to the + * control domain (the client) to allow the control domain to use the service + * to shutdown the guest. On the control domain, the kernel can provide + * the shutdown service to the domain manager software in loopback mode to + * allow the domain manager to shutdown the local control domain. + * Several software entities can provide or use domain services: OBP, SP, + * user-level logical domain manager and kernel driver (this module). + * After establishing a domain service protocol link between two entities, + * many services can be shared on the link. Services advertise + * their availablility by sending a service registration request containing + * a service id (a string identifying the service) and a generated numerical + * handle (a value to use to identify the service connection after the + * connection has been established). A service request is acknowledged + * (ACK'd) by the other end of the link if the service is supported. + * Once the service registration is ACK'd, the service connection is + * established and service protocol packets can be exchanged by + * both entities (client and provider) on either side of the link. + * This driver can execute in the control domain, guest domains or both. + * It contains a set of builtin services associated with the "primary" (or + * control) domain. The driver also contains an API which allows external + * domain services to be registered with the driver. This API can be utilized by + * another kernel driver to provide/use services. The API can also be used by + * another kernel driver (i.e. vlds) to provide user-level domain services. + * + */ + +static unsigned int dsdbg_level; +module_param(dsdbg_level, uint, S_IRUGO|S_IWUSR); + #define DRV_MODULE_NAME "ds" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.0" -#define DRV_MODULE_RELDATE "Jul 11, 2007" -static char version[] = - DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; -MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); -MODULE_DESCRIPTION("Sun LDOM domain services driver"); +#define XSTR(s) STR(s) +#define STR(s) #s +#define DRV_MODULE_VERSION XSTR(DS_MAJOR_VERSION) "." XSTR(DS_MINOR_VERSION) + +static char version[] = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION "\n"; + +#define dprintk(fmt, args...) do {\ +if (dsdbg_level > 0)\ + printk(KERN_ERR "%s: %s: " fmt, DRV_MODULE_NAME, __func__, ##args);\ +} while (0) + +MODULE_AUTHOR("Oracle"); +MODULE_DESCRIPTION("Sun4v LDOM domain services driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); +#define LDC_IRQ_NAME_MAX 32 + +#define DS_DEFAULT_BUF_SIZE 4096 +#define DS_DEFAULT_MTU 4096 + +#define DS_PRIMARY_ID 0 + +#define DS_INVALID_HANDLE 0xFFFFFFFFFFFFFFFFUL + +/* + * The DS spec mentions that a DS handle is just any random number. + * However, the Solaris code uses some conventions to identify server + * and consumer handles, based on the setting of some bits in the + * handle. We have to use the same convention to be compatible with + * services from Solaris. + */ +#define DS_HDL_ISCLIENT_BIT 0x80000000ull +#define DS_HDL_ISCNTRLD_BIT 0x40000000ull + +/* Globals to identify the local ldom handle */ +u64 ds_local_ldom_handle; +bool ds_local_ldom_handle_set; + +/* Global driver data struct for data common to all ds devices. */ +struct ds_driver_data { + + /* list of all ds devices */ + struct list_head ds_dev_list; + int num_ds_dev_list; + +}; +struct ds_driver_data ds_data; +static DEFINE_SPINLOCK(ds_data_lock); /* protect ds_data */ + +/* + * For each DS port, a timer fires every DS_REG_TIMER_FREQ + * milliseconds to attempt to register services on that DS port. + */ +#define DS_REG_TIMER_FREQ 100 /* in ms */ + +/* Timeout to wait for responses for sp-token and var-config DS requests */ +#define DS_RESPONSE_TIMEOUT 10 /* in seconds */ + +#ifdef DS_KERNEL_TIMER_BUG_WAR +/* + * Define a partial type for ldc_channel so the compiler knows + * how to indirect ds->lp->lock. This must match the definition in ldc.c + * (which should probably be moved to ldc.h). + */ +struct ldc_channel { + /* Protects all operations that depend upon channel state. */ + spinlock_t lock; +}; +#endif /* DS_KERNEL_TIMER_BUG_WAR */ + +/* + * DS device structure. There is one of these probed/created per + * domain-services-port node in the MD. + * On a guest ldom, there is typically just one primary ds device + * for services provided from/to the "primary". + * On the primary ldom, there can be several ds devices - typically + * one for the SP, primary and each guest ldom. + */ +struct ds_dev { + /* link into the global driver data dev list */ + struct list_head list; + + /* protect this ds_dev */ + spinlock_t ds_lock; + + /* number of references to this ds_dev on the callout queue */ + u64 co_ref_cnt; + + /* flag to indicate if this ds_dev is active */ + bool active; + + /* flag to indicate if this is a domain DS (versus the SP DS) */ + bool is_domain; + + /* LDC connection info for this ds_dev */ + struct ldc_channel *lp; + u8 hs_state; + u64 id; + u64 handle; + + /* negotiated DS version */ + ds_ver_t neg_vers; + + /* LDC receive data buffer for this ds_dev */ + u8 *rcv_buf; + int rcv_buf_len; + + /* service registration timer */ + struct timer_list ds_reg_tmr; + + u32 next_service_handle; + + /* list of local service providers registered with this ds_dev */ + struct list_head service_provider_list; + + /* list of local service clients registered with this ds_dev */ + struct list_head service_client_list; + + /* list of work items queued for processing (by callout thread) */ + struct list_head callout_list; + +}; + +/* ds_dev hs_state values */ +#define DS_HS_LDC_DOWN 0x00 +#define DS_HS_START 0x01 +#define DS_HS_COMPLETE 0x02 + +/* + * LDC interrupts are not blocked by spin_lock_irqsave(). So, for any + * lock which the LDC interrupt handler (ds_event) obtains, we must + * explicitly disable the LDC interrupt before grabbing the lock + * throughout the driver (and re-enable the interrupt after releasing + * the lock). This is to prevent a deadlock where the interrupt handler + * waits indefinitely for a lock which is held by another thread on the + * same CPU. + * + * The reason behind this is as follows: + * spin_lock_irqsave() raises the PIL to level 14 which effectively + * blocks interrupt_level_n traps (for n < 15). However, LDC + * interrupts are not interrupt_level_n traps. They are dev_mondo traps, + * so they are not impacted by the PIL. + */ + +#define LOCK_DS_DEV(ds, flags) do {\ + ldc_disable_hv_intr((ds)->lp); \ + spin_lock_irqsave(&((ds)->ds_lock), (flags)); \ +} while (0); + +#define UNLOCK_DS_DEV(ds, flags) do {\ + spin_unlock_irqrestore(&((ds)->ds_lock), flags); \ + ldc_enable_hv_intr((ds)->lp); \ +} while (0); + +/* + * Generic service info structure used to describe + * a provider service or local client service. + */ +struct ds_service_info { + /* link into a ds_dev service list */ + struct list_head list; + + /* id of the service */ + char *id; + + /* supported max version */ + ds_ver_t vers; + + /* callback ops for reg/unreg and data */ + ds_ops_t ops; + + /* registration state */ + u64 reg_state; + + /* registration timeout */ + u64 svc_reg_timeout; + + /* connection negotiated version */ + ds_ver_t neg_vers; + + /* + * Flag to indicate if the service is a + * a client or provider. This flag should always + * correspond to the list this service_info + * it is in (i.e. in the client or provider service + * list in the ds_dev). + */ + bool is_client; + + /* Flag to indicate if the service is a builtin service */ + bool is_builtin; + + /* + * Service is in loopback mode. + * Loopback mode allows a service provider and client + * which reside on the same/local host to connect directly + * (without using a LDC). + */ + bool is_loopback; + + /* flag to indicate if this service is connected */ + bool is_connected; + + /* Unique handle associated with this service */ + u64 handle; + + /* Handle used for service connection. */ + u64 con_handle; + +}; + +/* service_info reg_states */ +#define DS_REG_STATE_UNREG 0x00 +#define DS_REG_STATE_REG_SENT 0x01 +#define DS_REG_STATE_REGISTERED_LDC 0x02 +#define DS_REG_STATE_REGISTERED_LOOPBACK 0x03 + +/* + * DS service data structures + */ + struct ds_msg_tag { - __u32 type; + u32 type; #define DS_INIT_REQ 0x00 #define DS_INIT_ACK 0x01 #define DS_INIT_NACK 0x02 @@ -50,7 +314,12 @@ struct ds_msg_tag { #define DS_DATA 0x09 #define DS_NACK 0x0a - __u32 len; + u32 len; +}; + +struct ds_msg { + struct ds_msg_tag tag; + char payload[0]; }; /* Result codes */ @@ -61,397 +330,1242 @@ struct ds_msg_tag { #define DS_TYPE_UNKNOWN 0x04 struct ds_version { - __u16 major; - __u16 minor; + u16 major; + u16 minor; +}; + +struct ds_ver_req_payload { + struct ds_version ver; }; struct ds_ver_req { - struct ds_msg_tag tag; - struct ds_version ver; + struct ds_msg_tag tag; + struct ds_ver_req_payload payload; +}; + +struct ds_ver_ack_payload { + u16 minor; }; struct ds_ver_ack { - struct ds_msg_tag tag; - __u16 minor; + struct ds_msg_tag tag; + struct ds_ver_ack_payload payload; +}; + +struct ds_ver_nack_payload { + u16 major; }; struct ds_ver_nack { - struct ds_msg_tag tag; - __u16 major; + struct ds_msg_tag tag; + struct ds_ver_nack_payload payload; +}; + +struct ds_reg_req_payload { + u64 handle; + u16 major; + u16 minor; + char svc_id[0]; }; struct ds_reg_req { - struct ds_msg_tag tag; - __u64 handle; - __u16 major; - __u16 minor; - char svc_id[0]; + struct ds_msg_tag tag; + struct ds_reg_req_payload payload; +}; + +struct ds_reg_ack_payload { + u64 handle; + u16 minor; }; struct ds_reg_ack { - struct ds_msg_tag tag; - __u64 handle; - __u16 minor; + struct ds_msg_tag tag; + struct ds_reg_ack_payload payload; +}; + +struct ds_reg_nack_payload { + u64 handle; + u64 result; + u16 major; }; struct ds_reg_nack { - struct ds_msg_tag tag; - __u64 handle; - __u16 major; + struct ds_msg_tag tag; + struct ds_reg_nack_payload payload; +}; + +struct ds_unreg_req_payload { + u64 handle; }; struct ds_unreg_req { - struct ds_msg_tag tag; - __u64 handle; + struct ds_msg_tag tag; + struct ds_unreg_req_payload payload; +}; + +struct ds_unreg_ack_payload { + u64 handle; }; struct ds_unreg_ack { - struct ds_msg_tag tag; - __u64 handle; + struct ds_msg_tag tag; + struct ds_unreg_ack_payload payload; +}; + +struct ds_unreg_nack_payload { + u64 handle; }; struct ds_unreg_nack { - struct ds_msg_tag tag; - __u64 handle; + struct ds_msg_tag tag; + struct ds_unreg_nack_payload payload; }; -struct ds_data { - struct ds_msg_tag tag; - __u64 handle; +struct ds_data_req_payload { + u64 handle; + char data[0]; +}; + +struct ds_data_req { + struct ds_msg_tag tag; + struct ds_data_req_payload payload; +}; + +#define DS_DATA_REQ_DSIZE(req) \ + ((req)->tag.len - sizeof(struct ds_data_req_payload)) + +struct ds_data_nack_payload { + u64 handle; + u64 result; }; struct ds_data_nack { - struct ds_msg_tag tag; - __u64 handle; - __u64 result; + struct ds_msg_tag tag; + struct ds_data_nack_payload payload; +}; + +struct ds_unknown_msg_payload { + u64 handle; /* ??? */ +}; + +struct ds_unknown_msg { + struct ds_msg_tag tag; + struct ds_unknown_msg_payload payload; +}; + +struct ds_md_update_req { + u64 req_num; +}; + +struct ds_md_update_res { + u64 req_num; + u32 result; +}; + +struct ds_shutdown_req { + u64 req_num; + u32 ms_delay; +}; + +struct ds_shutdown_res { + u64 req_num; + u32 result; + char reason[1]; +}; + +struct ds_panic_req { + u64 req_num; +}; + +struct ds_panic_res { + u64 req_num; + u32 result; + char reason[1]; +}; + +struct ds_pri_msg { + u64 req_num; + u64 type; +#define DS_PRI_REQUEST 0x00 +#define DS_PRI_DATA 0x01 +#define DS_PRI_UPDATE 0x02 +}; + +struct ds_var_hdr { + u32 type; +#define DS_VAR_SET_REQ 0x00 +#define DS_VAR_DELETE_REQ 0x01 +#define DS_VAR_SET_RESP 0x02 +#define DS_VAR_DELETE_RESP 0x03 +}; + +struct ds_var_set_msg { + struct ds_var_hdr hdr; + char name_and_value[0]; +}; + +struct ds_var_delete_msg { + struct ds_var_hdr hdr; + char name[0]; +}; + +struct ds_var_resp { + struct ds_var_hdr hdr; + u32 result; +#define DS_VAR_SUCCESS 0x00 +#define DS_VAR_NO_SPACE 0x01 +#define DS_VAR_INVALID_VAR 0x02 +#define DS_VAR_INVALID_VAL 0x03 +#define DS_VAR_NOT_PRESENT 0x04 +}; + +struct ds_sp_token_msg { + u64 req_num; + u64 type; + __u8 service[]; +#define DS_SPTOK_REQUEST 0x01 +}; + +struct ds_sp_token_resp { + u64 req_num; + u32 result; + u32 ip_addr; + u32 portid; + __u8 token[DS_SPTOK_TOKEN_LEN]; +#define DS_SP_TOKEN_RES_OK 0x00 +#define DS_SP_TOKEN_RES_SVC_UNKNOWN 0x01 +#define DS_SP_TOKEN_RES_SVC_UNAVAIL 0x02 +#define DS_SP_TOKEN_RES_DOWN 0x03 +}; + +#ifdef CONFIG_HOTPLUG_CPU +struct dr_cpu_tag { + u64 req_num; + u32 type; +#define DR_CPU_CONFIGURE 0x43 +#define DR_CPU_UNCONFIGURE 0x55 +#define DR_CPU_FORCE_UNCONFIGURE 0x46 +#define DR_CPU_STATUS 0x53 + +/* Responses */ +#define DR_CPU_OK 0x6f +#define DR_CPU_ERROR 0x65 + + u32 num_records; +}; + +struct dr_cpu_resp_entry { + u32 cpu; + u32 result; +#define DR_CPU_RES_OK 0x00 +#define DR_CPU_RES_FAILURE 0x01 +#define DR_CPU_RES_BLOCKED 0x02 +#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03 +#define DR_CPU_RES_NOT_IN_MD 0x04 + + u32 stat; +#define DR_CPU_STAT_NOT_PRESENT 0x00 +#define DR_CPU_STAT_UNCONFIGURED 0x01 +#define DR_CPU_STAT_CONFIGURED 0x02 + + u32 str_off; }; +#endif /* CONFIG_HOTPLUG_CPU */ -struct ds_info; -struct ds_cap_state { - __u64 handle; - void (*data)(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len); +/* + * Builtin services provided directly by this module. + */ +struct ds_builtin_service { + /* service id */ + char *id; - const char *service_id; + /* supported max version */ + ds_ver_t vers; - u8 state; -#define CAP_STATE_UNKNOWN 0x00 -#define CAP_STATE_REG_SENT 0x01 -#define CAP_STATE_REGISTERED 0x02 + /* callback ops for this service */ + ds_ops_t ops; }; -static void md_update_data(struct ds_info *dp, struct ds_cap_state *cp, - void *buf, int len); -static void domain_shutdown_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len); -static void domain_panic_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len); +/* Prototypes for the builtin service callbacks */ +static void ds_md_update_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t hdl, void *buf, size_t len); +static void ds_dom_shutdown_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t hdl, void *buf, size_t len); +static void ds_dom_panic_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t hdl, void *buf, size_t len); #ifdef CONFIG_HOTPLUG_CPU -static void dr_cpu_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len); +static void ds_dr_cpu_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t hdl, void *buf, size_t len); #endif -static void ds_pri_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len); -static void ds_var_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len); - -static struct ds_cap_state ds_states_template[] = { +static void ds_var_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t hdl, void *buf, size_t len); +static void ds_sp_token_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t hdl, void *buf, size_t len); +/* + * Each service can have a unique supported maj/min version, but for + * now we set them all to the same supported maj/min value below. + */ +#define DS_CAP_MAJOR 1 +#define DS_CAP_MINOR 0 + +/* + * Builtin service providers connected to the primary domain. These + * service providers are started on any domain, and they are connected + * and consumed by the primary domain. + */ +static struct ds_builtin_service ds_primary_builtin_template[] = { + { - .service_id = "md-update", - .data = md_update_data, + .id = "md-update", + .vers = {DS_CAP_MAJOR, DS_CAP_MINOR}, + .ops = {NULL, + NULL, + ds_md_update_data_cb}, }, { - .service_id = "domain-shutdown", - .data = domain_shutdown_data, + .id = "domain-shutdown", + .vers = {DS_CAP_MAJOR, DS_CAP_MINOR}, + .ops = {NULL, + NULL, + ds_dom_shutdown_data_cb}, }, { - .service_id = "domain-panic", - .data = domain_panic_data, + .id = "domain-panic", + .vers = {DS_CAP_MAJOR, DS_CAP_MINOR}, + .ops = {NULL, + NULL, + ds_dom_panic_data_cb}, }, + #ifdef CONFIG_HOTPLUG_CPU { - .service_id = "dr-cpu", - .data = dr_cpu_data, + .id = "dr-cpu", + .vers = {DS_CAP_MAJOR, DS_CAP_MINOR}, + .ops = {NULL, + NULL, + ds_dr_cpu_data_cb}, }, #endif + + /* + * var-config effectively behaves has a service client. But all kernel + * ds services are defined as providers, no matter if they actually + * behave as a server or as client. + */ { - .service_id = "pri", - .data = ds_pri_data, + .id = "var-config", + .vers = {DS_CAP_MAJOR, DS_CAP_MINOR}, + .ops = {NULL, + NULL, + ds_var_data_cb}, }, +}; + +/* + * Builtin service clients connected to the SP. These service providers are + * started only on the primary domain (which is the only domain connected + * to the SP). They are connected to the SP which is the consumer of + * these services. + */ +static struct ds_builtin_service ds_sp_builtin_template[] = { + { - .service_id = "var-config", - .data = ds_var_data, + .id = "var-config-backup", + .vers = {DS_CAP_MAJOR, DS_CAP_MINOR}, + .ops = {NULL, + NULL, + ds_var_data_cb}, }, { - .service_id = "var-config-backup", - .data = ds_var_data, + .id = "sp-token", + .vers = {DS_CAP_MAJOR, DS_CAP_MINOR}, + .ops = {NULL, + NULL, + ds_sp_token_data_cb}, }, }; -static DEFINE_SPINLOCK(ds_lock); - -struct ds_info { - struct ldc_channel *lp; - u8 hs_state; -#define DS_HS_START 0x01 -#define DS_HS_DONE 0x02 +/* prototypes for local functions */ +static void ds_unregister_ldc_services(struct ds_dev *ds); +static struct ds_service_info *ds_find_service_client_handle( + struct ds_dev *ds, u64 handle); +static struct ds_service_info *ds_find_service_provider_handle( + struct ds_dev *ds, u64 handle); +static struct ds_service_info *ds_find_service_client_con_handle( + struct ds_dev *ds, u64 handle); +static struct ds_service_info *ds_find_service_provider_con_handle( + struct ds_dev *ds, u64 handle); +static struct ds_service_info *ds_find_service_provider_id(struct ds_dev *ds, + char *svc_id); +static void ds_remove_service_provider(struct ds_dev *ds, + struct ds_service_info *provider_svc_info); +static struct ds_service_info *ds_add_service_provider(struct ds_dev *ds, + char *id, ds_ver_t vers, ds_ops_t *ops, bool is_builtin); +static struct ds_service_info *ds_find_service_client_id(struct ds_dev *ds, + char *svc_id); +static struct ds_service_info *ds_add_service_client(struct ds_dev *ds, + char *id, ds_ver_t vers, ds_ops_t *ops, bool is_builtin); +static void ds_remove_service_client(struct ds_dev *ds, + struct ds_service_info *client_svc_info); +static int ds_service_unreg(struct ds_dev *ds, u64 handle); +static void ds_disconnect_service_client(struct ds_dev *ds, + struct ds_service_info *client_svc_info); +static void ds_disconnect_service_provider(struct ds_dev *ds, + struct ds_service_info *provider_svc_info); + +#define LDOMS_DEBUG_LEVEL_SETUP "ldoms_debug_level=" +#define LDOMS_MAX_DEBUG_LEVEL 7 +unsigned int ldoms_debug_level; +EXPORT_SYMBOL(ldoms_debug_level); + +module_param(ldoms_debug_level, uint, S_IRUGO|S_IWUSR); + +static int __init ldoms_debug_level_setup(char *level_str) +{ + unsigned long level; - u64 id; + if (!level_str) + return -EINVAL; - void *rcv_buf; - int rcv_buf_len; + level = simple_strtoul(level_str, NULL, 0); - struct ds_cap_state *ds_states; - int num_ds_states; + if (level < LDOMS_MAX_DEBUG_LEVEL) + ldoms_debug_level = level; - struct ds_info *next; -}; + return 1; -static struct ds_info *ds_info_list; +} +__setup(LDOMS_DEBUG_LEVEL_SETUP, ldoms_debug_level_setup); -static struct ds_cap_state *find_cap(struct ds_info *dp, u64 handle) +static void ds_reset(struct ds_dev *ds) { - unsigned int index = handle >> 32; + dprintk("entered.\n"); - if (index >= dp->num_ds_states) - return NULL; - return &dp->ds_states[index]; -} + ds->hs_state = DS_HS_LDC_DOWN; -static struct ds_cap_state *find_cap_by_string(struct ds_info *dp, - const char *name) -{ - int i; + ds_unregister_ldc_services(ds); - for (i = 0; i < dp->num_ds_states; i++) { - if (strcmp(dp->ds_states[i].service_id, name)) - continue; + /* Disconnect the LDC */ + ldc_disconnect(ds->lp); - return &dp->ds_states[i]; - } - return NULL; + /* clear the LDC RESET flag so that the LDC can reconnect */ + ldc_clr_reset(ds->lp); } -static int __ds_send(struct ldc_channel *lp, void *data, int len) +static int ds_ldc_send_msg(struct ldc_channel *lp, void *data, int len) { - int err, limit = 1000; + int rv, limit = 1000; - err = -EINVAL; + rv = -EINVAL; while (limit-- > 0) { - err = ldc_write(lp, data, len); - if (!err || (err != -EAGAIN)) + rv = ldc_write(lp, data, len); + if (rv != -EAGAIN) break; udelay(1); } - return err; + return rv; } -static int ds_send(struct ldc_channel *lp, void *data, int len) +static int ds_ldc_send_payload(struct ldc_channel *lp, u32 type, + void *data, int len) { - unsigned long flags; - int err; + struct ds_msg *msg; + size_t msglen; + gfp_t alloc_flags; + int rv; + + /* This function can be called in either process or atomic mode */ + if (in_atomic()) + alloc_flags = GFP_ATOMIC; + else + alloc_flags = GFP_KERNEL; + msglen = sizeof(struct ds_msg) + len; + msg = kzalloc(msglen, alloc_flags); + if (msg == NULL) + return -ENOMEM; - spin_lock_irqsave(&ds_lock, flags); - err = __ds_send(lp, data, len); - spin_unlock_irqrestore(&ds_lock, flags); + msg->tag.type = type; + msg->tag.len = len; + memcpy(msg->payload, data, len); - return err; -} + rv = ds_ldc_send_msg(lp, msg, msglen); -struct ds_md_update_req { - __u64 req_num; -}; + kfree(msg); -struct ds_md_update_res { - __u64 req_num; - __u32 result; -}; + return rv; +} -static void md_update_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len) +static void ds_send_data_nack(struct ds_dev *ds, u64 handle, u64 result) { - struct ldc_channel *lp = dp->lp; - struct ds_data *dpkt = buf; - struct ds_md_update_req *rp; - struct { - struct ds_data data; - struct ds_md_update_res res; - } pkt; - - rp = (struct ds_md_update_req *) (dpkt + 1); + struct ds_data_nack_payload req; + int rv; - pr_info("ds-%llu: Machine description update.\n", dp->id); - - mdesc_update(); + dprintk("entered.\n"); - memset(&pkt, 0, sizeof(pkt)); - pkt.data.tag.type = DS_DATA; - pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag); - pkt.data.handle = cp->handle; - pkt.res.req_num = rp->req_num; - pkt.res.result = DS_OK; + req.handle = handle; + req.result = result; - ds_send(lp, &pkt, sizeof(pkt)); + rv = ds_ldc_send_payload(ds->lp, DS_NACK, &req, sizeof(req)); + if (rv <= 0) + pr_err("ds-%llu: %s: ldc_send failed. (%d)\n ", ds->id, + __func__, rv); } -struct ds_shutdown_req { - __u64 req_num; - __u32 ms_delay; +struct ds_callout_entry_hdr { + struct list_head list; + u8 type; + struct ds_dev *ds; }; - -struct ds_shutdown_res { - __u64 req_num; - __u32 result; - char reason[1]; +/* callout queue entry types */ +#define DS_QTYPE_DATA 0x1 +#define DS_QTYPE_REG 0x2 +#define DS_QTYPE_UNREG 0x3 + +/* callout queue entry for data cb */ +struct ds_callout_data_entry { + struct ds_callout_entry_hdr hdr; + u8 data_req_type; + u64 req[0]; +}; +/* data_req_type field types */ +#define DS_DTYPE_CLIENT_REQ 0x1 +#define DS_DTYPE_PROVIDER_REQ 0x2 +#define DS_DTYPE_LDC_REQ 0x3 + +/* callout queue entry for reg or unreg cb */ +struct ds_callout_reg_entry { + struct ds_callout_entry_hdr hdr; + u64 hdl; }; -static void domain_shutdown_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len) +static struct ds_service_info *ds_callout_data_get_service( + struct ds_dev *ds, u8 data_req_type, u64 hdl) { - struct ldc_channel *lp = dp->lp; - struct ds_data *dpkt = buf; - struct ds_shutdown_req *rp; - struct { - struct ds_data data; - struct ds_shutdown_res res; - } pkt; + struct ds_service_info *svc_info; - rp = (struct ds_shutdown_req *) (dpkt + 1); + /* + * Find the provider or client service to which + * a data message is intended to be sent. + * If the original request was from a client, find + * a provider handle. If the original request was + * from a provider, find a client handle. If the + * original request was from a LDC, look for either. + * This check is required to support a loopback + * connection where both a client and provider + * connected in loopback mode have the same con_handle. + */ - pr_info("ds-%llu: Shutdown request from LDOM manager received.\n", - dp->id); + svc_info = NULL; - memset(&pkt, 0, sizeof(pkt)); - pkt.data.tag.type = DS_DATA; - pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag); - pkt.data.handle = cp->handle; - pkt.res.req_num = rp->req_num; - pkt.res.result = DS_OK; - pkt.res.reason[0] = 0; + if (data_req_type == DS_DTYPE_CLIENT_REQ || + data_req_type == DS_DTYPE_LDC_REQ) + svc_info = ds_find_service_provider_con_handle(ds, hdl); - ds_send(lp, &pkt, sizeof(pkt)); + if (!svc_info && + (data_req_type == DS_DTYPE_PROVIDER_REQ || + data_req_type == DS_DTYPE_LDC_REQ)) + svc_info = ds_find_service_client_con_handle(ds, hdl); + + if (!svc_info || !svc_info->is_connected) { + + if (!svc_info) + dprintk("ds-%llu: Data received for " + "unknown handle %llu\n", ds->id, hdl); + else + dprintk("ds-%llu: Data received for " + "unconnected handle %llu\n", ds->id, hdl); + + /* + * If this was a LDC data packet, nack it. + * NOTE: If this was a loopback data packet, + * we should always find a connected target + * service and never execute this code. In + * the unlikely event that the loopback + * connection has been disconnected while the + * data packet is "in-flight", the packet will + * just be ignored and ignoring the packet is + * probably appropriate in that case. + */ + if (data_req_type == DS_DTYPE_LDC_REQ) + ds_send_data_nack(ds, hdl, DS_INV_HDL); + + return NULL; + } + + return svc_info; - orderly_poweroff(true); } -struct ds_panic_req { - __u64 req_num; -}; +static struct ds_service_info *ds_callout_reg_get_service( + struct ds_dev *ds, u8 type, u64 hdl) +{ + struct ds_service_info *svc_info; -struct ds_panic_res { - __u64 req_num; - __u32 result; - char reason[1]; -}; + svc_info = ds_find_service_provider_handle(ds, hdl); + if (svc_info == NULL) { + + svc_info = ds_find_service_client_handle(ds, hdl); + if (svc_info == NULL) { + dprintk("ds-%llu: %s cb request received for " + "unknown handle %llu\n", ds->id, + (type == DS_QTYPE_REG) ? "Reg" : "Unreg", hdl); + return NULL; + } + } -static void domain_panic_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len) + return svc_info; + +} + +static void ds_do_callout_processing(void) { - struct ldc_channel *lp = dp->lp; - struct ds_data *dpkt = buf; - struct ds_panic_req *rp; - struct { - struct ds_data data; - struct ds_panic_res res; - } pkt; + unsigned long flags; + unsigned long ds_flags; + struct ds_dev *ds; + struct ds_callout_entry_hdr *qhdrp; + struct ds_callout_entry_hdr *tmp; + struct ds_callout_reg_entry *rentry; + struct ds_callout_data_entry *dentry; + struct ds_service_info *svc_info; + struct ds_data_req *data_req; + void (*reg_cb)(ds_cb_arg_t, ds_svc_hdl_t, ds_ver_t *); + void (*unreg_cb)(ds_cb_arg_t, ds_svc_hdl_t); + void (*data_cb)(ds_cb_arg_t, ds_svc_hdl_t, void *, size_t); + ds_cb_arg_t cb_arg; + ds_ver_t neg_vers; + u64 hdl; + LIST_HEAD(todo); + + dprintk("ds: CPU[%d]: callout processing START\n", smp_processor_id()); + + /* + * Merge all the ds_dev callout lists into a + * single local todo list for processing. The + * ds_dev callout lists are re-initialized to empty. + * We do this because we cannot hold any driver locks + * while we process the entries (and make callbacks) + * because it's possible that the callbacks could + * call back into this driver and attempt to re-acquire + * the lock(s) resulting in deadlock. + */ + spin_lock_irqsave(&ds_data_lock, flags); + list_for_each_entry(ds, &ds_data.ds_dev_list, list) { + LOCK_DS_DEV(ds, ds_flags) + list_splice_tail_init(&ds->callout_list, &todo); + UNLOCK_DS_DEV(ds, ds_flags) + } + spin_unlock_irqrestore(&ds_data_lock, flags); + + list_for_each_entry_safe(qhdrp, tmp, &todo, list) { + + LOCK_DS_DEV(qhdrp->ds, ds_flags) + /* + * If the ds this entry references + * has been deactivated, skip it. + * If this is the last reference to it, + * free the ds. + */ + qhdrp->ds->co_ref_cnt--; + + if (unlikely(!qhdrp->ds->active)) { + + UNLOCK_DS_DEV(qhdrp->ds, ds_flags) - rp = (struct ds_panic_req *) (dpkt + 1); + if (qhdrp->ds->co_ref_cnt == 0) + kfree(qhdrp->ds); - pr_info("ds-%llu: Panic request from LDOM manager received.\n", - dp->id); + list_del(&qhdrp->list); + kfree(qhdrp); - memset(&pkt, 0, sizeof(pkt)); - pkt.data.tag.type = DS_DATA; - pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag); - pkt.data.handle = cp->handle; - pkt.res.req_num = rp->req_num; - pkt.res.result = DS_OK; - pkt.res.reason[0] = 0; + continue; + } + + if (qhdrp->type == DS_QTYPE_DATA) { + /* process data entry */ + dentry = (struct ds_callout_data_entry *)qhdrp; + data_req = (struct ds_data_req *) dentry->req; + ds = dentry->hdr.ds; + + svc_info = ds_callout_data_get_service(ds, + dentry->data_req_type, data_req->payload.handle); + + if (unlikely(svc_info == NULL)) { + UNLOCK_DS_DEV(ds, ds_flags) + list_del(&qhdrp->list); + kfree(qhdrp); + continue; + } + + /* + * We unlock the ds_dev before we make the data + * callback to enforce the rule that no locks be held + * when making callbacks. However, this opens a timing + * hole where a service unregistration could come in + * between releasing the lock and making the callback + * rendering the svc_info * stale/freed. So, copy + * over the svc_info fields into locals before we + * free the lock to close this very unlikely but + * possible hole. + */ + hdl = svc_info->handle; + data_cb = svc_info->ops.ds_data_cb; + cb_arg = svc_info->ops.cb_arg; + + UNLOCK_DS_DEV(ds, ds_flags) + + /* + * We strip off the DS protocol header (ds_data_req) + * portion of the data for the callback to receive. + * Since tag->len includes the handle (a u64) of the + * ds_data_req + the payload, we must subtract an extra + * u64 from the len. This is per spec. + */ + data_cb(cb_arg, hdl, data_req->payload.data, + DS_DATA_REQ_DSIZE(data_req)); + + } else { + /* process reg/ureg entry */ + rentry = (struct ds_callout_reg_entry *)qhdrp; + ds = rentry->hdr.ds; + + svc_info = ds_callout_reg_get_service(ds, + rentry->hdr.type, rentry->hdl); + + if (unlikely(svc_info == NULL)) { + UNLOCK_DS_DEV(ds, ds_flags) + list_del(&qhdrp->list); + kfree(qhdrp); + continue; + } + + /* + * We unlock the ds_dev before we make the reg/unreg + * callback to enforce the rule that no locks be held + * when making callbacks. However, this opens a timing + * hole where a service unregistration could come in + * between releasing the lock and making the callback + * rendering the svc_info * stale/freed. So, copy + * over the svc_info fields into locals before we + * free the lock to close this very unlikely but + * possible hole. + */ + hdl = svc_info->handle; + reg_cb = svc_info->ops.ds_reg_cb; + unreg_cb = svc_info->ops.ds_unreg_cb; + cb_arg = svc_info->ops.cb_arg; + neg_vers = svc_info->neg_vers; + + UNLOCK_DS_DEV(ds, ds_flags) + + if (rentry->hdr.type == DS_QTYPE_REG) { + if (reg_cb != NULL) + reg_cb(cb_arg, hdl, &neg_vers); + } else { + if (unreg_cb != NULL) + unreg_cb(cb_arg, hdl); + } - ds_send(lp, &pkt, sizeof(pkt)); + } + + /* done processing the entry, remove it from the list */ + list_del(&qhdrp->list); + kfree(qhdrp); + } - panic("PANIC requested by LDOM manager."); + dprintk("ds: CPU[%d]: callout processing END\n", smp_processor_id()); } -#ifdef CONFIG_HOTPLUG_CPU -struct dr_cpu_tag { - __u64 req_num; - __u32 type; -#define DR_CPU_CONFIGURE 0x43 -#define DR_CPU_UNCONFIGURE 0x55 -#define DR_CPU_FORCE_UNCONFIGURE 0x46 -#define DR_CPU_STATUS 0x53 +static DECLARE_WAIT_QUEUE_HEAD(ds_wait); -/* Responses */ -#define DR_CPU_OK 0x6f -#define DR_CPU_ERROR 0x65 +static int ds_callout_thread(void *__unused) +{ + DEFINE_WAIT(wait); + unsigned long flags; + struct ds_dev *ds; + bool work_to_do; - __u32 num_records; -}; + while (1) { + prepare_to_wait(&ds_wait, &wait, TASK_INTERRUPTIBLE); -struct dr_cpu_resp_entry { - __u32 cpu; - __u32 result; -#define DR_CPU_RES_OK 0x00 -#define DR_CPU_RES_FAILURE 0x01 -#define DR_CPU_RES_BLOCKED 0x02 -#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03 -#define DR_CPU_RES_NOT_IN_MD 0x04 + work_to_do = false; + spin_lock_irqsave(&ds_data_lock, flags); + list_for_each_entry(ds, &ds_data.ds_dev_list, list) { + if (!list_empty(&ds->callout_list)) { + work_to_do = true; + break; + } + } + spin_unlock_irqrestore(&ds_data_lock, flags); - __u32 stat; -#define DR_CPU_STAT_NOT_PRESENT 0x00 -#define DR_CPU_STAT_UNCONFIGURED 0x01 -#define DR_CPU_STAT_CONFIGURED 0x02 + if (!work_to_do) + schedule(); - __u32 str_off; -}; + finish_wait(&ds_wait, &wait); + + if (kthread_should_stop()) + break; + + ds_do_callout_processing(); + } + + return 0; +} -static void __dr_cpu_send_error(struct ds_info *dp, - struct ds_cap_state *cp, - struct ds_data *data) +static int ds_submit_reg_cb(struct ds_dev *ds, u64 hdl, u8 type) { - struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); - struct { - struct ds_data data; - struct dr_cpu_tag tag; - } pkt; - int msg_len; + struct ds_callout_reg_entry *rentry; + gfp_t alloc_flags; - memset(&pkt, 0, sizeof(pkt)); - pkt.data.tag.type = DS_DATA; - pkt.data.handle = cp->handle; - pkt.tag.req_num = tag->req_num; - pkt.tag.type = DR_CPU_ERROR; - pkt.tag.num_records = 0; + /* This function can be called in either process or atomic mode */ + if (in_atomic()) + alloc_flags = GFP_ATOMIC; + else + alloc_flags = GFP_KERNEL; + + rentry = kzalloc(sizeof(struct ds_callout_reg_entry), alloc_flags); + if (!rentry) + return -ENOMEM; + + rentry->hdr.type = type; + rentry->hdr.ds = ds; + rentry->hdl = hdl; - msg_len = (sizeof(struct ds_data) + - sizeof(struct dr_cpu_tag)); + list_add_tail(&rentry->hdr.list, &ds->callout_list); + ds->co_ref_cnt++; - pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag); + dprintk("ds-%llu: Added %s item to work queue " + "(co_ref_cnt=%llu)\n", ds->id, + (rentry->hdr.type == DS_QTYPE_REG) ? "Reg" : "Unreg", + ds->co_ref_cnt); - __ds_send(dp->lp, &pkt, msg_len); + wake_up(&ds_wait); + + return 0; } -static void dr_cpu_send_error(struct ds_info *dp, - struct ds_cap_state *cp, - struct ds_data *data) +static int ds_submit_data_cb(struct ds_dev *ds, struct ds_msg_tag *pkt, + u8 data_type) { - unsigned long flags; + struct ds_callout_data_entry *dentry; + u64 pktlen; + gfp_t alloc_flags; + + pktlen = (sizeof(struct ds_msg_tag) + pkt->len); + + /* + * Data packets are added to our data thread's + * data work queue for later processing. + */ + + /* This function can be called in either process or atomic mode */ + if (in_atomic()) + alloc_flags = GFP_ATOMIC; + else + alloc_flags = GFP_KERNEL; + + dentry = kzalloc(sizeof(struct ds_callout_data_entry) + pktlen, + alloc_flags); + if (!dentry) + return -ENOMEM; + + dentry->hdr.type = DS_QTYPE_DATA; + dentry->hdr.ds = ds; + dentry->data_req_type = data_type; + memcpy(&dentry->req, pkt, pktlen); + + list_add_tail(&dentry->hdr.list, &ds->callout_list); + ds->co_ref_cnt++; + + dprintk("ds-%llu: Added data item (type=%u) to work queue " + "(co_ref_cnt=%llu)\n", ds->id, pkt->type, ds->co_ref_cnt); + + wake_up(&ds_wait); + + return 0; +} + +/* + * External service registration interface functions + */ +int ds_cap_init(ds_capability_t *cap, ds_ops_t *ops, u32 flags, + u64 domain_hdl, ds_svc_hdl_t *hdlp) +{ + struct ds_dev *ds; + struct ds_service_info *svc_info = NULL; + unsigned long data_flags = 0; + unsigned long ds_flags = 0; + bool is_domain; + + dprintk("entered.\n"); + + /* validate args */ + if (cap == NULL || ops == NULL) { + pr_err("%s: Error: NULL argument(s) received\n", __func__); + return -EINVAL; + } + + /* flags must be set to PROVIDER or CLIENT but not both. */ + if (!(flags & DS_CAP_IS_PROVIDER || flags & DS_CAP_IS_CLIENT) || + (flags & DS_CAP_IS_PROVIDER && flags & DS_CAP_IS_CLIENT)) { + pr_err("%s: Error: Invalid flags argument received %u\n", + __func__, flags); + return -EINVAL; + } + + /* data callback must be specified, other ops callbacks can be NULL */ + if (ops->ds_data_cb == NULL) { + pr_err("%s: Error: data callback op must be present\n", + __func__); + return -EINVAL; + } + + is_domain = ((flags & DS_TARGET_IS_DOMAIN) != 0); + + /* Find the ds_dev associated with domain_hdl. */ + spin_lock_irqsave(&ds_data_lock, data_flags); + ds = NULL; + list_for_each_entry(ds, &ds_data.ds_dev_list, list) { + + LOCK_DS_DEV(ds, ds_flags) + + if ((is_domain && ds->is_domain && ds->handle == domain_hdl) || + (!is_domain && !ds->is_domain)) + break; + + UNLOCK_DS_DEV(ds, ds_flags) + } + spin_unlock_irqrestore(&ds_data_lock, data_flags); + + if (ds == NULL) { + pr_err("%s: Error: dom_hdl %llu (domain=%d) DS " + "port not found\n", __func__, domain_hdl, + ((flags & DS_TARGET_IS_DOMAIN) != 0)); + return -ENODEV; + } + + if (flags & DS_CAP_IS_PROVIDER) { + + /* Check if there is already a registered service provider */ + svc_info = ds_find_service_provider_id(ds, cap->svc_id); + if (svc_info != NULL) { + if (svc_info->is_connected && !svc_info->is_builtin) { + pr_err("%s: Error: service provider %s " + "already registered\n", __func__, + cap->svc_id); + UNLOCK_DS_DEV(ds, ds_flags) + return -EBUSY; + } else { + /* + * Existing service is not connected or is + * a builtin (i.e. allow external to override + * builtin). Remove the service. + */ + ds_remove_service_provider(ds, svc_info); + } + } + + svc_info = ds_add_service_provider(ds, cap->svc_id, cap->vers, + ops, false); + + if (svc_info == NULL) { + pr_err("ds-%llu: %s: Failed to add service " + "provider %s", ds->id, __func__, cap->svc_id); + UNLOCK_DS_DEV(ds, ds_flags) + return -ENOMEM; + } + + } else if (flags & DS_CAP_IS_CLIENT) { + + /* Check if there is already a registered service client */ + svc_info = ds_find_service_client_id(ds, cap->svc_id); + if (svc_info != NULL) { + if (svc_info->is_connected && !svc_info->is_builtin) { + pr_err("%s: Error: service client %s " + "already registered\n", __func__, + cap->svc_id); + UNLOCK_DS_DEV(ds, ds_flags) + return -EBUSY; + } else { + /* + * Existing service is not connected or is + * a builtin (i.e. allow external to override + * builtin). Remove the service. + */ + ds_remove_service_client(ds, svc_info); + } + } + + svc_info = ds_add_service_client(ds, cap->svc_id, cap->vers, + ops, false); + + if (svc_info == NULL) { + pr_err("ds-%llu: %s: Failed to add service " + "client %s", ds->id, __func__, cap->svc_id); + UNLOCK_DS_DEV(ds, ds_flags) + return -ENOMEM; + } + } + + /* populate the unique handle to passed in hdlp argument */ + *hdlp = (ds_svc_hdl_t)svc_info->handle; + + dprintk("ds-%llu: Registered %s service (%llx), client=%d\n", + ds->id, svc_info->id, svc_info->handle, svc_info->is_client); + + UNLOCK_DS_DEV(ds, ds_flags) + + return 0; + +} +EXPORT_SYMBOL(ds_cap_init); + +int ds_cap_fini(ds_svc_hdl_t hdl) +{ + struct ds_dev *ds; + struct ds_service_info *svc_info, *tmp; + unsigned long flags = 0; + unsigned long ds_flags = 0; + + dprintk("entered.\n"); + + /* validate args */ + if (hdl == 0) { + pr_err("%s: Error: hdl argument received is 0\n", __func__); + return -EINVAL; + } + + /* Find and remove all services associated with hdl. */ + + spin_lock_irqsave(&ds_data_lock, flags); + + list_for_each_entry(ds, &ds_data.ds_dev_list, list) { + + LOCK_DS_DEV(ds, ds_flags) + + list_for_each_entry_safe(svc_info, tmp, + &ds->service_provider_list, list) { + if (svc_info->handle == (u64)hdl) + ds_remove_service_provider(ds, svc_info); + } + + list_for_each_entry_safe(svc_info, tmp, + &ds->service_client_list, list) { + if (svc_info->handle == (u64)hdl) + ds_remove_service_client(ds, svc_info); + } + + UNLOCK_DS_DEV(ds, ds_flags) + } + + spin_unlock_irqrestore(&ds_data_lock, flags); + + return 0; + +} +EXPORT_SYMBOL(ds_cap_fini); + +int ds_cap_send(ds_svc_hdl_t hdl, void *buf, size_t buflen) +{ + struct ds_dev *ds; + struct ds_service_info *svc_info; + unsigned long flags = 0; + unsigned long ds_flags = 0; + struct ds_data_req *hdr; + int msglen; + u8 type; + int rv; + + dprintk("entered.\n"); + + /* validate args */ + if (hdl == 0) { + pr_err("%s: Error: hdl argument received is 0\n", __func__); + return -EINVAL; + } + + if (buf == NULL) { + pr_err("%s: Error: Invalid NULL buffer argument\n", __func__); + return -EINVAL; + } + + if (buflen == 0) + return 0; + + /* Find the service uniquely identified by hdl */ + + svc_info = NULL; + + spin_lock_irqsave(&ds_data_lock, flags); + list_for_each_entry(ds, &ds_data.ds_dev_list, list) { + + LOCK_DS_DEV(ds, ds_flags) + + svc_info = ds_find_service_provider_handle(ds, (u64)hdl); + if (svc_info == NULL) + svc_info = ds_find_service_client_handle(ds, + (u64)hdl); + + /* if we found the hdl, break but do not release the ds_lock */ + if (svc_info != NULL) + break; + + UNLOCK_DS_DEV(ds, ds_flags) + } + + spin_unlock_irqrestore(&ds_data_lock, flags); + + if (svc_info == NULL) { + pr_err("%s: Error: no service found " + "for handle %llx\n", __func__, hdl); + return -ENODEV; + } + + if (!svc_info->is_connected) { + pr_err("%s: Error: Service %s not connected.\n", __func__, + svc_info->id); + UNLOCK_DS_DEV(ds, ds_flags) + return -ENODEV; + } + + /* build the data packet containing the data */ + msglen = sizeof(struct ds_data_req) + buflen; + hdr = kzalloc(msglen, GFP_KERNEL); + if (hdr == NULL) { + pr_err("ds-%llu: %s: failed to alloc mem for data msg.\n", + ds->id, __func__); + UNLOCK_DS_DEV(ds, ds_flags) + return -ENOMEM; + } + hdr->tag.type = DS_DATA; + hdr->tag.len = sizeof(struct ds_data_req_payload) + buflen; + hdr->payload.handle = svc_info->con_handle; + (void) memcpy(hdr->payload.data, buf, buflen); + + if (svc_info->is_loopback) { + /* + * If the service is connected via loopback, submit the + * packet to our local work queue. + */ + type = (svc_info->is_client) ? DS_DTYPE_CLIENT_REQ + : DS_DTYPE_PROVIDER_REQ; + rv = ds_submit_data_cb(ds, (struct ds_msg_tag *)hdr, type); + if (rv < 0) + pr_err("ds-%llu: %s: ds_submit_data_cb failed.\n ", + ds->id, __func__); + } else { + /* send the data out to the LDC */ + rv = ds_ldc_send_msg(ds->lp, (void *)hdr, msglen); + if (rv <= 0) { + pr_err("ds-%llu: %s: ldc_send failed.(%d)\n ", + ds->id, __func__, rv); + rv = -EIO; + } else { + rv = 0; + } + } + + kfree(hdr); + + UNLOCK_DS_DEV(ds, ds_flags) + + return rv; +} +EXPORT_SYMBOL(ds_cap_send); + +/* + * Builtin service callback routines + */ + +static void ds_md_update_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t handle, void *buf, size_t len) +{ + struct ds_dev *ds = (struct ds_dev *)arg; + struct ds_md_update_req *rp; + struct ds_md_update_res res; + + dprintk("entered.\n"); + + rp = (struct ds_md_update_req *)buf; + + pr_alert("ds-%llu: Machine description update.\n", ds->id); + + mdesc_update(); + + res.req_num = rp->req_num; + res.result = DS_OK; + + ds_cap_send(handle, &res, sizeof(struct ds_md_update_res)); +} + +static void ds_dom_shutdown_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t handle, void *buf, size_t len) +{ + struct ds_dev *ds = (struct ds_dev *)arg; + struct ds_shutdown_req *rp; + struct ds_shutdown_res res; + + dprintk("entered.\n"); + + rp = (struct ds_shutdown_req *)buf; + + pr_alert("ds-%llu: Shutdown request received.\n", ds->id); + + res.req_num = rp->req_num; + res.result = DS_OK; + res.reason[0] = 0; + + ds_cap_send(handle, &res, sizeof(struct ds_shutdown_res)); + + /* give a message to the console if the delay is greater than 1 sec. */ + if (rp->ms_delay > 1000) { + pr_alert("ds-%llu: Shutting down in %u seconds.\n", + ds->id, rp->ms_delay/1000); + /* delay for specified ms before shutdown */ + mdelay(rp->ms_delay); + } + + + orderly_poweroff(true); +} + +static void ds_dom_panic_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t handle, void *buf, size_t len) +{ + struct ds_dev *ds = (struct ds_dev *)arg; + struct ds_panic_req *rp; + struct ds_panic_res res; + + dprintk("entered.\n"); + + rp = (struct ds_panic_req *)buf; + + pr_alert("ds-%llu: Panic request received.\n", ds->id); + + res.req_num = rp->req_num; + res.result = DS_OK; + res.reason[0] = 0; + + ds_cap_send(handle, &res, sizeof(struct ds_panic_res)); + + panic("PANIC requested.\n"); +} + +#ifdef CONFIG_HOTPLUG_CPU + +static void __dr_cpu_send_error(struct ds_dev *ds, + u64 handle, struct dr_cpu_tag *tag) +{ + struct dr_cpu_tag resp_tag; + + dprintk("entered.\n"); - spin_lock_irqsave(&ds_lock, flags); - __dr_cpu_send_error(dp, cp, data); - spin_unlock_irqrestore(&ds_lock, flags); + resp_tag.req_num = tag->req_num; + resp_tag.type = DR_CPU_ERROR; + resp_tag.num_records = 0; + + ds_cap_send(handle, &resp_tag, sizeof(struct dr_cpu_tag)); } #define CPU_SENTINEL 0xffffffff @@ -460,6 +1574,8 @@ static void purge_dups(u32 *list, u32 num_ents) { unsigned int i; + dprintk("entered.\n"); + for (i = 0; i < num_ents; i++) { u32 cpu = list[i]; unsigned int j; @@ -476,25 +1592,19 @@ static void purge_dups(u32 *list, u32 num_ents) static int dr_cpu_size_response(int ncpus) { - return sizeof(struct ds_data) + - sizeof(struct dr_cpu_tag) + + return sizeof(struct dr_cpu_tag) + (sizeof(struct dr_cpu_resp_entry) * ncpus); } -static void dr_cpu_init_response(struct ds_data *resp, u64 req_num, +static void dr_cpu_init_response(struct dr_cpu_tag *tag, u64 req_num, u64 handle, int resp_len, int ncpus, cpumask_t *mask, u32 default_stat) { struct dr_cpu_resp_entry *ent; - struct dr_cpu_tag *tag; int i, cpu; - tag = (struct dr_cpu_tag *) (resp + 1); ent = (struct dr_cpu_resp_entry *) (tag + 1); - resp->tag.type = DS_DATA; - resp->tag.len = resp_len - sizeof(struct ds_msg_tag); - resp->handle = handle; tag->req_num = req_num; tag->type = DR_CPU_OK; tag->num_records = ncpus; @@ -509,14 +1619,12 @@ static void dr_cpu_init_response(struct ds_data *resp, u64 req_num, BUG_ON(i != ncpus); } -static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus, +static void dr_cpu_mark(struct dr_cpu_tag *tag, int cpu, int ncpus, u32 res, u32 stat) { struct dr_cpu_resp_entry *ent; - struct dr_cpu_tag *tag; int i; - tag = (struct dr_cpu_tag *) (resp + 1); ent = (struct dr_cpu_resp_entry *) (tag + 1); for (i = 0; i < ncpus; i++) { @@ -528,12 +1636,13 @@ static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus, } } -static int dr_cpu_configure(struct ds_info *dp, struct ds_cap_state *cp, - u64 req_num, cpumask_t *mask) +static int __cpuinit dr_cpu_configure(struct ds_dev *ds, + u64 handle, u64 req_num, cpumask_t *mask) { - struct ds_data *resp; + struct dr_cpu_tag *resp; int resp_len, ncpus, cpu; - unsigned long flags; + + dprintk("entered.\n"); ncpus = cpumask_weight(mask); resp_len = dr_cpu_size_response(ncpus); @@ -541,7 +1650,7 @@ static int dr_cpu_configure(struct ds_info *dp, struct ds_cap_state *cp, if (!resp) return -ENOMEM; - dr_cpu_init_response(resp, req_num, cp->handle, + dr_cpu_init_response(resp, req_num, handle, resp_len, ncpus, mask, DR_CPU_STAT_CONFIGURED); @@ -551,29 +1660,27 @@ static int dr_cpu_configure(struct ds_info *dp, struct ds_cap_state *cp, for_each_cpu(cpu, mask) { int err; - pr_info("ds-%llu: Starting cpu %d...\n", dp->id, cpu); + dprintk("ds-%llu: Starting cpu %d...\n", ds->id, cpu); err = cpu_up(cpu); if (err) { - __u32 res = DR_CPU_RES_FAILURE; - __u32 stat = DR_CPU_STAT_UNCONFIGURED; + u32 res = DR_CPU_RES_FAILURE; + u32 stat = DR_CPU_STAT_UNCONFIGURED; if (!cpu_present(cpu)) { /* CPU not present in MD */ - res = DR_CPU_RES_NOT_IN_MD; stat = DR_CPU_STAT_NOT_PRESENT; } else if (err == -ENODEV) { /* CPU did not call in successfully */ res = DR_CPU_RES_CPU_NOT_RESPONDING; } - pr_info("ds-%llu: CPU startup failed err=%d\n", dp->id, err); + pr_err("ds-%llu: CPU startup failed err=%d\n", ds->id, + err); dr_cpu_mark(resp, cpu, ncpus, res, stat); } } - spin_lock_irqsave(&ds_lock, flags); - __ds_send(dp->lp, resp, resp_len); - spin_unlock_irqrestore(&ds_lock, flags); + ds_cap_send(handle, resp, resp_len); kfree(resp); @@ -583,14 +1690,13 @@ static int dr_cpu_configure(struct ds_info *dp, struct ds_cap_state *cp, return 0; } -static int dr_cpu_unconfigure(struct ds_info *dp, - struct ds_cap_state *cp, - u64 req_num, - cpumask_t *mask) +static int dr_cpu_unconfigure(struct ds_dev *ds, + u64 handle, u64 req_num, cpumask_t *mask) { - struct ds_data *resp; + struct dr_cpu_tag *resp; int resp_len, ncpus, cpu; - unsigned long flags; + + dprintk("entered.\n"); ncpus = cpumask_weight(mask); resp_len = dr_cpu_size_response(ncpus); @@ -598,14 +1704,14 @@ static int dr_cpu_unconfigure(struct ds_info *dp, if (!resp) return -ENOMEM; - dr_cpu_init_response(resp, req_num, cp->handle, + dr_cpu_init_response(resp, req_num, handle, resp_len, ncpus, mask, DR_CPU_STAT_UNCONFIGURED); for_each_cpu(cpu, mask) { int err; - pr_info("ds-%llu: Shutting down cpu %d...\n", dp->id, cpu); + pr_info("ds-%llu: Shutting down cpu %d...\n", ds->id, cpu); err = cpu_down(cpu); if (err) dr_cpu_mark(resp, cpu, ncpus, @@ -613,26 +1719,26 @@ static int dr_cpu_unconfigure(struct ds_info *dp, DR_CPU_STAT_CONFIGURED); } - spin_lock_irqsave(&ds_lock, flags); - __ds_send(dp->lp, resp, resp_len); - spin_unlock_irqrestore(&ds_lock, flags); + ds_cap_send(handle, resp, resp_len); kfree(resp); return 0; } -static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, - int len) +static void __cpuinit ds_dr_cpu_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t handle, void *buf, size_t len) { - struct ds_data *data = buf; - struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); + struct ds_dev *ds = (struct ds_dev *)arg; + struct dr_cpu_tag *tag = (struct dr_cpu_tag *)buf; u32 *cpu_list = (u32 *) (tag + 1); u64 req_num = tag->req_num; cpumask_t mask; unsigned int i; int err; + dprintk("entered.\n"); + switch (tag->type) { case DR_CPU_CONFIGURE: case DR_CPU_UNCONFIGURE: @@ -640,7 +1746,7 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, break; default: - dr_cpu_send_error(dp, cp, data); + __dr_cpu_send_error(ds, handle, tag); return; } @@ -656,630 +1762,2226 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, } if (tag->type == DR_CPU_CONFIGURE) - err = dr_cpu_configure(dp, cp, req_num, &mask); + err = dr_cpu_configure(ds, handle, req_num, &mask); else - err = dr_cpu_unconfigure(dp, cp, req_num, &mask); + err = dr_cpu_unconfigure(ds, handle, req_num, &mask); if (err) - dr_cpu_send_error(dp, cp, data); + __dr_cpu_send_error(ds, handle, tag); } #endif /* CONFIG_HOTPLUG_CPU */ -struct ds_pri_msg { - __u64 req_num; - __u64 type; -#define DS_PRI_REQUEST 0x00 -#define DS_PRI_DATA 0x01 -#define DS_PRI_UPDATE 0x02 -}; +static DEFINE_MUTEX(ds_var_mutex); +static DECLARE_COMPLETION(ds_var_config_cb_complete); +static DEFINE_MUTEX(ds_var_complete_mutex); +static int ds_var_response; -static void ds_pri_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len) +static void ds_var_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t handle, void *buf, size_t len) { - struct ds_data *dpkt = buf; - struct ds_pri_msg *rp; + struct ds_var_resp *rp; - rp = (struct ds_pri_msg *) (dpkt + 1); + dprintk("entered.\n"); - pr_info("ds-%llu: PRI REQ [%llx:%llx], len=%d\n", dp->id, rp->req_num, - rp->type, len); -} + rp = (struct ds_var_resp *)buf; -struct ds_var_hdr { - __u32 type; -#define DS_VAR_SET_REQ 0x00 -#define DS_VAR_DELETE_REQ 0x01 -#define DS_VAR_SET_RESP 0x02 -#define DS_VAR_DELETE_RESP 0x03 -}; + dprintk("hdr.type = %u\n", rp->hdr.type); + dprintk("result = %u\n", rp->result); -struct ds_var_set_msg { - struct ds_var_hdr hdr; - char name_and_value[0]; -}; + if (rp->hdr.type != DS_VAR_SET_RESP && + rp->hdr.type != DS_VAR_DELETE_RESP) + return; -struct ds_var_delete_msg { - struct ds_var_hdr hdr; - char name[0]; -}; + ds_var_response = rp->result; + wmb(); -struct ds_var_resp { - struct ds_var_hdr hdr; - __u32 result; -#define DS_VAR_SUCCESS 0x00 -#define DS_VAR_NO_SPACE 0x01 -#define DS_VAR_INVALID_VAR 0x02 -#define DS_VAR_INVALID_VAL 0x03 -#define DS_VAR_NOT_PRESENT 0x04 -}; + mutex_lock(&ds_var_complete_mutex); + complete(&ds_var_config_cb_complete); + mutex_unlock(&ds_var_complete_mutex); +} -static DEFINE_MUTEX(ds_var_mutex); -static int ds_var_doorbell; -static int ds_var_response; +static DEFINE_MUTEX(ds_sp_token_mutex); +static DECLARE_COMPLETION(ds_sp_token_cb_complete); +static DEFINE_MUTEX(ds_sp_token_complete_mutex); +static u32 ds_sp_token_resp_result; +static u64 ds_sp_token_resp_req_num; +static u64 ds_sp_token_next_req_num; +static ds_sptok_t ds_sp_token_data; -static void ds_var_data(struct ds_info *dp, - struct ds_cap_state *cp, - void *buf, int len) +static void ds_sp_token_data_cb(ds_cb_arg_t arg, + ds_svc_hdl_t handle, void *buf, size_t len) { - struct ds_data *dpkt = buf; - struct ds_var_resp *rp; + struct ds_dev *ds = (struct ds_dev *)arg; + struct ds_sp_token_resp *rp; + + dprintk("entered.\n"); + + rp = (struct ds_sp_token_resp *)buf; + + dprintk("ds-%llu: SP TOKEN REQ [%llx:%x], len=%lu ip_addr=%x (%d.%d)" + "portid=%d\n", ds->id, rp->req_num, rp->result, len, rp->ip_addr, + (rp->ip_addr & 0xFF00) >> 8, rp->ip_addr & 0xFF, rp->portid); + + dprintk("[%x:%x...0x%x...:%x].\n", (__u8)rp->token[0], + (__u8)rp->token[1], (__u8)rp->token[11], (__u8)rp->token[19]); + + (void) memcpy(&ds_sp_token_data, &(rp->ip_addr), sizeof(ds_sptok_t)); + ds_sp_token_resp_result = rp->result; + ds_sp_token_resp_req_num = rp->req_num; + wmb(); + + mutex_lock(&ds_sp_token_complete_mutex); + complete(&ds_sp_token_cb_complete); + mutex_unlock(&ds_sp_token_complete_mutex); + +} + +/* + * Helper functions + */ + +static u64 ds_get_service_timeout(void) +{ + u8 random_byte; + u64 timeout_cnt; + + /* + * Return a random number of jiffies that is + * between 3000 and 9000ms in the future. + * XXX - make these values configurable. + */ + get_random_bytes(&random_byte, 1); + timeout_cnt = (((random_byte % 7) + 3)); + + return jiffies + msecs_to_jiffies(timeout_cnt * 1000); + +} + +static struct ds_service_info *ds_find_connected_prov_service(char *svc_id) +{ + struct ds_dev *ds; + unsigned long flags; + unsigned long ds_flags = 0; + struct ds_service_info *svc_info; + + spin_lock_irqsave(&ds_data_lock, flags); + + list_for_each_entry(ds, &ds_data.ds_dev_list, list) { + + LOCK_DS_DEV(ds, ds_flags) + + svc_info = ds_find_service_provider_id(ds, svc_id); + if (svc_info != NULL && svc_info->is_connected) { + UNLOCK_DS_DEV(ds, ds_flags) + spin_unlock_irqrestore(&ds_data_lock, flags); + return svc_info; + } + + UNLOCK_DS_DEV(ds, ds_flags) + } + + spin_unlock_irqrestore(&ds_data_lock, flags); + + return NULL; + +} + +static struct ds_service_info *ds_find_service_provider_id(struct ds_dev *ds, + char *svc_id) +{ + struct ds_service_info *svc_info; + + list_for_each_entry(svc_info, &ds->service_provider_list, list) { + if (strncmp(svc_info->id, svc_id, DS_MAX_SVC_NAME_LEN) == 0) + return svc_info; + } + + return NULL; +} + +static struct ds_service_info *ds_find_service_provider_handle( + struct ds_dev *ds, u64 handle) +{ + struct ds_service_info *svc_info; + + list_for_each_entry(svc_info, &ds->service_provider_list, list) { + if (svc_info->handle == handle) + return svc_info; + } + + return NULL; +} + +static struct ds_service_info *ds_find_service_provider_con_handle( + struct ds_dev *ds, u64 handle) +{ + struct ds_service_info *svc_info; + + list_for_each_entry(svc_info, &ds->service_provider_list, list) { + if (svc_info->con_handle == handle) + return svc_info; + } + + return NULL; +} + +static struct ds_service_info *ds_find_service_client_id(struct ds_dev *ds, + char *svc_id) +{ + struct ds_service_info *svc_info; + + list_for_each_entry(svc_info, &ds->service_client_list, list) { + if (strncmp(svc_info->id, svc_id, DS_MAX_SVC_NAME_LEN) == 0) + return svc_info; + } + + return NULL; +} + +static struct ds_service_info *ds_find_service_client_handle( + struct ds_dev *ds, u64 handle) +{ + struct ds_service_info *svc_info; + + list_for_each_entry(svc_info, &ds->service_client_list, list) { + if (svc_info->handle == handle) + return svc_info; + } + + return NULL; +} + +static struct ds_service_info *ds_find_service_client_con_handle( + struct ds_dev *ds, u64 handle) +{ + struct ds_service_info *svc_info; + + list_for_each_entry(svc_info, &ds->service_client_list, list) { + if (svc_info->con_handle == handle) + return svc_info; + } + + return NULL; +} + +static struct ds_service_info *ds_find_lb_service_peer(struct ds_dev *ds, + struct ds_service_info *svc_info) +{ + struct ds_service_info *peer_svc_info; + + /* if the service is a client, find a provider with the same id */ + if (svc_info->is_client) { + peer_svc_info = ds_find_service_provider_id(ds, svc_info->id); + if (peer_svc_info && peer_svc_info->reg_state == + DS_REG_STATE_REGISTERED_LOOPBACK) + return peer_svc_info; + } else { + peer_svc_info = ds_find_service_client_id(ds, svc_info->id); + if (peer_svc_info && peer_svc_info->reg_state == + DS_REG_STATE_REGISTERED_LOOPBACK) + return peer_svc_info; + } + + return NULL; +} + + +static u64 ds_get_new_service_handle(struct ds_dev *ds, bool is_client) +{ + + u64 handle; + + /* + * Solaris uses a couple of bits in the handle as flags. + * See, DS_HDL_ISCLIENT_BIT, DS_HDL_ISCNTRLD_BIT. + * So, to avoid using these bits in a handle we only use the + * bottom 30 bits. This will help avoid issues on mixed + * systems running both Linux and Solaris domains. + */ + + /* handle wrap at DS_HDL_ISCNTRLD_BIT - don't use 0 */ + if (ds->next_service_handle == DS_HDL_ISCNTRLD_BIT) + ds->next_service_handle = 1; + + handle = (ds->id << 32) | ds->next_service_handle++; + + /* + * If the service is a client service, set the ISLCLIENT + * bit which is an indication (or "ping") to the other end + * to send a REG_REQ for the provider service. + */ + if (is_client) + handle |= DS_HDL_ISCLIENT_BIT; + + return handle; + +} + +static struct ds_service_info *ds_add_service_provider(struct ds_dev *ds, + char *id, ds_ver_t vers, ds_ops_t *ops, bool is_builtin) +{ + struct ds_service_info *svc_info; + + dprintk("entered.\n"); + + svc_info = kzalloc(sizeof(struct ds_service_info), GFP_KERNEL); + if (unlikely(svc_info == NULL)) + return NULL; + + svc_info->id = kmemdup(id, (strlen(id) + 1), GFP_KERNEL); + svc_info->vers = vers; + svc_info->ops = *ops; + svc_info->is_client = false; + svc_info->is_builtin = is_builtin; + svc_info->is_loopback = false; + svc_info->is_connected = false; + svc_info->reg_state = DS_REG_STATE_UNREG; + svc_info->svc_reg_timeout = ds_get_service_timeout(); + + /* + * Get a service handle to use to reference this svc_info. + * This handle is also used to send a REG_REQ for this service. + */ + svc_info->handle = ds_get_new_service_handle(ds, false); + svc_info->con_handle = 0; + + /* init the the ops arg for builtin services to the ds */ + if (is_builtin) + svc_info->ops.cb_arg = ds; + + list_add_tail(&svc_info->list, &ds->service_provider_list); + + return svc_info; +} + +static void ds_remove_service_provider(struct ds_dev *ds, + struct ds_service_info *provider_svc_info) +{ + dprintk("entered.\n"); + + if (provider_svc_info->is_connected) + ds_disconnect_service_provider(ds, provider_svc_info); + + kfree(provider_svc_info->id); + list_del(&provider_svc_info->list); + kfree(provider_svc_info); + +} + +static struct ds_service_info *ds_add_service_client(struct ds_dev *ds, + char *id, ds_ver_t vers, ds_ops_t *ops, bool is_builtin) +{ + struct ds_service_info *svc_info; + + dprintk("entered.\n"); + + svc_info = kzalloc(sizeof(struct ds_service_info), GFP_KERNEL); + if (unlikely(svc_info == NULL)) + return NULL; + + svc_info->id = kmemdup(id, (strlen(id) + 1), GFP_KERNEL); + svc_info->vers = vers; + svc_info->ops = *ops; + svc_info->is_client = true; + svc_info->is_builtin = is_builtin; + svc_info->is_loopback = false; + svc_info->is_connected = false; + svc_info->reg_state = DS_REG_STATE_UNREG; + svc_info->svc_reg_timeout = ds_get_service_timeout(); + + /* Get a service handle to use to reference this svc_info. */ + svc_info->handle = ds_get_new_service_handle(ds, true); + svc_info->con_handle = 0; + + /* init the the ops arg for builtin services to the ds */ + if (is_builtin) + svc_info->ops.cb_arg = ds; + + list_add_tail(&svc_info->list, &ds->service_client_list); + + return svc_info; +} + +static void ds_remove_service_client(struct ds_dev *ds, + struct ds_service_info *client_svc_info) +{ + dprintk("entered.\n"); + + /* If the service is connected, send a unreg message */ + if (client_svc_info->is_connected) + ds_disconnect_service_client(ds, client_svc_info); + + kfree(client_svc_info->id); + list_del(&client_svc_info->list); + kfree(client_svc_info); + +} + +static void ds_connect_service_client(struct ds_dev *ds, u64 handle, + u16 major, u16 minor, struct ds_service_info *client_svc_info) +{ + dprintk("entered.\n"); + + /* assign the client to the service */ + client_svc_info->is_loopback = false; + client_svc_info->con_handle = handle; + client_svc_info->neg_vers.major = major; + client_svc_info->neg_vers.minor = minor; + client_svc_info->reg_state = DS_REG_STATE_REGISTERED_LDC; + client_svc_info->is_connected = true; + + /* submit the register callback */ + (void) ds_submit_reg_cb(ds, client_svc_info->handle, DS_QTYPE_REG); +} + +static void ds_disconnect_service_client(struct ds_dev *ds, + struct ds_service_info *client_svc_info) +{ + struct ds_service_info *peer_svc_info; + int rv; + + dprintk("entered.\n"); + + peer_svc_info = NULL; + + if (client_svc_info->reg_state == DS_REG_STATE_REGISTERED_LOOPBACK) { + peer_svc_info = ds_find_lb_service_peer(ds, client_svc_info); + } else if (client_svc_info->reg_state == DS_REG_STATE_REGISTERED_LDC) { + rv = ds_service_unreg(ds, client_svc_info->con_handle); + if (rv != 0) { + pr_err("ds-%llu: %s: failed to send UNREG_REQ for " + "handle %llx (%d)\n", ds->id, __func__, + client_svc_info->con_handle, rv); + } + } + client_svc_info->is_loopback = false; + client_svc_info->con_handle = 0; + client_svc_info->neg_vers.major = 0; + client_svc_info->neg_vers.minor = 0; + client_svc_info->reg_state = DS_REG_STATE_UNREG; + client_svc_info->is_connected = false; + client_svc_info->svc_reg_timeout = ds_get_service_timeout(); + + /* submit the unregister callback */ + (void) ds_submit_reg_cb(ds, client_svc_info->handle, DS_QTYPE_UNREG); + + /* if it was a loopback connection, disconnect the peer */ + if (peer_svc_info) + ds_disconnect_service_provider(ds, peer_svc_info); +} + +static void ds_connect_service_provider(struct ds_dev *ds, u64 handle, + u16 major, u16 minor, struct ds_service_info *provider_svc_info) +{ + dprintk("entered.\n"); + + /* register the provider */ + provider_svc_info->is_loopback = false; + provider_svc_info->con_handle = handle; + provider_svc_info->neg_vers.major = major; + provider_svc_info->neg_vers.minor = minor; + provider_svc_info->reg_state = DS_REG_STATE_REGISTERED_LDC; + provider_svc_info->is_connected = true; + + /* submit the register callback */ + (void) ds_submit_reg_cb(ds, provider_svc_info->handle, DS_QTYPE_REG); + +} + +static void ds_disconnect_service_provider(struct ds_dev *ds, + struct ds_service_info *provider_svc_info) +{ + struct ds_service_info *peer_svc_info; + int rv; + + dprintk("entered.\n"); + + peer_svc_info = NULL; + if (provider_svc_info->reg_state == DS_REG_STATE_REGISTERED_LOOPBACK) { + peer_svc_info = ds_find_lb_service_peer(ds, provider_svc_info); + } else if (provider_svc_info->reg_state == + DS_REG_STATE_REGISTERED_LDC) { + rv = ds_service_unreg(ds, provider_svc_info->con_handle); + if (rv != 0) { + pr_err("ds-%llu: %s: failed to send UNREG_REQ for " + "handle %llx (%d)\n", ds->id, __func__, + provider_svc_info->con_handle, rv); + } + } + provider_svc_info->is_loopback = false; + provider_svc_info->con_handle = 0; + provider_svc_info->neg_vers.major = 0; + provider_svc_info->neg_vers.minor = 0; + provider_svc_info->reg_state = DS_REG_STATE_UNREG; + provider_svc_info->is_connected = false; + provider_svc_info->svc_reg_timeout = ds_get_service_timeout(); + + /* submit the unregister callback */ + (void) ds_submit_reg_cb(ds, provider_svc_info->handle, DS_QTYPE_UNREG); + + /* if it was a loopback connection, disconnect the peer */ + if (peer_svc_info) + ds_disconnect_service_client(ds, peer_svc_info); +} + +static int ds_connect_loopback_service(struct ds_dev *ds, + struct ds_service_info *svc_info, + struct ds_service_info *peer_svc_info) +{ + ds_ver_t neg_vers; + + dprintk("entered.\n"); + + /* First check to make sure the versions are compatible */ + if (svc_info->vers.major != peer_svc_info->vers.major) { + pr_err("ds-%llu: failed to connect loopback service %s due " + "version incompatibilty (%llu, %llu)\n", ds->id, + svc_info->id, svc_info->vers.major, + peer_svc_info->vers.major); + return -EINVAL; + } + + /* create the negotiated version */ + neg_vers.minor = min_t(u64, svc_info->vers.minor, + peer_svc_info->vers.minor); + neg_vers.major = svc_info->vers.major; + + /* establish the loopback connection */ + svc_info->is_loopback = true; + svc_info->neg_vers = neg_vers; + svc_info->reg_state = DS_REG_STATE_REGISTERED_LOOPBACK; + svc_info->con_handle = svc_info->handle; + svc_info->is_connected = true; + peer_svc_info->is_loopback = true; + peer_svc_info->neg_vers = neg_vers; + peer_svc_info->reg_state = DS_REG_STATE_REGISTERED_LOOPBACK; + peer_svc_info->con_handle = svc_info->handle; + peer_svc_info->is_connected = true; + + /* submit the register callbacks */ + (void) ds_submit_reg_cb(ds, svc_info->handle, DS_QTYPE_REG); + (void) ds_submit_reg_cb(ds, peer_svc_info->handle, DS_QTYPE_REG); + + return 0; +} + +static void ds_unregister_ldc_services(struct ds_dev *ds) +{ + struct ds_service_info *svc_info; + + dprintk("entered.\n"); + + list_for_each_entry(svc_info, &ds->service_provider_list, list) { + if (svc_info->reg_state == DS_REG_STATE_REGISTERED_LDC) + ds_disconnect_service_provider(ds, svc_info); + } + + list_for_each_entry(svc_info, &ds->service_client_list, list) { + if (svc_info->reg_state == DS_REG_STATE_REGISTERED_LDC) + ds_disconnect_service_client(ds, svc_info); + } + +} + +static void ds_reregister_ldc_services(struct ds_dev *ds) +{ + struct ds_service_info *svc_info; + + dprintk("entered.\n"); + + list_for_each_entry(svc_info, &ds->service_provider_list, list) { + if (svc_info->reg_state == DS_REG_STATE_REG_SENT) { + svc_info->reg_state = DS_REG_STATE_UNREG; + svc_info->svc_reg_timeout = ds_get_service_timeout(); + } + } + + list_for_each_entry(svc_info, &ds->service_client_list, list) { + if (svc_info->reg_state == DS_REG_STATE_REG_SENT) { + svc_info->reg_state = DS_REG_STATE_UNREG; + svc_info->svc_reg_timeout = ds_get_service_timeout(); + } + } + +} + +static void ds_remove_services(struct ds_dev *ds) +{ + struct ds_service_info *svc_info, *tmp; + + dprintk("entered.\n"); + + list_for_each_entry_safe(svc_info, tmp, + &ds->service_provider_list, list) { + ds_remove_service_provider(ds, svc_info); + } + + list_for_each_entry_safe(svc_info, tmp, + &ds->service_client_list, list) { + ds_remove_service_client(ds, svc_info); + } + +} + +/* + * DS Kernel Interface functions + */ +void ldom_set_var(const char *var, const char *value) +{ + struct ds_service_info *svc_info; + union { + struct ds_var_set_msg msg; + char all[512]; + } payload; + char *base, *p; + int msg_len; + int rv; + + dprintk("entered.\n"); + + if (var == NULL) { + pr_err("%s: Invalid NULL variable name argument.\n", __func__); + return; + } + + if (value == NULL) { + pr_err("%s: Invalid NULL variable value argument.\n", __func__); + return; + } + + if (strlen(var) > 254) { + pr_err("%s: Variable name too long.\n", __func__); + return; + } + + if (strlen(value) > 254) { + pr_err("%s: Variable value too long.\n", __func__); + return; + } + + svc_info = ds_find_connected_prov_service("var-config"); + if (svc_info == NULL) + svc_info = ds_find_connected_prov_service("var-config-backup"); + + if (svc_info == NULL) { + pr_err("%s: var-config and var-config-backup service " + "not registered. Failed to set (%s) variable " + "to (%s).\n", __func__, var, value); + return; + } + + dprintk("%s: found %s client service\n", __func__, svc_info->id); + + memset(&payload, 0, sizeof(payload)); + payload.msg.hdr.type = DS_VAR_SET_REQ; + base = p = &payload.msg.name_and_value[0]; + strcpy(p, var); + p += strlen(var) + 1; + strcpy(p, value); + p += strlen(value) + 1; + + msg_len = (sizeof(struct ds_var_set_msg) + (p - base)); + msg_len = (msg_len + 3) & ~3; + + mutex_lock(&ds_var_mutex); + + ds_var_response = -1; + wmb(); + + /* + * (re)init the completion var to help guarantee + * responses are for this request (and not an older + * request which came in late). Use a mutex to protect + * against the possibility of re-initializing at the same time + * as the callout thread calling complete() in the callback. + */ + mutex_lock(&ds_var_complete_mutex); + init_completion(&ds_var_config_cb_complete); + mutex_unlock(&ds_var_complete_mutex); + + rv = ds_cap_send(svc_info->handle, &payload, msg_len); + + if (!rv) { + /* wait for response here */ + wait_for_completion_timeout(&ds_var_config_cb_complete, + (DS_RESPONSE_TIMEOUT * HZ)); + } + + if (ds_var_response != DS_VAR_SUCCESS) + pr_err("%s: var-config [%s:%s] failed, response(%d).\n", + __func__, var, value, ds_var_response); + + mutex_unlock(&ds_var_mutex); + + return; + +} + +static int ldom_req_sp_token(const char *service_name, u32 *sp_token_result, + ds_sptok_t *sp_token_data) +{ + struct ds_service_info *svc_info; + struct ds_sp_token_msg *payload; + int svc_len; /* length of service_name string */ + int payload_len; /* length of ds_sp_token_msg payload */ + int rv; + + dprintk("entered.\n"); + + if (service_name == NULL) { + pr_err("%s: Invalid NULL service name argument.\n", __func__); + return -EINVAL; + } + + svc_info = ds_find_connected_prov_service("sp-token"); + if (svc_info == NULL) { + pr_err("%s: sp-token service not registered.\n", __func__); + return -EIO; + } + + svc_len = (service_name == NULL || *service_name == '\0') ? 0 : + strlen(service_name) + 1; + if (svc_len > DS_MAX_SVC_NAME_LEN) { + pr_err("%s: service name '%s' too long.\n", + __func__, service_name); + return -EINVAL; + } + + payload_len = sizeof(struct ds_sp_token_msg) + svc_len; + payload = kzalloc(payload_len, GFP_KERNEL); + if (payload == NULL) { + pr_err("%s: failed to alloc mem for msg.\n", __func__); + return -ENOMEM; + } + + payload->type = DS_SPTOK_REQUEST; + (void) memcpy(payload->service, service_name, svc_len); + + mutex_lock(&ds_sp_token_mutex); + + payload->req_num = ds_sp_token_next_req_num; + + dprintk("%s: sizeof ds_sp_token_msg=%lu svclen=%d.\n", + __func__, sizeof(struct ds_sp_token_msg), svc_len); + dprintk("req_num %llu: payload(%p): type[0x%llx] svc[%s].\n", + payload->req_num, payload, payload->type, payload->service); + + /* set init values */ + ds_sp_token_resp_req_num = ~0; + ds_sp_token_resp_result = ~0; + wmb(); + + /* + * (re)init the completion var to help guarantee + * responses are for this request (and not an older + * request which came in late). Use a mutex to protect + * against the possibility of re-initializing at the same time + * as the callout thread calling complete() in the callback. + */ + mutex_lock(&ds_sp_token_complete_mutex); + init_completion(&ds_sp_token_cb_complete); + mutex_unlock(&ds_sp_token_complete_mutex); + + rv = ds_cap_send(svc_info->handle, payload, payload_len); + + kfree(payload); + + if (!rv) { + + while (1) { + /* wait for response here */ + rv = wait_for_completion_timeout( + &ds_sp_token_cb_complete, + (DS_RESPONSE_TIMEOUT * HZ)); + + if (!rv) { + pr_err("%s: set-token failed: no reply.\n", + __func__); + rv = -ETIMEDOUT; + break; + } + + /* got a reply, validate it */ + + /* If the response wasn't for this request, try again */ + if (ds_sp_token_resp_req_num != + ds_sp_token_next_req_num) { + continue; + } + + /* if we didn't get a valid reply, abort */ + if (ds_sp_token_resp_result != DS_SP_TOKEN_RES_OK) { + pr_err("%s: set-token failed [%d].\n", __func__, + ds_sp_token_resp_result); + rv = -EIO; + break; + } else { + /* + * Got a valid response. + * Copy the response/result to caller. + */ + *sp_token_result = ds_sp_token_resp_result; + *sp_token_data = ds_sp_token_data; + rv = 0; + break; + } + } + } + + /* increment sequence number for next caller - wrap at ~0 */ + if (++ds_sp_token_next_req_num == ~0) + ds_sp_token_next_req_num = 0; + + mutex_unlock(&ds_sp_token_mutex); + + return rv; +} + +static char full_boot_str[256] __aligned(32); +static int reboot_data_supported; + +void ldom_reboot(const char *boot_command) +{ + dprintk("entered.\n"); + + /* + * Don't bother with any of this if the boot_command + * is empty. + */ + if (boot_command && strlen(boot_command)) { + unsigned long len; + + strcpy(full_boot_str, "boot "); + strcpy(full_boot_str + strlen("boot "), boot_command); + len = strlen(full_boot_str); + + if (reboot_data_supported) { + unsigned long ra = kimage_addr_to_ra(full_boot_str); + unsigned long hv_ret; + + hv_ret = sun4v_reboot_data_set(ra, len); + if (hv_ret != HV_EOK) + pr_err("%s: Unable to set reboot " + "data hv_ret=%lu\n", __func__, hv_ret); + } else { + ldom_set_var("reboot-command", full_boot_str); + } + } + sun4v_mach_sir(); +} + +void ldom_power_off(void) +{ + dprintk("entered.\n"); + + sun4v_mach_exit(0); +} + +static int ds_handle_data_nack(struct ds_dev *ds, struct ds_msg_tag *pkt) +{ + int rv; + struct ds_data_nack *data_nack; + + dprintk("entered.\n"); + + data_nack = (struct ds_data_nack *)pkt; + + switch (data_nack->payload.result) { + case DS_INV_HDL: + + pr_err("ds-%llu: received INV_HDL data NACK for " + "handle %llx\n", ds->id, data_nack->payload.handle); + + /* + * If we got back an DS_INV_HDL data nack, it means + * the other side could not find a handle associated + * with a data pack we sent to it. So, we interpret this + * to mean the other side's client has gone away, so we + * send an unregister request to clean things up. + */ + rv = ds_service_unreg(ds, data_nack->payload.handle); + if (rv != 0) { + pr_err("ds-%llu: failed to send UNREG_REQ for " + "handle %llx on data NACK (%d)\n", ds->id, + data_nack->payload.handle, rv); + } + + break; + + case DS_TYPE_UNKNOWN: + + /* + * If we got back a TYPE_UNKNOWN, it means the other side + * got an unknown msg_type from a pkt we sent to it. Maybe + * it's an older/buggy driver? What to do? + */ + pr_err("ds-%llu: received UNKNOWN data NACK for " + "handle %llx\n", ds->id, data_nack->payload.handle); + rv = 0; + + break; + }; + + return rv; +} + +static int ds_data_msg(struct ds_dev *ds, struct ds_msg_tag *pkt) +{ + int rv; + struct ds_unknown_msg *unknown_msg; + + dprintk("entered.\n"); + + switch (pkt->type) { + case DS_DATA: + rv = ds_submit_data_cb(ds, pkt, DS_DTYPE_LDC_REQ); + break; + case DS_NACK: + rv = ds_handle_data_nack(ds, pkt); + break; + default: + /* + * XXX - If we receive an unknown msg_type, per spec, + * we are supposed to send back a nack with the handle + * However, since this is an unknown msg_type, + * we don't know how to retrieve the handle from the msg! + * (a deficiency with the protocol). Let's just hope + * the handle is the first 8 bytes of the payload...? + */ + unknown_msg = (struct ds_unknown_msg *)pkt; + ds_send_data_nack(ds, unknown_msg->payload.handle, + DS_TYPE_UNKNOWN); + rv = 0; + }; + + return rv; +} + +static int ds_service_reg(struct ds_dev *ds, struct ds_service_info *svc_info) +{ + int rv; + int payload_len; + struct { + struct ds_reg_req_payload req; + u8 id_buf[256]; + } pbuf; + + dprintk("entered.\n"); + + payload_len = (sizeof(struct ds_reg_req_payload) + + strlen(svc_info->id) + 1); + + /* adjust for 4 bytes of default padding of ds_reg_req_payload */ + payload_len -= 4; + + memset(&pbuf, 0, sizeof(pbuf)); + pbuf.req.handle = svc_info->handle; /* use the unique handle */ + pbuf.req.major = svc_info->vers.major; + pbuf.req.minor = svc_info->vers.minor; + strcpy(pbuf.req.svc_id, svc_info->id); + + rv = ds_ldc_send_payload(ds->lp, DS_REG_REQ, &pbuf, payload_len); + + if (rv > 0) + dprintk("ds-%llu: DS_REG_REQ sent for %s service (%llu.%llu), " + "hdl=(%llx)\n", ds->id, svc_info->id, svc_info->vers.major, + svc_info->vers.minor, svc_info->handle); + + return (rv <= 0); +} + +static int ds_service_unreg(struct ds_dev *ds, u64 handle) +{ + struct ds_unreg_req_payload req; + int rv; + + dprintk("entered.\n"); + + req.handle = handle; + + rv = ds_ldc_send_payload(ds->lp, DS_UNREG_REQ, &req, sizeof(req)); + + return (rv <= 0); +} + +static void ds_service_ack(struct ds_dev *ds, u64 handle, u16 minor) +{ + struct ds_reg_ack_payload req; + int rv; + + dprintk("entered.\n"); + + req.handle = handle; + req.minor = minor; + + rv = ds_ldc_send_payload(ds->lp, DS_REG_ACK, &req, sizeof(req)); + if (rv <= 0) + pr_err("ds-%llu: %s: ldc_send failed. (%d)\n ", ds->id, + __func__, rv); +} + +static void ds_service_nack(struct ds_dev *ds, u64 handle, u64 result, + u16 major) +{ + struct ds_reg_nack_payload req; + int rv; + + dprintk("entered.\n"); + + req.handle = handle; + req.result = result; + req.major = major; + + rv = ds_ldc_send_payload(ds->lp, DS_REG_NACK, &req, sizeof(req)); + if (rv <= 0) + pr_err("ds-%llu: %s: ldc_send failed. (%d)\n ", ds->id, + __func__, rv); + +} + +static void ds_service_unreg_ack(struct ds_dev *ds, u64 handle) +{ + struct ds_unreg_ack_payload req; + int rv; + + dprintk("entered.\n"); + + req.handle = handle; + + rv = ds_ldc_send_payload(ds->lp, DS_UNREG_ACK, &req, sizeof(req)); + if (rv <= 0) + pr_err("ds-%llu: %s: ldc_send failed. (%d)\n ", ds->id, + __func__, rv); + +} + +/* + * Process DS service registration packets received from LDC. + */ +static int ds_handshake_reg(struct ds_dev *ds, struct ds_msg_tag *pkt) +{ + int rv; + u16 neg_svc_minor; + struct ds_reg_req *reg_req = NULL; + struct ds_reg_ack *reg_ack = NULL; + struct ds_reg_nack *reg_nack = NULL; + struct ds_unreg_req *unreg_req = NULL; + struct ds_unreg_ack *unreg_ack = NULL; + struct ds_unreg_nack *unreg_nack = NULL; + struct ds_service_info *svc_info; + + dprintk("entered.\n"); + + rv = 0; + + if (ds->hs_state != DS_HS_COMPLETE) { + /* + * We should not be getting service registration type + * packets unless the HS has been established, so reset + * to get back to a sane state. + */ + pr_err("ds-%llu: ds_handshake_reg: received REG packet " + "but HS is not complete!\n", ds->id); + goto conn_reset; + } + + /* + * In HS_COMPLETE state, we expect only the following service + * registration packets: + * DS_REG_REQ: The other end of the LDC is requesting registration + * of a service. + * Action: + * If we have a provider or client registered for + * this service, ACK with the supported minor and + * connect the service. + * Use major sent in request and lowest minor. + * If we don't have a registered service, NACK it. + * DS_REG_ACK: The other end of the LDC has ACK'd our request to + * register a service. + * Action: + * Use the handle sent in the ACK. + * Use the major sent with the original request and + * lowest minor. + * DS_REG_NACK: The other end of the LDC has NACK'd our request + * to register a service. + * + * DS_UNREG_REQ: + * DS_UNREG_ACK: + * DS_UNREG_NACK: Behave according to the spec. + */ + + if (pkt->type == DS_REG_REQ) { + + /* Other end has sent a register request */ + + reg_req = (struct ds_reg_req *)pkt; + + /* + * For compatibility with Solaris ldoms on mixed + * systems, if we receive a REG_REQ with the + * DS_HDL_ISCLIENT_BIT, it is an indication (or "ping") + * to send a REG_REQ for any provider services for this + * svc_id. + */ + + if (reg_req->payload.handle & DS_HDL_ISCLIENT_BIT) { + + dprintk("ds-%llu: Received REG_REQ 'ping' " + "for %s service", ds->id, + reg_req->payload.svc_id); + + /* + * If there is a provider service in SENT + * state (which means the service never got + * connected), put it back into UNREG state + * so it will be registered again. + */ + svc_info = ds_find_service_provider_id(ds, + reg_req->payload.svc_id); + if (svc_info != NULL && + svc_info->reg_state == DS_REG_STATE_REG_SENT) { + svc_info->reg_state = DS_REG_STATE_UNREG; + svc_info->svc_reg_timeout = + ds_get_service_timeout(); + } + + goto done; + + } + + /* check if there is a registered service for this request */ + svc_info = ds_find_service_client_id(ds, + reg_req->payload.svc_id); + if (svc_info == NULL) { + svc_info = ds_find_service_provider_id(ds, + reg_req->payload.svc_id); + if (svc_info == NULL) { + /* There is no registered service */ + dprintk("ds-%llu: no service registered for " + "REG_REQ service %s (%llx)\n", ds->id, + reg_req->payload.svc_id, + reg_req->payload.handle); + + /* NACK it */ + ds_service_nack(ds, reg_req->payload.handle, + DS_INV_HDL, 0); + + goto done; + } + } + + /* Found a registered service */ + + if (svc_info->is_connected) { + /* service is already registered */ + ds_service_nack(ds, reg_req->payload.handle, + DS_REG_DUP, 0); + goto done; + } + + if (reg_req->payload.major != svc_info->vers.major) { + /* service version is incompatible */ + ds_service_nack(ds, reg_req->payload.handle, + DS_REG_VER_NACK, 0); + goto done; + } + + neg_svc_minor = min_t(u16, (u16)svc_info->vers.minor, + reg_req->payload.minor); + + if (svc_info->is_client) + ds_connect_service_client(ds, reg_req->payload.handle, + reg_req->payload.major, neg_svc_minor, svc_info); + else + ds_connect_service_provider(ds, reg_req->payload.handle, + reg_req->payload.major, neg_svc_minor, svc_info); + + /* ACK the init request */ + ds_service_ack(ds, reg_req->payload.handle, + (u16)svc_info->vers.minor); + + dprintk("ds-%llu: Registered %s %s service (%llx) " + "version %llu.%llu to (%llx).\n", ds->id, + (svc_info->is_client ? "Client" : "Provider"), + svc_info->id, svc_info->handle, + svc_info->neg_vers.major, + svc_info->neg_vers.minor, + svc_info->con_handle); + + } else if (pkt->type == DS_REG_ACK) { + + /* other end has ACK'd our reg request */ + + reg_ack = (struct ds_reg_ack *)pkt; + + svc_info = ds_find_service_provider_handle(ds, + reg_ack->payload.handle); + if (svc_info == NULL) { + svc_info = ds_find_service_client_handle(ds, + reg_ack->payload.handle); + + if (svc_info == NULL) { + /* no service for this handle */ + pr_err("ds-%llu: REG ACK for unknown " + "handle %llx\n", ds->id, + reg_ack->payload.handle); + goto done; + } + } + + if (svc_info->reg_state != DS_REG_STATE_REG_SENT) { + pr_err("ds-%llu: REG ACK for %s service in " + "%llu state (%llx)\n", ds->id, svc_info->id, + svc_info->reg_state, reg_ack->payload.handle); + goto done; + } + + /* Use the lowest negotiated DS minor version */ + neg_svc_minor = min_t(u16, reg_ack->payload.minor, + svc_info->vers.minor); + + if (svc_info->is_client) + ds_connect_service_client(ds, reg_ack->payload.handle, + svc_info->vers.major, neg_svc_minor, svc_info); + else + ds_connect_service_provider(ds, reg_ack->payload.handle, + svc_info->vers.major, neg_svc_minor, svc_info); + + + dprintk("ds-%llu: Registered %s service " + "version %llu.%llu (%llx).\n", ds->id, + svc_info->id, svc_info->neg_vers.major, + svc_info->neg_vers.minor, svc_info->handle); + + } else if (pkt->type == DS_REG_NACK) { + + /* other end has NACK'd our reg request */ + + reg_nack = (struct ds_reg_nack *)pkt; + + svc_info = ds_find_service_provider_handle(ds, + reg_nack->payload.handle); + if (svc_info == NULL) { + svc_info = ds_find_service_client_handle(ds, + reg_nack->payload.handle); + if (svc_info == NULL) { + /* No service for this handle */ + pr_err("ds-%llu: REG NACK for " + "unknown handle %llx\n", + ds->id, reg_nack->payload.handle); + goto done; + } + } + + if (svc_info->reg_state != DS_REG_STATE_REG_SENT) { + pr_err("ds-%llu: REG NACK for %s service in " + "%llu state (%llx)\n", ds->id, svc_info->id, + svc_info->reg_state, reg_nack->payload.handle); + goto done; + } + + /* + * If a service is NACK'd for any reason we simply put + * the service into UNREG state. At some point in the + * future, the service registration will be re-tried + * by the timer thread. + */ + svc_info->reg_state = DS_REG_STATE_UNREG; + svc_info->svc_reg_timeout = ds_get_service_timeout(); + + dprintk("ds-%llu: Registration nack'd for %s service " + "(%llx). Result=%llu. Major=%u\n", ds->id, svc_info->id, + reg_nack->payload.handle, reg_nack->payload.result, + reg_nack->payload.major); + + } else if (pkt->type == DS_UNREG_REQ) { + + /* other end has sent a unregister request */ + + unreg_req = (struct ds_unreg_req *)pkt; + + /* unregister any service associated with the handle */ + + /* see if service registered */ + svc_info = ds_find_service_client_con_handle(ds, + unreg_req->payload.handle); + if (svc_info == NULL) { + svc_info = ds_find_service_provider_con_handle(ds, + unreg_req->payload.handle); + + if (svc_info == NULL) { + /* There is no service */ + + pr_err("ds-%llu: no service registered for " + "UNREG_REQ handle %llx\n", ds->id, + unreg_req->payload.handle); + + /* + * Our service could have been unregistered and + * removed. Go ahead and ACK it. This allows + * the other side to still clean up properly. + */ + ds_service_unreg_ack(ds, + unreg_req->payload.handle); + + goto done; + } + } + + + if (svc_info->reg_state != DS_REG_STATE_REGISTERED_LDC) { + pr_err("ds-%llu: UNREG_REQ for %s service in " + "%llu state (%llx)\n", ds->id, svc_info->id, + svc_info->reg_state, unreg_req->payload.handle); + goto done; + } + + dprintk("ds-%llu: Unregistered %s service (%llx) " + "from (%llx).\n", ds->id, svc_info->id, + svc_info->con_handle, unreg_req->payload.handle); + + if (svc_info->is_client) + ds_disconnect_service_client(ds, svc_info); + else + ds_disconnect_service_provider(ds, svc_info); + + /* ACK the unreg request */ + ds_service_unreg_ack(ds, unreg_req->payload.handle); + + } else if (pkt->type == DS_UNREG_ACK) { + + /* Got an ACK to our UNREG_REQ */ + + unreg_ack = (struct ds_unreg_ack *)pkt; + + svc_info = ds_find_service_client_con_handle(ds, + unreg_ack->payload.handle); + if (svc_info == NULL) { + svc_info = ds_find_service_provider_con_handle(ds, + unreg_ack->payload.handle); + if (svc_info == NULL) { + /* + * There is no service for this handle. + * It's possible the service was + * unregistered and removed. + */ + dprintk("ds-%llu: UNREG ACK for unknown " + "handle %llx\n", ds->id, + unreg_ack->payload.handle); + goto done; + } + } + + dprintk("ds-%llu: Unregistered %s service (%llx).\n", + ds->id, svc_info->id, unreg_ack->payload.handle); + + if (svc_info->is_client) + ds_disconnect_service_client(ds, svc_info); + else + ds_disconnect_service_provider(ds, svc_info); + + } else if (pkt->type == DS_UNREG_NACK) { + + /* Got a NACK to our UNREG_REQ */ + + unreg_nack = (struct ds_unreg_nack *)pkt; + + /* XXX - what to do on an unreg NACK??? */ + + dprintk("ds-%llu: Received UNREG_NACK for (%llx).\n", + ds->id, unreg_nack->payload.handle); + + } else { + /* Unexpected packet type. Reset to get back to a sane state. */ + goto conn_reset; + } + +done: + return 0; + +conn_reset: + + ds_reset(ds); + + return -ECONNRESET; +} + +static int ds_is_local_ds(struct ds_dev *ds) +{ + struct mdesc_handle *hp; + u64 cd_node; + u64 anode; + u64 target; + const u64 *local_handle; + + if (!ds_local_ldom_handle_set) { + /* + * Find the virtual-domain-service node under the + * channel-devices node in the MD which + * contains the vlds-domain-handle property. + * This is the "local" ldom handle. + * Cache it in ds_local_ldom_handle global var. + */ + hp = mdesc_grab(); + if (hp) { + /* get the channel-devices ndoe in the MD */ + cd_node = mdesc_node_by_name(hp, MDESC_NODE_NULL, + "channel-devices"); + if (cd_node != MDESC_NODE_NULL) { + /* + * For each node under look for the + * virtual-device node which contains the + * vlds-domain-handle property. + */ + mdesc_for_each_arc(anode, hp, cd_node, + MDESC_ARC_TYPE_FWD) { + + target = mdesc_arc_target(hp, anode); + + local_handle = mdesc_get_property(hp, + target, "vlds-domain-handle", NULL); + if (local_handle != NULL) { + ds_local_ldom_handle = + *local_handle; + ds_local_ldom_handle_set = true; + } + } + } - rp = (struct ds_var_resp *) (dpkt + 1); + mdesc_release(hp); + } + } - if (rp->hdr.type != DS_VAR_SET_RESP && - rp->hdr.type != DS_VAR_DELETE_RESP) - return; + if (ds_local_ldom_handle_set && + ds->handle == ds_local_ldom_handle) { + return 1; + } - ds_var_response = rp->result; - wmb(); - ds_var_doorbell = 1; + return 0; } -void ldom_set_var(const char *var, const char *value) +static void ds_timer_register_service(struct ds_dev *ds, + struct ds_service_info *svc_info) { - struct ds_cap_state *cp; - struct ds_info *dp; - unsigned long flags; + struct ds_service_info *peer_svc_info; + int rv; - spin_lock_irqsave(&ds_lock, flags); - cp = NULL; - for (dp = ds_info_list; dp; dp = dp->next) { - struct ds_cap_state *tmp; + /* Check if the service is allowed to register yet */ + if (jiffies < svc_info->svc_reg_timeout) + return; - tmp = find_cap_by_string(dp, "var-config"); - if (tmp && tmp->state == CAP_STATE_REGISTERED) { - cp = tmp; - break; - } - } - if (!cp) { - for (dp = ds_info_list; dp; dp = dp->next) { - struct ds_cap_state *tmp; + if (svc_info->reg_state != DS_REG_STATE_UNREG) + return; - tmp = find_cap_by_string(dp, "var-config-backup"); - if (tmp && tmp->state == CAP_STATE_REGISTERED) { - cp = tmp; - break; + /* We have a service ready to be registered. */ + + /* + * First check to see if there is a local unconnected loopback peer + * for this service id and if so, connect it in loopback mode. + * NOTE: we only allow loopback connections on the "local" DS port. + */ + if (ds_is_local_ds(ds)) { + if (svc_info->is_client) + peer_svc_info = ds_find_service_provider_id(ds, + svc_info->id); + else + peer_svc_info = ds_find_service_client_id(ds, + svc_info->id); + + if (peer_svc_info && !peer_svc_info->is_connected) { + rv = ds_connect_loopback_service(ds, svc_info, + peer_svc_info); + if (rv == 0) { + dprintk("ds-%llu: Registered loopback " + "service %s (%llu)\n", ds->id, + svc_info->id, svc_info->con_handle); + return; + } else { + pr_err("ds-%llu: failed to connect " + "loopback %s service\n", ds->id, + svc_info->id); } + /* fallthrough and attempt LDC registration? */ } } - spin_unlock_irqrestore(&ds_lock, flags); - - if (cp) { - union { - struct { - struct ds_data data; - struct ds_var_set_msg msg; - } header; - char all[512]; - } pkt; - char *base, *p; - int msg_len, loops; - - if (strlen(var) + strlen(value) + 2 > - sizeof(pkt) - sizeof(pkt.header)) { - printk(KERN_ERR PFX - "contents length: %zu, which more than max: %lu," - "so could not set (%s) variable to (%s).\n", - strlen(var) + strlen(value) + 2, - sizeof(pkt) - sizeof(pkt.header), var, value); + + /* Only attempt LDC registration if the HS is complete */ + if (ds->hs_state == DS_HS_COMPLETE) { + rv = ds_service_reg(ds, svc_info); + if (rv == 0) { + svc_info->reg_state = DS_REG_STATE_REG_SENT; + /* + * Clear the reg SENT timeout. + * We don't retry unless the LDC is reconnected. + * Or if we receive a client "ping" for the service. + */ + svc_info->svc_reg_timeout = 0; return; + } else { + dprintk("ds-%llu: failed to send REG_REQ for " + " \"%s\" service (%d)\n", ds->id, + svc_info->id, rv); } + } - memset(&pkt, 0, sizeof(pkt)); - pkt.header.data.tag.type = DS_DATA; - pkt.header.data.handle = cp->handle; - pkt.header.msg.hdr.type = DS_VAR_SET_REQ; - base = p = &pkt.header.msg.name_and_value[0]; - strcpy(p, var); - p += strlen(var) + 1; - strcpy(p, value); - p += strlen(value) + 1; - - msg_len = (sizeof(struct ds_data) + - sizeof(struct ds_var_set_msg) + - (p - base)); - msg_len = (msg_len + 3) & ~3; - pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag); - - mutex_lock(&ds_var_mutex); - - spin_lock_irqsave(&ds_lock, flags); - ds_var_doorbell = 0; - ds_var_response = -1; - - __ds_send(dp->lp, &pkt, msg_len); - spin_unlock_irqrestore(&ds_lock, flags); - - loops = 1000; - while (ds_var_doorbell == 0) { - if (loops-- < 0) - break; - barrier(); - udelay(100); - } + /* + * We failed to register the service. + * Try again in the future. + */ + svc_info->svc_reg_timeout = ds_get_service_timeout(); +} - mutex_unlock(&ds_var_mutex); +static void ds_exec_reg_timer(unsigned long data) +{ + struct ds_dev *ds = (struct ds_dev *)data; + unsigned long flags; + struct ds_service_info *svc_info; + int rv; - if (ds_var_doorbell == 0 || - ds_var_response != DS_VAR_SUCCESS) - pr_info("ds-%llu: var-config [%s:%s] failed, response(%d).\n", - dp->id, var, value, ds_var_response); - } else { - pr_info("var-config not registered so could not set (%s) variable to (%s).\n", - var, value); +#ifdef DS_KERNEL_TIMER_BUG_WAR + /* + * There appears to be a bug in the UEK kernel where + * timers can execute on a CPU where local interrupts + * have been disabled. Deadlocks have been observed + * where the DS registration timer (ds_reg_tmr) can + * execute on a CPU, interrupting a thread on the CPU + * which is holding the ds->ds_lock or the ds->lp->lock + * resulting in a deadlock when the timer attempts + * to grab the lock. As a workaround, the timer handler will + * first check if the locks are held and if so, simply + * reschedule the timer and exit (without grabbing the + * locks - thus avoiding the deadlock). the kernel needs + * to be fixed at some point since executing timers + * on CPUs with local interrupts disabled is a violation + * of spin_lock_irqsave() semantics. + */ + if (spin_is_locked(&ds->ds_lock) || spin_is_locked(&ds->lp->lock)) { + mod_timer(&ds->ds_reg_tmr, + jiffies + msecs_to_jiffies(DS_REG_TIMER_FREQ)); + return; } -} +#endif /* DS_KERNEL_TIMER_BUG_WAR */ -static char full_boot_str[256] __aligned(32); -static int reboot_data_supported; + LOCK_DS_DEV(ds, flags) -void ldom_reboot(const char *boot_command) -{ - /* Don't bother with any of this if the boot_command - * is empty. + /* + * Walk through the services for this ds and for those + * which are not yet registered, (re)send a REG_REQ. */ - if (boot_command && strlen(boot_command)) { - unsigned long len; + list_for_each_entry(svc_info, &ds->service_provider_list, list) + ds_timer_register_service(ds, svc_info); - snprintf(full_boot_str, sizeof(full_boot_str), "boot %s", - boot_command); - len = strlen(full_boot_str); + list_for_each_entry(svc_info, &ds->service_client_list, list) + ds_timer_register_service(ds, svc_info); - if (reboot_data_supported) { - unsigned long ra = kimage_addr_to_ra(full_boot_str); - unsigned long hv_ret; + /* reset the timer to fire again in DS_REG_TIMER_FREQ ms */ + rv = mod_timer(&ds->ds_reg_tmr, + jiffies + msecs_to_jiffies(DS_REG_TIMER_FREQ)); - hv_ret = sun4v_reboot_data_set(ra, len); - if (hv_ret != HV_EOK) - pr_err("SUN4V: Unable to set reboot data hv_ret=%lu\n", - hv_ret); - } else { - ldom_set_var("reboot-command", full_boot_str); - } - } - sun4v_mach_sir(); -} + UNLOCK_DS_DEV(ds, flags) -void ldom_power_off(void) -{ - sun4v_mach_exit(0); } -static void ds_conn_reset(struct ds_info *dp) +static void ds_start_service_reg_timer(struct ds_dev *ds) { - pr_err("ds-%llu: ds_conn_reset() from %pf\n", dp->id, - __builtin_return_address(0)); + int rv; + + dprintk("entered.\n"); + + setup_timer(&ds->ds_reg_tmr, ds_exec_reg_timer, + (unsigned long)ds); + + /* kick off the first timer in DS_REG_TIMER_FREQ ms */ + rv = mod_timer(&ds->ds_reg_tmr, + jiffies + msecs_to_jiffies(DS_REG_TIMER_FREQ)); + + if (rv) + pr_err("ds-%llu: Error setting ds registration timer", + ds->id); } -static unsigned long long register_services(struct ds_info *dp) + +/* + * NOTE: All kernel ds services are defined as providers, no matter if + * they actually behave as a server or as client. + */ +static void ds_add_builtin_services(struct ds_dev *ds, + struct ds_builtin_service *ds_builtin_service_template, + int num_template_services) { - struct ldc_channel *lp = dp->lp; + + struct ds_service_info *svc_info; int i; - unsigned long long nreg = 0; - - for (i = 0; i < dp->num_ds_states; i++) { - struct { - struct ds_reg_req req; - u8 id_buf[256]; - } pbuf; - struct ds_cap_state *cp = &dp->ds_states[i]; - int err, msg_len; - u64 new_count; - - if (cp->state == CAP_STATE_REGISTERED) - continue; - nreg |= (1 << i); + dprintk("entered.\n"); - /* solaris service domains think 0x80000000 indicates clients */ - new_count = sched_clock() & 0x7fffffff; - cp->handle = ((u64) i << 32) | new_count; + /* walk the builtin service provider array and add to the ds */ + for (i = 0; i < num_template_services; i++) { - msg_len = (sizeof(struct ds_reg_req) + - strlen(cp->service_id)); + /* + * If there is already a registered service provider + * for this id, skip it since there can only be 1 + * service provider per ds/service id. + */ + svc_info = ds_find_service_provider_id(ds, + ds_builtin_service_template[i].id); - memset(&pbuf, 0, sizeof(pbuf)); - pbuf.req.tag.type = DS_REG_REQ; - pbuf.req.tag.len = (msg_len - sizeof(struct ds_msg_tag)); - pbuf.req.handle = cp->handle; - pbuf.req.major = 1; - pbuf.req.minor = 0; - strcpy(pbuf.req.svc_id, cp->service_id); + if (svc_info != NULL) + continue; - err = __ds_send(lp, &pbuf, msg_len); - if (err > 0) - cp->state = CAP_STATE_REG_SENT; + /* if no existing service provider, add the builtin */ + svc_info = ds_add_service_provider(ds, + ds_builtin_service_template[i].id, + ds_builtin_service_template[i].vers, + &ds_builtin_service_template[i].ops, + true); + + if (svc_info == NULL) + pr_err("ds-%llu: Failed to add builtin " + "provider service %s", ds->id, + ds_builtin_service_template[i].id); } - return nreg; -} -static struct timer_list ds_reg_tmr; -static int reg_cnt; +} -static void ds_run_timer(unsigned long data) +static int ds_init_req(struct ds_dev *ds) { - unsigned long flags; - unsigned long long ret; - struct ds_info *dp = (struct ds_info *)data; + struct ds_ver_req_payload req; + int rv; - spin_lock_irqsave(&ds_lock, flags); - ret = register_services(dp); - ++reg_cnt; - spin_unlock_irqrestore(&ds_lock, flags); + dprintk("entered.\n"); - if (!ret) - return; + /* send a DS version init request */ + req.ver.major = DS_MAJOR_VERSION; + req.ver.minor = DS_MINOR_VERSION; - if (reg_cnt > 5) { - int i; - for (i = 0; i < dp->num_ds_states; i++) - if (ret & (1 << i)) { - struct ds_cap_state *cp = &dp->ds_states[i]; - pr_err("ds-%llu: registration of \"%s\" failed\n", - dp->id, cp->service_id); - } - } else { - ret = mod_timer(&ds_reg_tmr, jiffies + msecs_to_jiffies(3000)); - if (ret) - pr_err("ds-%llu: Error setting timer callback\n", - dp->id); - } + rv = ds_ldc_send_payload(ds->lp, DS_INIT_REQ, &req, sizeof(req)); + + return (rv <= 0); } -static void ds_setup_retry_timer(struct ds_info *dp) +static void ds_init_ack(struct ds_dev *ds) { - int ret; + struct ds_ver_ack_payload req; + int rv; - /* - * "reliable" ldc communication will not catch if ack/nack's are - * not received for service registering attempts. retry via timer. - */ - setup_timer(&ds_reg_tmr, ds_run_timer, (unsigned long)dp); + dprintk("entered.\n"); + + req.minor = DS_MINOR_VERSION; + + rv = ds_ldc_send_payload(ds->lp, DS_INIT_ACK, &req, sizeof(req)); + if (rv <= 0) + pr_err("ds-%llu: %s: ldc_send failed. (%d)\n ", ds->id, + __func__, rv); - ret = mod_timer(&ds_reg_tmr, jiffies + msecs_to_jiffies(2000));; - if (ret) - pr_err("ds-%llu: Error setting ds registration retry timer\n", - dp->id); } -static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt) +static void ds_init_nack(struct ds_dev *ds, u16 major) { + struct ds_ver_nack_payload req; + int rv; - if (dp->hs_state == DS_HS_START) { - if (pkt->type != DS_INIT_ACK) - goto conn_reset; + dprintk("entered.\n"); - dp->hs_state = DS_HS_DONE; - ds_setup_retry_timer(dp); - return register_services(dp); - } + req.major = major; - if (dp->hs_state != DS_HS_DONE) - goto conn_reset; + rv = ds_ldc_send_payload(ds->lp, DS_INIT_NACK, &req, sizeof(req)); + if (rv <= 0) + pr_err("ds-%llu: %s: ldc_send failed. (%d)\n ", ds->id, + __func__, rv); - if (pkt->type == DS_REG_ACK) { - struct ds_reg_ack *ap = (struct ds_reg_ack *) pkt; - struct ds_cap_state *cp = find_cap(dp, ap->handle); +} - if (!cp) { - pr_err("ds-%llu: REG ACK for unknown handle %llx\n", - dp->id, ap->handle); - return 0; +/* Process DS init packets received from LDC. */ +static int ds_handshake_init(struct ds_dev *ds, struct ds_msg_tag *pkt) +{ + struct ds_ver_req *init_req; + struct ds_ver_ack *init_ack; + u16 neg_ds_major; + u16 neg_ds_minor; + + dprintk("entered.\n"); + + if (ds->hs_state != DS_HS_START) { + + if (ds->hs_state == DS_HS_COMPLETE) { + /* + * If an INIT type pkt comes through while in + * HS_COMPLETE state, it could be a extraneuous packet + * left over from a (simultaneous) handshake. So, we + * will just ignore it since the connection has already + * been established. No need to error out. + */ + goto done; } - pr_info("ds-%llu: Registered %s service.\n", dp->id, - cp->service_id); - cp->state = CAP_STATE_REGISTERED; - } else if (pkt->type == DS_REG_NACK) { - struct ds_reg_nack *np = (struct ds_reg_nack *) pkt; - struct ds_cap_state *cp = find_cap(dp, np->handle); - if (!cp) { - pr_err("ds-%llu: REG NACK for unknown handle %llx\n", - dp->id, np->handle); - return 0; - } - cp->state = CAP_STATE_UNKNOWN; + /* Invalid state, reset to get sane again */ + goto conn_reset; } - return 0; + /* + * In the DS_HS_START state, only valid pkt types are: + * DS_INIT_REQ: Other end of LDC is requesting INIT of DS. + * Action: + * If the sent major is compatible, ACK + * with supported minor. + * Use major sent in request and lowest minor. + * DS_INIT_ACK: Other end of LDC has ack'd our DS INIT request. + * Action: + * Use major sent in original INIT_REQ and + * lowest minor. + * DS_INIT_NACK: Other end of LDC nack'd our DS INIT request. + * Action: + * Remiain in HS_START state. Other side could try to + * init the DS (with an acceptable major #). + */ -conn_reset: - ds_conn_reset(dp); - return -ECONNRESET; -} + if (pkt->type == DS_INIT_REQ) { -static void __send_ds_nack(struct ds_info *dp, u64 handle) -{ - struct ds_data_nack nack = { - .tag = { - .type = DS_NACK, - .len = (sizeof(struct ds_data_nack) - - sizeof(struct ds_msg_tag)), - }, - .handle = handle, - .result = DS_INV_HDL, - }; + init_req = (struct ds_ver_req *)pkt; - __ds_send(dp->lp, &nack, sizeof(nack)); -} + /* Check if the major is compatible */ -static LIST_HEAD(ds_work_list); -static DECLARE_WAIT_QUEUE_HEAD(ds_wait); + /* NOTE - we currently only support DS_MAJOR_VERSION. */ + if (init_req->payload.ver.major != DS_MAJOR_VERSION) { + /* + * Incompatible major, NACK it. But remain in + * HS_START state since it's possible our + * INIT_REQ will still be successfully ACK'd. + */ + ds_init_nack(ds, 0); + goto done; + } -struct ds_queue_entry { - struct list_head list; - struct ds_info *dp; - int req_len; - int __pad; - u64 req[0]; -}; + /* Use the requested DS major version */ + neg_ds_major = init_req->payload.ver.major; -static void process_ds_work(void) -{ - struct ds_queue_entry *qp, *tmp; - unsigned long flags; - LIST_HEAD(todo); + /* Use the lowest negotiated DS minor version */ + neg_ds_minor = min_t(u16, init_req->payload.ver.minor, + DS_MINOR_VERSION); - spin_lock_irqsave(&ds_lock, flags); - list_splice_init(&ds_work_list, &todo); - spin_unlock_irqrestore(&ds_lock, flags); + /* ACK the init request */ + ds_init_ack(ds); - list_for_each_entry_safe(qp, tmp, &todo, list) { - struct ds_data *dpkt = (struct ds_data *) qp->req; - struct ds_info *dp = qp->dp; - struct ds_cap_state *cp = find_cap(dp, dpkt->handle); - int req_len = qp->req_len; + } else if (pkt->type == DS_INIT_ACK) { - if (!cp) { - pr_err("ds-%llu: Data for unknown handle %llu\n", - dp->id, dpkt->handle); + init_ack = (struct ds_ver_ack *)pkt; - spin_lock_irqsave(&ds_lock, flags); - __send_ds_nack(dp, dpkt->handle); - spin_unlock_irqrestore(&ds_lock, flags); - } else { - cp->data(dp, cp, dpkt, req_len); - } + /* Use the major version we sent in the INIT request */ + neg_ds_major = DS_MAJOR_VERSION; + + /* Use the lowest negotiated DS minor version */ + neg_ds_minor = min_t(u16, init_ack->payload.minor, + DS_MINOR_VERSION); - list_del(&qp->list); - kfree(qp); + } else if (pkt->type == DS_INIT_NACK) { + /* + * If we get a NACK, per spec, we could try another + * request with an alternate major number. However, for now, + * we do not and we just remain in HS_START state. + * We remain in START state so the other end could + * still potentially make/complete a HS init request. + * If code is ever added in the future to retry the INIT_REQ + * with an alternate major, per spec, the code should use the + * major returned in the NACK. + */ + goto done; + + } else { + + /* Unexpected packet type. Reset to get back to a sane state. */ + goto conn_reset; } -} -static int ds_thread(void *__unused) -{ - DEFINE_WAIT(wait); + /* assign the negotiated maj/min for the DS connection */ + ds->neg_vers.major = (u64)neg_ds_major; + ds->neg_vers.minor = (u64)neg_ds_minor; - while (1) { - prepare_to_wait(&ds_wait, &wait, TASK_INTERRUPTIBLE); - if (list_empty(&ds_work_list)) - schedule(); - finish_wait(&ds_wait, &wait); + /* Handshake established, move to complete state */ + ds->hs_state = DS_HS_COMPLETE; - if (kthread_should_stop()) - break; + /* + * If there were any services which failed to + * register before, then try to re-register them. + */ + ds_reregister_ldc_services(ds); - process_ds_work(); - } + dprintk("ds-%llu: DS INIT HS Complete Version=%llu.%llu.\n", ds->id, + ds->neg_vers.major, ds->neg_vers.minor); +done: return 0; + +conn_reset: + + ds_reset(ds); + + return -ECONNRESET; + } -static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len) +static int ds_handshake_msg(struct ds_dev *ds, struct ds_msg_tag *pkt) { - struct ds_data *dpkt = (struct ds_data *) pkt; - struct ds_queue_entry *qp; - qp = kmalloc(sizeof(struct ds_queue_entry) + len, GFP_ATOMIC); - if (!qp) { - __send_ds_nack(dp, dpkt->handle); - } else { - qp->dp = dp; - memcpy(&qp->req, pkt, len); - list_add_tail(&qp->list, &ds_work_list); - wake_up(&ds_wait); + dprintk("entered.\n"); + + dprintk("ds-%llu: ds_handshake: hs_state=%d, pkt_type = %d\n", ds->id, + ds->hs_state, pkt->type); + + if (ds->hs_state == DS_HS_LDC_DOWN) { + + /* We should not be getting HS packets until the LDC is UP */ + + pr_err("ds-%llu: ds_handshake: received HS packet " + "but LDC is down!\n", ds->id); + + /* reset the connection to get back to a sane state */ + goto conn_reset; } - return 0; -} -static void ds_up(struct ds_info *dp) -{ - struct ldc_channel *lp = dp->lp; - struct ds_ver_req req; - int err; + switch (pkt->type) { + case DS_INIT_REQ: + case DS_INIT_ACK: + case DS_INIT_NACK: + + /* handle ds initialization packets */ + return ds_handshake_init(ds, pkt); + + case DS_REG_REQ: + case DS_REG_ACK: + case DS_REG_NACK: + case DS_UNREG_REQ: + case DS_UNREG_ACK: + case DS_UNREG_NACK: + + /* handle service registration packets */ + return ds_handshake_reg(ds, pkt); - req.tag.type = DS_INIT_REQ; - req.tag.len = sizeof(req) - sizeof(struct ds_msg_tag); - req.ver.major = 1; - req.ver.minor = 0; + default: + /* Invalid pkt type */ + pr_err("ds-%llu: Invalid pkt received %d\n", ds->id, pkt->type); + return -EINVAL; + } + +conn_reset: - err = __ds_send(lp, &req, sizeof(req)); - if (err > 0) - dp->hs_state = DS_HS_START; + ds_reset(ds); + + return -ECONNRESET; } -static void ds_reset(struct ds_info *dp) +static void ds_up(struct ds_dev *ds) { - int i; + int rv; - dp->hs_state = 0; + dprintk("entered.\n"); - for (i = 0; i < dp->num_ds_states; i++) { - struct ds_cap_state *cp = &dp->ds_states[i]; + /* reset the HS state machine */ + ds->hs_state = DS_HS_START; - cp->state = CAP_STATE_UNKNOWN; - } + /* send a DS init request */ + rv = ds_init_req(ds); + + if (rv != 0) + pr_err("ds-%llu: failed to send DS_INIT_REQ (%d)\n", + ds->id, rv); } static void ds_event(void *arg, int event) { - struct ds_info *dp = arg; - struct ldc_channel *lp = dp->lp; + struct ds_dev *ds = arg; unsigned long flags; - int err; + int rv; - spin_lock_irqsave(&ds_lock, flags); + dprintk("ds-%llu: CPU[%d] event received = %d\n", ds->id, + smp_processor_id(), event); + + /* + * NOTE - we don't use the UN/LOCK_DS_DEV macros here + * since we do not need to disable the HV interrupt - since + * we are in the interrupt handler. + */ + spin_lock_irqsave(&ds->ds_lock, flags); if (event == LDC_EVENT_UP) { - ds_up(dp); - spin_unlock_irqrestore(&ds_lock, flags); + ds_up(ds); + spin_unlock_irqrestore(&ds->ds_lock, flags); return; } if (event == LDC_EVENT_RESET) { - ds_reset(dp); - spin_unlock_irqrestore(&ds_lock, flags); + ds_reset(ds); + spin_unlock_irqrestore(&ds->ds_lock, flags); return; } if (event != LDC_EVENT_DATA_READY) { - pr_warn("ds-%llu: Unexpected LDC event %d\n", dp->id, event); - spin_unlock_irqrestore(&ds_lock, flags); + pr_err("ds-%llu: Unexpected LDC event %d\n", ds->id, event); + spin_unlock_irqrestore(&ds->ds_lock, flags); return; } - err = 0; + rv = 0; while (1) { struct ds_msg_tag *tag; - err = ldc_read(lp, dp->rcv_buf, sizeof(*tag)); + rv = ldc_read(ds->lp, ds->rcv_buf, sizeof(*tag)); - if (unlikely(err < 0)) { - if (err == -ECONNRESET) - ds_conn_reset(dp); + if (unlikely(rv < 0)) { + if (rv == -ECONNRESET) + ds_reset(ds); break; } - if (err == 0) + + if (rv == 0) break; - tag = dp->rcv_buf; - err = ldc_read(lp, tag + 1, tag->len); + tag = (struct ds_msg_tag *)ds->rcv_buf; - if (unlikely(err < 0)) { - if (err == -ECONNRESET) - ds_conn_reset(dp); + /* Make sure the read won't overrun our buffer */ + if (tag->len > (DS_DEFAULT_BUF_SIZE - + sizeof(struct ds_msg_tag))) { + pr_err("ds-%llu: %s: msg tag length too big.\n", + ds->id, __func__); + ds_reset(ds); break; } - if (err < tag->len) + + rv = ldc_read(ds->lp, tag + 1, tag->len); + + if (unlikely(rv < 0)) { + if (rv == -ECONNRESET) + ds_reset(ds); break; + } - if (tag->type < DS_DATA) - err = ds_handshake(dp, dp->rcv_buf); - else - err = ds_data(dp, dp->rcv_buf, - sizeof(*tag) + err); - if (err == -ECONNRESET) + if (rv < tag->len) break; + + if (tag->type < DS_DATA) { + dprintk("ds-%llu: hs data received (%d bytes)\n", + ds->id, rv); + rv = ds_handshake_msg(ds, + (struct ds_msg_tag *)ds->rcv_buf); + } else { + dprintk("ds-%llu: data received (%d bytes)\n", + ds->id, rv); + /* only process data if the HS is complete */ + if (ds->hs_state == DS_HS_COMPLETE) { + rv = ds_data_msg(ds, + (struct ds_msg_tag *)ds->rcv_buf); + } else { + /* just eat the data packet */ + pr_err("ds-%llu: %s: received data for " + "unconnected DS - ignored.\n", + ds->id, __func__); + rv = 0; + } + } + + if (unlikely(rv < 0)) { + + if (rv == -ECONNRESET) + break; + + pr_err("ds-%llu: %s: failed process data " + "packet rv = %d\n", ds->id, __func__, rv); + } + } + + spin_unlock_irqrestore(&ds->ds_lock, flags); +} + +static long ds_fops_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + ds_ioctl_sptok_data_t __user *uarg; + u32 major_version; + u32 minor_version; + u32 sp_token_result; + ds_sptok_t sp_token_data; + char service_name[DS_MAX_SVC_NAME_LEN]; + int rv; + + dprintk("entered.\n"); + + rv = 0; + + switch (cmd) { + case DS_SPTOK_GET: + pr_info("%s Getting sp-token\n", __func__); + uarg = (ds_ioctl_sptok_data_t __user *)arg; + if (get_user(major_version, &uarg->major_version) != 0 || + get_user(minor_version, &uarg->minor_version) != 0 || + copy_from_user(service_name, &uarg->service_name, + DS_MAX_SVC_NAME_LEN)) { + return -EFAULT; + } + if ((major_version > DS_MAJOR_VERSION) || + (major_version == DS_MAJOR_VERSION && + minor_version > DS_MINOR_VERSION)) { + pr_err("%s Invalid version number %u.%u\n", + __func__, major_version, minor_version); + return -EINVAL; + } + rv = ldom_req_sp_token(service_name, &sp_token_result, + &sp_token_data); + if (!rv && sp_token_result == DS_SP_TOKEN_RES_OK) { + dprintk("Copying sp token to userland\n"); + if (copy_to_user(&uarg->sp_tok, + (void *)&sp_token_data, + sizeof(struct ds_sptok))) { + rv = -EFAULT; + } + } + break; + default: + pr_err("%s Invalid cmd (%d)\n", __func__, cmd); + rv = -EINVAL; } - spin_unlock_irqrestore(&ds_lock, flags); + return rv; } static int ds_probe(struct vio_dev *vdev, const struct vio_device_id *id) { - static int ds_version_printed; struct ldc_channel_config ds_cfg = { .event = ds_event, - .mtu = 4096, + .mtu = DS_DEFAULT_MTU, .mode = LDC_MODE_STREAM, }; struct mdesc_handle *hp; struct ldc_channel *lp; - struct ds_info *dp; + struct ds_dev *ds; const u64 *val; - int err, i; + char ds_irq_name[LDC_IRQ_NAME_MAX]; + unsigned long flags; + unsigned long ds_flags = 0; + bool is_sp; + u64 node; + int rv; - if (ds_version_printed++ == 0) - pr_info("%s", version); + dprintk("entered.\n"); - dp = kzalloc(sizeof(*dp), GFP_KERNEL); - err = -ENOMEM; - if (!dp) + ds = kzalloc(sizeof(struct ds_dev), GFP_KERNEL); + rv = -ENOMEM; + if (unlikely(!ds)) goto out_err; + spin_lock_init(&ds->ds_lock); + + INIT_LIST_HEAD(&ds->service_provider_list); + INIT_LIST_HEAD(&ds->service_client_list); + INIT_LIST_HEAD(&ds->callout_list); + + ds->co_ref_cnt = 0; + ds->active = true; + hp = mdesc_grab(); - val = mdesc_get_property(hp, vdev->mp, "id", NULL); - if (val) - dp->id = *val; - mdesc_release(hp); - dp->rcv_buf = kzalloc(4096, GFP_KERNEL); - if (!dp->rcv_buf) - goto out_free_dp; + node = vio_vdev_node(hp, vdev); + if (node == MDESC_NODE_NULL) { + dprintk("ds: Failed to get vdev MD node.\n"); + mdesc_release(hp); + rv = -ENXIO; + goto out_free_ds; + } + + val = mdesc_get_property(hp, node, "id", NULL); + if (val == NULL) { + mdesc_release(hp); + rv = -ENXIO; + goto out_free_ds; + } else { + ds->id = *val; + } - dp->rcv_buf_len = 4096; + /* The SP DS port is identified by a unique ldc-ids property */ + val = mdesc_get_property(hp, node, "ldc-ids", NULL); + is_sp = (val != NULL); - dp->ds_states = kmemdup(ds_states_template, - sizeof(ds_states_template), GFP_KERNEL); - if (!dp->ds_states) - goto out_free_rcv_buf; + val = mdesc_get_property(hp, node, "vlds-remote-domain-handle", + NULL); + if (val == NULL) { + /* Not all DS ports have a handle (such as the SP DS port). */ + ds->handle = DS_INVALID_HANDLE; + } else { + ds->handle = *val; + } + + mdesc_release(hp); - dp->num_ds_states = ARRAY_SIZE(ds_states_template); + /* If this is not the SP DS, then this is a domain DS */ + ds->is_domain = !is_sp; - for (i = 0; i < dp->num_ds_states; i++) - dp->ds_states[i].handle = ((u64)i << 32); + ds->rcv_buf = kzalloc(DS_DEFAULT_BUF_SIZE, GFP_KERNEL); + if (unlikely(!ds->rcv_buf)) + goto out_free_ds; + ds->rcv_buf_len = DS_DEFAULT_BUF_SIZE; + + ds->hs_state = DS_HS_LDC_DOWN; + + ds_cfg.debug = 0; ds_cfg.tx_irq = vdev->tx_irq; ds_cfg.rx_irq = vdev->rx_irq; + ds_cfg.rx_ino = vdev->rx_ino; + ds_cfg.tx_ino = vdev->tx_ino; + ds_cfg.dev_handle = vdev->dev_handle; - lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS"); + /* create the irq name for the ldc */ + (void) scnprintf(ds_irq_name, LDC_IRQ_NAME_MAX, "DS-%llu", ds->handle); + + lp = ldc_alloc(vdev->channel_id, &ds_cfg, ds, ds_irq_name); if (IS_ERR(lp)) { - err = PTR_ERR(lp); - goto out_free_ds_states; + rv = PTR_ERR(lp); + goto out_free_rcv_buf; } - dp->lp = lp; + ds->lp = lp; - err = ldc_bind(lp); - if (err) + /* + * As soon as we bind the LDC, we can start getting + * events. So grab the ds_lock here and hold it + * until we are done initializing the ds. + */ + LOCK_DS_DEV(ds, ds_flags) + + rv = ldc_bind(lp); + if (rv) { + UNLOCK_DS_DEV(ds, ds_flags) goto out_free_ldc; + } - spin_lock_irq(&ds_lock); - dp->next = ds_info_list; - ds_info_list = dp; - spin_unlock_irq(&ds_lock); + (void) ldc_connect(ds->lp); - return err; + dev_set_drvdata(&vdev->dev, ds); -out_free_ldc: - ldc_free(dp->lp); + ds->next_service_handle = 1; /* start assigning handles from 1 */ + + /* add primary builtin services */ + if (ds->id == DS_PRIMARY_ID) + ds_add_builtin_services(ds, ds_primary_builtin_template, + ARRAY_SIZE(ds_primary_builtin_template)); + + /* add SP builtin services */ + if (is_sp) + ds_add_builtin_services(ds, ds_sp_builtin_template, + ARRAY_SIZE(ds_sp_builtin_template)); -out_free_ds_states: - kfree(dp->ds_states); + /* add the ds_dev to the global ds_data device list */ + spin_lock_irqsave(&ds_data_lock, flags); + list_add_tail(&ds->list, &ds_data.ds_dev_list); + ds_data.num_ds_dev_list++; + spin_unlock_irqrestore(&ds_data_lock, flags); + + /* + * begin the process of registering services. + * Note - we do this here to allow loopback services + * even if the DS LDC connection/handshake fails to establish. + */ + ds_start_service_reg_timer(ds); + + dprintk("ds-%llu: probe successful for domain %llu (channel_id=%lu).\n", + ds->id, ds->handle, vdev->channel_id); + + UNLOCK_DS_DEV(ds, ds_flags) + + return rv; + +out_free_ldc: + ldc_free(ds->lp); out_free_rcv_buf: - kfree(dp->rcv_buf); + kfree(ds->rcv_buf); -out_free_dp: - kfree(dp); +out_free_ds: + kfree(ds); out_err: - return err; + return rv; } static int ds_remove(struct vio_dev *vdev) { + struct ds_dev *ds; + struct ds_callout_entry_hdr *qhdrp; + struct ds_callout_entry_hdr *tmp; + unsigned long flags; + unsigned long ds_flags; + + dprintk("entered.\n"); + + ds = dev_get_drvdata(&vdev->dev); + + if (ds == NULL) + return 0; + + /* + * Lock the global ds_dev list to prevent another thread + * from finding the ds in the list while we are removing it. + */ + spin_lock_irqsave(&ds_data_lock, flags); + + /* + * Lock down the ds_dev to prevent removing it + * while being used by another thread. + */ + LOCK_DS_DEV(ds, ds_flags) + + /* remove the ds_dev from the global ds_data device list */ + list_del(&ds->list); + ds_data.num_ds_dev_list--; + + del_timer(&ds->ds_reg_tmr); + + ds_remove_services(ds); + + ds->hs_state = DS_HS_LDC_DOWN; + + ldc_disconnect(ds->lp); + + ldc_unbind(ds->lp); + + ldc_free(ds->lp); + + kfree(ds->rcv_buf); + + /* free any entries left on the callout list */ + list_for_each_entry_safe(qhdrp, tmp, &ds->callout_list, list) { + list_del(&qhdrp->list); + kfree(qhdrp); + ds->co_ref_cnt--; + } + + dprintk("ds-%llu: removing domain %llu (co_ref_cnt=%llu)\n", + ds->id, ds->handle, ds->co_ref_cnt); + + /* + * When the callout thread processes work entries, it + * creates a local list of entries which can contain + * references to this ds. So, we maintain + * a ds reference count for entries on the callout todo list. + * If there are no outstanding references to this ds, free + * the ds now (it's safely locked down). If there are outstanding + * references (because the callout thread is currently processing them), + * allow the callout thread to clean things up - we do not want to + * remove the ds here since the callout thread will reference it. + */ + if (ds->co_ref_cnt == 0) { + UNLOCK_DS_DEV(ds, ds_flags); + kfree(ds); + } else { + /* + * Mark the ds_dev as inactive. + * ds_dev will be cleaned up by the + * callout processing. + */ + ds->active = false; + UNLOCK_DS_DEV(ds, ds_flags) + } + + spin_unlock_irqrestore(&ds_data_lock, flags); + return 0; } @@ -1294,22 +3996,54 @@ static struct vio_driver ds_driver = { .id_table = ds_match, .probe = ds_probe, .remove = ds_remove, - .name = "ds", + .name = DRV_MODULE_NAME, +}; + +static struct file_operations ds_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = ds_fops_ioctl +}; + +static struct miscdevice ds_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = DRV_MODULE_NAME, + .fops = &ds_fops }; static int __init ds_init(void) { unsigned long hv_ret, major, minor; + struct task_struct *callout_task; + int err; + + /* set the default ldoms debug level */ + dsdbg_level = ldoms_debug_level; + + dprintk("%s", version); + + INIT_LIST_HEAD(&ds_data.ds_dev_list); + ds_data.num_ds_dev_list = 0; + + err = misc_register(&ds_miscdev); + if (err) + return err; + + dprintk("minor is %d.\n", ds_miscdev.minor); if (tlb_type == hypervisor) { hv_ret = sun4v_get_version(HV_GRP_REBOOT_DATA, &major, &minor); if (hv_ret == HV_EOK) { - pr_info("SUN4V: Reboot data supported (maj=%lu,min=%lu).\n", - major, minor); + dprintk("SUN4V: Reboot data supported " + "(maj=%lu,min=%lu).\n", major, minor); reboot_data_supported = 1; } } - kthread_run(ds_thread, NULL, "kldomd"); + + callout_task = kthread_run(ds_callout_thread, NULL, "ldoms-ds"); + if (IS_ERR(callout_task)) { + misc_deregister(&ds_miscdev); + return PTR_ERR(callout_task); + } return vio_register_driver(&ds_driver); } diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 1ae5eb1bb045..d6ac8969d315 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -34,7 +34,6 @@ static char version[] = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; -#define LDC_PACKET_SIZE 64 /* Packet header layout for unreliable and reliable mode frames. * When in RAW mode, packets are simply straight 64-byte payloads @@ -178,6 +177,8 @@ do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \ printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \ } while (0) +#define LDC_ABORT(lp) ldc_abort((lp), __func__) + static const char *state_to_str(u8 state) { switch (state) { @@ -196,15 +197,6 @@ static const char *state_to_str(u8 state) } } -static void ldc_set_state(struct ldc_channel *lp, u8 state) -{ - ldcdbg(STATE, "STATE (%s) --> (%s)\n", - state_to_str(lp->state), - state_to_str(state)); - - lp->state = state; -} - static unsigned long __advance(unsigned long off, unsigned long num_entries) { off += LDC_PACKET_SIZE; @@ -516,11 +508,12 @@ static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt) return err; } -static int ldc_abort(struct ldc_channel *lp) +static int ldc_abort(struct ldc_channel *lp, const char *msg) { unsigned long hv_err; - ldcdbg(STATE, "ABORT\n"); + ldcdbg(STATE, "ABORT[%s]\n", msg); + ldc_print(lp); /* We report but do not act upon the hypervisor errors because * there really isn't much we can do if they fail at this point. @@ -605,7 +598,7 @@ static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp) } } if (err) - return ldc_abort(lp); + return LDC_ABORT(lp); return 0; } @@ -618,13 +611,13 @@ static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp) if (lp->hs_state == LDC_HS_GOTVERS) { if (lp->ver.major != vp->major || lp->ver.minor != vp->minor) - return ldc_abort(lp); + return LDC_ABORT(lp); } else { lp->ver = *vp; lp->hs_state = LDC_HS_GOTVERS; } if (send_rts(lp)) - return ldc_abort(lp); + return LDC_ABORT(lp); return 0; } @@ -635,17 +628,17 @@ static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp) unsigned long new_tail; if (vp->major == 0 && vp->minor == 0) - return ldc_abort(lp); + return LDC_ABORT(lp); vap = find_by_major(vp->major); if (!vap) - return ldc_abort(lp); + return LDC_ABORT(lp); p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS, vap, sizeof(*vap), &new_tail); if (!p) - return ldc_abort(lp); + return LDC_ABORT(lp); return send_tx_packet(lp, p, new_tail); } @@ -668,7 +661,7 @@ static int process_version(struct ldc_channel *lp, return process_ver_nack(lp, vp); default: - return ldc_abort(lp); + return LDC_ABORT(lp); } } @@ -681,13 +674,13 @@ static int process_rts(struct ldc_channel *lp, if (p->stype != LDC_INFO || lp->hs_state != LDC_HS_GOTVERS || p->env != lp->cfg.mode) - return ldc_abort(lp); + return LDC_ABORT(lp); lp->snd_nxt = p->seqid; lp->rcv_nxt = p->seqid; lp->hs_state = LDC_HS_SENTRTR; if (send_rtr(lp)) - return ldc_abort(lp); + return LDC_ABORT(lp); return 0; } @@ -700,7 +693,7 @@ static int process_rtr(struct ldc_channel *lp, if (p->stype != LDC_INFO || p->env != lp->cfg.mode) - return ldc_abort(lp); + return LDC_ABORT(lp); lp->snd_nxt = p->seqid; lp->hs_state = LDC_HS_COMPLETE; @@ -723,7 +716,7 @@ static int process_rdx(struct ldc_channel *lp, if (p->stype != LDC_INFO || !(rx_seq_ok(lp, p->seqid))) - return ldc_abort(lp); + return LDC_ABORT(lp); lp->rcv_nxt = p->seqid; @@ -750,14 +743,14 @@ static int process_control_frame(struct ldc_channel *lp, return process_rdx(lp, p); default: - return ldc_abort(lp); + return LDC_ABORT(lp); } } static int process_error_frame(struct ldc_channel *lp, struct ldc_packet *p) { - return ldc_abort(lp); + return LDC_ABORT(lp); } static int process_data_ack(struct ldc_channel *lp, @@ -776,12 +769,45 @@ static int process_data_ack(struct ldc_channel *lp, return 0; } if (head == lp->tx_tail) - return ldc_abort(lp); + return LDC_ABORT(lp); } return 0; } +void ldc_enable_hv_intr(struct ldc_channel *lp) +{ + unsigned long flags; + + spin_lock_irqsave(&lp->lock, flags); + + ldcdbg(RX, "ldc_enable_hv_intr: dh=%llu, ino=%llu\n", + lp->cfg.dev_handle, lp->cfg.rx_ino); + sun4v_vintr_set_valid(lp->cfg.dev_handle, lp->cfg.rx_ino, + HV_INTR_ENABLED); + + spin_unlock_irqrestore(&lp->lock, flags); + +} +EXPORT_SYMBOL(ldc_enable_hv_intr); + + +void ldc_disable_hv_intr(struct ldc_channel *lp) +{ + unsigned long flags; + + spin_lock_irqsave(&lp->lock, flags); + + ldcdbg(RX, "ldc_disable_hv_intr: dh=%llu, ino=%llu\n", + lp->cfg.dev_handle, lp->cfg.rx_ino); + sun4v_vintr_set_valid(lp->cfg.dev_handle, lp->cfg.rx_ino, + HV_INTR_DISABLED); + + spin_unlock_irqrestore(&lp->lock, flags); + +} +EXPORT_SYMBOL(ldc_disable_hv_intr); + static void send_events(struct ldc_channel *lp, unsigned int event_mask) { if (event_mask & LDC_EVENT_RESET) @@ -820,16 +846,21 @@ static irqreturn_t ldc_rx(int irq, void *dev_id) lp->hs_state = LDC_HS_COMPLETE; ldc_set_state(lp, LDC_STATE_CONNECTED); - event_mask |= LDC_EVENT_UP; - - orig_state = lp->chan_state; + /* + * Generate an LDC_EVENT_UP event if the channel + * was not already up. + */ + if (orig_state != LDC_CHANNEL_UP) { + event_mask |= LDC_EVENT_UP; + orig_state = lp->chan_state; + } } /* If we are in reset state, flush the RX queue and ignore * everything. */ if (lp->flags & LDC_FLAG_RESET) { - (void) __set_rx_head(lp, lp->rx_tail); + (void) ldc_rx_reset(lp); goto out; } @@ -880,7 +911,7 @@ handshake_complete: break; default: - err = ldc_abort(lp); + err = LDC_ABORT(lp); break; } @@ -895,7 +926,7 @@ handshake_complete: err = __set_rx_head(lp, new); if (err < 0) { - (void) ldc_abort(lp); + (void) LDC_ABORT(lp); break; } if (lp->hs_state == LDC_HS_COMPLETE) @@ -936,7 +967,14 @@ static irqreturn_t ldc_tx(int irq, void *dev_id) lp->hs_state = LDC_HS_COMPLETE; ldc_set_state(lp, LDC_STATE_CONNECTED); - event_mask |= LDC_EVENT_UP; + /* + * Generate an LDC_EVENT_UP event if the channel + * was not already up. + */ + if (orig_state != LDC_CHANNEL_UP) { + event_mask |= LDC_EVENT_UP; + orig_state = lp->chan_state; + } } spin_unlock_irqrestore(&lp->lock, flags); @@ -1342,6 +1380,14 @@ int ldc_bind(struct ldc_channel *lp) lp->hs_state = LDC_HS_OPEN; ldc_set_state(lp, LDC_STATE_BOUND); + if (lp->cfg.mode == LDC_MODE_RAW) { + /* + * There is no handshake in RAW mode, so handshake + * is completed. + */ + lp->hs_state = LDC_HS_COMPLETE; + } + spin_unlock_irqrestore(&lp->lock, flags); return 0; @@ -1447,12 +1493,62 @@ int ldc_state(struct ldc_channel *lp) } EXPORT_SYMBOL(ldc_state); +void ldc_set_state(struct ldc_channel *lp, u8 state) +{ + ldcdbg(STATE, "STATE (%s) --> (%s)\n", + state_to_str(lp->state), + state_to_str(state)); + + lp->state = state; +} +EXPORT_SYMBOL(ldc_set_state); + +int ldc_mode(struct ldc_channel *lp) +{ + return lp->cfg.mode; +} +EXPORT_SYMBOL(ldc_mode); + +int ldc_rx_reset(struct ldc_channel *lp) +{ + return __set_rx_head(lp, lp->rx_tail); +} +EXPORT_SYMBOL(ldc_rx_reset); + +void ldc_clr_reset(struct ldc_channel *lp) +{ + lp->flags &= ~LDC_FLAG_RESET; +} +EXPORT_SYMBOL(ldc_clr_reset); + +void ldc_print(struct ldc_channel *lp) +{ + pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n" + "\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n" + "\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n" + "\trcv_nxt=%u snd_nxt=%u\n", + __func__, lp->id, lp->flags, state_to_str(lp->state), + lp->chan_state, lp->hs_state, + lp->rx_head, lp->rx_tail, lp->rx_num_entries, + lp->tx_head, lp->tx_tail, lp->tx_num_entries, + lp->rcv_nxt, lp->snd_nxt); +} +EXPORT_SYMBOL(ldc_print); + static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size) { struct ldc_packet *p; - unsigned long new_tail; + unsigned long new_tail, hv_err; int err; + hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail, + &lp->chan_state); + if (unlikely(hv_err)) + return -EBUSY; + + if (unlikely(lp->chan_state != LDC_CHANNEL_UP)) + return LDC_ABORT(lp); + if (size > LDC_PACKET_SIZE) return -EMSGSIZE; @@ -1483,7 +1579,7 @@ static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size) &lp->rx_tail, &lp->chan_state); if (hv_err) - return ldc_abort(lp); + return LDC_ABORT(lp); if (lp->chan_state == LDC_CHANNEL_DOWN || lp->chan_state == LDC_CHANNEL_RESETTING) @@ -1526,7 +1622,7 @@ static int write_nonraw(struct ldc_channel *lp, const void *buf, return -EBUSY; if (unlikely(lp->chan_state != LDC_CHANNEL_UP)) - return ldc_abort(lp); + return LDC_ABORT(lp); if (!tx_has_space_for(lp, size)) return -EAGAIN; @@ -1592,9 +1688,9 @@ static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p, if (err) return err; - err = __set_rx_head(lp, lp->rx_tail); + err = ldc_rx_reset(lp); if (err < 0) - return ldc_abort(lp); + return LDC_ABORT(lp); return 0; } @@ -1607,7 +1703,7 @@ static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p) return err; } if (p->stype & LDC_NACK) - return ldc_abort(lp); + return LDC_ABORT(lp); return 0; } @@ -1627,7 +1723,7 @@ static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head) &lp->rx_tail, &lp->chan_state); if (hv_err) - return ldc_abort(lp); + return LDC_ABORT(lp); if (lp->chan_state == LDC_CHANNEL_DOWN || lp->chan_state == LDC_CHANNEL_RESETTING) @@ -1650,7 +1746,7 @@ static int rx_set_head(struct ldc_channel *lp, unsigned long head) int err = __set_rx_head(lp, head); if (err < 0) - return ldc_abort(lp); + return LDC_ABORT(lp); lp->rx_head = head; return 0; @@ -1689,7 +1785,7 @@ static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) &lp->rx_tail, &lp->chan_state); if (hv_err) - return ldc_abort(lp); + return LDC_ABORT(lp); if (lp->chan_state == LDC_CHANNEL_DOWN || lp->chan_state == LDC_CHANNEL_RESETTING) @@ -1733,9 +1829,14 @@ static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) lp->rcv_nxt = p->seqid; + /* + * If this is a control-only packet, there is nothing + * else to do but advance the rx queue since the packet + * was already processed above. + */ if (!(p->type & LDC_DATA)) { new = rx_advance(lp, new); - goto no_data; + break; } if (p->stype & (LDC_ACK | LDC_NACK)) { err = data_ack_nack(lp, p); @@ -1871,6 +1972,25 @@ static const struct ldc_mode_ops stream_ops = { .read = read_stream, }; +int ldc_tx_space_available(struct ldc_channel *lp, unsigned long size) +{ + unsigned long flags; + + spin_lock_irqsave(&lp->lock, flags); + + /* tx_has_space_for() works for all modes */ + if (!tx_has_space_for(lp, size)) { + spin_unlock_irqrestore(&lp->lock, flags); + return 0; + } + + spin_unlock_irqrestore(&lp->lock, flags); + + return 1; + +} +EXPORT_SYMBOL(ldc_tx_space_available); + int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size) { unsigned long flags; @@ -1895,11 +2015,36 @@ int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size) } EXPORT_SYMBOL(ldc_write); +int ldc_rx_data_available(struct ldc_channel *lp) +{ + unsigned long flags; + + spin_lock_irqsave(&lp->lock, flags); + + if (lp->cfg.mode == LDC_MODE_STREAM && lp->mssbuf_len > 0) { + spin_unlock_irqrestore(&lp->lock, flags); + return 1; + } + + if (lp->rx_head == lp->rx_tail) { + spin_unlock_irqrestore(&lp->lock, flags); + return 0; + } + + spin_unlock_irqrestore(&lp->lock, flags); + + return 1; + +} +EXPORT_SYMBOL(ldc_rx_data_available); + int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size) { unsigned long flags; int err; + ldcdbg(RX, "ldc_read: entered size=%d\n", size); + if (!buf) return -EINVAL; @@ -1915,6 +2060,9 @@ int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size) spin_unlock_irqrestore(&lp->lock, flags); + ldcdbg(RX, "ldc_read:mode=%d, head=%lu, tail=%lu rv=%d\n", + lp->cfg.mode, lp->rx_head, lp->rx_tail, err); + return err; } EXPORT_SYMBOL(ldc_read); diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index b9fe42450308..4d6242ab5f34 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -75,6 +75,58 @@ struct mdesc_handle { struct mdesc_hdr mdesc; }; +typedef int (*mdesc_node_info_f)(struct mdesc_handle *, u64, + union md_node_info *); +typedef bool (*mdesc_node_match_f)(union md_node_info *, union md_node_info *); + +struct md_node_ops { + char *name; + mdesc_node_info_f get_info; + mdesc_node_match_f node_match; +}; + +static int get_vdev_port_node_info(struct mdesc_handle *md, u64 node, + union md_node_info *node_info); +static bool vdev_port_node_match(union md_node_info *a_node_info, + union md_node_info *b_node_info); +static int get_ds_port_node_info(struct mdesc_handle *md, u64 node, + union md_node_info *node_info); +static bool ds_port_node_match(union md_node_info *a_node_info, + union md_node_info *b_node_info); + +/* supported node types which can be registered */ +static struct md_node_ops md_node_ops_table[] = { + {"virtual-device-port", get_vdev_port_node_info, vdev_port_node_match}, + {"domain-services-port", get_ds_port_node_info, ds_port_node_match}, + {NULL, NULL, NULL} +}; + +void mdesc_get_node_ops(char *node_name, mdesc_node_info_f *node_info_f, + mdesc_node_match_f *node_match_f) +{ + int i; + mdesc_node_info_f get_info_func; + mdesc_node_match_f node_match_func; + + get_info_func = NULL; + node_match_func = NULL; + + if (node_name != NULL) { + for (i = 0; md_node_ops_table[i].name != NULL; i++) { + if (strcmp(md_node_ops_table[i].name, node_name) == 0) { + get_info_func = md_node_ops_table[i].get_info; + node_match_func = + md_node_ops_table[i].node_match; + break; + } + } + } + + *node_info_f = get_info_func; + *node_match_f = node_match_func; + +} + static void mdesc_handle_init(struct mdesc_handle *hp, unsigned int handle_size, void *base) @@ -130,26 +182,26 @@ static struct mdesc_mem_ops memblock_mdesc_ops = { static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) { unsigned int handle_size; - struct mdesc_handle *hp; - unsigned long addr; void *base; handle_size = (sizeof(struct mdesc_handle) - sizeof(struct mdesc_hdr) + mdesc_size); + base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_REPEAT); + if (base) { + struct mdesc_handle *hp; + unsigned long addr; - /* - * Allocation has to succeed because mdesc update would be missed - * and such events are not retransmitted. - */ - base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL); - addr = (unsigned long)base; - addr = (addr + 15UL) & ~15UL; - hp = (struct mdesc_handle *) addr; + addr = (unsigned long)base; + addr = (addr + 15UL) & ~15UL; + hp = (struct mdesc_handle *) addr; - mdesc_handle_init(hp, handle_size, base); + mdesc_handle_init(hp, handle_size, base); + return hp; + } + + return NULL; - return hp; } static void mdesc_kfree(struct mdesc_handle *hp) @@ -220,15 +272,35 @@ static struct mdesc_notifier_client *client_list; void mdesc_register_notifier(struct mdesc_notifier_client *client) { u64 node; + int i; + bool supported; mutex_lock(&mdesc_mutex); + + /* check to see if the node is supported for registration */ + supported = false; + for (i = 0; md_node_ops_table[i].name != NULL; i++) { + if (strcmp(md_node_ops_table[i].name, client->node_name) == 0) { + supported = true; + break; + } + } + + if (!supported) { + printk(KERN_ERR "MD: %s: %s node not supported\n", + __func__, client->node_name); + mutex_unlock(&mdesc_mutex); + return; + } + client->next = client_list; client_list = client; mdesc_for_each_node_by_name(cur_mdesc, node, client->node_name) - client->add(cur_mdesc, node); + client->add(cur_mdesc, node, client->node_name); mutex_unlock(&mdesc_mutex); + } static const u64 *parent_cfg_handle(struct mdesc_handle *hp, u64 node) @@ -250,59 +322,128 @@ static const u64 *parent_cfg_handle(struct mdesc_handle *hp, u64 node) return id; } +static int get_vdev_port_node_info(struct mdesc_handle *md, u64 node, + union md_node_info *node_info) +{ + const u64 *idp; + const u64 *parent_cfg_hdlp; + const char *name; + + /* + * Virtual device nodes are distinguished by: + * 1. "id" property + * 2. "name" property + * 3. parent node "cfg-handle" property + */ + idp = mdesc_get_property(md, node, "id", NULL); + name = mdesc_get_property(md, node, "name", NULL); + parent_cfg_hdlp = parent_cfg_handle(md, node); + + if (!idp || !name || !parent_cfg_hdlp) + return -1; + + node_info->vdev_port.id = *idp; + strncpy(node_info->vdev_port.name, name, MDESC_MAX_STR_LEN); + node_info->vdev_port.parent_cfg_hdl = *parent_cfg_hdlp; + + return 0; +} + +static bool vdev_port_node_match(union md_node_info *a_node_info, + union md_node_info *b_node_info) +{ + if (a_node_info->vdev_port.id != b_node_info->vdev_port.id) + return false; + + if (a_node_info->vdev_port.parent_cfg_hdl != + b_node_info->vdev_port.parent_cfg_hdl) + return false; + + if (strncmp(a_node_info->vdev_port.name, + b_node_info->vdev_port.name, MDESC_MAX_STR_LEN) != 0) + return false; + + return true; + +} + +static int get_ds_port_node_info(struct mdesc_handle *md, u64 node, + union md_node_info *node_info) +{ + const u64 *idp; + + /* DS port nodes use the "id" property to distinguish them */ + idp = mdesc_get_property(md, node, "id", NULL); + if (!idp) + return -1; + + node_info->ds_port.id = *idp; + + return 0; +} + + +static bool ds_port_node_match(union md_node_info *a_node_info, + union md_node_info *b_node_info) +{ + if (a_node_info->ds_port.id != b_node_info->ds_port.id) + return false; + + return true; +} + /* Run 'func' on nodes which are in A but not in B. */ static void invoke_on_missing(const char *name, - struct mdesc_handle *a, - struct mdesc_handle *b, - void (*func)(struct mdesc_handle *, u64)) + struct mdesc_handle *a, + struct mdesc_handle *b, + void (*func)(struct mdesc_handle *, u64, const char *node_name)) { - u64 node; + u64 a_node; + u64 b_node; + union md_node_info a_node_info; + union md_node_info b_node_info; + mdesc_node_info_f get_info_func; + mdesc_node_match_f node_match_func; + int rv; + bool found; + + /* Find the get_info and node_match ops for the given node name */ + mdesc_get_node_ops((char *)name, &get_info_func, &node_match_func); + + /* If we didn't find a match, the node type is not supported */ + if (get_info_func == NULL || node_match_func == NULL) { + printk(KERN_ERR "MD: %s: %s node type is not supported\n", + __func__, name); + return; + } - mdesc_for_each_node_by_name(a, node, name) { - int found = 0, is_vdc_port = 0; - const char *name_prop; - const u64 *id; - u64 fnode; - - name_prop = mdesc_get_property(a, node, "name", NULL); - if (name_prop && !strcmp(name_prop, "vdc-port")) { - is_vdc_port = 1; - id = parent_cfg_handle(a, node); - } else - id = mdesc_get_property(a, node, "id", NULL); - - if (!id) { - printk(KERN_ERR "MD: Cannot find ID for %s node.\n", - (name_prop ? name_prop : name)); + mdesc_for_each_node_by_name(a, a_node, name) { + + found = false; + + rv = get_info_func(a, a_node, &a_node_info); + if (rv != 0) { + printk(KERN_ERR "MD: %s: Cannot find 1 or more required " + "match properties for %s node.\n", __func__, name); continue; } - mdesc_for_each_node_by_name(b, fnode, name) { - const u64 *fid; - - if (is_vdc_port) { - name_prop = mdesc_get_property(b, fnode, - "name", NULL); - if (!name_prop || - strcmp(name_prop, "vdc-port")) - continue; - fid = parent_cfg_handle(b, fnode); - if (!fid) { - printk(KERN_ERR "MD: Cannot find ID " - "for vdc-port node.\n"); - continue; - } - } else - fid = mdesc_get_property(b, fnode, - "id", NULL); - - if (*id == *fid) { - found = 1; + /* Check each node in B for node matching a_node */ + mdesc_for_each_node_by_name(b, b_node, name) { + + rv = get_info_func(b, b_node, &b_node_info); + if (rv != 0) + continue; + + if (node_match_func(&a_node_info, &b_node_info)) { + found = true; break; } } + if (!found) - func(a, node); + func(a, a_node, name); + } } @@ -368,6 +509,77 @@ out: mutex_unlock(&mdesc_mutex); } +u64 mdesc_get_node(struct mdesc_handle *hp, char *node_name, + union md_node_info *node_info) +{ + mdesc_node_info_f get_info_func; + mdesc_node_match_f node_match_func; + u64 hp_node; + union md_node_info hp_node_info; + int rv; + + if (hp == NULL || node_name == NULL || node_info == NULL) + return MDESC_NODE_NULL; + + /* Find the ops for the given node name */ + mdesc_get_node_ops(node_name, &get_info_func, &node_match_func); + + /* If we didn't find a node_match func, the node is not supported */ + if (get_info_func == NULL || node_match_func == NULL) { + printk(KERN_ERR "MD: %s: %s node is not supported\n", + __func__, node_name); + return -EINVAL; + } + + mdesc_for_each_node_by_name(hp, hp_node, node_name) { + + rv = get_info_func(hp, hp_node, &hp_node_info); + if (rv != 0) + continue; + + if (node_match_func(node_info, &hp_node_info)) + break; + } + + return hp_node; + +} +EXPORT_SYMBOL(mdesc_get_node); + +int mdesc_get_node_info(struct mdesc_handle *hp, u64 node, char *node_name, + union md_node_info *node_info) +{ + mdesc_node_info_f get_info_func; + mdesc_node_match_f node_match_func; + int rv; + + if (hp == NULL || node == MDESC_NODE_NULL || + node_name == NULL || node_info == NULL) + return -EINVAL; + + /* Find the get_info op for the given node name */ + mdesc_get_node_ops(node_name, &get_info_func, &node_match_func); + + /* If we didn't find a get_info_func, the node name is not supported */ + if (get_info_func == NULL) { + printk(KERN_ERR "MD: %s: %s node is not supported\n", + __func__, node_name); + return -EINVAL; + } + + rv = get_info_func(hp, node, node_info); + if (rv != 0) { + printk(KERN_ERR "MD: %s: Cannot find 1 or more required " + "match properties for %s node.\n", __func__, node_name); + return -1; + } + + return 0; + +} +EXPORT_SYMBOL(mdesc_get_node_info); + + static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) { return (struct mdesc_elem *) (mdesc + 1); diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index cb5789c9f961..4e02b3df6d57 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -62,14 +62,31 @@ static int vio_device_probe(struct device *dev) struct vio_dev *vdev = to_vio_dev(dev); struct vio_driver *drv = to_vio_driver(dev->driver); const struct vio_device_id *id; - int error = -ENODEV; + int error; + + if (!drv->probe) + return -ENODEV; + + id = vio_match_device(drv->id_table, vdev); + if (!id) + return -ENODEV; + + /* alloc irqs (unless the driver specified not to) */ + if (!drv->no_irq) { + if (vdev->tx_irq == 0 && vdev->tx_ino != ~0UL) + vdev->tx_irq = + sun4v_build_virq(vdev->dev_handle, + vdev->tx_ino); + + if (vdev->rx_irq == 0 && vdev->rx_ino != ~0UL) + vdev->rx_irq = + sun4v_build_virq(vdev->dev_handle, + vdev->rx_ino); - if (drv->probe) { - id = vio_match_device(drv->id_table, vdev); - if (id) - error = drv->probe(vdev, id); } + error = drv->probe(vdev, id); + return error; } @@ -78,9 +95,17 @@ static int vio_device_remove(struct device *dev) struct vio_dev *vdev = to_vio_dev(dev); struct vio_driver *drv = to_vio_driver(dev->driver); - if (drv->remove) + if (drv->remove) { + return drv->remove(vdev); + /* + * Ideally, we would remove/deallocate tx/rx virqs + * here - however, there are currently no support + * routines to do so at the moment. TBD + */ + } + return 1; } @@ -163,11 +188,55 @@ static struct device_node *cdev_node; static struct vio_dev *root_vdev; static u64 cdev_cfg_handle; +static const u64 *vio_cfg_handle(struct mdesc_handle *hp, u64 node) +{ + const u64 *cfg_handle; + u64 a; + + cfg_handle = NULL; + mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) { + u64 target; + + target = mdesc_arc_target(hp, a); + cfg_handle = mdesc_get_property(hp, target, + "cfg-handle", NULL); + if (cfg_handle) + break; + } + + return cfg_handle; +} + +/* + * vio_dev_node + * Find the node in the current MD which matches the + * given vio_dev. This must be done dynamically since the + * node value can change if the MD is updated. + * NOTE: the MD must be locked, using mdesc_grab(), + * when calling this routine! + */ +u64 vio_vdev_node(struct mdesc_handle *hp, struct vio_dev *vdev) +{ + u64 node; + + if (vdev == NULL) + return MDESC_NODE_NULL; + + node = mdesc_get_node(hp, vdev->node_name, &vdev->md_node_info); + + return node; + +} +EXPORT_SYMBOL(vio_vdev_node); + static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, struct vio_dev *vdev) { u64 a; + vdev->tx_ino = ~0UL; + vdev->rx_ino = ~0UL; + vdev->channel_id = ~0UL; mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { const u64 *chan_id; const u64 *irq; @@ -177,18 +246,19 @@ static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, irq = mdesc_get_property(hp, target, "tx-ino", NULL); if (irq) - vdev->tx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); + vdev->tx_ino = *irq; irq = mdesc_get_property(hp, target, "rx-ino", NULL); - if (irq) { - vdev->rx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); + if (irq) vdev->rx_ino = *irq; - } chan_id = mdesc_get_property(hp, target, "id", NULL); if (chan_id) vdev->channel_id = *chan_id; } + + vdev->dev_handle = cdev_cfg_handle; + } int vio_set_intr(unsigned long dev_ino, int state) @@ -201,14 +271,13 @@ int vio_set_intr(unsigned long dev_ino, int state) EXPORT_SYMBOL(vio_set_intr); static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, - struct device *parent) + char *node_name, struct device *parent) { - const char *type, *compat, *bus_id_name; + const char *type, *compat; struct device_node *dp; struct vio_dev *vdev; int err, tlen, clen; const u64 *id, *cfg_handle; - u64 a; type = mdesc_get_property(hp, mp, "device-type", &tlen); if (!type) { @@ -218,7 +287,7 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, tlen = strlen(type) + 1; } } - if (tlen > VIO_MAX_TYPE_LEN) { + if (tlen > VIO_MAX_TYPE_LEN || strlen(type) >= VIO_MAX_TYPE_LEN) { printk(KERN_ERR "VIO: Type string [%s] is too long.\n", type); return NULL; @@ -226,31 +295,7 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, id = mdesc_get_property(hp, mp, "id", NULL); - cfg_handle = NULL; - mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { - u64 target; - - target = mdesc_arc_target(hp, a); - cfg_handle = mdesc_get_property(hp, target, - "cfg-handle", NULL); - if (cfg_handle) - break; - } - - bus_id_name = type; - if (!strcmp(type, "domain-services-port")) - bus_id_name = "ds"; - - /* - * 20 char is the old driver-core name size limit, which is no more. - * This check can probably be removed after review and possible - * adaption of the vio users name length handling. - */ - if (strlen(bus_id_name) >= 20 - 4) { - printk(KERN_ERR "VIO: bus_id_name [%s] is too long.\n", - bus_id_name); - return NULL; - } + cfg_handle = vio_cfg_handle(hp, mp); compat = mdesc_get_property(hp, mp, "device-type", &clen); if (!compat) { @@ -267,7 +312,6 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, return NULL; } - vdev->mp = mp; memcpy(vdev->type, type, tlen); if (compat) memcpy(vdev->compat, compat, clen); @@ -275,22 +319,23 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, memset(vdev->compat, 0, sizeof(vdev->compat)); vdev->compat_len = clen; - vdev->channel_id = ~0UL; - vdev->tx_irq = ~0; - vdev->rx_irq = ~0; + vdev->port_id = ~0UL; + vdev->tx_irq = 0; + vdev->rx_irq = 0; vio_fill_channel_info(hp, mp, vdev); if (!id) { - dev_set_name(&vdev->dev, "%s", bus_id_name); + dev_set_name(&vdev->dev, "%s", type); vdev->dev_no = ~(u64)0; } else if (!cfg_handle) { - dev_set_name(&vdev->dev, "%s-%llu", bus_id_name, *id); + dev_set_name(&vdev->dev, "%s-%llu", type, *id); vdev->dev_no = *id; } else { - dev_set_name(&vdev->dev, "%s-%llu-%llu", bus_id_name, + dev_set_name(&vdev->dev, "%s-%llu-%llu", type, *cfg_handle, *id); vdev->dev_no = *cfg_handle; + vdev->port_id = *id; } vdev->dev.parent = parent; @@ -312,7 +357,27 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, } vdev->dp = dp; - printk(KERN_INFO "VIO: Adding device %s\n", dev_name(&vdev->dev)); + /* + * node_name is NULL for the parent/channel-devices node and + * the parent doesn't require the MD node info. + */ + if (node_name != NULL) { + + strncpy(vdev->node_name, node_name, VIO_MAX_NAME_LEN); + + err = mdesc_get_node_info(hp, mp, node_name, + &vdev->md_node_info); + if (err) { + printk(KERN_ERR "VIO: Could not get MD node " + "info %s, err=%d\n", dev_name(&vdev->dev), err); + kfree(vdev); + return NULL; + } + } + + printk(KERN_INFO "VIO: Adding device %s (tx_ino = %llx, " + "rx_ino = %llx)\n", dev_name(&vdev->dev), vdev->tx_ino, + vdev->rx_ino); err = device_register(&vdev->dev); if (err) { @@ -328,26 +393,42 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, return vdev; } -static void vio_add(struct mdesc_handle *hp, u64 node) +static void vio_add(struct mdesc_handle *hp, u64 node, + const char *node_name) { - (void) vio_create_one(hp, node, &root_vdev->dev); + (void) vio_create_one(hp, node, (char *)node_name, &root_vdev->dev); } +struct vio_remove_node_data { + struct mdesc_handle *hp; + u64 node; +}; + static int vio_md_node_match(struct device *dev, void *arg) { struct vio_dev *vdev = to_vio_dev(dev); + u64 node; + struct vio_remove_node_data *node_data; - if (vdev->mp == (u64) arg) - return 1; + node_data = (struct vio_remove_node_data *)arg; + + node = vio_vdev_node(node_data->hp, vdev); - return 0; + if (node == node_data->node) + return 1; + else + return 0; } -static void vio_remove(struct mdesc_handle *hp, u64 node) +static void vio_remove(struct mdesc_handle *hp, u64 node, const char *node_name) { struct device *dev; + struct vio_remove_node_data node_data; + + node_data.hp = hp; + node_data.node = node; - dev = device_find_child(&root_vdev->dev, (void *) node, + dev = device_find_child(&root_vdev->dev, (void *)&node_data, vio_md_node_match); if (dev) { printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev)); @@ -368,7 +449,8 @@ static struct mdesc_notifier_client vio_device_notifier = { * under "openboot" that we should not mess with as aparently that is * reserved exclusively for OBP use. */ -static void vio_add_ds(struct mdesc_handle *hp, u64 node) +static void vio_add_ds(struct mdesc_handle *hp, u64 node, + const char *node_name) { int found; u64 a; @@ -385,7 +467,8 @@ static void vio_add_ds(struct mdesc_handle *hp, u64 node) } if (found) - (void) vio_create_one(hp, node, &root_vdev->dev); + (void) vio_create_one(hp, node, (char *)node_name, + &root_vdev->dev); } static struct mdesc_notifier_client vio_ds_notifier = { @@ -452,7 +535,7 @@ static int __init vio_init(void) cdev_cfg_handle = *cfg_handle; - root_vdev = vio_create_one(hp, root, NULL); + root_vdev = vio_create_one(hp, root, NULL, NULL); err = -ENODEV; if (!root_vdev) { printk(KERN_ERR "VIO: Could not create root device.\n"); diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index 526fcb5d8ce9..aff57cdd10b0 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -113,7 +113,7 @@ void vio_link_state_change(struct vio_driver_state *vio, int event) break; case VDEV_DISK_SERVER: vio->dr_state = VIO_DR_STATE_RXREQ; - break; + return; /* VDS never initiates a handshake */ } start_handshake(vio); } else if (event == LDC_EVENT_RESET) { @@ -222,7 +222,11 @@ static int send_rdx(struct vio_driver_state *vio) static int send_attr(struct vio_driver_state *vio) { - return vio->ops->send_attr(vio); + if (vio->ops && vio->ops->send_attr) + return vio->ops->send_attr(vio); + + return -EINVAL; + } static struct vio_version *find_by_major(struct vio_driver_state *vio, @@ -282,6 +286,7 @@ static int process_ver_info(struct vio_driver_state *vio, ver.minor = vap->minor; pkt->minor = ver.minor; pkt->tag.stype = VIO_SUBTYPE_ACK; + pkt->dev_class = vio->dev_class; viodbg(HS, "SEND VERSION ACK maj[%u] min[%u]\n", pkt->major, pkt->minor); err = send_ctrl(vio, &pkt->tag, sizeof(*pkt)); @@ -373,20 +378,23 @@ static int process_attr(struct vio_driver_state *vio, void *pkt) if (!(vio->hs_state & VIO_HS_GOTVERS)) return handshake_failure(vio); - err = vio->ops->handle_attr(vio, pkt); - if (err < 0) { - return handshake_failure(vio); - } else { - vio->hs_state |= VIO_HS_GOT_ATTR; + if (vio->ops && vio->ops->handle_attr) { + err = vio->ops->handle_attr(vio, pkt); + if (err < 0) { + return handshake_failure(vio); + } else { + vio->hs_state |= VIO_HS_GOT_ATTR; - if ((vio->dr_state & VIO_DR_STATE_TXREQ) && - !(vio->hs_state & VIO_HS_SENT_DREG)) { - if (send_dreg(vio) < 0) - return handshake_failure(vio); + if ((vio->dr_state & VIO_DR_STATE_TXREQ) && + !(vio->hs_state & VIO_HS_SENT_DREG)) { + if (send_dreg(vio) < 0) + return handshake_failure(vio); - vio->hs_state |= VIO_HS_SENT_DREG; + vio->hs_state |= VIO_HS_SENT_DREG; + } } } + return 0; } @@ -646,10 +654,14 @@ int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt) err = process_unknown(vio, pkt); break; } + if (!err && vio->hs_state != prev_state && - (vio->hs_state & VIO_HS_COMPLETE)) - vio->ops->handshake_complete(vio); + (vio->hs_state & VIO_HS_COMPLETE)) { + + if (vio->ops && vio->ops->handshake_complete) + vio->ops->handshake_complete(vio); + } return err; } @@ -724,6 +736,10 @@ int vio_ldc_alloc(struct vio_driver_state *vio, cfg.tx_irq = vio->vdev->tx_irq; cfg.rx_irq = vio->vdev->rx_irq; + cfg.rx_ino = vio->vdev->rx_ino; + cfg.tx_ino = vio->vdev->tx_ino; + cfg.dev_handle = vio->vdev->dev_handle; + lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name); if (IS_ERR(lp)) return PTR_ERR(lp); @@ -764,7 +780,11 @@ void vio_port_up(struct vio_driver_state *vio) } if (!err) { - err = ldc_connect(vio->lp); + if (ldc_mode(vio->lp) == LDC_MODE_RAW) + ldc_set_state(vio->lp, LDC_STATE_CONNECTED); + else + err = ldc_connect(vio->lp); + if (err) printk(KERN_WARNING "%s: Port %lu connect failed, " "err=%d\n", @@ -798,16 +818,22 @@ int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, case VDEV_NETWORK_SWITCH: case VDEV_DISK: case VDEV_DISK_SERVER: + case VDEV_CONSOLE_CON: + case VDEV_VLDC: break; default: return -EINVAL; } - if (!ops->send_attr || - !ops->handle_attr || - !ops->handshake_complete) - return -EINVAL; + if (dev_class == VDEV_NETWORK || + dev_class == VDEV_NETWORK_SWITCH || + dev_class == VDEV_DISK || + dev_class == VDEV_DISK_SERVER) { + if (!ops || !ops->send_attr || !ops->handle_attr || + !ops->handshake_complete) + return -EINVAL; + } if (!ver_table || ver_table_size < 0) return -EINVAL; diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 3ccef9eba6f9..fc6eb17192a9 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -570,4 +570,11 @@ config BLK_DEV_RSXX To compile this driver as a module, choose M here: the module will be called rsxx. +config VDS + tristate "Sun virtual disk server (VDS)" + depends on SUN_LDOMS + default m + help + Support for Sun logical domain disks. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 9cc6c18a1c7e..b80fd295da42 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_BLK_DEV_SKD) += skd.o obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o +obj-$(CONFIG_VDS) += vds/ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 4b911ed96ea3..2c59f2778064 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -866,11 +866,19 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) struct vdc_port *port; int err; const u64 *ldc_timeout; + u64 node; print_version(); hp = mdesc_grab(); + node = vio_vdev_node(hp, vdev); + if (node == MDESC_NODE_NULL) { + printk(KERN_ERR PFX "Failed to get vdev MD node.\n"); + err = -ENXIO; + goto err_out_release_mdesc; + } + err = -ENODEV; if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) { printk(KERN_ERR PFX "Port id [%llu] too large.\n", @@ -899,7 +907,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) * a readahead I/O first, and once that fails it will try to read a * single page. */ - ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL); + ldc_timeout = mdesc_get_property(hp, node, "vdc-timeout", NULL); port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0; setup_timer(&port->ldc_reset_timer, vdc_ldc_reset_timer, (unsigned long)port); diff --git a/drivers/block/vds/Makefile b/drivers/block/vds/Makefile new file mode 100644 index 000000000000..102f0766dc30 --- /dev/null +++ b/drivers/block/vds/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_VDS) := vds.o + +vds-y := vds_blk.o vds_efi.o vds_io.o vds_label.o vds_main.o vds_reg.o \ + vds_vtoc.o + diff --git a/drivers/block/vds/vds.h b/drivers/block/vds/vds.h new file mode 100644 index 000000000000..28be0e2a26da --- /dev/null +++ b/drivers/block/vds/vds.h @@ -0,0 +1,166 @@ +/* + * vds.h: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct vds_part { + sector_t start; + sector_t size; +}; + +#define VDS_MAXPART 128 /* max # of logical partitions */ +#define DK_LABEL_SIZE 512 /* size of disk label */ + +struct vds_port { + struct vio_driver_state vio; + u8 flags; + u8 xfer_mode; + u8 media_type; + u8 label_type; + u8 npart; + u64 max_xfer_size; + u64 vdisk_size; + u32 vdisk_bsize; + u32 msglen; + u64 seq; + const char *path; + void *msgbuf; + struct vds_be_ops *be_ops; /* backend ops */ + void *be_data; + struct mutex label_lock; + char label[DK_LABEL_SIZE]; /* for vtoc/gpt */ + struct vds_part *part; + struct vio_disk_geom *geom; + struct vio_disk_vtoc *vtoc; + struct workqueue_struct *ioq; + struct workqueue_struct *rtq; +}; + +#define VDS_PORT_SEQ 0x1 + +static inline struct vds_port *to_vds_port(struct vio_driver_state *vio) +{ + return container_of(vio, struct vds_port, vio); +} + +struct vds_io; + +/* + * Backend interface. + */ +struct vds_be_ops { + int (*init)(struct vds_port *port); + void (*fini)(struct vds_port *port); + int (*rw)(struct vds_io *io); + int (*flush)(struct vds_port *port); +}; + +struct vds_be_ops *vds_blk_get_ops(void); +struct vds_be_ops *vds_reg_get_ops(void); + +int vds_be_init(struct vds_port *port); +void vds_be_fini(struct vds_port *port); + +/* + * Label interface. + */ +void vds_label_init(struct vds_port *port); +void vds_label_fini(struct vds_port *port); +void vds_label_reset(struct vds_port *port); +void vds_label_clear_part(struct vds_port *port); +int vds_label_get_vtoc(struct vds_port *port); +int vds_label_get_start(struct vds_port *port, int slice, sector_t *start); +int vds_label_chk_iso(struct vds_port *port, bool *iso); + +int vds_efi_get(struct vds_port *port, sector_t lba, size_t len, void *data); +int vds_efi_set(struct vds_port *port, sector_t lba, size_t len, void *data); +int vds_efi_clear(struct vds_port *port); +int vds_efi_validate(struct vds_port *port); + +int vds_vtoc_get(struct vds_port *port); +int vds_vtoc_set(struct vds_port *port, struct vio_disk_vtoc *vtoc); +int vds_vtoc_clear(struct vds_port *port); + +#define vds_label_lock(p, v) \ + do { \ + vdsdbg(LOCK, "label lock\n"); \ + mutex_lock(&(p)->label_lock); \ + } while (0) + +#define vds_label_unlock(p, v) \ + do { \ + vdsdbg(LOCK, "label unlock\n"); \ + mutex_unlock(&(p)->label_lock); \ + } while (0) + +#define VDS_LABEL_NONE 0 +#define VDS_LABEL_VTOC 1 +#define VDS_LABEL_EFI 2 + +#define VDS_EFI_GPT 1 + +/* + * Solaris ENOTSUP error. Solaris vdisk expects to receive this error + * when getting the vtoc or geometry of a disk with and EFI label. + */ +#define VDS_ENOTSUP 48 + +#define ONE_MEGABYTE (1ULL << 20) +#define ONE_GIGABYTE (1ULL << 30) + +#define vds_vio_lock(v, f) \ + do { \ + vdsdbg(LOCK, "%s: lock\n", __func__); \ + spin_lock_irqsave(&(v)->lock, (f)); \ + } while (0) + +#define vds_vio_unlock(v, f) \ + do { \ + vdsdbg(LOCK, "%s: unlock\n", __func__); \ + spin_unlock_irqrestore(&(v)->lock, (f)); \ + } while (0) + +#define VDS_DEBUG_INIT 0x01 +#define VDS_DEBUG_HS 0x02 +#define VDS_DEBUG_DATA 0x04 +#define VDS_DEBUG_LOCK 0x08 +#define VDS_DEBUG_WQ 0x10 +#define VDS_DEBUG_MEM 0x20 +#define VDS_DEBUG_IOC 0x40 +#define VDS_DEBUG_FLUSH 0x80 +#define VDS_DEBUG_IO 0x100 +#define VDS_DEBUG_BIO 0x200 +#define VDS_DEBUG_FIO 0x400 + +extern int vds_dbg; +extern int vds_dbg_ldc; +extern int vds_dbg_vio; + +#define vdsdbg(TYPE, f, a...) \ + do { \ + if (vds_dbg & VDS_DEBUG_##TYPE) \ + pr_info("vds: ID[%lu] %s " f, \ + vio->vdev->channel_id, __func__, ## a); \ + } while (0) + +#define vdsmsg(type, f, a...) \ + pr_##type("%s: " f, __func__, ## a); diff --git a/drivers/block/vds/vds_blk.c b/drivers/block/vds/vds_blk.c new file mode 100644 index 000000000000..42bc4a321466 --- /dev/null +++ b/drivers/block/vds/vds_blk.c @@ -0,0 +1,202 @@ +/* + * vds_blk.c: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include "vds.h" +#include "vds_io.h" + +#define VDS_FMODE (FMODE_READ | FMODE_WRITE | FMODE_EXCL) + +static int vds_blk_init(struct vds_port *port) +{ + struct block_device *bdev; + + bdev = blkdev_get_by_path(port->path, VDS_FMODE, (void *)port); + if (IS_ERR(bdev)) + return (int)(PTR_ERR(bdev)); + + port->vdisk_bsize = bdev_logical_block_size(bdev); + port->vdisk_size = i_size_read(bdev->bd_inode) / port->vdisk_bsize; + port->max_xfer_size = to_bytes(blk_queue_get_max_sectors( + bdev_get_queue(bdev), 0)) / port->vdisk_bsize; + + port->be_data = bdev; + + return 0; +} + +static void vds_blk_fini(struct vds_port *port) +{ + struct block_device *bdev = port->be_data; + + if (bdev) + blkdev_put(bdev, VDS_FMODE); +} + +static void vds_blk_end_io(struct bio *bio, int error) +{ + struct vds_io *io = bio->bi_private; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + unsigned long flags; + int done; + + vdsdbg(BIO, "bio_put(%p), count=%d\n", bio, atomic_read(&io->count)); + bio_put(bio); + + if (error) { + vdsmsg(err, "IO error (%d)\n", error); + if (!io->error) + io->error = error; + } + + /* + * Make sure complete() is called atomically for + * io.count == 0 and the IO operation is completely + * finished in case vds_event checks io.count. + */ + BUG_ON(atomic_read(&io->count) <= 0); + vdsdbg(LOCK, "lock\n"); + spin_lock_irqsave(&port->vio.lock, flags); + vdsdbg(WQ, "cpu=%d work=%p\n", smp_processor_id(), &io->vds_work); + done = atomic_dec_and_test(&io->count); + mb(); /* XXX need barrier? */ + if (done) + complete(&io->event); + spin_unlock_irqrestore(&port->vio.lock, flags); + vdsdbg(LOCK, "unlock\n"); +} + +static int vds_blk_rw(struct vds_io *io) +{ + int i; + int rw; + int done; + int err = 0; + struct bio *bio; + struct page *page, *pages; + unsigned npages; + unsigned long len; + unsigned long biolen, biomax; + sector_t offset, size, resid; + struct blk_plug plug; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + struct block_device *bdev = port->be_data; + + vdsdbg(BIO, "(0x%p, %lld, %ld, %d)\n", io->pages, io->size, + io->offset, io->rw); + + rw = io->rw; + size = to_sector(io->size); + offset = io->offset; + pages = io->pages; + npages = io->npages; + len = npages << PAGE_SHIFT; + + rw |= REQ_SYNC; /* device IO is always sync */ + resid = size; + i = 0; + + BUG_ON(atomic_read(&io->count)); + atomic_set(&io->count, 1); + init_completion(&io->event); + + /* + * Tell the driver to coalesce bio operations if possible. + */ + blk_start_plug(&plug); + + biomax = port->max_xfer_size * port->vdisk_bsize; + + /* + * Break up the request into bio operations and submit them. + */ + while (resid) { + bio = bio_alloc(GFP_NOIO, npages); + bio->bi_iter.bi_sector = offset + (size - resid); + bio->bi_bdev = bdev; + bio->bi_end_io = vds_blk_end_io; + bio->bi_private = io; + + for (biolen = 0; resid; biolen += len) { + int rv; + + /* + * Try and add as many pages as possible. + */ + BUG_ON(biolen > biomax); + len = min(PAGE_SIZE, to_bytes(resid)); + len = min(len, biomax - biolen); + if (!len) + break; + page = pages + i; + + /* + * XXX Can offset be non-zero? + */ + rv = bio_add_page(bio, page, len, 0); + vdsdbg(BIO, "bio_add_page(%p, %p, %lx)=%d\n", + bio, page, len, rv); + vdsdbg(BIO, "bi_sector=%lu, bi_size=%u\n", + bio->bi_iter.bi_sector, bio->bi_iter.bi_size); + + if (!rv) { + vdsmsg(err, + "bio_add_page: resid=%ld biolen=%ld\n", + resid, biolen); + err = -EIO; + break; + } + + i++; + npages--; + resid -= to_sector(len); + vdsdbg(BIO, "npages=%d, resid=%lu\n", npages, resid); + } + + if (err) + break; + + atomic_inc(&io->count); + mb(); /* XXX need barrier? */ + vdsdbg(BIO, "submit_bio(%d, %p) count=%d\n", + rw, bio, atomic_read(&io->count)); + submit_bio(rw, bio); + } + + blk_finish_plug(&plug); /* let the bio ops go... */ + + /* + * If the last bio completes after the dec_and_test check + * wait_for_completion() should not block and just return. + */ + done = atomic_dec_and_test(&io->count); + mb(); /* XXX need barrier? */ + if (!done) + wait_for_completion(&io->event); + vdsdbg(BIO, "io complete count=%d\n", atomic_read(&io->count)); + + return err; +} + +static int vds_blk_flush(struct vds_port *port) +{ + struct block_device *bdev = port->be_data; + + return blkdev_issue_flush(bdev, GFP_KERNEL, NULL); +} + +struct vds_be_ops vds_blk_ops = { + vds_blk_init, + vds_blk_fini, + vds_blk_rw, + vds_blk_flush, +}; + +struct vds_be_ops *vds_blk_get_ops() +{ + return &vds_blk_ops; +} diff --git a/drivers/block/vds/vds_efi.c b/drivers/block/vds/vds_efi.c new file mode 100644 index 000000000000..43c929c4390c --- /dev/null +++ b/drivers/block/vds/vds_efi.c @@ -0,0 +1,239 @@ +/* + * vds_vtoc.c: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include "vds.h" +#include "vds_io.h" +#include <../block/partitions/check.h> +#include <../block/partitions/efi.h> +#include +#include + +#define VDS_EFI_GPE_LEN(port, nparts) \ + roundup((sizeof(gpt_entry) * (nparts)), (port)->vdisk_bsize) + +/* + * Return a 32-bit CRC of the contents of the buffer. + * + * The seed is 0xffffffff and the result is XORed with 0xffffffff + * because this is what the Itanium firmware expects. + */ +static unsigned int vds_efi_crc32(const unsigned char *s, unsigned int len) +{ + return crc32(~0L, (void *)s, len) ^ ~0L; + +} + +/* + * vds_efi_crc_check + * + * Compute the CRC on the range of memory specified by (addr, len) + * and return whether that CRC value matches the value stored at + * the location referenced by crc_field. + */ +static int vds_efi_crc_check(u32 *crc_field, unsigned char *addr, u32 len) +{ + u32 crc_stored; + u32 crc_computed; + int rv = 0; + + crc_stored = *crc_field; + *crc_field = cpu_to_le32(0); + crc_computed = vds_efi_crc32(addr, len); + *crc_field = crc_stored; + + if (le32_to_cpu(crc_stored) != crc_computed) { + vdsmsg(warn, + "Bad EFI CRC: (stored, computed): (0x%x, 0x%x)\n", + crc_stored, crc_computed); + rv = -EINVAL; + } + + return rv; +} + +/* + * Check that an EFI GPT is valid. This function should be called with a raw + * EFI GPT i.e. GPT data should be in little endian format as indicated in the + * EFI specification and they should not have been swapped to match with the + * system endianness. + */ +static int vds_efi_check_gpt(struct vio_driver_state *vio, + gpt_header *gpt, size_t block_size) +{ + if (gpt->signature != cpu_to_le64(GPT_HEADER_SIGNATURE)) { + vdsdbg(IOC, "Bad EFI signature: 0x%llx != 0x%llx\n", + (long long)gpt->signature, + (long long)cpu_to_le64(GPT_HEADER_SIGNATURE)); + return -EINVAL; + } + + /* + * check CRC of the header; the size of the header should + * never be larger than one block + */ + if (le32_to_cpu(gpt->header_size) > block_size) { + vdsmsg(warn, "Header (%u bytes) larger than one block (%u)\n", + le32_to_cpu(gpt->header_size), + (unsigned int)block_size); + return -EINVAL; + } + + return vds_efi_crc_check(&gpt->header_crc32, + (unsigned char *)gpt, le32_to_cpu(gpt->header_size)); +} + +static void vds_efi_update_part(struct vds_port *port, gpt_entry *gpe) +{ + int i; + u64 start, end; + + vds_label_clear_part(port); + + for (i = 0; i < port->npart; i++) { + + start = le64_to_cpu(gpe[i].starting_lba); + end = le64_to_cpu(gpe[i].ending_lba); + + if (start && end) { + port->part[i].start = start; + port->part[i].size = end - start + 1; + } + } +} + +static int vds_efi_update(struct vds_port *port, gpt_header *gpt) +{ + int rv; + u32 nparts; + size_t gpe_len; + sector_t lba; + gpt_entry *gpe = NULL; + struct vio_driver_state *vio = &port->vio; + + /* + * Validate GPT and update partition info. + */ + rv = vds_efi_check_gpt(vio, gpt, port->vdisk_bsize); + if (rv) { + vdsdbg(IOC, "bad EFI GPT\n"); + return rv; + } + + lba = le64_to_cpu(gpt->partition_entry_lba); + nparts = le32_to_cpu(gpt->num_partition_entries); + + /* + * If the number of partitions represented in the GPT + * Header is larger than what is created by convention + * force the vdisk subsystem to use the conventional value. + * + * Note that we do not force a fatal error. The vdisk + * client will not be able to access partitions beyond + * the specified value, but the vdisk client will also + * not fail on operations that access an EFI disk having + * a large number of unused partitions. + */ + nparts = min_t(u32, nparts, VDS_MAXPART); + port->npart = nparts; + + gpe_len = VDS_EFI_GPE_LEN(port, nparts); + if (gpe_len) { + gpe = kzalloc(gpe_len, GFP_KERNEL); + + rv = vds_read(port, (void *)gpe, lba, gpe_len); + if (rv) { + kfree(gpe); + port->npart = 0; + return rv; + } + + vds_efi_update_part(port, gpe); + kfree(gpe); + } + + port->label_type = VDS_LABEL_EFI; + + return 0; +} + +/* + * Get the EFI GPT or GPE from the disk backend. The on-disk GPT and GPE + * are stored in little endian format and this function converts selected + * fields using the endianness of the system for it's internal use but the + * client data is returned unmodified. + * + * The number of partitions in an EFI GPT can be larger than what the vdisk + * subsystem supports. Return the smaller of what is in the label and what + * the vdisk subsystem supports. + */ +int vds_efi_validate(struct vds_port *port) +{ + int rv; + struct vio_driver_state *vio = &port->vio; + + rv = vds_read(port, port->label, VDS_EFI_GPT, DK_LABEL_SIZE); + + if (!rv) + rv = vds_efi_update(port, (gpt_header *)port->label); + + if (rv) + vdsdbg(IOC, "failed: rv=%d\n", rv); + + return rv; +} + +inline int vds_efi_get(struct vds_port *port, sector_t lba, size_t len, + void *data) +{ + return vds_read(port, data, lba, len); +} + +int vds_efi_set(struct vds_port *port, sector_t lba, size_t len, void *data) +{ + int rv, err; + struct vio_driver_state *vio = &port->vio; + + vdsdbg(IOC, "data=%p lba=%lu len=%lu\n", data, lba, len); + + err = vds_write(port, data, lba, len); + + if (err) { + vdsmsg(err, "write EFI label failed: rv=%d\n", err); + } else if (lba == VDS_EFI_GPT) { + rv = vds_efi_validate(port); + if (rv) + /* + * To convert from EFI to VTOC, Solaris format(1M) + * clears the EFI signature, issues a GETGEOM command + * and puts the EFI signature back on the disk, so + * ignore invalid signature errors here just in case. + */ + vdsdbg(IOC, "read EFI label failed: rv=%d\n", rv); + } + + return err; +} + +int vds_efi_clear(struct vds_port *port) +{ + int rv; + struct vio_driver_state *vio = &port->vio; + + /* + * Clear primary and backup GPT. + */ + rv = vds_clear(port, VDS_EFI_GPT, port->vdisk_bsize); + if (rv) + return rv; + + rv = vds_clear(port, port->vdisk_size - 1, port->vdisk_bsize); + if (rv) + vdsdbg(IOC, "Clearing backup GPT failed rv=%d\n", rv); + + vds_label_reset(port); + + return 0; +} diff --git a/drivers/block/vds/vds_io.c b/drivers/block/vds/vds_io.c new file mode 100644 index 000000000000..d44bee69b9c0 --- /dev/null +++ b/drivers/block/vds/vds_io.c @@ -0,0 +1,622 @@ +/* + * vds_io.c: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include "vds.h" +#include "vds_io.h" + +#define VDS_MAX_XFER_SIZE (128 * 1024) +#define VDS_RETRIES 5 +#define VDS_DEV_DELAY 1000000 /* 1 sec */ +#define VDS_SLICE_NONE 0xff + +static struct kmem_cache *vds_io_cache; +static int vds_ioc_size; +static char *vds_ioc_name = "vds_io"; + +int vds_io_init(void) +{ + int max_entry; + int max_cookies; + int max_dring_mode; + int max_desc_mode; + + /* + * Create a kmem_cache for vds_io allocations. + * + * The size of the cache object accomdate the largest possible + * IO transfer initiated from either dring or descriptor mode. + */ + max_cookies = (roundup(VDS_MAX_XFER_SIZE, PAGE_SIZE) / PAGE_SIZE) + 1; + max_cookies = max(max_cookies, VIO_MAX_RING_COOKIES); + max_entry = max_cookies * sizeof(struct ldc_trans_cookie); + + max_dring_mode = LDC_PACKET_SIZE + sizeof(struct vio_disk_desc) + + max_entry; + max_desc_mode = sizeof(struct vio_disk_desc_inband) + max_entry; + + vds_ioc_size = sizeof(struct vds_io) + + max(max_dring_mode, max_desc_mode); + + vds_io_cache = kmem_cache_create(vds_ioc_name, vds_ioc_size, 0, + 0, NULL); + if (!vds_io_cache) { + vdsmsg(err, "Failed to create vds_io_cache\n"); + return -ENOMEM; + } + + return 0; +} + +void vds_io_fini(void) +{ + kmem_cache_destroy(vds_io_cache); +} + +/* + * Allocate a vds_io request structure. + * + * Allocate the structure from vds_io_cache if the total required + * space fits within a vds_io_cache object; otherwise use kmalloc(). + * + * XXX In principle, the kmalloc() method should not be required + * since vds_io_cache should accommodate the largest supported IO + * transfer size defined as VDS_MAX_XFER_SIZE. The max_xfer_size + * parameter is negotiated during the handshake and should be honored + * by all clients; however, it seems that OBP does not do that. + * This should not be an issue since VDS_MAX_XFER_SIZE should + * always be larger than any OBP transfer size but the kmalloc() + * option is there since an OBP transfer size > VDS_MAX_XFER_SIZE + * could theoretically cause memory corruption. + * + * The proper thing to do would be nack an non-conforming transfer size. + */ +struct vds_io *vds_io_alloc(struct vio_driver_state *vio, + void (*func)(struct work_struct *)) +{ + struct vds_port *port = to_vds_port(vio); + struct vds_io *io; + int size; + + size = sizeof(*io) + port->msglen + vio->desc_buf_len; + vdsdbg(MEM, "size=%d ioc_size=%d\n", size, vds_ioc_size); + + if (size <= vds_ioc_size) { + io = kmem_cache_zalloc(vds_io_cache, GFP_ATOMIC); + + if (!io) + return NULL; + io->flags = VDS_IO_CACHE; + io->msgbuf = io->buf; + io->desc_buf = io->buf + port->msglen; + } else { + io = kzalloc(sizeof(*io), GFP_ATOMIC); + if (!io) + goto err; + io->msgbuf = kzalloc(port->msglen, GFP_ATOMIC); + if (!io->msgbuf) + goto err; + BUG_ON(!vio->desc_buf_len); + io->desc_buf = kzalloc(vio->desc_buf_len, GFP_ATOMIC); + if (!io->desc_buf) + goto err; + } + io->vio = vio; + if (func) + INIT_WORK(&io->vds_work, func); + + return io; + +err: + kfree(io->msgbuf); + kfree(io->desc_buf); + kfree(io); + + return NULL; +} + +void vds_io_free(struct vds_io *io) +{ + if (io->flags & VDS_IO_CACHE) { + kmem_cache_free(vds_io_cache, io); + } else { + kfree(io->msgbuf); + kfree(io->desc_buf); + kfree(io); + } +} + +static int vds_io_alloc_pages(struct vds_io *io, unsigned long len) +{ + struct vio_driver_state *vio = io->vio; + + BUG_ON(len % PAGE_SIZE != 0); + io->ord = get_order(len); + io->pages = alloc_pages(GFP_KERNEL | __GFP_COMP, io->ord); + if (!io->pages) + return -ENOMEM; + io->npages = len >> PAGE_SHIFT; + + vdsdbg(MEM, "ord=%d pages=%p npages=%d\n", io->ord, io->pages, + io->npages); + + return 0; +} + +static void vds_io_free_pages(struct vds_io *io) +{ + __free_pages(io->pages, io->ord); + + io->pages = NULL; + io->npages = 0; + io->ord = 0; +} + +void vds_io_enq(struct vds_io *io) +{ + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + + vdsdbg(WQ, "cpu=%d\n", smp_processor_id()); + + BUG_ON(!in_interrupt()); + + if (io->flags & VDS_IO_FINI) + queue_work(port->rtq, &io->vds_work); + else + queue_work(port->ioq, &io->vds_work); +} + +static int vds_io_rw(struct vds_io *io) +{ + int err; + void *buf; + unsigned long len; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + + vdsdbg(IO, "(0x%p, %lld, %ld, %d)\n", io->addr, io->size, + io->offset, io->rw); + + if (!to_sector(io->size)) + return -EINVAL; + + if (!port->be_ops) + return -EIO; + + len = (unsigned long)roundup(io->size, PAGE_SIZE); + err = vds_io_alloc_pages(io, len); + if (err) + return err; + + buf = page_address(io->pages); + + BUG_ON(!buf); + BUG_ON(!io->addr); + + if (io->rw & WRITE) + memcpy(buf, io->addr, io->size); + + err = port->be_ops->rw(io); + + if (!err && !(io->rw & WRITE)) + memcpy(io->addr, buf, io->size); + + vds_io_free_pages(io); + + return err; +} + +/* + * Common routine for read/write/clear interfaces. + */ +static int vds_rw(struct vds_port *port, void *addr, sector_t offset, u64 size, + int rw) +{ + int rv = -ENOMEM; + struct vds_io *io; + struct vio_driver_state *vio = &port->vio; + + io = vds_io_alloc(vio, NULL); + if (io) { + io->addr = addr; + io->offset = offset; + io->size = size; + io->rw = rw; + rv = vds_io_rw(io); + vds_io_free(io); + } + + vdsdbg(IO, "addr=%p offset=%lu size=%llu rw=%d rv=%d\n", + addr, offset, size, rw, rv); + + return rv; +} + +inline int vds_read(struct vds_port *port, void *addr, sector_t off, u64 size) +{ + return vds_rw(port, addr, off, size, 0); +} + +inline int vds_write(struct vds_port *port, void *addr, sector_t off, u64 size) +{ + return vds_rw(port, addr, off, size, WRITE); +} + +inline int vds_clear(struct vds_port *port, sector_t offset, u64 size) +{ + int rv; + void *addr; + + addr = kzalloc(size, GFP_KERNEL); + if (!addr) + return -ENOMEM; + + rv = vds_rw(port, addr, offset, size, WRITE); + + kfree(addr); + + return rv; +} + +static int vds_copy(struct vio_driver_state *vio, int dir, void *buf, + struct vio_disk_dring_payload *desc, u64 size, u64 offset) +{ + int rv, err; + + if (!size) + size = desc->size; + + rv = ldc_copy(vio->lp, dir, buf, size, offset, desc->cookies, + desc->ncookies); + if (rv > 0) { + if (rv == size) + err = 0; + else + err = -EIO; + } else + err = rv; + + vdsdbg(BIO, "dir=%d size=%llu offset=%llu rv=%d err=%d\n", + dir, size, offset, rv, err); + + return err; +} + +int vd_op_get_vtoc(struct vds_io *io) +{ + int rv; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + + rv = vds_label_get_vtoc(port); + if (rv) + vdsdbg(IOC, "vds_label_get_vtoc rv=%d\n", rv); + + if (rv == 0 || rv == -EINVAL) + rv = vds_copy(vio, LDC_COPY_OUT, port->vtoc, io->desc, 0, 0); + + vdsdbg(IOC, "VD_OP_GET_VTOC ascii=%s\n", port->vtoc->ascii_label); + vdsdbg(IOC, "VD_OP_GET_VTOC rv=%d\n", rv); + + return rv; +} + +int vd_op_set_vtoc(struct vds_io *io) +{ + int rv = 0; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + + vds_label_lock(port, vio); + + rv = vds_copy(vio, LDC_COPY_IN, port->vtoc, io->desc, 0, 0); + + if (rv == 0 && port->label_type == VDS_LABEL_EFI) + rv = vds_efi_clear(port); + + if (!rv) + rv = vds_vtoc_set(port, port->vtoc); + + vds_label_unlock(port, vio); + + vdsdbg(IOC, "VD_OP_SET_VTOC ascii=%s\n", port->vtoc->ascii_label); + vdsdbg(IOC, "VD_OP_SET_VTOC rv=%d\n", rv); + return rv; +} + +int vd_op_get_geom(struct vds_io *io) +{ + int rv; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + + rv = vds_label_get_vtoc(port); + if (rv) + vdsdbg(IOC, "vds_label_get_vtoc rv=%d\n", rv); + + if (rv == 0 || rv == -EINVAL) { + struct vio_disk_geom *geom = port->geom; + + vdsdbg(IOC, "ncyl=%u nhd=%u nsec=%u\n", + geom->phy_cyl, geom->num_hd, geom->num_sec); + + rv = vds_copy(vio, LDC_COPY_OUT, geom, io->desc, 0, 0); + } + + vdsdbg(IOC, "VD_OP_GET_DISKGEOM rv=%d\n", rv); + + return rv; +} + +int vd_op_set_geom(struct vds_io *io) +{ + int rv; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + + rv = vds_copy(vio, LDC_COPY_IN, port->geom, io->desc, 0, 0); + + vdsdbg(IOC, "VD_OP_SET_DISKGEOM rv=%d\n", rv); + + return rv; +} + +int vd_op_get_efi(struct vds_io *io) +{ + int rv; + size_t len; + void *data; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + struct vio_disk_efi efi_in; + struct vio_disk_efi *efi_out = NULL; + + rv = vds_copy(vio, LDC_COPY_IN, &efi_in, io->desc, sizeof(efi_in), 0); + if (rv) + goto done; + + vds_label_lock(port, vio); + + /* + * Adjust the required len by an additional VIO EFI header + * so that the returned results are contiguous and can be + * copied out all at once. + */ + len = efi_in.len + sizeof(struct vio_disk_efi); + efi_out = kzalloc(len, GFP_KERNEL); + if (efi_out) { + data = (void *)efi_out + sizeof(struct vio_disk_efi); + rv = vds_efi_get(port, efi_in.lba, efi_in.len, data); + } else + rv = -ENOMEM; + + if (!rv) { + efi_out->lba = efi_in.lba; + efi_out->len = efi_in.len; + rv = vds_copy(vio, LDC_COPY_OUT, efi_out, io->desc, len, 0); + } + + vds_label_unlock(port, vio); + +done: + vdsdbg(IOC, "VD_OP_GET_EFI rv=%d\n", rv); + kfree(efi_out); + + return rv; +} + +int vd_op_set_efi(struct vds_io *io) +{ + int rv; + struct vio_disk_efi *efi; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + + efi = kzalloc(roundup(io->desc->size, 8), GFP_KERNEL); + if (!efi) { + rv = -ENOMEM; + goto done; + } + + vds_label_lock(port, vio); + + rv = vds_copy(vio, LDC_COPY_IN, efi, io->desc, 0, 0); + + if (rv == 0 && port->label_type == VDS_LABEL_VTOC) + rv = vds_vtoc_clear(port); + + if (!rv) + rv = vds_efi_set(port, efi->lba, efi->len, efi->data); + + vds_label_unlock(port, vio); + +done: + vdsdbg(IOC, "VD_OP_SET_EFI rv=%d\n", rv); + kfree(efi); + + return rv; +} + +int vd_op_flush(struct vio_driver_state *vio) +{ + int rv; + struct vds_port *port = to_vds_port(vio); + + if (port->be_ops) { + flush_workqueue(port->ioq); + rv = port->be_ops->flush(port); + } else + rv = -EIO; + + vdsdbg(FLUSH, "VD_OP_FLUSH rv=%d\n", rv); + return rv; +} + +int vd_op_rw(struct vds_io *io) +{ + int err = 0; + u8 slice; + unsigned long len, dsz; + sector_t offset, size, start; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + struct vio_disk_dring_payload *desc; + void *buf; + + desc = io->desc; + + /* + * Get the request size and block offset. + */ + offset = to_sector((desc->offset * port->vdisk_bsize)); + size = to_sector(desc->size); + if (!size) { + io->ack = VIO_SUBTYPE_NACK; + goto done; + } + + /* + * If a slice is provided, make sure there is label info + * to read the slice offset from. + */ + slice = desc->slice; + if (slice != VDS_SLICE_NONE) { + err = vds_label_get_start(port, slice, &start); + if (err) { + io->ack = VIO_SUBTYPE_NACK; + goto done; + } + offset += start; + } + + /* + * Allocate pages for io. + * + * Calculate one page per cookie rather using desc->size because + * for example a PAGE_SIZE request may be split across number of + * cookies. + * + * XXX Coalesce cookies with contiguous addresses in order to + * reduce the number of page allocations and bio requests. + */ + len = (unsigned long)desc->ncookies * PAGE_SIZE; + dsz = (unsigned long)roundup(desc->size, PAGE_SIZE); + len = max(len, dsz); + err = vds_io_alloc_pages(io, len); + if (err) + goto done; + + buf = page_address(io->pages); + + if (io->rw & WRITE) { + err = vds_copy(vio, LDC_COPY_IN, buf, desc, 0, 0); + if (err) + goto free; + } + + /* + * Call the backend to perform the actual operation. + */ + io->size = desc->size; + io->offset = offset; + + if (port->be_ops) + err = port->be_ops->rw(io); + else + err = -EIO; + + if (!err && !(io->rw & WRITE)) + err = vds_copy(vio, LDC_COPY_OUT, buf, desc, 0, 0); + +free: + vds_io_free_pages(io); + + if (offset <= 1 && (io->rw & WRITE)) + vds_label_init(port); + +done: + return err; +} + +/* + * Backend operations. + */ +int vds_be_init(struct vds_port *port) +{ + int i, rv; + bool iso; + umode_t mode; + struct path path; + struct inode *inode; + struct vio_driver_state *vio = &port->vio; + + rv = kern_path(port->path, LOOKUP_FOLLOW, &path); + if (rv) + goto done; + + inode = path.dentry->d_inode; + mode = inode->i_mode; + path_put(&path); + + if (S_ISREG(mode)) + port->be_ops = vds_reg_get_ops(); + else if (S_ISBLK(mode)) + port->be_ops = vds_blk_get_ops(); + else + rv = -ENODEV; + + if (!rv) + for (i = 0; i < VDS_RETRIES; i++) { + rv = port->be_ops->init(port); + if (rv == 0 || rv != -EAGAIN) + break; + udelay(VDS_DEV_DELAY); + } + + vdsdbg(HS, "vdisk_blk_sz=%u vdisk_sz=%llu max_xfer_sz=%llu\n", + port->vdisk_bsize, port->vdisk_size, port->max_xfer_size); + + if (!(port->vdisk_bsize && port->vdisk_size && port->max_xfer_size)) { + rv = -EINVAL; + goto done; + } + + rv = vds_label_chk_iso(port, &iso); + if (rv) { + vdsmsg(err, "media check error\n"); + goto done; + } + + /* + * Indicate whether to call this a CD or DVD from the size + * of the ISO image (images for both drive types are stored + * in the ISO-9600 format). CDs can store up to just under 1Gb + */ + if (!iso) + port->media_type = VD_MEDIA_TYPE_FIXED; + else if ((port->vdisk_size * port->vdisk_bsize) > ONE_GIGABYTE) + port->media_type = VD_MEDIA_TYPE_DVD; + else + port->media_type = VD_MEDIA_TYPE_CD; + + vds_label_init(port); + +done: + if (rv) + vdsmsg(err, "%s: init failed (%d)\n", port->path, rv); + + return rv; +} + +void vds_be_fini(struct vds_port *port) +{ + flush_workqueue(port->ioq); + vds_label_fini(port); + if (port->be_ops) { + port->be_ops->fini(port); + port->be_data = NULL; + } +} diff --git a/drivers/block/vds/vds_io.h b/drivers/block/vds/vds_io.h new file mode 100644 index 000000000000..6faf75202274 --- /dev/null +++ b/drivers/block/vds/vds_io.h @@ -0,0 +1,67 @@ +/* + * vds_io.h: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +struct vds_port; + +/* + * IO interface. + * + * I/O struct allocated dynamically per client request. + * A request is scheduled in interrupt context and executed later + * in a worker kernel thread in process context. The default events + * worker threads are used (1 per cpu). + * A client request may cause a number bio operations which + * are tracked by count below. + */ +struct vds_io { + int flags; + int ack; + int error; + u32 msglen; + atomic_t count; + void *msgbuf; + void *desc_buf; + struct vio_disk_dring_payload *desc; + struct vio_driver_state *vio; + int rw; + u64 size; + unsigned ord; + void *addr; + sector_t offset; + unsigned npages; + struct page *pages; + struct completion event; + struct work_struct vds_work; + char buf[0]; +}; + +#define VDS_IO_CACHE 0x1 +#define VDS_IO_INIT 0x2 +#define VDS_IO_FINI 0x4 + +int vds_io_init(void); +void vds_io_fini(void); +struct vds_io *vds_io_alloc(struct vio_driver_state *vio, + void (*func)(struct work_struct *)); +void vds_io_free(struct vds_io *io); +void vds_io_enq(struct vds_io *io); + +void *vds_get(struct vds_port *port, sector_t offset, u64 size); +int vds_clear(struct vds_port *port, sector_t offset, u64 size); +int vds_read(struct vds_port *port, void *addr, sector_t offset, u64 size); +int vds_write(struct vds_port *port, void *addr, sector_t offset, u64 size); + +/* + * VIO interface. + */ +int vd_op_get_vtoc(struct vds_io *io); +int vd_op_set_vtoc(struct vds_io *io); +int vd_op_get_geom(struct vds_io *io); +int vd_op_set_geom(struct vds_io *io); +int vd_op_get_efi(struct vds_io *io); +int vd_op_set_efi(struct vds_io *io); +int vd_op_flush(struct vio_driver_state *vio); +int vd_op_rw(struct vds_io *io); diff --git a/drivers/block/vds/vds_label.c b/drivers/block/vds/vds_label.c new file mode 100644 index 000000000000..bf08a10affa3 --- /dev/null +++ b/drivers/block/vds/vds_label.c @@ -0,0 +1,145 @@ +/* + * vds_lb.c: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include "vds.h" +#include "vds_io.h" +#include + +#define ISO_VOLDESC_SEC 16 /* 1st sector of volume descriptors */ + +inline void vds_label_clear_part(struct vds_port *port) +{ + memset(port->part, 0, sizeof(*port->part) * VDS_MAXPART); +} + +void vds_label_reset(struct vds_port *port) +{ + struct vio_driver_state *vio = &port->vio; + + vdsdbg(IOC, "media=%u label=%u\n", port->media_type, port->label_type); + vds_label_clear_part(port); + port->npart = 0; + port->label_type = VDS_LABEL_NONE; +} + +int vds_label_chk_iso(struct vds_port *port, bool *iso) +{ + int rv; + sector_t sec; + struct iso_volume_descriptor *vdp; + char iso_buf[ISOFS_BLOCK_SIZE]; + struct vio_driver_state *vio = &port->vio; + + /* + * Read the sector that should contain the 2nd ISO volume + * descriptor. The second field in this descriptor is called the + * Standard Identifier and is set to CD001 for a CD-ROM compliant + * to the ISO 9660 standard. + */ + sec = (ISO_VOLDESC_SEC * ISOFS_BLOCK_SIZE) / port->vdisk_bsize; + rv = vds_read(port, (void *)iso_buf, sec, ISOFS_BLOCK_SIZE); + if (rv) + goto done; + + vdp = (struct iso_volume_descriptor *)iso_buf; + + if (strncmp(vdp->id, ISO_STANDARD_ID, sizeof(vdp->id)) == 0) + *iso = 1; + else + *iso = 0; + +done: + vdsdbg(IOC, "media=%d rv=%d\n", port->media_type, rv); + return rv; +} + +/* + * Cache the label info since partition offsets are needed for + * IO requests against a particular slice vs. VD_SLICE_NONE. + * + * A call to vds_label_init() unconditionally reads the label + * (VTOC/EFI) from the disk and caches the result if the read + * succeeds. + * + * Don't check for errors here since VD_SLICE_NONE requests + * don't need partition offsets; instead any IO request requiring + * partition info will later fail. + */ +void vds_label_init(struct vds_port *port) +{ + struct vio_driver_state *vio = &port->vio; + int rv; + + /* + * Set the ops according to the label type (VTOC/EFI) + * and init as appropriate. Make sure ops is set + * atomically and cannot change while the label info is + * fetched. This is conceivably possible if multiple + * requests are processed in concurrent work threads. + */ + vds_label_lock(port, vio); + + if (port->npart) + vdsdbg(INIT, "existing partitions (%d).\n", port->npart); + + vds_label_reset(port); + + rv = vds_vtoc_get(port); + if (rv == -EINVAL) + rv = vds_efi_validate(port); + + if (rv) + vdsdbg(INIT, "unknown disk label\n"); + + vds_label_unlock(port, vio); +} + +void vds_label_fini(struct vds_port *port) +{ + struct vio_driver_state *vio = &port->vio; + + vds_label_lock(port, vio); + vds_label_reset(port); + vds_label_unlock(port, vio); +} + +int vds_label_get_vtoc(struct vds_port *port) +{ + int rv; + struct vio_driver_state *vio = &port->vio; + + vds_label_lock(port, vio); + + vds_label_reset(port); + + rv = vds_vtoc_get(port); + if (rv == -EINVAL) { + (void) vds_efi_validate(port); + if (port->label_type == VDS_LABEL_EFI) + rv = -VDS_ENOTSUP; + } + + vds_label_unlock(port, vio); + + return rv; +} + +int vds_label_get_start(struct vds_port *port, int slice, sector_t *start) +{ + struct vio_driver_state *vio = &port->vio; + int rv = -EIO; + + vds_label_lock(port, vio); + if (slice < port->npart) { + *start = port->part[slice].start; + rv = 0; + } + vds_label_unlock(port, vio); + + vdsdbg(IO, "(%d)=(%d, %lu)\n", slice, rv, *start); + + return rv; +} diff --git a/drivers/block/vds/vds_main.c b/drivers/block/vds/vds_main.c new file mode 100644 index 000000000000..e9eae09fba8b --- /dev/null +++ b/drivers/block/vds/vds_main.c @@ -0,0 +1,949 @@ +/* + * vds_main.c: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include "vds.h" +#include "vds_io.h" + +#define DRV_MOD_NAME "vds" +#define DRV_MOD_VERSION "1.0" + +static char version[] = DRV_MOD_NAME ".c:v" DRV_MOD_VERSION "\n"; +MODULE_DESCRIPTION("LDOM virtual disk server driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MOD_VERSION); + +#define VDS_OPS (1 << VD_OP_BREAD | \ + 1 << VD_OP_BWRITE | \ + 1 << VD_OP_GET_VTOC | \ + 1 << VD_OP_SET_VTOC | \ + 1 << VD_OP_GET_DISKGEOM | \ + 1 << VD_OP_SET_DISKGEOM | \ + 1 << VD_OP_GET_EFI | \ + 1 << VD_OP_SET_EFI | \ + 1 << VD_OP_FLUSH) +/* + * XXX The recommended value is 0 but that creates threads + * which scale with ncpu and because of some apparent + * flow control issues cause scsi timeouts so limit to + * 1 thread for now. + */ +int vds_wq = 1; +int vds_dbg; +int vds_dbg_ldc; +int vds_dbg_vio; + +module_param(vds_dbg, uint, 0664); +module_param(vds_dbg_ldc, uint, 0664); +module_param(vds_dbg_vio, uint, 0664); +module_param(vds_wq, uint, 0664); + +/* Ordered from largest major to lowest */ +static struct vio_version vds_versions[] = { + { .major = 1, .minor = 1 }, + { .major = 1, .minor = 0 }, +}; + +static void vds_handshake_complete(struct vio_driver_state *vio) +{ + struct vio_dring_state *dr; + + dr = &vio->drings[VIO_DRIVER_RX_RING]; + dr->snd_nxt = dr->rcv_nxt = 1; +} + +static int vds_handle_unknown(struct vds_port *port) +{ + struct vio_msg_tag *pkt = port->msgbuf; + + vdsmsg(err, "Received unknown msg [%02x:%02x:%04x:%08x]\n", + pkt->type, pkt->stype, pkt->stype_env, pkt->sid); + vdsmsg(err, "Resetting connection.\n"); + + ldc_disconnect(port->vio.lp); + + return -ECONNRESET; +} + +/* vio_driver_init() expects this. */ +static int vds_send_attr(struct vio_driver_state *vio) +{ + return 0; +} + +static int vds_handle_attr(struct vio_driver_state *vio, void *arg) +{ + struct vds_port *port = to_vds_port(vio); + struct vio_disk_attr_info *pkt = arg; + + /* checkpatch.pl doesn't like split format strings */ + vdsdbg(HS, "GOT ATTR stype[0x%x] stype_env[0x%x] ", + pkt->tag.stype, pkt->tag.stype_env); + + vdsdbg(HS, "xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n", + pkt->xfer_mode, pkt->vdisk_block_size, pkt->max_xfer_size); + + if (pkt->tag.type != VIO_TYPE_CTRL || + pkt->tag.stype != VIO_SUBTYPE_INFO || + pkt->tag.stype_env != VIO_ATTR_INFO || + pkt->max_xfer_size == 0) { + vdsmsg(err, "%s: Attribute NACK\n", vio->name); + return -ECONNRESET; + } + + if (pkt->xfer_mode == VIO_DESC_MODE) { + struct vio_disk_attr_info tmp; + + /* + * vio_disk_dring_inband contains no cookies; need room + * for up to n cookies, where "n" is the number of full + * pages plus possibly one partial page required to cover + * "max_xfer_size". Add room for one more cookie if + * "max_xfer_size" isn't an integral multiple of the page size. + * Must first get the maximum transfer size in bytes. + */ + size_t max_xfer_bytes = pkt->vdisk_block_size ? + pkt->vdisk_block_size * pkt->max_xfer_size : + pkt->max_xfer_size; + + size_t max_inband_msglen = + sizeof(struct vio_disk_desc_inband) + + (((roundup(max_xfer_bytes, PAGE_SIZE) / PAGE_SIZE) + 1) * + sizeof(struct ldc_trans_cookie)); + + vdsdbg(HS, "DESC ATTR max_ibm=%ld\n", max_inband_msglen); + + /* + * Set the maximum expected message length to + * accommodate in-band-descriptor messages with all + * their cookies. + */ + vio->desc_buf_len = max_inband_msglen; + + /* + * Reallocate before responding to the message since + * the next request in the handshake will use this size + * and a small msgbuf would make the ldc read fail. + */ + tmp = *pkt; + kfree(port->msgbuf); + port->msglen = max_inband_msglen; + port->msgbuf = kzalloc(port->msglen, GFP_ATOMIC); + if (!port->msgbuf) { + vdsmsg(err, "%s: kzalloc failed\n", vio->name); + return -ECONNRESET; + } + memcpy(port->msgbuf, &tmp, sizeof(tmp)); + pkt = port->msgbuf; + + } + + port->xfer_mode = pkt->xfer_mode; + + pkt->vdisk_block_size = port->vdisk_bsize; + + /* XXX OBP doesn't seem to honor max_xfer_size */ + pkt->max_xfer_size = port->max_xfer_size; + pkt->vdisk_size = port->vdisk_size; + pkt->vdisk_type = VD_DISK_TYPE_DISK; + pkt->vdisk_mtype = port->media_type; + pkt->operations = VDS_OPS; + pkt->tag.stype = VIO_SUBTYPE_ACK; + pkt->tag.sid = vio_send_sid(vio); + + vdsdbg(HS, "SEND ATTR dksz[%llu] blksz[%u] max_xfer[%llu] ops[%llx]\n", + pkt->vdisk_size, pkt->vdisk_block_size, + pkt->max_xfer_size, pkt->operations); + + return vio_ldc_send(&port->vio, pkt, sizeof(*pkt)); +} + +static struct vio_driver_ops vds_vio_ops = { + .send_attr = vds_send_attr, + .handle_attr = vds_handle_attr, + .handshake_complete = vds_handshake_complete, +}; + +static void vds_reset(struct vio_driver_state *vio); +static void vds_evt_reset(struct vio_driver_state *vio); + +static int vds_dring_done(struct vds_io *io) +{ + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + struct vio_dring_data *pkt = io->msgbuf; + struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING]; + struct vio_disk_desc *desc; + int rv; + int idx; + + desc = io->desc_buf; + desc->status = io->error; + desc->hdr.state = VIO_DESC_DONE; + + vdsdbg(DATA, "DRING DONE [%08llx:%08x:%08x:%02x:%08llx:%08llx]\n", + pkt->dring_ident, + pkt->start_idx, + pkt->end_idx, + pkt->state, + pkt->seq, + port->seq); + + vdsdbg(DATA, + "DRING DONE" + " [%02x:%02x:%08llx:%02x:%02x:%04d:%08llx:%08llx:%08x]\n", + desc->hdr.state, + desc->hdr.ack, + desc->req_id, + desc->operation, + desc->slice, + desc->status, + desc->offset, + desc->size, + desc->ncookies); + + idx = pkt->start_idx; + rv = ldc_put_dring_entry(vio->lp, io->desc_buf, dr->entry_size, + (idx * dr->entry_size), dr->cookies, + dr->ncookies); + if (rv != dr->entry_size) + goto reset; + + /* + * If we successfully responded to the request (ack or nack), + * then return the actual IO operation return value, otherwise + * reset the connection. + */ + pkt->tag.stype = io->ack; + rv = vio_ldc_send(vio, pkt, sizeof(*pkt)); + if (rv > 0) { + rv = io->error; + vds_io_free(io); + vdsdbg(DATA, "DRING RET %d\n", rv); + return rv; + } + +reset: + vdsmsg(err, "Reset VDS LDC rv[%d]\n", rv); + vds_reset(vio); + vds_io_free(io); + + vdsdbg(DATA, "DRING RESET\n"); + return -ECONNRESET; +} + +static int vds_desc_done(struct vds_io *io) +{ + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + struct vio_disk_desc_inband *pkt = io->msgbuf; + struct vio_desc_data *hdr = &pkt->hdr; + int rv; + + pkt->payload.status = io->error; + hdr->tag.stype = io->ack; + + vdsdbg(DATA, "DESC DONE [%02x:%02x:%04x:%08x:%08llx:%08llx:%08llx]\n", + hdr->tag.type, + hdr->tag.stype, + hdr->tag.stype_env, + hdr->tag.sid, + hdr->desc_handle, + hdr->seq, + port->seq); + + vdsdbg(DATA, "DESC DONE [%08llx:%02x:%02x:%04d:%08llx:%08llx:%08x]\n", + pkt->payload.req_id, + pkt->payload.operation, + pkt->payload.slice, + pkt->payload.status, + pkt->payload.offset, + pkt->payload.size, + pkt->payload.ncookies); + + rv = vio_ldc_send(vio, pkt, io->msglen); + if (rv <= 0) { + vdsmsg(err, "Reset VDS LDC rv[%d]\n", rv); + vds_reset(vio); + rv = -ECONNRESET; + } else { + rv = io->error; + } + + vds_io_free(io); + return rv; +} + +static void vds_get_desc(struct vds_io *io) +{ + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + struct vio_disk_dring_payload *desc = NULL; + + switch (port->xfer_mode) { + case VIO_DRING_MODE: { + struct vio_disk_desc *d = io->desc_buf; + desc = (struct vio_disk_dring_payload *)&d->req_id; + + vdsdbg(DATA, "DRING desc[%08llx:%08x:%08llx:%08llx]\n", + desc->size, desc->ncookies, + desc->cookies[0].cookie_addr, + desc->cookies[0].cookie_size); + break; + } + case VIO_DESC_MODE: { + int i; + struct vio_disk_desc_inband *d = io->desc_buf; + + desc = &d->payload; + for (i = 0; i < desc->ncookies; i++) + vdsdbg(DATA, "DESC desc[%08llx:%04x:%08llx:%08llx]\n", + desc->size, desc->ncookies, + desc->cookies[i].cookie_addr, + desc->cookies[i].cookie_size); + break; + } + default: + break; + } + + io->desc = desc; + return; +} + +/* + * Bottom half handshake routine. + */ +static void vds_bh_hs(struct work_struct *work) +{ + struct vds_io *io = container_of(work, struct vds_io, vds_work); + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + int err = 0; + + vdsdbg(HS, "%s\n", port->path); + + BUG_ON(in_interrupt()); + + if (io->flags & VDS_IO_INIT) + err = vds_be_init(port); + + if (!err) + err = vio_control_pkt_engine(vio, port->msgbuf); + + if (err) + vdsmsg(err, "%s: handshake failed (%d)\n", port->path, err); + + vds_io_free(io); +} + +/* + * Bottom half IO routine. + */ +static void vds_bh_io(struct work_struct *work) +{ + struct vds_io *io = container_of(work, struct vds_io, vds_work); + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + int err; + + BUG_ON(in_interrupt()); + + vds_get_desc(io); + BUG_ON(!io->desc); + + io->ack = VIO_SUBTYPE_ACK; + io->error = 0; + + switch (io->desc->operation) { + case VD_OP_BREAD: + err = vd_op_rw(io); + break; + case VD_OP_BWRITE: + io->rw = WRITE; + err = vd_op_rw(io); + break; + case VD_OP_GET_VTOC: + err = vd_op_get_vtoc(io); + break; + case VD_OP_SET_VTOC: + err = vd_op_set_vtoc(io); + break; + case VD_OP_GET_DISKGEOM: + err = vd_op_get_geom(io); + break; + case VD_OP_SET_DISKGEOM: + err = vd_op_set_geom(io); + break; + case VD_OP_GET_EFI: + err = vd_op_get_efi(io); + break; + case VD_OP_SET_EFI: + err = vd_op_set_efi(io); + break; + case VD_OP_FLUSH: + err = vd_op_flush(vio); + break; + default: + err = -ENOTSUPP; + break; + } + + if (io->ack == VIO_SUBTYPE_ACK && err != 0 && io->error == 0) + io->error = err > 0 ? err : -err; + + if (port->xfer_mode == VIO_DRING_MODE) + (void) vds_dring_done(io); + else if (port->xfer_mode == VIO_DESC_MODE) + (void) vds_desc_done(io); + else + BUG(); +} + +static void vds_reset(struct vio_driver_state *vio) +{ + struct vds_port *port = to_vds_port(vio); + unsigned long flags; + int err; + + vdsdbg(HS, "%s\n", port->path); + + BUG_ON(in_interrupt()); + + vds_vio_lock(vio, flags); + vds_be_fini(port); + + vio_link_state_change(vio, LDC_EVENT_RESET); + vio->desc_buf_len = 0; + + port->flags = 0; + kfree(port->msgbuf); + port->msglen = LDC_PACKET_SIZE; + port->msgbuf = kzalloc(port->msglen, GFP_ATOMIC); + if (!port->msgbuf) { + vdsmsg(err, "%s: kzalloc failed\n", vio->name); + goto done; + } + + err = ldc_connect(vio->lp); + if (err) + vdsmsg(warn, "%s: Port %lu connect failed, err=%d\n", + vio->name, vio->vdev->channel_id, err); + +done: + vds_vio_unlock(vio, flags); +} + +static void vds_bh_reset(struct work_struct *work) +{ + struct vds_io *io = container_of(work, struct vds_io, vds_work); + struct vio_driver_state *vio = io->vio; + + vds_io_free(io); + vds_reset(vio); + ldc_enable_hv_intr(vio->lp); +} + +static int vds_dring_io(struct vio_driver_state *vio) +{ + struct vds_port *port = to_vds_port(vio); + struct vio_dring_data *pkt = port->msgbuf; + struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING]; + struct vio_disk_desc *desc; + struct vds_io *io; + int reset = 0; + int rv; + int idx; + + vdsdbg(DATA, "DRING [%08llx:%08x:%08x:%02x:%08llx:%08llx]\n", + pkt->dring_ident, + pkt->start_idx, + pkt->end_idx, + pkt->state, + pkt->seq, + port->seq); + + io = vds_io_alloc(vio, vds_bh_io); + if (!io) + return -ENOMEM; + + memcpy(io->msgbuf, port->msgbuf, port->msglen); + + if ((port->flags & VDS_PORT_SEQ) && (pkt->seq != port->seq + 1)) { + vdsmsg(err, + "Message out of sequence seq[0x%llx] vds_seq[0x%llx]\n", + pkt->seq, port->seq); + goto err; + } + port->seq = pkt->seq; + port->flags |= VDS_PORT_SEQ; + reset = 1; + + if (port->xfer_mode != VIO_DRING_MODE) { + vdsmsg(err, "Invalid xfer mode pkt[0x%x] port[0x%x]\n", + pkt->tag.stype_env, port->xfer_mode); + goto err; + } + + idx = pkt->start_idx; + if (idx != pkt->end_idx) { + vdsmsg(err, + "Invalid idx start[%d] end[%d]\n", idx, pkt->end_idx); + goto err; + } + + rv = ldc_get_dring_entry(vio->lp, io->desc_buf, dr->entry_size, + (idx * dr->entry_size), dr->cookies, + dr->ncookies); + if (rv != dr->entry_size) + goto err; + + desc = (struct vio_disk_desc *)io->desc_buf; + + vdsdbg(DATA, + "DRING [%02x:%02x:%08llx:%02x:%02x:%04d:%08llx:%08llx:%08x]\n", + desc->hdr.state, + desc->hdr.ack, + desc->req_id, + desc->operation, + desc->slice, + desc->status, + desc->offset, + desc->size, + desc->ncookies); + + /* + * Queue the request. + */ + if (desc->hdr.state == VIO_DESC_READY) { + vds_io_enq(io); + return 0; + } + +err: + if (reset) { + vdsmsg(err, "Reset VDS LDC\n"); + vds_io_free(io); + vds_evt_reset(vio); + rv = -ECONNRESET; + } else { + vdsmsg(err, "NACK request io=%p\n", io); + io->ack = VIO_SUBTYPE_NACK; + io->error = 0; + rv = vds_dring_done(io); + } + return rv; +} + +static int vds_desc_io(struct vio_driver_state *vio, int msglen) +{ + struct vds_port *port = to_vds_port(vio); + struct vio_disk_desc_inband *pkt = port->msgbuf; + struct vio_desc_data *hdr = &pkt->hdr; + struct vds_io *io; + int rv; + + vdsdbg(DATA, "DESC [%02x:%02x:%04x:%08x:%08llx:%08llx:%08llx]\n", + hdr->tag.type, + hdr->tag.stype, + hdr->tag.stype_env, + hdr->tag.sid, + hdr->desc_handle, + hdr->seq, + port->seq); + + vdsdbg(DATA, "DESC [%08llx:%02x:%02x:%04d:%08llx:%08llx:%08x]\n", + pkt->payload.req_id, + pkt->payload.operation, + pkt->payload.slice, + pkt->payload.status, + pkt->payload.offset, + pkt->payload.size, + pkt->payload.ncookies); + + io = vds_io_alloc(vio, vds_bh_io); + if (!io) + return -ENOMEM; + + memcpy(io->msgbuf, port->msgbuf, msglen); + + if ((port->flags & VDS_PORT_SEQ) && (hdr->seq != port->seq + 1)) { + vdsmsg(err, + "Message out of sequence seq[0x%llx] vds_seq[0x%llx]\n", + hdr->seq, port->seq); +#if 0 + /* XXX OBP seems to send out of sequence messages */ + goto nack; +#endif + } + port->seq = hdr->seq; + port->flags |= VDS_PORT_SEQ; + + if (port->xfer_mode != VIO_DESC_MODE) { + vdsmsg(err, "Invalid xfer mode pkt[0x%x] port[0x%x]\n", + hdr->tag.stype_env, port->xfer_mode); + goto nack; + } + + /* + * Queue the request. + */ + memcpy(io->desc_buf, port->msgbuf, msglen); + io->msglen = msglen; + vds_io_enq(io); + + return 0; + +nack: + io->ack = VIO_SUBTYPE_NACK; + io->error = 0; + rv = vds_desc_done(io); + return rv; +} + +static void vds_evt_reset(struct vio_driver_state *vio) +{ + struct vds_io *io; + + vdsdbg(HS, "\n"); + + BUG_ON(!in_interrupt()); + + io = vds_io_alloc(vio, vds_bh_reset); + if (!io) + return; + + ldc_disable_hv_intr(vio->lp); + io->flags |= VDS_IO_FINI; + + vds_io_enq(io); +} + +static void vds_evt_up(struct vio_driver_state *vio) +{ + BUG_ON(!in_interrupt()); + + vio_link_state_change(vio, LDC_EVENT_UP); + /* this is needed in dring mode */ + vio->dr_state &= ~VIO_DR_STATE_RXREQ; +} + +static int +vds_evt_ctl(struct vio_driver_state *vio) +{ + struct vds_io *io; + + BUG_ON(!in_interrupt()); + + io = vds_io_alloc(vio, vds_bh_hs); + if (!io) + return -ENOMEM; + + if (vio->hs_state == VIO_HS_INVALID) + io->flags |= VDS_IO_INIT; + + vds_io_enq(io); + + return 0; +} + +static void vds_evt_data(struct vio_driver_state *vio) +{ + int rv; + int msglen; + struct vio_msg_tag *tag; + struct vds_port *port = to_vds_port(vio); + + BUG_ON(!in_interrupt()); + + while (1) { + rv = ldc_read(vio->lp, port->msgbuf, port->msglen); + vdsdbg(DATA, "ldc_read(%d)=%d\n", port->msglen, rv); + if (rv < 0) { + if (rv == -ECONNRESET) + vds_evt_reset(vio); + break; + } + if (rv == 0) + break; + tag = port->msgbuf; + vdsdbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", + tag->type, + tag->stype, + tag->stype_env, + tag->sid); + msglen = rv; + rv = vio_validate_sid(vio, tag); + if (rv < 0) + break; + switch (tag->type) { + case VIO_TYPE_CTRL: + /* + * This is needed in dring mode. + */ + if (tag->stype == VIO_SUBTYPE_INFO && + tag->stype_env == VIO_DRING_REG) + vio->dr_state |= VIO_DR_STATE_RXREQ; + rv = vds_evt_ctl(vio); + break; + case VIO_TYPE_DATA: + switch (tag->stype) { + case VIO_SUBTYPE_INFO: + switch (tag->stype_env) { + case VIO_DRING_DATA: + rv = vds_dring_io(vio); + break; + case VIO_DESC_DATA: + rv = vds_desc_io(vio, msglen); + break; + default: + rv = -EINVAL; + break; + } + break; + default: + rv = vds_handle_unknown(port); + break; + } + break; + default: + rv = vds_handle_unknown(port); + break; + } + if (rv < 0) + break; + } +} + +static void vds_event(void *arg, int event) +{ + unsigned long flags; + struct vds_port *port = arg; + struct vio_driver_state *vio = &port->vio; + + vdsdbg(DATA, "event=%d cpu=%d\n", event, smp_processor_id()); + + vds_vio_lock(vio, flags); + + switch (event) { + case LDC_EVENT_RESET: + vds_evt_reset(vio); + break; + case LDC_EVENT_UP: + vds_evt_up(vio); + break; + case LDC_EVENT_DATA_READY: + vds_evt_data(vio); + break; + default: + vdsmsg(warn, "Unexpected LDC event %d\n", event); + break; + } + + vds_vio_unlock(vio, flags); +} + +static struct ldc_channel_config vds_ldc_cfg = { + .event = vds_event, + .mtu = 64, + .mode = LDC_MODE_UNRELIABLE, +}; + +static ssize_t vds_sysfs_path_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + int rv; + unsigned long flags; + struct vds_port *port = dev_get_drvdata(device); + struct vio_driver_state *vio = &port->vio; + + vds_vio_lock(vio, flags); + rv = scnprintf(buf, PAGE_SIZE, "%s\n", port->path); + vds_vio_unlock(vio, flags); + + return rv; +} + +static DEVICE_ATTR(path, S_IRUSR, vds_sysfs_path_show, NULL); + +static struct attribute *vds_sysfs_entries[] = { + &dev_attr_path.attr, + NULL +}; + +static struct attribute_group vds_attribute_group = { + .name = NULL, /* put in device directory */ + .attrs = vds_sysfs_entries, +}; + +static void print_version(void) +{ + printk_once(KERN_INFO "%s", version); +} + +static int vds_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) +{ + struct mdesc_handle *hp; + struct vds_port *port; + struct vio_driver_state *vio; + const char *path; + u64 node; + int err; + + print_version(); + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) { + vdsmsg(err, "Cannot allocate vds_port.\n"); + return -ENOMEM; + } + + port->msglen = LDC_PACKET_SIZE; + port->msgbuf = kzalloc(port->msglen, GFP_KERNEL); + if (!port->msgbuf) { + err = -ENOMEM; + goto free_port; + } + + vio = &port->vio; + + err = vio_driver_init(vio, vdev, VDEV_DISK_SERVER, + vds_versions, ARRAY_SIZE(vds_versions), + &vds_vio_ops, (char *)dev_name(&vdev->dev)); + if (err) + goto free_msgbuf; + + vio->debug = vds_dbg_vio; + vds_ldc_cfg.debug = vds_dbg_ldc; + + err = vio_ldc_alloc(vio, &vds_ldc_cfg, port); + if (err) + goto free_msgbuf; + + hp = mdesc_grab(); + + node = vio_vdev_node(hp, vdev); + if (node == MDESC_NODE_NULL) { + err = -ENXIO; + mdesc_release(hp); + goto free_ldc; + } + + path = mdesc_get_property(hp, node, "vds-block-device", NULL); + if (!path) { + err = -ENXIO; + mdesc_release(hp); + goto free_ldc; + } + port->path = kstrdup(path, GFP_KERNEL); + mdesc_release(hp); + vdsdbg(INIT, "path=%s\n", path); + port->vtoc = kzalloc(roundup(sizeof(*port->vtoc), 8), GFP_KERNEL); + port->geom = kzalloc(roundup(sizeof(*port->geom), 8), GFP_KERNEL); + port->part = kzalloc(sizeof(*port->part) * VDS_MAXPART, GFP_KERNEL); + + /* + * The io and reset work queues are separate because the + * io work queue is flushed during reset which would hang + * if reset itself was scheduled on the io queue. + */ + port->ioq = alloc_workqueue("vds_io", WQ_UNBOUND, vds_wq); + port->rtq = alloc_ordered_workqueue("vds_reset", 0); + if (!port->ioq || !port->rtq) { + err = -ENXIO; + goto free_path; + } + + mutex_init(&port->label_lock); + + dev_set_drvdata(&vdev->dev, port); + + err = sysfs_create_group(&vdev->dev.kobj, &vds_attribute_group); + if (err) + goto free_path; + + vio_port_up(vio); + + return 0; + +free_path: + kfree(port->path); + kfree(port->vtoc); + kfree(port->geom); + kfree(port->part); + +free_ldc: + vio_ldc_free(vio); + +free_msgbuf: + kfree(port->msgbuf); + +free_port: + kfree(port); + + return err; +} + +static int vds_port_remove(struct vio_dev *vdev) +{ + struct vds_port *port = dev_get_drvdata(&vdev->dev); + struct vio_driver_state *vio = &port->vio; + + if (!port) + return 0; + + del_timer_sync(&vio->timer); + ldc_disconnect(vio->lp); /* XXX vds_port_down() */ + vio_ldc_free(vio); + sysfs_remove_group(&vdev->dev.kobj, &vds_attribute_group); + dev_set_drvdata(&vdev->dev, NULL); + + mutex_destroy(&port->label_lock); + kfree(port->path); + kfree(port->msgbuf); + kfree(port->vtoc); + kfree(port->geom); + kfree(port->part); + kfree(port); + + return 0; +} + +static const struct vio_device_id vds_port_match[] = { + { + .type = "vds-port", + }, + {}, +}; + +static struct vio_driver vds_port_driver = { + .id_table = vds_port_match, + .probe = vds_port_probe, + .remove = vds_port_remove, + .name = "vds_port", +}; + +static int __init vds_init(void) +{ + int rv; + + rv = vds_io_init(); + if (!rv) { + rv = vio_register_driver(&vds_port_driver); + if (rv < 0) + vds_io_fini(); + } + + return rv; +} + +static void __exit vds_exit(void) +{ + vio_unregister_driver(&vds_port_driver); + vds_io_fini(); +} + +module_init(vds_init); +module_exit(vds_exit); diff --git a/drivers/block/vds/vds_reg.c b/drivers/block/vds/vds_reg.c new file mode 100644 index 000000000000..b790f99e6f8a --- /dev/null +++ b/drivers/block/vds/vds_reg.c @@ -0,0 +1,86 @@ +/* + * vds_reg.c: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include "vds.h" +#include "vds_io.h" + +static int vds_reg_init(struct vds_port *port) +{ + struct file *file; + + file = filp_open(port->path, O_RDWR | O_EXCL | O_LARGEFILE, 0); + if (IS_ERR(file)) + return (int)PTR_ERR(file); + + port->vdisk_bsize = 512; + port->vdisk_size = i_size_read(file_inode(file)) / + port->vdisk_bsize; + port->max_xfer_size = 1024; + + port->be_data = file; + + return 0; +} + +static void vds_reg_fini(struct vds_port *port) +{ + struct file *file = port->be_data; + + if (file) + filp_close(file, NULL); +} + +static int vds_reg_rw(struct vds_io *io) +{ + loff_t off; + ssize_t iosz; + void *addr; + struct vio_driver_state *vio = io->vio; + struct vds_port *port = to_vds_port(vio); + struct file *file = port->be_data; + + vdsdbg(FIO, "(0x%p, %lld, %ld, %d)\n", io->pages, io->size, + io->offset, io->rw); + + if (file == NULL) { + vdsmsg(err, "NULL file pointer for IO\n"); + return -EIO; + } + + addr = page_address(io->pages); + off = to_bytes(io->offset); + + if (io->rw & WRITE) + iosz = file->f_op->write(file, addr, io->size, &off); + else + iosz = file->f_op->read(file, addr, io->size, &off); + + if (iosz != io->size) { + vdsmsg(err, "file IO failed: iosz=%ld\n", iosz); + return -EIO; + } + + return 0; +} + +static int vds_reg_flush(struct vds_port *port) +{ + struct file *file = port->be_data; + + return vfs_fsync(file, 0); +} + +struct vds_be_ops vds_reg_ops = { + vds_reg_init, + vds_reg_fini, + vds_reg_rw, + vds_reg_flush, +}; + +struct vds_be_ops *vds_reg_get_ops() +{ + return &vds_reg_ops; +} diff --git a/drivers/block/vds/vds_vtoc.c b/drivers/block/vds/vds_vtoc.c new file mode 100644 index 000000000000..06b692a5417c --- /dev/null +++ b/drivers/block/vds/vds_vtoc.c @@ -0,0 +1,427 @@ +/* + * vds_vtoc.c: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include "vds.h" +#include "vds_io.h" +#include "vds_vtoc.h" + +/* + * By Solaris convention, slice/partition 2 represents the entire disk; + * unfortunately, this convention does not appear to be codified. + */ +#define VDS_ENTIRE_DISK_SLICE 2 + +/* Number of backup labels */ +#define VDS_DSKIMG_NUM_BACKUP 5 + +static unsigned short vds_lbl2cksum(struct dk_label *label) +{ + int count; + unsigned short sum, *sp; + + count = (sizeof(struct dk_label)) / (sizeof(short)) - 1; + sp = (unsigned short *)label; + sum = 0; + while (count--) + sum ^= *sp++; + + return sum; +} + +static void +vds_vtoc_update_part(struct vds_port *port, struct dk_label *label) +{ + int i; + + vds_label_clear_part(port); + + for (i = 0; i < port->npart; i++) { + port->part[i].start = label->dkl_map[i].dkl_cylno * + label->dkl_nhead * label->dkl_nsect; + port->part[i].size = label->dkl_map[i].dkl_nblk; + } +} + +/* + * Function: + * vd_get_readable_size + * + * Description: + * Convert a given size in bytes to a human readable format in + * kilobytes, megabytes, gigabytes or terabytes. + * + * Parameters: + * full_size - the size to convert in bytes. + * size - the converted size. + * unit - the unit of the converted size: 'K' (kilobyte), + * 'M' (Megabyte), 'G' (Gigabyte), 'T' (Terabyte). + * + * Return Code: + * none + */ +static void vd_get_readable_size(size_t full_size, size_t *size, char *unit) +{ + if (full_size < (1ULL << 20)) { + *size = full_size >> 10; + *unit = 'K'; /* Kilobyte */ + } else if (full_size < (1ULL << 30)) { + *size = full_size >> 20; + *unit = 'M'; /* Megabyte */ + } else if (full_size < (1ULL << 40)) { + *size = full_size >> 30; + *unit = 'G'; /* Gigabyte */ + } else { + *size = full_size >> 40; + *unit = 'T'; /* Terabyte */ + } +} + +/* + * Set the default label for a given disk size. This is used when the disk + * does not have a valid VTOC so that the user can get a valid default + * configuration. The default label has all slice sizes set to 0 (except + * slice 2 which is the entire disk) to force the user to write a valid + * label onto the disk image. + */ +static void vds_vtoc_set_default(struct vds_port *port, struct dk_label *label) +{ + char unit; + size_t size; + size_t bsize = port->vdisk_bsize; + size_t disk_size = port->vdisk_size * bsize; + struct vio_driver_state *vio = &port->vio; + + memset(label, 0, sizeof(struct dk_label)); + + /* + * Ideally we would like the cylinder size (nsect * nhead) to be the + * same whatever the disk size is. That way the VTOC label could be + * easily updated in case the disk size is increased (keeping the + * same cylinder size allows to preserve the existing partitioning + * when updating the VTOC label). But it is not possible to have + * a fixed cylinder size and to cover all disk size. + * + * So we define different cylinder sizes depending on the disk size. + * The cylinder size is chosen so that we don't have too few cylinders + * for a small disk image, or so many on a big disk image that you + * waste space for backup superblocks or cylinder group structures. + * Also we must have a resonable number of cylinders and sectors so + * that newfs can run using default values. + * + * +-----------+--------+---------+--------+ + * | disk_size | < 2MB | 2MB-4GB | >= 8GB | + * +-----------+--------+---------+--------+ + * | nhead | 1 | 1 | 96 | + * | nsect | 200 | 600 | 768 | + * +-----------+--------+---------+--------+ + * + * Other parameters are computed from these values: + * + * pcyl = disk_size / (nhead * nsect * 512) + * acyl = (pcyl > 2)? 2 : 0 + * ncyl = pcyl - acyl + * + * The maximum number of cylinder is 65535 so this allows to define a + * geometry for a disk size up to 65535 * 96 * 768 * 512 = 2.24 TB + * which is more than enough to cover the maximum size allowed by the + * extended VTOC format (2TB). + */ + + if (disk_size >= 8 * ONE_GIGABYTE) { + + label->dkl_nhead = 96; + label->dkl_nsect = 768; + + } else if (disk_size >= 2 * ONE_MEGABYTE) { + + label->dkl_nhead = 1; + label->dkl_nsect = 600; + + } else { + + label->dkl_nhead = 1; + label->dkl_nsect = 200; + } + + label->dkl_pcyl = disk_size / + (label->dkl_nsect * label->dkl_nhead * bsize); + + if (label->dkl_pcyl == 0) + label->dkl_pcyl = 1; + + label->dkl_acyl = 0; + + if (label->dkl_pcyl > 2) + label->dkl_acyl = 2; + + label->dkl_ncyl = label->dkl_pcyl - label->dkl_acyl; + label->dkl_write_reinstruct = 0; + label->dkl_read_reinstruct = 0; + label->dkl_rpm = 7200; + label->dkl_apc = 0; + label->dkl_intrlv = 0; + + vdsdbg(IOC, "requested disk size: %ld bytes\n", disk_size); + vdsdbg(IOC, "setup: ncyl=%d nhead=%d nsec=%d\n", label->dkl_pcyl, + label->dkl_nhead, label->dkl_nsect); + vdsdbg(IOC, "provided disk size: %lld bytes\n", (uint64_t) + (label->dkl_pcyl * label->dkl_nhead * + label->dkl_nsect * bsize)); + + vd_get_readable_size(disk_size, &size, &unit); + + /* + * We must have a correct label name otherwise format(1m) will + * not recognized the disk as labeled. + */ + (void) snprintf(label->dkl_asciilabel, LEN_DKL_ASCII, + "SUN-DiskImage-%ld%cB cyl %d alt %d hd %d sec %d", + size, unit, + label->dkl_ncyl, label->dkl_acyl, label->dkl_nhead, + label->dkl_nsect); + + /* default VTOC */ + label->dkl_vtoc.v_version = V_EXTVERSION; + label->dkl_vtoc.v_nparts = V_NUMPAR; + label->dkl_vtoc.v_sanity = VTOC_SANE; + label->dkl_vtoc.v_part[VDS_ENTIRE_DISK_SLICE].p_tag = V_BACKUP; + label->dkl_map[VDS_ENTIRE_DISK_SLICE].dkl_cylno = 0; + label->dkl_map[VDS_ENTIRE_DISK_SLICE].dkl_nblk = label->dkl_ncyl * + label->dkl_nhead * label->dkl_nsect; + label->dkl_magic = DKL_MAGIC; + label->dkl_cksum = vds_lbl2cksum(label); +} + +/* + * Get the disk label. If the type is unknown, initialize a default label. + */ +static int vds_vtoc_get_label(struct vds_port *port, struct dk_label **lp) +{ + int rv = -EIO; + struct dk_label *label = (struct dk_label *)port->label; + struct vio_driver_state *vio = &port->vio; + + rv = vds_read(port, label, 0, DK_LABEL_SIZE); + if (rv) + return rv; + + if (label->dkl_magic != DKL_MAGIC) { + vdsdbg(IOC, "bad VTOC label magic %04x\n", label->dkl_magic); + if (port->label_type == VDS_LABEL_NONE) { + vds_vtoc_set_default(port, label); + rv = -EINVAL; + } + } else if (label->dkl_cksum != vds_lbl2cksum(label)) { + vdsmsg(err, "bad VTOC label checksum\n"); + } else { + vdsdbg(IOC, "VTOC magic=%04x\n", label->dkl_magic); + vdsdbg(IOC, "ncyl=%d nhead=%d nsec=%d\n", label->dkl_pcyl, + label->dkl_nhead, label->dkl_nsect); + rv = 0; + } + + if (rv != 0 && rv != -EINVAL) + label = NULL; + + *lp = label; + + return rv; +} + +static void +vds_vtoc_l2g(struct dk_label *label, struct vio_disk_geom *geom) +{ + geom->num_cyl = label->dkl_ncyl; + geom->alt_cyl = label->dkl_acyl; + geom->num_hd = label->dkl_nhead; + geom->num_sec = label->dkl_nsect; + geom->ifact = label->dkl_intrlv; + geom->apc = label->dkl_apc; + geom->rpm = label->dkl_rpm; + geom->phy_cyl = label->dkl_pcyl; + geom->rd_skip = label->dkl_read_reinstruct; + geom->wr_skip = label->dkl_write_reinstruct; +} + +static void +vds_vtoc_g2l(struct vio_disk_geom *geom, struct dk_label *label) +{ + label->dkl_ncyl = geom->num_cyl; + label->dkl_acyl = geom->alt_cyl; + label->dkl_nhead = geom->num_hd; + label->dkl_nsect = geom->num_sec; + label->dkl_intrlv = geom->ifact; + label->dkl_apc = geom->apc; + label->dkl_rpm = geom->rpm; + label->dkl_pcyl = geom->phy_cyl; + label->dkl_read_reinstruct = geom->rd_skip; + label->dkl_write_reinstruct = geom->wr_skip; + label->dkl_cksum = vds_lbl2cksum(label); +} + +/* + * Get the disk VTOC. If there is no valid label, + * set a default VTOC. + */ +/*ARGSUSED*/ +int vds_vtoc_get(struct vds_port *port) +{ + int i, rv; + struct dk_label *label; + struct vio_disk_vtoc *vtoc = port->vtoc; + + rv = vds_vtoc_get_label(port, &label); + if (!label) + return rv; + + memcpy(vtoc->volume_name, label->dkl_vtoc.v_volume, + VIO_DISK_VNAME_LEN); + memcpy(vtoc->ascii_label, label->dkl_asciilabel, LEN_DKL_ASCII); + vtoc->sector_size = 512; + vtoc->num_partitions = label->dkl_vtoc.v_nparts; + + for (i = 0; i < vtoc->num_partitions; i++) { + vtoc->partitions[i].id = label->dkl_vtoc.v_part[i].p_tag; + vtoc->partitions[i].perm_flags = + label->dkl_vtoc.v_part[i].p_flag; + vtoc->partitions[i].start_block = + label->dkl_map[i].dkl_cylno * + label->dkl_nhead * label->dkl_nsect; + vtoc->partitions[i].num_blocks = label->dkl_map[i].dkl_nblk; + } + + vds_vtoc_l2g(label, port->geom); + + /* + * Always update the cached copy, in case this is + * a shared disk and the label has been updated. + */ + if (!rv) { + port->label_type = VDS_LABEL_VTOC; + port->npart = label->dkl_vtoc.v_nparts; + vds_vtoc_update_part(port, label); + } + + return rv; +} + +static int +vds_vtoc_set_backup(struct vds_port *port, struct dk_label *label, bool clear) +{ + int rv; + sector_t blk, sec, cyl, head, cnt, nsect; + struct vio_driver_state *vio = &port->vio; + + /* + * Backup labels are on the last alternate cylinder's + * first five odd sectors. + */ + if (label->dkl_acyl == 0) { + vdsdbg(IOC, "no alt cylinder, cannot store backup labels"); + return 0; + } + + cyl = label->dkl_ncyl + label->dkl_acyl - 1; + head = label->dkl_nhead - 1; + nsect = label->dkl_nsect; + + blk = (cyl * ((label->dkl_nhead * nsect) - label->dkl_apc)) + + (head * nsect); + + if (clear == true) + memset(label, 0, sizeof(*label)); + + /* + * Write the backup labels. Make sure we don't try to write past + * the last cylinder. + */ + sec = 1; + + for (cnt = 0; cnt < VDS_DSKIMG_NUM_BACKUP; cnt++) { + + if (sec >= nsect) { + vdsdbg(IOC, "not enough sectors for backup labels"); + return 0; + } + + rv = vds_write(port, label, blk + sec, DK_LABEL_SIZE); + if (rv) { + vdsdbg(IOC, "error writing label at block %lu\n rv=%d", + blk + sec, rv); + return rv; + } + + vdsdbg(IOC, "wrote backup label at block %lu\n", blk + sec); + vdsdbg(IOC, "ncyl=%d nhead=%d nsec=%d\n", + label->dkl_pcyl, label->dkl_nhead, label->dkl_nsect); + + sec += 2; + } + + return 0; +} + +int vds_vtoc_set(struct vds_port *port, struct vio_disk_vtoc *vtoc) +{ + int i, rv; + struct dk_label *label; + + rv = vds_vtoc_get_label(port, &label); + if (!label) + return rv; + + vds_vtoc_g2l(port->geom, label); + + memcpy(label->dkl_vtoc.v_volume, vtoc->volume_name, + VIO_DISK_VNAME_LEN); + memcpy(label->dkl_asciilabel, vtoc->ascii_label, LEN_DKL_ASCII); + label->dkl_vtoc.v_nparts = vtoc->num_partitions; + + for (i = 0; i < vtoc->num_partitions; i++) { + label->dkl_vtoc.v_part[i].p_tag = vtoc->partitions[i].id; + label->dkl_vtoc.v_part[i].p_flag = + vtoc->partitions[i].perm_flags; + label->dkl_map[i].dkl_cylno = vtoc->partitions[i].start_block / + (label->dkl_nhead * label->dkl_nsect); + label->dkl_map[i].dkl_nblk = vtoc->partitions[i].num_blocks; + } + + label->dkl_cksum = vds_lbl2cksum(label); + + rv = vds_write(port, label, 0, DK_LABEL_SIZE); + + if (!rv) { + port->label_type = VDS_LABEL_VTOC; + port->npart = label->dkl_vtoc.v_nparts; + vds_vtoc_update_part(port, label); + } + + /* + * There is no need to return an error for backups + * since the primary succeeded. + */ + (void) vds_vtoc_set_backup(port, label, false); + + return rv; +} + +int vds_vtoc_clear(struct vds_port *port) +{ + int rv; + struct dk_label *label; + + rv = vds_vtoc_get_label(port, &label); + if (!label) + return rv; + + rv = vds_clear(port, 0, DK_LABEL_SIZE); + if (!rv) { + vds_label_reset(port); + (void) vds_vtoc_set_backup(port, label, true); + } + + return rv; +} diff --git a/drivers/block/vds/vds_vtoc.h b/drivers/block/vds/vds_vtoc.h new file mode 100644 index 000000000000..12f1e90f8ec5 --- /dev/null +++ b/drivers/block/vds/vds_vtoc.h @@ -0,0 +1,120 @@ +/* + * vds_vtoc.h: LDOM Virtual Disk Server. + * + * Copyright (C) 2014 Oracle. All rights reserved. + * + * Format of a Sun disk label. + * Resides in cylinder 0, head 0, sector 0. + * + * From Solaris dklabel.h + * + */ + +#define NDKMAP 8 /* # of logical partitions */ +#define DKL_MAGIC 0xDABE /* magic number */ +#define LEN_DKL_ASCII 128 /* length of dkl_asciilabel */ +#define LEN_DKL_VVOL 8 /* length of v_volume */ + + +/* + * partition headers: section 1 + * Fixed size for on-disk dk_label + */ +struct dk_map32 { + uint32_t dkl_cylno; /* starting cylinder */ + uint32_t dkl_nblk; /* number of blocks; if == 0, */ + /* partition is undefined */ +}; + +/* + * partition headers: section 2, + * brought over from AT&T SVr4 vtoc structure. + */ +struct dk_map2 { + uint16_t p_tag; /* ID tag of partition */ + uint16_t p_flag; /* permission flag */ +}; + +/* + * VTOC inclusions from AT&T SVr4 + * Fixed sized types for on-disk VTOC + */ +struct dk_vtoc { + uint32_t v_version; /* layout version */ + char v_volume[LEN_DKL_VVOL]; /* volume name */ + uint16_t v_nparts; /* number of partitions */ + struct dk_map2 v_part[NDKMAP]; /* partition hdrs, sec 2 */ + uint32_t v_bootinfo[3]; /* info needed by mboot */ + uint32_t v_sanity; /* to verify vtoc sanity */ + uint32_t v_reserved[10]; /* free space */ + int32_t v_timestamp[NDKMAP]; /* partition timestamp */ +}; + +/* + * define the amount of disk label padding needed to make + * the entire structure occupy 512 bytes. + */ +#define LEN_DKL_PAD (DK_LABEL_SIZE \ + - ((LEN_DKL_ASCII) + \ + (sizeof(struct dk_vtoc)) + \ + (sizeof(struct dk_map32) * NDKMAP) + \ + (14 * (sizeof(uint16_t))) + \ + (2 * (sizeof(uint16_t))))) + +struct dk_label { + char dkl_asciilabel[LEN_DKL_ASCII]; /* for compatibility */ + struct dk_vtoc dkl_vtoc; /* vtoc inclusions from AT&T SVr4 */ + uint16_t dkl_write_reinstruct; /* # sectors to skip, writes */ + uint16_t dkl_read_reinstruct; /* # sectors to skip, reads */ + char dkl_pad[LEN_DKL_PAD]; /* unused part of 512 bytes */ + uint16_t dkl_rpm; /* rotations per minute */ + uint16_t dkl_pcyl; /* # physical cylinders */ + uint16_t dkl_apc; /* alternates per cylinder */ + uint16_t dkl_obs1; /* obsolete */ + uint16_t dkl_obs2; /* obsolete */ + uint16_t dkl_intrlv; /* interleave factor */ + uint16_t dkl_ncyl; /* # of data cylinders */ + uint16_t dkl_acyl; /* # of alternate cylinders */ + uint16_t dkl_nhead; /* # of heads in this partition */ + uint16_t dkl_nsect; /* # of sectors per track */ + uint16_t dkl_obs3; /* obsolete */ + uint16_t dkl_obs4; /* obsolete */ + struct dk_map32 dkl_map[NDKMAP]; /* logical partition headers */ + uint16_t dkl_magic; /* identifies this label format */ + uint16_t dkl_cksum; /* xor checksum of sector */ +}; + +#define V_NUMPAR NDKMAP /* The number of partitions */ + /* (from dkio.h) */ + +#define VTOC_SANE 0x600DDEEE /* Indicates a sane VTOC */ +#define V_VERSION 0x01 /* layout version number */ +#define V_EXTVERSION V_VERSION /* extvtoc layout version number */ + +/* + * Partition identification tags + */ +#define V_UNASSIGNED 0x00 /* unassigned partition */ +#define V_BOOT 0x01 /* Boot partition */ +#define V_ROOT 0x02 /* Root filesystem */ +#define V_SWAP 0x03 /* Swap filesystem */ +#define V_USR 0x04 /* Usr filesystem */ +#define V_BACKUP 0x05 /* full disk */ +#define V_STAND 0x06 /* Stand partition */ +#define V_VAR 0x07 /* Var partition */ +#define V_HOME 0x08 /* Home partition */ +#define V_ALTSCTR 0x09 /* Alternate sector partition */ +#define V_CACHE 0x0a /* Obsolete (was for cachefs) */ + +/* The following partition identification tags apply to EFI/GPT labels only */ +#define V_RESERVED 0x0b /* SMI reserved data */ +#define V_SYSTEM 0x0c /* EFI/GPT system partition */ +#define V_BIOS_BOOT 0x18 /* BIOS Boot partition */ + +#define V_UNKNOWN 0xff /* Unknown partition */ + +/* + * Partition permission flags + */ +#define V_UNMNT 0x01 /* Unmountable partition */ +#define V_RONLY 0x10 /* Read only */ diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index a4af8221751e..ad999ac07521 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -609,6 +609,20 @@ config TILE_SROM device appear much like a simple EEPROM, and knows how to partition a single ROM for multiple purposes. +config VLDC + tristate "Logical Domains Virtual Channel" + depends on SUN_LDOMS + default m + help + Support for virtual channel under Logical Domains. + +config VLDS + tristate "Logical Domains Domain Services" + depends on SUN_LDOMS + default m + help + Support for domain services under Logical Domains. + source "drivers/char/xillybus/Kconfig" endmenu diff --git a/drivers/char/Makefile b/drivers/char/Makefile index d06cde26031b..f752fe274605 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -61,4 +61,8 @@ obj-$(CONFIG_JS_RTC) += js-rtc.o js-rtc-y = rtc.o obj-$(CONFIG_TILE_SROM) += tile-srom.o + +obj-$(CONFIG_VLDC) += vldc.o +obj-$(CONFIG_VLDS) += vlds.o + obj-$(CONFIG_XILLYBUS) += xillybus/ diff --git a/drivers/char/vldc.c b/drivers/char/vldc.c new file mode 100644 index 000000000000..ef8d266776ef --- /dev/null +++ b/drivers/char/vldc.c @@ -0,0 +1,1426 @@ +/* + * vldc.c: Sun4v Virtual LDC (Logical Domain Channel) Driver + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define VLDC_DEBUG 1 /* force VLDC_DEBUG on for development */ + +#ifdef VLDC_DEBUG +static bool vldcdbg; +module_param(vldcdbg, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(vldcdbg, "Boolean to enable debug messages (0 == off, 1 == on)"); + +#define dprintk(fmt, args...) do {\ +if (vldcdbg)\ + printk(KERN_ERR "%s: " fmt, __func__, ##args);\ +} while (0) + +#else +#define dprintk(fmt, args...) +#endif /* VLDC_DEBUG */ + +#define DRV_NAME "vldc" +#define DRV_VERSION "1.0" +#define VLDC_DEVICE_NAME DRV_NAME + +#define VLDC_MINOR_BASE 0 +#define VLDC_MAX_DEVS 64 /* Arbitrary # - hopefully enough */ + +#define VLDC_DEFAULT_MTU 0x1000 /* default mtu size 4K */ +#define VLDC_MAX_MTU (256 * 1024) /* 256K */ +#define VLDC_DEFAULT_MODE LDC_MODE_RAW +#define VLDC_MAX_COOKIE (256 * 1024) /* 256K */ + +/* Time (in ms) to sleep waiting for write space to become available */ +#define VLDC_WRITE_BLOCK_SLEEP_DELAY 1 + +/* Timeout (in ms) to sleep waiting for LDC connection to complete */ +#define VLDC_CONNECTION_TIMEOUT 10000 + +static char driver_version[] = DRV_NAME ".c:v" DRV_VERSION "\n"; + +/* Global driver data struct for data common to all devices */ +struct vldc_driver_data { + struct list_head vldc_dev_list; /* list of all vldc devices */ + int num_vldc_dev_list; + struct class *chrdev_class; + dev_t devt; +}; +struct vldc_driver_data vldc_data; +static DEFINE_MUTEX(vldc_data_mutex); /* protect vldc_data */ + +/* + * VLDC device struct. Each vldc device which is probed + * will have one of these structs associated with it. + * Integer type fields which could possibly be accessed by more + * than 1 thread simultaneously are declared as type atomic_t + * to assure atomic access. + */ +struct vldc_dev { + /* link into the global driver data dev list */ + struct list_head list; + + struct mutex vldc_mutex; + struct cdev cdev; + char *tx_buf; + char *rx_buf; + dev_t devt; + char *name; + struct device *device; + struct vio_dev *vdev; + struct ldc_channel *lp; + atomic_t mtu; + atomic_t mode; + + /* each device gets its own read cookie buf */ + void *cookie_read_buf; + + /* each device gets its own write cookie buf */ + void *cookie_write_buf; + + /* waitqueue for poll() or blocking read() operations */ + wait_queue_head_t waitqueue; + + /* atomic var to indicate if the device is released - i.e. not open */ + atomic_t is_released; + + /* atomic var to indicate if reset has been asserted on the device */ + atomic_t is_reset_asserted; +}; + +static bool vldc_will_write_block(struct vldc_dev *vldc, size_t count) +{ + if (atomic_read(&vldc->is_released) || + atomic_read(&vldc->is_reset_asserted)) { + /* device was released or reset, exit */ + return false; + } + + return !ldc_tx_space_available(vldc->lp, count); +} + +static int vldc_ldc_send(struct vldc_dev *vldc, void *data, int len) +{ + int err, limit = 1000; + + err = -EINVAL; + while (limit-- > 0) { + err = ldc_write(vldc->lp, data, len); + if (!err || (err != -EAGAIN)) + break; + udelay(1); + } + + return err; +} + +static ssize_t vldc_fops_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *off) +{ + struct vldc_dev *vldc; + int rv; + char *ubufp; + int nbytes_written; + int nbytes_left; + size_t size; + + dprintk("entered.\n"); + + /* validate args */ + if (filp == NULL || ubuf == NULL) + return -EINVAL; + + nbytes_written = 0; /* number of bytes written */ + + vldc = filp->private_data; + rv = 0; + + /* + * If the device has been released/closed + * or has been reset, exit with error. + */ + if (atomic_read(&vldc->is_released)) { + rv = -ENODEV; + goto done; + } + + if (atomic_read(&vldc->is_reset_asserted)) { + rv = -EIO; + goto done; + } + + if (vldc_will_write_block(vldc, count) && + (filp->f_flags & O_NONBLOCK)) { + rv = -EAGAIN; + goto done; + } + + /* + * Loop here waiting for write space to become available. + * NOTE: we can't wait on an event here because there is no event + * to indicate that write space has become available. + */ + while (vldc_will_write_block(vldc, count)) { + msleep_interruptible(VLDC_WRITE_BLOCK_SLEEP_DELAY); + if (signal_pending(current)) { + /* task caught a signal during the sleep - abort. */ + rv = -EINTR; + goto done; + } + } + + /* + * Check again if the device has been released/closed + * or has been reset while we were waiting. + */ + if (atomic_read(&vldc->is_released)) { + rv = -ENODEV; + goto done; + } + + if (atomic_read(&vldc->is_reset_asserted)) { + rv = -EIO; + goto done; + } + + nbytes_left = count; /* number of bytes left to write */ + ubufp = (char *)ubuf; + + while (nbytes_left > 0) { + + /* NOTE: RAW mode can only write max size of LDC_PACKET_SIZE */ + if (atomic_read(&vldc->mode) == LDC_MODE_RAW) + size = min_t(int, LDC_PACKET_SIZE, nbytes_left); + else + size = min_t(int, atomic_read(&vldc->mtu), nbytes_left); + + if (copy_from_user(vldc->tx_buf, ubufp, size) != 0) { + rv = -EFAULT; + goto done; + } + + rv = vldc_ldc_send(vldc, vldc->tx_buf, size); + + dprintk("(%s) ldc_write() returns %d\n", vldc->name, rv); + + if (unlikely(rv < 0)) + break; + + if (unlikely(rv == 0)) + break; + + ubufp += rv; + nbytes_written += rv; + nbytes_left -= rv; + } + + /* Return any data written (even if we got a subsequent error) */ + if (nbytes_written > 0) + rv = nbytes_written; + +done: + + dprintk("(%s) num bytes written=%d, return value=%d\n", + vldc->name, nbytes_written, rv); + + return (ssize_t)rv; +} + +static bool vldc_will_read_block(struct vldc_dev *vldc) +{ + + if (atomic_read(&vldc->is_released) || + atomic_read(&vldc->is_reset_asserted)) { + /* device was released or reset, exit */ + return false; + } + + return !ldc_rx_data_available(vldc->lp); +} + +static ssize_t vldc_fops_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct vldc_dev *vldc; + int rv; + char *ubufp; + int nbytes_read; + int nbytes_left; + size_t size; + + dprintk("entered.\n"); + + /* validate args */ + if (filp == NULL || ubuf == NULL) + return -EINVAL; + + nbytes_read = 0; /* number of bytes read */ + + vldc = filp->private_data; + rv = 0; + + /* Per spec if reading 0 bytes, just return 0. */ + if (count == 0) { + rv = 0; + goto done; + } + + /* + * If the device has been released/closed or + * has been reset, exit with error. + */ + if (atomic_read(&vldc->is_released)) { + rv = -ENODEV; + goto done; + } + + if (atomic_read(&vldc->is_reset_asserted)) { + rv = -EIO; + goto done; + } + + if (vldc_will_read_block(vldc) && (filp->f_flags & O_NONBLOCK)) { + rv = -EAGAIN; + goto done; + } + + /* + * NOTE: this will only wait if the vldc_will_read_block + * initially returns true + */ + rv = wait_event_interruptible(vldc->waitqueue, + !vldc_will_read_block(vldc)); + if (rv < 0) + goto done; + + /* + * Check again if the device has been released/closed + * or has been reset while we were waiting + */ + if (atomic_read(&vldc->is_released)) { + /* device was released, exit */ + rv = -ENODEV; + goto done; + } + + if (atomic_read(&vldc->is_reset_asserted)) { + rv = -EIO; + goto done; + } + + nbytes_left = count; /* number of bytes left to read */ + ubufp = (char *)ubuf; + + /* read count bytes or until LDC has no more read data (or error) */ + while (nbytes_left > 0) { + + /* NOTE: RAW mode can only read min size of LDC_PACKET_SIZE */ + if (atomic_read(&vldc->mode) == LDC_MODE_RAW) + size = max_t(int, LDC_PACKET_SIZE, nbytes_left); + else + size = min_t(int, atomic_read(&vldc->mtu), nbytes_left); + + rv = ldc_read(vldc->lp, vldc->rx_buf, size); + + dprintk("(%s) ldc_read() returns %d\n", vldc->name, rv); + + if (unlikely(rv < 0)) + break; + + if (unlikely(rv == 0)) + break; + + if (copy_to_user(ubufp, vldc->rx_buf, rv) != 0) { + rv = -EFAULT; + goto done; + } + + ubufp += rv; + nbytes_read += rv; + nbytes_left -= rv; + } + + /* Return any data read (even if we got a subsequent error) */ + if (nbytes_read > 0) + rv = nbytes_read; + +done: + + dprintk("(%s) num bytes read=%d, return value=%d\n", + vldc->name, nbytes_read, rv); + + /* re-enable interrupts */ + ldc_enable_hv_intr(vldc->lp); + + return (ssize_t)rv; + +} + +static unsigned int vldc_fops_poll(struct file *filp, poll_table *wait) +{ + struct vldc_dev *vldc; + int mask; + + dprintk("entered.\n"); + + vldc = filp->private_data; + + /* + * XXX For the error cases, should return error codes or POLLHUP? + * If the device has been released/closed or has been reset, + * exit with error. + */ + if (atomic_read(&vldc->is_released)) + return -ENODEV; + + if (atomic_read(&vldc->is_reset_asserted)) + return -EIO; + + poll_wait(filp, &vldc->waitqueue, wait); + + /* + * Check again if the device has been released/closed + * or has been reset while we were waiting + */ + if (atomic_read(&vldc->is_released)) + return -ENODEV; + + if (atomic_read(&vldc->is_reset_asserted)) + return -EIO; + + mask = 0; + + if (!vldc_will_read_block(vldc)) + mask |= POLLIN | POLLRDNORM; + + /* Check that we can write at least MTU bytes */ + if (!vldc_will_write_block(vldc, (size_t)atomic_read(&vldc->mtu))) + mask |= POLLOUT | POLLWRNORM; + + return mask; +} + +static long vldc_read_cookie(struct vldc_dev *vldc, u64 src_addr, u64 dst_addr, + u64 len) +{ + struct ldc_trans_cookie cookie; + int rv; + char *ubufp; + u32 nbytes_read; + u32 nbytes_left; + + dprintk("entered.\n"); + + nbytes_read = 0; /* number of bytes read */ + + /* validate args */ + if (vldc == NULL || src_addr == 0 || dst_addr == 0) { + rv = -EINVAL; + goto done; + } + + dprintk("(%s) src_addr=0x%llx dst_addr=0x%llx len=0x%llx\n", + vldc->name, src_addr, dst_addr, len); + + if (atomic_read(&vldc->is_released)) { + rv = -ENODEV; + goto done; + } + + if (atomic_read(&vldc->is_reset_asserted)) { + rv = -EIO; + goto done; + } + + if (len == 0) { + rv = 0; + goto done; + } + + if (unlikely(len > VLDC_MAX_COOKIE)) { + rv = -E2BIG; + goto done; + } + + rv = 0; + nbytes_left = (u32)len; /* number of bytes left to read */ + ubufp = (char *)src_addr; + + /* copy in len bytes or until LDC has no more read data (or error) */ + while (nbytes_left > 0) { + + cookie.cookie_addr = dst_addr; + cookie.cookie_size = nbytes_left; + + rv = ldc_copy(vldc->lp, LDC_COPY_IN, vldc->cookie_read_buf, + nbytes_left, 0, &cookie, 1); + + dprintk("(%s) ldc_copy() returns %d\n", vldc->name, rv); + + if (unlikely(rv < 0)) + goto done; + + if (unlikely(rv == 0)) + break; + + if (copy_to_user(ubufp, vldc->cookie_read_buf, rv) != 0) { + rv = -EFAULT; + goto done; + } + + ubufp += rv; + dst_addr += rv; + nbytes_read += rv; + nbytes_left -= rv; + } + + rv = nbytes_read; + +done: + + dprintk("(%s) num bytes read=%d, return value=%d\n", + vldc->name, nbytes_read, rv); + + return rv; + +} + +static long vldc_write_cookie(struct vldc_dev *vldc, u64 src_addr, u64 dst_addr, + u64 len) +{ + struct ldc_trans_cookie cookie; + int rv; + char *ubufp; + u32 nbytes_written; + u32 nbytes_left; + + dprintk("entered.\n"); + + nbytes_written = 0; /* number of bytes written */ + + /* validate args */ + if (vldc == NULL || src_addr == 0 || dst_addr == 0) { + rv = -EINVAL; + goto done; + } + + dprintk("(%s) src_addr=0x%llx dst_addr=0x%llx len=0x%llx\n", + vldc->name, src_addr, dst_addr, len); + + if (atomic_read(&vldc->is_released)) { + rv = -ENODEV; + goto done; + } + + if (atomic_read(&vldc->is_reset_asserted)) { + rv = -EIO; + goto done; + } + + if (len == 0) { + rv = 0; + goto done; + } + + if (unlikely(len > VLDC_MAX_COOKIE)) { + rv = -E2BIG; + goto done; + } + + rv = 0; + nbytes_left = (u32)len; /* number of bytes left to write */ + ubufp = (char *)src_addr; + + /* copy in len bytes or until LDC has no more read data (or error) */ + while (nbytes_left > 0) { + + if (copy_from_user(vldc->cookie_write_buf, + ubufp, nbytes_left) != 0) { + rv = -EFAULT; + goto done; + } + + cookie.cookie_addr = dst_addr; + cookie.cookie_size = nbytes_left; + + rv = ldc_copy(vldc->lp, LDC_COPY_OUT, vldc->cookie_write_buf, + nbytes_left, 0, &cookie, 1); + + dprintk("(%s) ldc_copy() returns %d\n", vldc->name, rv); + + if (unlikely(rv < 0)) + goto done; + + if (unlikely(rv == 0)) + break; + + ubufp += rv; + dst_addr += rv; + nbytes_written += rv; + nbytes_left -= rv; + } + + rv = nbytes_written; + +done: + + dprintk("(%s) num bytes written=%d, return value=%d\n", + vldc->name, nbytes_written, rv); + + return rv; + +} + +static long vldc_fops_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + + struct vldc_dev *vldc; + struct vldc_data_t __user *uarg; + u64 src_addr; + u64 dst_addr; + u64 len; + int rv; + + dprintk("entered.\n"); + + rv = 0; + src_addr = 0; + dst_addr = 0; + len = 0; + + vldc = filp->private_data; + + /* get the arg for the read/write cookie ioctls */ + if (cmd == VLDC_IOCTL_READ_COOKIE || cmd == VLDC_IOCTL_WRITE_COOKIE) { + uarg = (struct vldc_data_t __user *)arg; + if (get_user(src_addr, &uarg->src_addr) != 0 || + get_user(dst_addr, &uarg->dst_addr) != 0 || + get_user(len, &uarg->length) != 0) { + rv = -EFAULT; + goto done; + } + } + + switch (cmd) { + case VLDC_IOCTL_READ_COOKIE: + + rv = vldc_read_cookie(vldc, src_addr, dst_addr, len); + + break; + + case VLDC_IOCTL_WRITE_COOKIE: + + rv = vldc_write_cookie(vldc, src_addr, dst_addr, len); + + break; + + default: + rv = -EINVAL; + break; + } + +done: + + return rv; + +} + +/* + * Event function does the following: + * 1. If data is ready from the LDC, indicate it + * in the corresponding device struct. + * 2. Wake up any (poll or read) waiters on this device + * + * NOTE - this routine is called in interrupt context. + */ +static void vldc_event(void *arg, int event) +{ + struct vldc_dev *vldc = arg; + + dprintk("entered.\n"); + + dprintk("%s: LDC event %d\n", vldc->name, event); + + if (event == LDC_EVENT_RESET) { + atomic_set(&vldc->is_reset_asserted, 1); + return; + } + + if (event == LDC_EVENT_UP) + return; + + if (unlikely(event != LDC_EVENT_DATA_READY)) { + dprintk("Unexpected LDC event %d\n", event); + return; + } + + /* + * disable interrupts until we have completed reading the data. + * NOTE: this will hold off all types of events including RESET + * until read has complete. If a device reset occurs within this + * window (while interrupts are disabled), attempts to read/write + * the device should/will fail at the LDC level (since a check is + * at that level - via an HV call - to first ensure the LDC is UP). + */ + + ldc_disable_hv_intr(vldc->lp); + + /* walkup any read or poll waiters */ + wake_up_interruptible(&vldc->waitqueue); + +} + + +static int vldc_connect(struct ldc_channel *lp) +{ + int timeout; + int state; + + /* no connection required in RAW mode */ + if (ldc_mode(lp) == LDC_MODE_RAW) + return 0; + + /* + * Issue a ldc_connect to make sure the handshake is initiated. + * NOTE: ldc_connect can fail if the LDC connection handshake + * completed since we called bind(). So, ignore + * ldc_connect() failures. + */ + (void) ldc_connect(lp); + + /* wait for the connection to complete */ + timeout = VLDC_CONNECTION_TIMEOUT; + do { + state = ldc_state(lp); + if (state == LDC_STATE_CONNECTED) + break; + msleep_interruptible(1); + } while (timeout-- > 0); + + if (state == LDC_STATE_CONNECTED) + return 0; + else + return -ETIMEDOUT; +} + +/* + * Open function does the following: + * 1. Alloc and bind LDC to the device (using sysfs parameters) + */ +static int vldc_fops_open(struct inode *inode, struct file *filp) +{ + struct vldc_dev *vldc; + char *tbuffer; + char *rbuffer; + char *crbuffer; + char *cwbuffer; + struct ldc_channel_config ldc_cfg; + struct ldc_channel *lp; + u32 mtu; + int rv; + int err; + bool ldc_bound; + + dprintk("entered.\n"); + + rv = 0; + ldc_bound = false; + tbuffer = NULL; + rbuffer = NULL; + crbuffer = NULL; + cwbuffer = NULL; + + vldc = container_of(inode->i_cdev, struct vldc_dev, cdev); + + /* just to be safe, if the device is in reset, deny the open. */ + if (atomic_read(&vldc->is_reset_asserted)) + return -EIO; + + dprintk("(%s)\n", vldc->name); + + /* + * We hold the vldc_mutex during the open to prevent + * a race with vldc_sysfs_mode_store() and vldc_sysfs_mtu_store(). + * See comments in those routines for more detail. + */ + mutex_lock(&vldc->vldc_mutex); + + /* + * Atomically test and mark the device as opened. + * This limits the usage of the device to one process at + * a time which is good enough for our purposes (and which + * simplifies locking). + */ + if (!atomic_dec_and_test(&vldc->is_released)) { + atomic_inc(&vldc->is_released); + dprintk("failed: Multiple open.\n"); + mutex_unlock(&vldc->vldc_mutex); + return -EBUSY; + } + + mutex_unlock(&vldc->vldc_mutex); + + mtu = (u32) atomic_read(&vldc->mtu); + + tbuffer = kzalloc(mtu, GFP_KERNEL); + if (tbuffer == NULL) { + dprintk("failed to allocate tbuffer.\n"); + rv = -ENOMEM; + goto error; + } + vldc->tx_buf = tbuffer; + + rbuffer = kzalloc(mtu, GFP_KERNEL); + if (rbuffer == NULL) { + dprintk("failed to allocate rbuffer.\n"); + rv = -ENOMEM; + goto error; + } + vldc->rx_buf = rbuffer; + + crbuffer = kzalloc(VLDC_MAX_COOKIE, GFP_KERNEL); + if (crbuffer == NULL) { + dprintk("failed to allocate crbuffer.\n"); + rv = -ENOMEM; + goto error; + } + vldc->cookie_read_buf = crbuffer; + + cwbuffer = kzalloc(VLDC_MAX_COOKIE, GFP_KERNEL); + if (cwbuffer == NULL) { + dprintk("failed to allocate cwbuffer.\n"); + rv = -ENOMEM; + goto error; + } + vldc->cookie_write_buf = cwbuffer; + + ldc_cfg.event = vldc_event; + ldc_cfg.mtu = mtu; + ldc_cfg.mode = atomic_read(&vldc->mode); + ldc_cfg.debug = 0; + ldc_cfg.tx_irq = vldc->vdev->tx_irq; + ldc_cfg.rx_irq = vldc->vdev->rx_irq; + ldc_cfg.rx_ino = vldc->vdev->rx_ino; + ldc_cfg.tx_ino = vldc->vdev->tx_ino; + ldc_cfg.dev_handle = vldc->vdev->dev_handle; + + /* Alloc and init the associated LDC */ + lp = ldc_alloc(vldc->vdev->channel_id, &ldc_cfg, vldc, vldc->name); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + dprintk("ldc_alloc() failed. err=%d\n", err); + rv = err; + goto error; + } + vldc->lp = lp; + + rv = ldc_bind(vldc->lp); + if (rv != 0) { + dprintk("ldc_bind() failed, err=%d.\n", rv); + goto error; + } + ldc_bound = true; + + rv = vldc_connect(vldc->lp); + if (rv != 0) { + dprintk("vldc_connect() failed, err=%d.\n", rv); + goto error; + } + + /* tuck away the vldc device for subsequent fops */ + filp->private_data = vldc; + + dprintk("Success.\n"); + + return 0; + +error: + + if (ldc_bound) + ldc_unbind(vldc->lp); + + if (vldc->lp != NULL) + ldc_free(vldc->lp); + + if (cwbuffer != NULL) + kfree(cwbuffer); + + if (crbuffer != NULL) + kfree(crbuffer); + + if (rbuffer != NULL) + kfree(rbuffer); + + if (tbuffer != NULL) + kfree(tbuffer); + + atomic_inc(&vldc->is_released); + + return rv; + +} + +static int vldc_fops_release(struct inode *inode, struct file *filp) +{ + struct vldc_dev *vldc; + + dprintk("entered.\n"); + + vldc = filp->private_data; + + ldc_unbind(vldc->lp); + + ldc_free(vldc->lp); + + kfree(vldc->cookie_write_buf); + + kfree(vldc->cookie_read_buf); + + kfree(vldc->rx_buf); + + kfree(vldc->tx_buf); + + /* mark the device as released */ + atomic_inc(&vldc->is_released); + + /* + * User must close and re-open the device to clear + * the reset asserted flag. + */ + atomic_set(&vldc->is_reset_asserted, 0); + + /* + * Wake up any rogue read or poll waiters. + * They will exit (with an error) since is_released is now set. + */ + wake_up_interruptible(&vldc->waitqueue); + + return 0; +} + +static const struct file_operations vldc_fops = { + .owner = THIS_MODULE, + .open = vldc_fops_open, + .release = vldc_fops_release, + .poll = vldc_fops_poll, + .read = vldc_fops_read, + .write = vldc_fops_write, + .unlocked_ioctl = vldc_fops_ioctl, +}; + +static int vldc_get_next_avail_minor(void) +{ + struct vldc_dev *vldc; + bool found; + int i; + + /* + * walk the vldc_dev_list list to find the next + * lowest available minor. + */ + mutex_lock(&vldc_data_mutex); + for (i = VLDC_MINOR_BASE; i < VLDC_MAX_DEVS; i++) { + found = false; + list_for_each_entry(vldc, &vldc_data.vldc_dev_list, list) { + if (i == MINOR(vldc->devt)) { + found = true; + break; + } + } + if (!found) { + /* found a free minor, use it */ + break; + } + } + mutex_unlock(&vldc_data_mutex); + + if (i == VLDC_MAX_DEVS) { + dprintk("no more minors left for allocation!\n"); + return -1; + } + + return i; +} + +static ssize_t vldc_sysfs_mode_show(struct device *device, + struct device_attribute *attr, char *buffer) +{ + struct vldc_dev *vldc; + + dprintk("entered.\n"); + + vldc = dev_get_drvdata(device); + + return scnprintf(buffer, PAGE_SIZE, "%d\n", atomic_read(&vldc->mode)); +} + +static ssize_t vldc_sysfs_mode_store(struct device *device, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vldc_dev *vldc; + unsigned int mode; + + dprintk("entered.\n"); + + if (sscanf(buf, "%ud", &mode) != 1) + return -EINVAL; + + /* validate the value from the user */ + if (!(mode == LDC_MODE_RAW || + mode == LDC_MODE_UNRELIABLE || + mode == LDC_MODE_STREAM)) { + return -EINVAL; + } + + vldc = dev_get_drvdata(device); + + /* + * Only allow the mode to be set if the device is closed. + * Use vldc_mutex to ensure that an open does not + * come in between the check for is_released and the set + * of the mode. + */ + mutex_lock(&vldc->vldc_mutex); + + if (!atomic_read(&vldc->is_released)) { + /* can't change the mode while the device is open */ + mutex_unlock(&vldc->vldc_mutex); + return -EBUSY; + } + + atomic_set(&vldc->mode, mode); + + mutex_unlock(&vldc->vldc_mutex); + + dprintk("mode changed to %d.\n", mode); + + return strnlen(buf, count); +} + + +static ssize_t vldc_sysfs_mtu_show(struct device *device, + struct device_attribute *attr, char *buffer) +{ + struct vldc_dev *vldc; + + dprintk("entered.\n"); + + vldc = dev_get_drvdata(device); + + return scnprintf(buffer, PAGE_SIZE, "%d\n", atomic_read(&vldc->mtu)); +} + +static ssize_t vldc_sysfs_mtu_store(struct device *device, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vldc_dev *vldc; + unsigned int mtu; + int rv; + + dprintk("entered.\n"); + + rv = 0; + + if (sscanf(buf, "%ud", &mtu) != 1) + return -EINVAL; + + /* validate the value from the user */ + if (mtu < LDC_PACKET_SIZE || mtu > VLDC_MAX_MTU) + return -EINVAL; + + vldc = dev_get_drvdata(device); + + /* + * Only allow the mtu to be set if the device is closed. + * Use vldc_mutex to ensure that an open does not + * come in between the check for is_released and the set + * of the mtu. + */ + mutex_lock(&vldc->vldc_mutex); + + if (!atomic_read(&vldc->is_released)) { + /* can't change the mtu while the device is open */ + mutex_unlock(&vldc->vldc_mutex); + return -EBUSY; + } + + atomic_set(&vldc->mtu, mtu); + + mutex_unlock(&vldc->vldc_mutex); + + dprintk("mtu changed to %d.\n", mtu); + + return strnlen(buf, count); + +} + + + +static DEVICE_ATTR(mode, (S_IRUSR|S_IWUSR), vldc_sysfs_mode_show, + vldc_sysfs_mode_store); +static DEVICE_ATTR(mtu, (S_IRUSR|S_IWUSR), vldc_sysfs_mtu_show, + vldc_sysfs_mtu_store); + +static struct attribute *vldc_sysfs_entries[] = { + &dev_attr_mode.attr, + &dev_attr_mtu.attr, + NULL +}; + +static struct attribute_group vldc_attribute_group = { + .name = NULL, /* put in device directory */ + .attrs = vldc_sysfs_entries, +}; + +/* + * Probe function does the following: + * 1. Create/Init vldc_dev for newly probed device + * 2. Create /dev entry for the device + * 3. Create sysfs entries for the device + */ +static int vldc_probe(struct vio_dev *vdev, const struct vio_device_id *vio_did) +{ + struct vldc_dev *vldc; + struct mdesc_handle *hp; + const char *valstr; + const u64 *id; + int rv, slen; + dev_t devt; + struct device *device; + int next_minor; + bool created_sysfs_group; + u64 node; +#ifdef VLDC_DEBUG + unsigned char devt_buf[32]; +#endif + + dprintk("entered.\n"); + + vldc = NULL; + hp = NULL; + valstr = NULL; + devt = 0; + device = NULL; + created_sysfs_group = false; + + vldc = kzalloc(sizeof(struct vldc_dev), GFP_KERNEL); + if (vldc == NULL) { + dprintk("failed to allocate vldc_dev\n"); + rv = -ENOMEM; + goto error; + } + + mutex_init(&vldc->vldc_mutex); + + hp = mdesc_grab(); + + node = vio_vdev_node(hp, vdev); + if (node == MDESC_NODE_NULL) { + dprintk("Failed to get vdev MD node.\n"); + mdesc_release(hp); + rv = -ENXIO; + goto error; + } + + id = mdesc_get_property(hp, node, "id", NULL); + if (id == NULL) { + dprintk("failed to get id property.\n"); + mdesc_release(hp); + rv = -ENXIO; + goto error; + } + + /* get the name of the service this vldc-port provides */ + valstr = mdesc_get_property(hp, node, "vldc-svc-name", &slen); + if (valstr == NULL) { + dprintk("failed to get vldc-svc-name property.\n"); + mdesc_release(hp); + rv = -ENXIO; + goto error; + } + + mdesc_release(hp); + + vldc->name = kzalloc(slen+1, GFP_KERNEL); /* +1 for NUll byte */ + if (vldc->name == NULL) { + dprintk("failed to alloc vldc->name.\n"); + rv = -ENOMEM; + goto error; + } + memcpy(vldc->name, valstr, slen); + vldc->name[slen] = '\0'; + + dprintk("%s: cfg_handle=%llu, id=%llu\n", vldc->name, + vdev->dev_no, *id); + + init_waitqueue_head(&vldc->waitqueue); + + /* mark the device as initially released (e.g. closed) */ + atomic_set(&vldc->is_released, 1); + + /* clear the reset asserted flag */ + atomic_set(&vldc->is_reset_asserted, 0); + + dev_set_drvdata(&vdev->dev, vldc); + + /* create the devt for this device */ + next_minor = vldc_get_next_avail_minor(); + if (next_minor == -1) { + dprintk("vldc_get_next_avail_minor() failed.\n"); + rv = -ENXIO; + goto error; + } + devt = MKDEV(MAJOR(vldc_data.devt), next_minor); + vldc->devt = devt; + + dprintk("%s: dev_t=%s\n", vldc->name, format_dev_t(devt_buf, + vldc->devt)); + + /* + * Use the default mode and mtu for starters. + * They are exported via sysfs for modification by the user + */ + atomic_set(&vldc->mode, VLDC_DEFAULT_MODE); + atomic_set(&vldc->mtu, VLDC_DEFAULT_MTU); + + /* create/add the associated cdev */ + cdev_init(&vldc->cdev, &vldc_fops); + vldc->cdev.owner = THIS_MODULE; + rv = cdev_add(&vldc->cdev, devt, 1); + if (rv != 0) { + dprintk("cdev_add() failed.\n"); + devt = 0; + goto error; + } + + /* create the associated /sys and /dev entries */ + device = device_create(vldc_data.chrdev_class, &vdev->dev, devt, + vldc, "%s", vldc->name); + if (IS_ERR(device)) { + dprintk("device_create() failed.\n"); + rv = PTR_ERR(device); + device = NULL; + goto error; + } + vldc->device = device; + + vldc->vdev = vdev; + + rv = sysfs_create_group(&device->kobj, &vldc_attribute_group); + if (rv) + goto error; + + created_sysfs_group = true; + + /* add the vldc to the global vldc_data device list */ + mutex_lock(&vldc_data_mutex); + list_add_tail(&vldc->list, &vldc_data.vldc_dev_list); + vldc_data.num_vldc_dev_list++; + mutex_unlock(&vldc_data_mutex); + + dprintk("%s: probe successful\n", vldc->name); + + return 0; + +error: + + if (!created_sysfs_group) + sysfs_remove_group(&device->kobj, &vldc_attribute_group); + + if (device) + device_destroy(vldc_data.chrdev_class, devt); + + if (devt) + cdev_del(&vldc->cdev); + + if (vldc->name) + kfree(vldc->name); + + if (vldc != NULL) { + mutex_destroy(&vldc->vldc_mutex); + kfree(vldc); + } + + dprintk("probe failed (rv=%d)\n", rv); + + return rv; +} + +static int vldc_free_vldc_dev(struct vldc_dev *vldc) +{ + + dprintk("entered. (%s)\n", vldc->name); + + mutex_lock(&vldc_data_mutex); + list_del(&vldc->list); + vldc_data.num_vldc_dev_list--; + mutex_unlock(&vldc_data_mutex); + + sysfs_remove_group(&vldc->device->kobj, &vldc_attribute_group); + device_destroy(vldc_data.chrdev_class, vldc->devt); + cdev_del(&vldc->cdev); + kfree(vldc->name); + mutex_destroy(&vldc->vldc_mutex); + kfree(vldc); + + return 0; +} + +static int vldc_remove(struct vio_dev *vdev) +{ + int rv; + struct vldc_dev *vldc; + + dprintk("entered.\n"); + + vldc = dev_get_drvdata(&vdev->dev); + + if (vldc == NULL) { + dprintk("failed to get vldc_dev from vio_dev.\n"); + rv = -ENXIO; + } else { + dprintk("removing (%s)\n", vldc->name); + rv = vldc_free_vldc_dev(vldc); + } + + return rv; +} + +static const struct vio_device_id vldc_match[] = { + { + .type = "vldc-port", + }, + {}, +}; + +static struct vio_driver vldc_driver = { + .id_table = vldc_match, + .probe = vldc_probe, + .remove = vldc_remove, + .name = VLDC_DEVICE_NAME, +}; + +static char *vldc_devnode(struct device *dev, umode_t *mode) +{ + if (mode != NULL) + *mode = 0600; + + return kasprintf(GFP_KERNEL, "vldc/%s", dev_name(dev)); +} + +/* + * Init function does the following + * 1. Init vldc_data struct fields + * 2. Register VIO driver + */ +static int __init vldc_init(void) +{ + int rv; +#ifdef VLDC_DEBUG + unsigned char devt_buf[32]; +#endif + + dprintk("entered. (DEBUG enabled)\n"); + + printk(KERN_INFO "%s", driver_version); + + INIT_LIST_HEAD(&vldc_data.vldc_dev_list); + vldc_data.num_vldc_dev_list = 0; + + rv = alloc_chrdev_region(&vldc_data.devt, VLDC_MINOR_BASE, + VLDC_MAX_DEVS, VLDC_DEVICE_NAME); + if (rv < 0) { + dprintk("alloc_chrdev_region failed: %d\n", rv); + return rv; + } + + if (vldc_data.devt == (dev_t)0) { + dprintk("alloc_chrdev_region failed: (vldc_data.devt == 0)\n"); + rv = -ENXIO; + return rv; + } + + dprintk("dev_t allocated = %s\n", + format_dev_t(devt_buf, vldc_data.devt)); + + vldc_data.chrdev_class = class_create(THIS_MODULE, VLDC_DEVICE_NAME); + if (IS_ERR(vldc_data.chrdev_class)) { + rv = PTR_ERR(vldc_data.chrdev_class); + dprintk("class_create() failed: %d\n", rv); + vldc_data.chrdev_class = NULL; + goto error; + } + + /* set callback to create devices under /dev/vldc directory */ + vldc_data.chrdev_class->devnode = vldc_devnode; + + rv = vio_register_driver(&vldc_driver); + if (rv != 0) { + dprintk("vio_register_driver() failed: %d\n", rv); + goto error; + } + + return 0; + +error: + if (vldc_data.chrdev_class) + class_destroy(vldc_data.chrdev_class); + + if (vldc_data.devt) + unregister_chrdev_region(vldc_data.devt, VLDC_MAX_DEVS); + + return rv; +} + +static void __exit vldc_exit(void) +{ + + dprintk("entered.\n"); + + /* + * Note - vio_unregister_driver() will invoke a call to + * vldc_remove() for every successfully probed device. + */ + vio_unregister_driver(&vldc_driver); + + if (vldc_data.chrdev_class) + class_destroy(vldc_data.chrdev_class); + + if (vldc_data.devt) + unregister_chrdev_region(vldc_data.devt, VLDC_MAX_DEVS); +} + +module_init(vldc_init); +module_exit(vldc_exit); + +MODULE_AUTHOR("Oracle"); +MODULE_DESCRIPTION("Sun4v Virtual LDC Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + diff --git a/drivers/char/vlds.c b/drivers/char/vlds.c new file mode 100644 index 000000000000..fd65777deab7 --- /dev/null +++ b/drivers/char/vlds.c @@ -0,0 +1,2092 @@ +/* + * vlds.c: Sun4v LDOMs Virtual Domain Services Driver + * + * Copyright (C) 2015 Oracle. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern unsigned int ldoms_debug_level; +static unsigned int vldsdbg_level; +module_param(vldsdbg_level, uint, S_IRUGO|S_IWUSR); + +#define DRV_NAME "vlds" +#define DRV_VERSION "1.0" +#define VLDS_DEVICE_NAME DRV_NAME + +#define VLDS_MINOR_BASE 0 +#define VLDS_MAX_DEVS 65535 /* need one per guest domain - max is 2^20 */ +#define VLDS_MAX_MSG_SIZE (256 * 1024) + +#define VLDS_SP_INT_NAME DS_SP_NAME /* SP DS internal name */ +#define VLDS_SP_DEV_NAME "sp" /* SP DS device name */ +#define VLDS_PATH_MAX 256 + +#define VLDS_INVALID_HANDLE 0xFFFFFFFFFFFFFFFFUL + +static char driver_version[] = DRV_NAME ".c:v" DRV_VERSION "\n"; + +#define dprintk(fmt, args...) do {\ +if (vldsdbg_level > 0)\ + printk(KERN_ERR "%s: %s: " fmt, DRV_NAME, __func__, ##args);\ +} while (0) + +/* Global driver data struct for data common to all devices */ +struct vlds_driver_data { + struct list_head vlds_dev_list; /* list of all vlds devices */ + int num_vlds_dev_list; + struct class *chrdev_class; + dev_t devt; +}; +struct vlds_driver_data vlds_data; +static DEFINE_MUTEX(vlds_data_mutex); /* protect vlds_data */ + +struct vlds_dev { + /* link into the global driver data dev list */ + struct list_head list; + + struct mutex vlds_mutex; /* protect this vlds_dev */ + struct cdev cdev; + dev_t devt; + char *int_name; /* internal name for device */ + struct device *device; + u64 domain_handle; /* only valid for domain dev */ + + /* list of all services for this vlds device */ + struct list_head service_info_list; + +}; + +/* we maintain a global vlds_dev for the SP device */ +struct vlds_dev *sp_vlds; + +struct vlds_service_info { + /* link into the vlds_dev service info list */ + struct list_head list; + + /* name/id of the service */ + char *name; + + u64 state; + + u64 flags; + + /* the thread group id which is using this service */ + pid_t tgid; + + /* unique handle assigned to this service */ + u64 handle; + + /* version that was negotiated */ + vlds_ver_t neg_vers; + + /* Queue of received data messages for this service */ + struct list_head msg_queue; + u64 msg_queue_size; + +}; +#define VLDS_SVC_IS_CLIENT(svc) ((svc)->flags & VLDS_REG_CLIENT) +#define VLDS_SVC_IS_EVENT(svc) ((svc)->flags & VLDS_REG_EVENT) + +struct vlds_msg_data { + /* link into the vlds_service_info message queue */ + struct list_head list; + + size_t size; /* message data size */ + u8 data[0]; /* message data */ +}; +#define VLDS_MAX_MSG_LIST_NUM 16 + +/* + * If a process registers an event fd, we create an + * event_info to track events for the process. + */ +struct vlds_event_info { + /* link into the vlds_event_info_list */ + struct list_head list; + + /* the thread group id (i.e. pid) to which this event_info belongs */ + pid_t tgid; + + /* fd to signal process of received event - See eventfd(2) */ + int fd; + + /* List of received events */ + struct list_head event_list; +}; + +struct list_head vlds_event_info_list; +static DEFINE_MUTEX(vlds_event_info_list_mutex); + +struct vlds_event { + /* link into the vlds_event_info event_list */ + struct list_head list; + + /* service associated with the event */ + struct vlds_service_info *svc_info; + + /* type of event - reg/unreg/data */ + u64 type; + + /* negotiated version (for reg events) */ + vlds_ver_t neg_vers; +}; + +/* + * When holding multiple locks in this driver, locking + * MUST be consistently performed in this order: + * vlds_data_mutex + * vlds_dev->vlds_mutex + * vlds_event_info_list_mutex + */ + +/* vlds_event_info_list_mutex must be held */ +static int vlds_add_event_info(pid_t tgid, int fd) +{ + struct vlds_event_info *event_info; + + dprintk("called\n"); + + event_info = kzalloc(sizeof(struct vlds_event_info), GFP_KERNEL); + if (unlikely(event_info == NULL)) { + dprintk("failed to allocate event_info\n"); + return -ENOMEM; + } + + event_info->tgid = tgid; + event_info->fd = fd; + INIT_LIST_HEAD(&event_info->event_list); + + list_add_tail(&event_info->list, &vlds_event_info_list); + + return 0; + +} + +/* vlds_event_info_list_mutex must be held */ +static int vlds_get_event_info(pid_t tgid, + struct vlds_event_info **ret_event_info) +{ + struct vlds_event_info *event_info; + bool found; + + found = false; + list_for_each_entry(event_info, &vlds_event_info_list, list) { + if (event_info->tgid == tgid) { + found = true; + break; + } + } + + if (!found) + return -ENODEV; + + *ret_event_info = event_info; + + return 0; + +} + +/* vlds_event_info_list_mutex must be held */ +static void vlds_remove_event_info(pid_t tgid) +{ + struct vlds_event_info *event_info; + struct vlds_event *event; + struct vlds_event *next; + bool found; + + dprintk("called\n"); + + found = false; + list_for_each_entry(event_info, &vlds_event_info_list, list) { + if (event_info->tgid == tgid) { + found = true; + break; + } + } + + if (found) { + /* Remove all events queued on this event_info */ + list_for_each_entry_safe(event, next, &event_info->event_list, + list) { + list_del(&event->list); + kfree(event); + } + + list_del(&event_info->list); + kfree(event_info); + } + +} + +static int vlds_add_event(pid_t tgid, struct vlds_service_info *svc_info, + u64 type, vlds_ver_t *neg_vers) +{ + struct vlds_event_info *event_info; + struct vlds_event *event; + struct task_struct *utask; + struct file *efd_file; + struct eventfd_ctx *efd_ctx; + int rv; + + mutex_lock(&vlds_event_info_list_mutex); + + event_info = NULL; + rv = vlds_get_event_info(tgid, &event_info); + if (rv || event_info == NULL) { + /* + * If we failed to find an event_info, it probably just + * means the process did not register for events in favor + * of using polling - which is valid. + */ + mutex_unlock(&vlds_event_info_list_mutex); + return 0; + } + + event = kzalloc(sizeof(struct vlds_event), GFP_KERNEL); + if (unlikely(event == NULL)) { + dprintk("failed to allocate event for " + "service %llx\n", svc_info->handle); + mutex_unlock(&vlds_event_info_list_mutex); + return -ENOMEM; + } else { + event->type = type; + event->svc_info = svc_info; + if (neg_vers != NULL) + event->neg_vers = *neg_vers; + + list_add_tail(&event->list, + &event_info->event_list); + } + + mutex_unlock(&vlds_event_info_list_mutex); + + /* + * Signal the process that there is an event pending + * This is tricky as it requires searching the task's + * file table for the entry corresponding to the event fd + * to get the event fd context. + */ + + rcu_read_lock(); + + /* Get the task struct */ + utask = pid_task(find_vpid(tgid), PIDTYPE_PID); + if (!utask) { + rcu_read_unlock(); + return -EIO; + } + + /* Get the file corresponding to event_info->fd */ + efd_file = fcheck_files(utask->files, event_info->fd); + if (!efd_file) { + rcu_read_unlock(); + return -EIO; + } + + /* Get the eventfd context associated with the file */ + efd_ctx = eventfd_ctx_fileget(efd_file); + if (!efd_ctx) { + rcu_read_unlock(); + return -EIO; + } + + /* signal the task by incrementing the counter by 1 */ + eventfd_signal(efd_ctx, 1); + + /* release the eventfd context */ + eventfd_ctx_put(efd_ctx); + + rcu_read_unlock(); + + return rv; + +} + +static struct vlds_event *vlds_get_event(struct vlds_event_info *event_info) +{ + + struct vlds_event *event; + + if (list_empty(&event_info->event_list)) + return NULL; + + event = list_first_entry(&event_info->event_list, + struct vlds_event, list); + + BUG_ON(event == NULL); + + return event; + +} + +static void vlds_remove_event(struct vlds_event_info *event_info, + struct vlds_event *event) +{ + if (event == NULL || list_empty(&event_info->event_list)) + return; + + /* Check here that the event is actually on the list? TBD */ + + list_del(&event->list); + + kfree(event); +} + +static void vlds_remove_svc_events(struct vlds_service_info *svc_info) +{ + struct vlds_event_info *event_info; + struct vlds_event *event; + struct vlds_event *next; + + mutex_lock(&vlds_event_info_list_mutex); + + list_for_each_entry(event_info, &vlds_event_info_list, list) { + + list_for_each_entry_safe(event, next, &event_info->event_list, + list) { + if (event->svc_info == svc_info) { + list_del(&event->list); + kfree(event); + } + } + } + + mutex_unlock(&vlds_event_info_list_mutex); +} + +static struct vlds_service_info *vlds_get_svc_info(struct vlds_dev *vlds, + char *svc_str, bool is_client) +{ + struct vlds_service_info *svc_info; + + list_for_each_entry(svc_info, &vlds->service_info_list, list) { + if (!strncmp(svc_info->name, svc_str, VLDS_MAX_NAMELEN) && + VLDS_SVC_IS_CLIENT(svc_info) == is_client) { + return svc_info; + } + } + + return NULL; +} + +static struct vlds_service_info *vlds_get_svc_info_hdl(struct vlds_dev *vlds, + u64 hdl) +{ + struct vlds_service_info *svc_info; + + list_for_each_entry(svc_info, &vlds->service_info_list, list) { + if (svc_info->handle == hdl) + return svc_info; + } + + return NULL; +} + +/* Add a message to a service message queue */ +static int vlds_add_msg(struct vlds_service_info *svc_info, void *buf, + size_t buflen) +{ + struct vlds_msg_data *msg_data; + + /* check if we've reached the max num of queued messages */ + if (svc_info->msg_queue_size > VLDS_MAX_MSG_LIST_NUM) + return -ENOSPC; + + /* make sure the message size isn't too large */ + if (buflen > VLDS_MAX_MSG_SIZE) + return -EFBIG; + + /* we don't allow enqueing zero length messages */ + if (buflen == 0) + return -EINVAL; + + /* allocate/copy a buffer for the message */ + msg_data = kzalloc(sizeof(struct vlds_msg_data) + buflen, GFP_KERNEL); + if (unlikely(msg_data == NULL)) + return -ENOMEM; + + /* copy the message/size */ + memcpy(msg_data->data, buf, buflen); + msg_data->size = buflen; + + /* add it to the queue */ + list_add_tail(&msg_data->list, &svc_info->msg_queue); + + svc_info->msg_queue_size++; + + return 0; +} + +/* + * Get a message (data and size) from a service message queue. + * NOTE: the message remains on the queue. + */ +static struct vlds_msg_data *vlds_get_msg(struct vlds_service_info *svc_info) +{ + struct vlds_msg_data *msg_data; + + if (list_empty(&svc_info->msg_queue)) { + /* + * TBD: Block instead of return here + * (unless NONBLOCK flag specified). + */ + return NULL; + } + + msg_data = list_first_entry(&svc_info->msg_queue, struct vlds_msg_data, + list); + + BUG_ON(msg_data == NULL); + + return msg_data; +} + +/* Dequeue a message from a service message queue. */ +static void vlds_dequeue_msg(struct vlds_service_info *svc_info, + struct vlds_msg_data *msg_data) +{ + if (msg_data == NULL || list_empty(&svc_info->msg_queue)) + return; + + /* Check here that the message is actually on the queue? TBD */ + + list_del(&msg_data->list); + + kfree(msg_data); + + svc_info->msg_queue_size--; +} + +static void vlds_free_msg_queue(struct vlds_service_info *svc_info) +{ + struct vlds_msg_data *msg_data; + struct vlds_msg_data *next; + + list_for_each_entry_safe(msg_data, next, &svc_info->msg_queue, + list) { + + list_del(&msg_data->list); + + kfree(msg_data); + + svc_info->msg_queue_size--; + } + +} + +/* + * Callback ops + */ +static void +vlds_ds_reg_cb(ds_cb_arg_t arg, ds_svc_hdl_t hdl, ds_ver_t *ver) +{ + struct vlds_dev *vlds; + struct vlds_service_info *svc_info; + int rv; + + dprintk("entered.\n"); + + vlds = (struct vlds_dev *)arg; + + mutex_lock(&vlds->vlds_mutex); + + svc_info = vlds_get_svc_info_hdl(vlds, hdl); + if (svc_info == NULL) { + dprintk("%s: received invalid handle (%llx)\n", + vlds->int_name, hdl); + mutex_unlock(&vlds->vlds_mutex); + return; + } + + svc_info->neg_vers.vlds_major = (u16)ver->major; + svc_info->neg_vers.vlds_minor = (u16)ver->minor; + svc_info->state = VLDS_HDL_STATE_CONNECTED; + + /* + * if the service requires events, + * add an event to the process's event_info queue + */ + if (VLDS_SVC_IS_EVENT(svc_info)) { + rv = vlds_add_event(svc_info->tgid, svc_info, + VLDS_EVENT_TYPE_REG, &svc_info->neg_vers); + if (rv) { + /* just give an error if we failed to add the event */ + pr_err("%s: failed to create registration event " + "(%llx)\n", vlds->int_name, hdl); + } + } + + dprintk("%s: service %s registered version (%u.%u) hdl=%llx\n", + vlds->int_name, svc_info->name, svc_info->neg_vers.vlds_major, + svc_info->neg_vers.vlds_minor, hdl); + + mutex_unlock(&vlds->vlds_mutex); + +} + +static void +vlds_ds_unreg_cb(ds_cb_arg_t arg, ds_svc_hdl_t hdl) +{ + struct vlds_dev *vlds; + struct vlds_service_info *svc_info; + int rv; + + dprintk("entered.\n"); + + vlds = (struct vlds_dev *)arg; + + mutex_lock(&vlds->vlds_mutex); + + svc_info = vlds_get_svc_info_hdl(vlds, hdl); + if (svc_info == NULL) { + dprintk("%s: recevied invalid handle (%llx)\n", + vlds->int_name, hdl); + mutex_unlock(&vlds->vlds_mutex); + return; + } + + svc_info->neg_vers.vlds_major = 0; + svc_info->neg_vers.vlds_minor = 0; + svc_info->state = VLDS_HDL_STATE_DISCONNECTED; + + /* + * if the service requires events, + * add an event to the process's event_info queue + */ + if (VLDS_SVC_IS_EVENT(svc_info)) { + rv = vlds_add_event(svc_info->tgid, svc_info, + VLDS_EVENT_TYPE_UNREG, NULL); + if (rv) { + /* just give an error if we failed to add the event */ + pr_err("%s: failed to create unregistration event " + "(%llx)\n", vlds->int_name, hdl); + } + } + + dprintk("%s: service %s unregistered hdl=%llx\n", + vlds->int_name, svc_info->name, hdl); + + mutex_unlock(&vlds->vlds_mutex); + +} + +static void +vlds_ds_data_cb(ds_cb_arg_t arg, ds_svc_hdl_t hdl, void *buf, size_t buflen) +{ + struct vlds_dev *vlds; + struct vlds_service_info *svc_info; + int rv; + + dprintk("entered.\n"); + + vlds = (struct vlds_dev *)arg; + + mutex_lock(&vlds->vlds_mutex); + + svc_info = vlds_get_svc_info_hdl(vlds, hdl); + if (svc_info == NULL) { + dprintk("%s: recevied invalid handle (%llx)\n", + vlds->int_name, hdl); + mutex_unlock(&vlds->vlds_mutex); + return; + } + + /* received data is assumed to be 1 complete message */ + rv = vlds_add_msg(svc_info, buf, buflen); + if (rv) { + if (rv == -ENOSPC) + dprintk("%s: service %s: message queue overflow!\n", + vlds->int_name, svc_info->name); + else if (rv == -EFBIG) + dprintk("%s: service %s: message too large " + "(%lu bytes)!\n", vlds->int_name, svc_info->name, + buflen); + else + dprintk("%s: service %s: failed to add message " + "(err = %d)!\n", vlds->int_name, + svc_info->name, rv); + + mutex_unlock(&vlds->vlds_mutex); + + return; + } + + /* + * if the service requires events, + * add an event to the process's event_info queue + */ + if (VLDS_SVC_IS_EVENT(svc_info)) { + rv = vlds_add_event(svc_info->tgid, svc_info, + VLDS_EVENT_TYPE_DATA, NULL); + if (rv) { + /* just give an error if we failed to add the event */ + pr_err("%s: failed to create data event (%llx)\n", + vlds->int_name, hdl); + } + } + + dprintk("%s: %s service: Received %lu bytes hdl=%llx\n", + vlds->int_name, svc_info->name, buflen, hdl); + + mutex_unlock(&vlds->vlds_mutex); + +} + +static ds_ops_t vlds_ds_ops = { + vlds_ds_reg_cb, /* register */ + vlds_ds_unreg_cb, /* unregister */ + vlds_ds_data_cb, /* data */ + NULL /* optional arg to ops */ +}; + +static int vlds_svc_reg(struct vlds_dev *vlds, const void __user *uarg) +{ + + vlds_svc_reg_arg_t svc_reg; + vlds_cap_t cap; + char *svc_str; + bool is_client_reg; + ds_capability_t dscap; + u32 flags; + ds_svc_hdl_t ds_hdl; + int rv; + struct vlds_service_info *svc_info; + + dprintk("entered.\n"); + + svc_str = NULL; + svc_info = NULL; + + /* Get (and validate) userland args */ + if (uarg == NULL || copy_from_user(&svc_reg, uarg, + sizeof(vlds_svc_reg_arg_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + /* Validate svc_reg.vlds_hdlp is present/accessible */ + if (!access_ok(VERIFY_WRITE, (void __user *)svc_reg.vlds_hdlp, + sizeof(u64))) { + rv = -EFAULT; + goto error_out1; + } + + if (copy_from_user(&cap, (const void __user *)svc_reg.vlds_capp, + sizeof(vlds_cap_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + /* make sure the service strlen is sane */ + if (cap.vlds_service.vlds_strlen == 0 || + cap.vlds_service.vlds_strlen > VLDS_MAX_NAMELEN) { + rv = -EINVAL; + goto error_out1; + } + + /* get the service string from userland */ + svc_str = kzalloc(cap.vlds_service.vlds_strlen + 1, GFP_KERNEL); + if (unlikely(svc_str == NULL)) { + rv = -ENOMEM; + goto error_out1; + } + + if (copy_from_user(svc_str, + (const void __user *)cap.vlds_service.vlds_strp, + cap.vlds_service.vlds_strlen) != 0) { + rv = -EFAULT; + goto error_out1; + } + + is_client_reg = (svc_reg.vlds_reg_flags & VLDS_REG_CLIENT); + + mutex_lock(&vlds->vlds_mutex); + + /* Check if the service is already being used */ + svc_info = vlds_get_svc_info(vlds, svc_str, is_client_reg); + if (svc_info != NULL) { + /* This service is already in use */ + rv = -EBUSY; + svc_info = NULL; + goto error_out2; + } + + /* init the ds capability structure */ + dscap.svc_id = svc_str; + dscap.vers.major = (u64)cap.vlds_vers.vlds_major; + dscap.vers.minor = (u64)cap.vlds_vers.vlds_minor; + + /* The svc_info will be passed back as an arg to the cb */ + vlds_ds_ops.cb_arg = (void *)vlds; + + flags = 0x0; + if (is_client_reg) + flags |= DS_CAP_IS_CLIENT; + else + flags |= DS_CAP_IS_PROVIDER; + + if (vlds != sp_vlds) + flags |= DS_TARGET_IS_DOMAIN; + + ds_hdl = 0; + rv = ds_cap_init(&dscap, &vlds_ds_ops, flags, vlds->domain_handle, + &ds_hdl); + if (rv || ds_hdl == 0) { + dprintk("%s: ds_cap_init failed for %s service\n", + vlds->int_name, svc_str); + goto error_out2; + } + + if (copy_to_user((void __user *)(svc_reg.vlds_hdlp), (u64 *)&ds_hdl, + sizeof(u64)) != 0) { + (void) ds_cap_fini(ds_hdl); + rv = -EFAULT; + goto error_out2; + } + + /* create a service info for the new service */ + svc_info = kzalloc(sizeof(struct vlds_service_info), GFP_KERNEL); + if (unlikely(svc_str == NULL)) { + (void) ds_cap_fini(ds_hdl); + rv = -ENOMEM; + goto error_out2; + } + + svc_info->name = svc_str; + svc_info->state = VLDS_HDL_STATE_NOT_YET_CONNECTED; + svc_info->flags = svc_reg.vlds_reg_flags; + svc_info->tgid = task_tgid_vnr(current); + svc_info->handle = (u64)ds_hdl; + INIT_LIST_HEAD(&svc_info->msg_queue); + svc_info->msg_queue_size = 0; + + /* add the service_info to the vlds device */ + list_add_tail(&svc_info->list, &vlds->service_info_list); + + dprintk("%s: registered %s service (client = %llu) " + "(hdl = %llx) (tgid = %u) with ds\n", vlds->int_name, svc_str, + VLDS_SVC_IS_CLIENT(svc_info), svc_info->handle, svc_info->tgid); + + mutex_unlock(&vlds->vlds_mutex); + + return 0; + +error_out2: + + mutex_unlock(&vlds->vlds_mutex); + +error_out1: + + dprintk("%s: failed to register service rv = %d\n", vlds->int_name, rv); + + if (svc_info) + kfree(svc_info); + + if (svc_str) + kfree(svc_str); + + return rv; +} + +static int vlds_unreg_hdl(struct vlds_dev *vlds, const void __user *uarg) +{ + vlds_unreg_hdl_arg_t unreg; + struct vlds_service_info *svc_info; + int rv; + + dprintk("entered.\n"); + + /* Get (and validate) userland args */ + if (uarg == NULL || copy_from_user(&unreg, uarg, + sizeof(vlds_unreg_hdl_arg_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + mutex_lock(&vlds->vlds_mutex); + + svc_info = vlds_get_svc_info_hdl(vlds, unreg.vlds_hdl); + if (svc_info == NULL) { + rv = -ENODEV; + goto error_out2; + } + + /* unregister the service from ds */ + rv = ds_cap_fini(unreg.vlds_hdl); + if (rv) { + dprintk("%s: ds_cap_fini failed for %s service ", + vlds->int_name, svc_info->name); + goto error_out2; + } + + dprintk("%s: unregistered %s service (client = %llu) " + "(hdl = %llx) with ds\n", vlds->int_name, svc_info->name, + VLDS_SVC_IS_CLIENT(svc_info), unreg.vlds_hdl); + + list_del(&svc_info->list); + + /* remove any events referencing this svc_info */ + vlds_remove_svc_events(svc_info); + + kfree(svc_info->name); + vlds_free_msg_queue(svc_info); + kfree(svc_info); + + mutex_unlock(&vlds->vlds_mutex); + + return 0; + +error_out2: + + mutex_unlock(&vlds->vlds_mutex); + +error_out1: + + dprintk("%s: failed to unregister service rv = %d\n", + vlds->int_name, rv); + + return rv; +} + +static int vlds_hdl_lookup(struct vlds_dev *vlds, const void __user *uarg) +{ + vlds_hdl_lookup_arg_t hdl_lookup; + struct vlds_service_info *svc_info; + char *svc_str; + u64 num_hdls; + int rv; + + dprintk("entered.\n"); + + svc_str = NULL; + + /* Get (and validate) userland args */ + if (uarg == NULL || copy_from_user(&hdl_lookup, uarg, + sizeof(vlds_hdl_lookup_arg_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + /* we only support 1 return handle */ + if (hdl_lookup.vlds_maxhdls != 1) { + rv = -EINVAL; + goto error_out1; + } + + /* get the service string */ + + /* make sure the service strlen is sane */ + if (hdl_lookup.vlds_service.vlds_strlen == 0 || + hdl_lookup.vlds_service.vlds_strlen > VLDS_MAX_NAMELEN) { + rv = -EINVAL; + goto error_out1; + } + + /* get the service string from userland */ + svc_str = kzalloc(hdl_lookup.vlds_service.vlds_strlen + 1, GFP_KERNEL); + if (unlikely(svc_str == NULL)) { + rv = -ENOMEM; + goto error_out1; + } + + if (copy_from_user(svc_str, + (const void __user *)hdl_lookup.vlds_service.vlds_strp, + hdl_lookup.vlds_service.vlds_strlen) != 0) { + rv = -EFAULT; + goto error_out1; + } + + mutex_lock(&vlds->vlds_mutex); + + svc_info = vlds_get_svc_info(vlds, svc_str, hdl_lookup.vlds_isclient); + if (svc_info == NULL) { + rv = -ENODEV; + goto error_out2; + } + + if (copy_to_user((void __user *)(hdl_lookup.vlds_hdlsp), + &svc_info->handle, sizeof(u64)) != 0) { + rv = -EFAULT; + goto error_out2; + } + + num_hdls = 1; + if (put_user(num_hdls, (u64 __user *)(hdl_lookup.vlds_nhdlsp)) != 0) { + rv = -EFAULT; + goto error_out2; + } + + dprintk("%s: handle lookup for %s service (client = %llu) " + "returned (hdl = %llx)\n", vlds->int_name, svc_str, + hdl_lookup.vlds_isclient, svc_info->handle); + + mutex_unlock(&vlds->vlds_mutex); + + return 0; + +error_out2: + + mutex_unlock(&vlds->vlds_mutex); + +error_out1: + + dprintk("%s: failed to lookup handle rv = %d\n", vlds->int_name, rv); + + if (svc_str) + kfree(svc_str); + + return rv; + +} + +static int vlds_dmn_lookup(struct vlds_dev *vlds, const void __user *uarg) +{ + vlds_dmn_lookup_arg_t dmn_lookup; + int rv; + + /* Get (and validate) userland args */ + if (uarg == NULL || copy_from_user(&dmn_lookup, uarg, + sizeof(vlds_dmn_lookup_arg_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + /* make sure the string buffer size is sane */ + if (dmn_lookup.vlds_dname.vlds_strlen < (strlen(vlds->int_name) + 1)) { + rv = -EINVAL; + goto error_out1; + } + + if (put_user(vlds->domain_handle, + (u64 __user *)(dmn_lookup.vlds_dhdlp)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + if (copy_to_user((void __user *)(dmn_lookup.vlds_dname.vlds_strp), + vlds->int_name, (strlen(vlds->int_name) + 1)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + return 0; + +error_out1: + + dprintk("%s: failed to lookup domain info. rv = %d\n", + vlds->int_name, rv); + + return rv; +} + +static int vlds_hdl_get_state(struct vlds_dev *vlds, const void __user *uarg) +{ + vlds_hdl_get_state_arg_t hdl_get_state; + struct vlds_service_info *svc_info; + vlds_hdl_state_t hdl_state; + int rv; + + /* Get (and validate) userland args */ + if (uarg == NULL || copy_from_user(&hdl_get_state, uarg, + sizeof(vlds_hdl_get_state_arg_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + mutex_lock(&vlds->vlds_mutex); + + svc_info = vlds_get_svc_info_hdl(vlds, hdl_get_state.vlds_hdl); + if (svc_info == NULL) { + rv = -ENODEV; + goto error_out2; + } + + memset(&hdl_state, 0, sizeof(hdl_state)); + hdl_state.state = svc_info->state; + /* if the state is connected, return the negotiated version */ + if (svc_info->state == VLDS_HDL_STATE_CONNECTED) { + hdl_state.vlds_vers.vlds_major = svc_info->neg_vers.vlds_major; + hdl_state.vlds_vers.vlds_minor = svc_info->neg_vers.vlds_minor; + } + + if (copy_to_user((void __user *)(hdl_get_state.vlds_statep), + &hdl_state, sizeof(vlds_hdl_state_t)) != 0) { + rv = -EFAULT; + goto error_out2; + } + + mutex_unlock(&vlds->vlds_mutex); + + return 0; + +error_out2: + + mutex_unlock(&vlds->vlds_mutex); + +error_out1: + + dprintk("%s: failed to get handle state rv = %d\n", vlds->int_name, rv); + + return rv; + +} + +static int vlds_send_msg(struct vlds_dev *vlds, const void __user *uarg) +{ + vlds_send_msg_arg_t send_msg; + struct vlds_service_info *svc_info; + u8 *send_buf; + int rv; + + dprintk("entered.\n"); + + send_buf = NULL; + + /* Get (and validate) userland args */ + if (uarg == NULL || copy_from_user(&send_msg, uarg, + sizeof(vlds_send_msg_arg_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + if (send_msg.vlds_buflen == 0 || + send_msg.vlds_buflen > VLDS_MAX_SENDBUF_LEN) { + rv = -EINVAL; + goto error_out1; + } + + mutex_lock(&vlds->vlds_mutex); + + svc_info = vlds_get_svc_info_hdl(vlds, send_msg.vlds_hdl); + if (svc_info == NULL) { + rv = -ENODEV; + goto error_out2; + } + + /* make sure we are in connected state before sending the data */ + if (svc_info->state != VLDS_HDL_STATE_CONNECTED) { + rv = -EIO; + goto error_out2; + } + + send_buf = kzalloc(send_msg.vlds_buflen, GFP_KERNEL); + if (unlikely(send_buf == NULL)) { + rv = -ENOMEM; + goto error_out2; + } + + if (copy_from_user(send_buf, (const void __user *)send_msg.vlds_bufp, + send_msg.vlds_buflen) != 0) { + rv = -EFAULT; + goto error_out2; + } + + rv = ds_cap_send(send_msg.vlds_hdl, send_buf, send_msg.vlds_buflen); + if (rv) { + + /* + * TBD: If rv == -EAGAIN, block here trying again in loop + * (unless NONBLOCK flag specified). + */ + dprintk("%s: ds_cap_send failed for %s service (rv=%d)\n", + vlds->int_name, svc_info->name, rv); + goto error_out2; + } + + kfree(send_buf); + + dprintk("%s: send msg hdl = %llx (buflen=%llu) SUCCESS\n", + vlds->int_name, send_msg.vlds_hdl, send_msg.vlds_buflen); + + mutex_unlock(&vlds->vlds_mutex); + + return 0; + +error_out2: + + mutex_unlock(&vlds->vlds_mutex); + +error_out1: + + dprintk("%s: failed to send msg rv = %d\n", vlds->int_name, rv); + + if (send_buf != NULL) + kfree(send_buf); + + return rv; + +} + +static int vlds_recv_msg(struct vlds_dev *vlds, const void __user *uarg) +{ + vlds_recv_msg_arg_t recv_msg; + struct vlds_service_info *svc_info; + u8 *msg; + size_t msglen; + int rv; + struct vlds_msg_data *msg_data; + + /* Get (and validate) userland args */ + if (uarg == NULL || copy_from_user(&recv_msg, uarg, + sizeof(vlds_recv_msg_arg_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + if (recv_msg.vlds_buflen > VLDS_MAX_SENDBUF_LEN) { + rv = -EINVAL; + goto error_out1; + } + + mutex_lock(&vlds->vlds_mutex); + + svc_info = vlds_get_svc_info_hdl(vlds, recv_msg.vlds_hdl); + if (svc_info == NULL) { + rv = -ENODEV; + goto error_out2; + } + + msg_data = vlds_get_msg(svc_info); + if (msg_data == NULL) { + msg = NULL; + msglen = 0; + } else { + msg = msg_data->data; + msglen = msg_data->size; + } + + if (put_user(msglen, (u64 __user *)(recv_msg.vlds_msglenp)) != 0) { + rv = -EFAULT; + goto error_out2; + } + + /* + * Special handling for a buflen of 0: if buflen is 0, we return + * the number of bytes for the next message in the queue. + * + * This is a mechanism for the caller to use to poll the queue + * to detect if a msg is ready to be received and to get the + * size of the next message so the appropriate sized buffer can + * be allocated to receive the msg. + */ + if (recv_msg.vlds_buflen == 0) { + + if (msglen > 0) + dprintk("%s: service %s: buflen==0 poll " + "returned %zu bytes\n", + vlds->int_name, svc_info->name, msglen); + + mutex_unlock(&vlds->vlds_mutex); + + return 0; + } + + /* + * We do not return truncated data. Return EFBIG error if + * supplied buffer is too small to hold the next message. + */ + if (msglen > 0 && recv_msg.vlds_buflen < msglen) { + dprintk("%s: service %s: recv buffer too small for " + "next message (supplied buffer = %llu bytes, " + "next message = %lu bytes)\n", + vlds->int_name, svc_info->name, recv_msg.vlds_buflen, + msglen); + + rv = -EFBIG; + goto error_out2; + } + + if (msglen > 0) { + + if (copy_to_user((void __user *)(recv_msg.vlds_bufp), + msg, msglen) != 0) { + rv = -EFAULT; + goto error_out2; + } + + /* + * We successfully copied the data to user, + * so dequeue the message + */ + vlds_dequeue_msg(svc_info, msg_data); + + dprintk("%s: recv msg hdl = %llx (len=%lu) SUCCESS\n", + vlds->int_name, recv_msg.vlds_hdl, msglen); + } + + mutex_unlock(&vlds->vlds_mutex); + + return 0; + +error_out2: + + mutex_unlock(&vlds->vlds_mutex); + +error_out1: + + dprintk("%s: failed to recv msg rv = %d\n", + vlds->int_name, rv); + + return rv; +} + +static int vlds_set_event_fd(struct vlds_dev *vlds, const void __user *uarg) +{ + vlds_set_event_fd_arg_t set_event_fd; + int rv; + pid_t tgid; + + /* Get (and validate) userland args */ + if (uarg == NULL || copy_from_user(&set_event_fd, uarg, + sizeof(vlds_set_event_fd_arg_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + tgid = task_tgid_vnr(current); + + mutex_lock(&vlds_event_info_list_mutex); + + /* + * If there is already an event fd + * registered for this process, remove it. + */ + vlds_remove_event_info(tgid); + + rv = vlds_add_event_info(tgid, set_event_fd.fd); + + mutex_unlock(&vlds_event_info_list_mutex); + + if (rv) + goto error_out1; + + dprintk("%s: vlds_set_event_fd: SUCCESS\n", vlds->int_name); + + return 0; + + +error_out1: + + dprintk("%s: failed to set event fd: rv = %d\n", + vlds->int_name, rv); + + return rv; +} + +static int vlds_unset_event_fd(struct vlds_dev *vlds, const void __user *uarg) +{ + pid_t tgid; + + tgid = task_tgid_vnr(current); + + mutex_lock(&vlds_event_info_list_mutex); + + vlds_remove_event_info(tgid); + + mutex_unlock(&vlds_event_info_list_mutex); + + dprintk("%s: vlds_unset_event_fd: SUCCESS\n", vlds->int_name); + + return 0; + +} + +static int vlds_get_next_event(struct vlds_dev *vlds, const void __user *uarg) +{ + vlds_get_next_event_arg_t next_event; + struct vlds_event_info *event_info; + struct vlds_event *event; + struct vlds_msg_data *msg_data; + u8 *msg; + size_t msglen; + int rv; + + dprintk("called\n"); + + /* Get (and validate) userland args */ + if (uarg == NULL || copy_from_user(&next_event, uarg, + sizeof(vlds_get_next_event_arg_t)) != 0) { + rv = -EFAULT; + goto error_out1; + } + + /* Validate next_event.vlds_hdlp is present/accessible */ + if (!access_ok(VERIFY_WRITE, (void __user *)next_event.vlds_hdlp, + sizeof(u64))) { + rv = -EFAULT; + goto error_out1; + } + + /* Validate next_event.vlds_event_typep is present/accessible */ + if (!access_ok(VERIFY_WRITE, (void __user *)next_event.vlds_event_typep, + sizeof(u64))) { + rv = -EFAULT; + goto error_out1; + } + + /* Validate next_event.neg_versp is present/accessible */ + if (!access_ok(VERIFY_WRITE, (void __user *)next_event.neg_versp, + sizeof(u64))) { + rv = -EFAULT; + goto error_out1; + } + + /* Validate next_event.vlds_buflen is valid */ + if (next_event.vlds_buflen == 0 || + next_event.vlds_buflen > VLDS_MAX_SENDBUF_LEN) { + rv = -EINVAL; + goto error_out1; + } + + /* Validate next_event.vlds_bufp is present/accessible */ + if (!access_ok(VERIFY_WRITE, (void __user *)next_event.vlds_bufp, + next_event.vlds_buflen)) { + rv = -EFAULT; + goto error_out1; + } + + /* Validate next_event.vlds_msglenp is present/accessible */ + if (!access_ok(VERIFY_WRITE, (void __user *)next_event.vlds_msglenp, + sizeof(u64))) { + rv = -EFAULT; + goto error_out1; + } + + /* user arg is valid, get the next event */ + + mutex_lock(&vlds->vlds_mutex); + + mutex_lock(&vlds_event_info_list_mutex); + + + event_info = NULL; + rv = vlds_get_event_info(task_tgid_vnr(current), &event_info); + if (rv || event_info == NULL) { + /* + * Process didn't register an event fd! + * This is required to start receiving events. + */ + rv = -EIO; + goto error_out2; + } + + event = vlds_get_event(event_info); + if (event == NULL) { + /* + * No events left outstanding. Return -ENOENT (-2) + * to indicate no more events to process. + */ + rv = -ENOENT; + goto error_out2; + } + + /* populate the return event handle */ + if (put_user(event->svc_info->handle, + (u64 __user *)(next_event.vlds_hdlp)) != 0) { + rv = -EFAULT; + goto error_out2; + } + + /* populate the return event type */ + if (put_user(event->type, (u64 __user *)(next_event.vlds_event_typep)) != 0) { + rv = -EFAULT; + goto error_out2; + } + + /* if it's a reg type event, populate the negotiated version */ + if (event->type == VLDS_EVENT_TYPE_REG) { + if (copy_to_user((void __user *)(next_event.neg_versp), + &event->neg_vers, sizeof(vlds_ver_t)) != 0) { + rv = -EFAULT; + goto error_out2; + } + } + + /* + * if it's a data type event, populate the data buffer + * with next message from the service + */ + if (event->type == VLDS_EVENT_TYPE_DATA) { + msg_data = vlds_get_msg(event->svc_info); + if (msg_data == NULL || msg_data->size == 0) { + rv = -EIO; + goto error_out2; + } + + msg = msg_data->data; + msglen = msg_data->size; + + if (next_event.vlds_buflen < msglen) { + dprintk("%s: service %s: recv buffer too small for " + "next message (supplied buffer = %llu bytes, " + "next message = %lu bytes)\n", + vlds->int_name, event->svc_info->name, + next_event.vlds_buflen, msglen); + + rv = -EFBIG; + goto error_out2; + } + + if (put_user(msglen, (u64 __user *)(next_event.vlds_msglenp)) + != 0) { + rv = -EFAULT; + goto error_out2; + } + + if (copy_to_user((void __user *)(next_event.vlds_bufp), + msg, msglen) != 0) { + rv = -EFAULT; + goto error_out2; + } + + /* we copied the data to user, so dequeue the message */ + vlds_dequeue_msg(event->svc_info, msg_data); + } + + /* We successfully transferred the event, remove it from the list */ + vlds_remove_event(event_info, event); + + mutex_unlock(&vlds_event_info_list_mutex); + + mutex_unlock(&vlds->vlds_mutex); + + return 0; + +error_out2: + + mutex_unlock(&vlds_event_info_list_mutex); + + mutex_unlock(&vlds->vlds_mutex); + +error_out1: + + if (rv != -ENOENT) + dprintk("%s: failed to get next event: rv = %d\n", + vlds->int_name, rv); + + return rv; +} + +static int vlds_fops_open(struct inode *inode, struct file *filp) +{ + + struct vlds_dev *vlds; + + dprintk("entered.\n"); + + /* + * We allow all opens on the device. We just need to + * tuck away the vlds device for subsequent fops. + */ + vlds = container_of(inode->i_cdev, struct vlds_dev, cdev); + + filp->private_data = vlds; + + return 0; +} + +static void vlds_unreg_all(struct vlds_dev *vlds) +{ + + struct vlds_service_info *svc_info; + struct vlds_service_info *next; + + if (vlds == NULL) + return; + + mutex_lock(&vlds->vlds_mutex); + + list_for_each_entry_safe(svc_info, next, &vlds->service_info_list, + list) { + + (void) ds_cap_fini(svc_info->handle); + + dprintk("%s: unregistered %s service (client = %llu) " + "(hdl = %llx) with ds\n", vlds->int_name, + svc_info->name, VLDS_SVC_IS_CLIENT(svc_info), + svc_info->handle); + + list_del(&svc_info->list); + vlds_remove_svc_events(svc_info); + kfree(svc_info->name); + vlds_free_msg_queue(svc_info); + kfree(svc_info); + + } + + mutex_unlock(&vlds->vlds_mutex); + +} + +static void vlds_unreg_all_tgid(struct vlds_dev *vlds, pid_t tgid) +{ + + struct vlds_service_info *svc_info; + struct vlds_service_info *next; + + mutex_lock(&vlds->vlds_mutex); + + list_for_each_entry_safe(svc_info, next, &vlds->service_info_list, + list) { + + if (svc_info->tgid == tgid) { + + (void) ds_cap_fini(svc_info->handle); + + dprintk("%s: unregistered %s service " + "(client = %llu) (hdl = %llx) with ds\n", + vlds->int_name, svc_info->name, + VLDS_SVC_IS_CLIENT(svc_info), svc_info->handle); + + list_del(&svc_info->list); + + kfree(svc_info->name); + vlds_free_msg_queue(svc_info); + kfree(svc_info); + } + + } + + mutex_unlock(&vlds->vlds_mutex); + +} + +static int vlds_fops_release(struct inode *inode, struct file *filp) +{ + struct vlds_dev *vlds; + pid_t tgid; + + dprintk("entered.\n"); + + if (filp == NULL) + return -EINVAL; + + vlds = filp->private_data; + + if (vlds == NULL) { + /* This should not happen, but... */ + pr_err("vlds_fops_release: ERROR- failed to get " + "associated vlds_dev\n"); + return 0; + } + + tgid = task_tgid_vnr(current); + + dprintk("%s: unregistering all events and services for tgid = %u\n", + vlds->int_name, tgid); + + /* Remove all events queued for this tgid */ + mutex_lock(&vlds_event_info_list_mutex); + + vlds_remove_event_info(tgid); + + mutex_unlock(&vlds_event_info_list_mutex); + + /* Close all services used by this process */ + vlds_unreg_all_tgid(vlds, tgid); + + return 0; +} + +static long vlds_fops_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct vlds_dev *vlds; + int rv; + + rv = 0; + + vlds = filp->private_data; + + switch (cmd) { + + case VLDS_IOCTL_SVC_REG: + + rv = vlds_svc_reg(vlds, (const void __user *)arg); + + break; + + case VLDS_IOCTL_UNREG_HDL: + + rv = vlds_unreg_hdl(vlds, (const void __user *)arg); + + break; + + case VLDS_IOCTL_HDL_LOOKUP: + + rv = vlds_hdl_lookup(vlds, (const void __user *)arg); + + break; + + case VLDS_IOCTL_DMN_LOOKUP: + + rv = vlds_dmn_lookup(vlds, (const void __user *)arg); + + break; + + case VLDS_IOCTL_SEND_MSG: + + rv = vlds_send_msg(vlds, (const void __user *)arg); + + break; + + case VLDS_IOCTL_RECV_MSG: + + rv = vlds_recv_msg(vlds, (const void __user *)arg); + + break; + + case VLDS_IOCTL_HDL_GET_STATE: + + rv = vlds_hdl_get_state(vlds, (const void __user *)arg); + + break; + + case VLDS_IOCTL_SET_EVENT_FD: + + rv = vlds_set_event_fd(vlds, (const void __user *)arg); + + break; + + case VLDS_IOCTL_UNSET_EVENT_FD: + + rv = vlds_unset_event_fd(vlds, (const void __user *)arg); + + break; + + case VLDS_IOCTL_GET_NEXT_EVENT: + + rv = vlds_get_next_event(vlds, (const void __user *)arg); + + break; + + default: + + return -EINVAL; + } + + return rv; +} + +static const struct file_operations vlds_fops = { + .owner = THIS_MODULE, + .open = vlds_fops_open, + .release = vlds_fops_release, + .unlocked_ioctl = vlds_fops_ioctl, +}; + +static int vlds_get_next_avail_minor(void) +{ + struct vlds_dev *vlds; + bool found; + int i; + + /* + * walk the vlds_dev_list list to find the next + * lowest available minor. + */ + mutex_lock(&vlds_data_mutex); + for (i = VLDS_MINOR_BASE; i < VLDS_MAX_DEVS; i++) { + found = false; + list_for_each_entry(vlds, &vlds_data.vlds_dev_list, list) { + if (i == MINOR(vlds->devt)) { + found = true; + break; + } + } + if (!found) { + /* found a free minor, use it */ + break; + } + } + mutex_unlock(&vlds_data_mutex); + + if (i == VLDS_MAX_DEVS) { + dprintk("no more minors left for allocation!\n"); + return -1; + } + + return i; +} + +static int vlds_alloc_vlds_dev(char *int_name, char *dev_name, + struct device *vdev_dev, const u64 domain_handle, + struct vlds_dev **vldsp) +{ + struct vlds_dev *vlds; + int rv; + dev_t devt; + struct device *device; + int next_minor; + unsigned char devt_buf[32]; + + dprintk("entered.\n"); + + devt = 0; + device = NULL; + + vlds = kzalloc(sizeof(struct vlds_dev), GFP_KERNEL); + if (unlikely(vlds == NULL)) { + dprintk("failed to allocate vlds_dev\n"); + rv = -ENOMEM; + goto error; + } + + vlds->domain_handle = domain_handle; + + mutex_init(&vlds->vlds_mutex); + + INIT_LIST_HEAD(&vlds->service_info_list); + + vlds->int_name = kmemdup(int_name, (strlen(int_name) + 1), GFP_KERNEL); + if (unlikely(vlds->int_name == NULL)) { + dprintk("failed to alloc vlds int name.\n"); + rv = -ENOMEM; + goto error; + } + + /* create the devt for this device */ + next_minor = vlds_get_next_avail_minor(); + if (next_minor == -1) { + dprintk("vlds_get_next_avail_minor() failed.\n"); + rv = -ENXIO; + goto error; + } + devt = MKDEV(MAJOR(vlds_data.devt), next_minor); + vlds->devt = devt; + + dprintk("%s: dev_t=%s\n", vlds->int_name, format_dev_t(devt_buf, + vlds->devt)); + dprintk("%s: domain_handle = %llu\n", vlds->int_name, domain_handle); + + /* create/add the associated cdev */ + cdev_init(&vlds->cdev, &vlds_fops); + vlds->cdev.owner = THIS_MODULE; + rv = cdev_add(&vlds->cdev, devt, 1); + if (rv != 0) { + dprintk("cdev_add() failed.\n"); + devt = 0; + goto error; + } + + /* create the associated /sys and /dev entries */ + device = device_create(vlds_data.chrdev_class, vdev_dev, devt, + vlds, "%s", dev_name); + if (IS_ERR(device)) { + dprintk("device_create() failed.\n"); + rv = PTR_ERR(device); + device = NULL; + goto error; + } + + vlds->device = device; + + /* add the vlds to the global vlds_data device list */ + mutex_lock(&vlds_data_mutex); + list_add_tail(&vlds->list, &vlds_data.vlds_dev_list); + vlds_data.num_vlds_dev_list++; + mutex_unlock(&vlds_data_mutex); + + if (vldsp != NULL) + *vldsp = vlds; + + return 0; + +error: + + if (device) + device_destroy(vlds_data.chrdev_class, devt); + + if (devt) + cdev_del(&vlds->cdev); + + if (vlds->int_name) + kfree(vlds->int_name); + + if (vlds != NULL) { + mutex_destroy(&vlds->vlds_mutex); + kfree(vlds); + } + + dprintk("dev alloc failed (rv=%d)\n", rv); + + return rv; +} + +static int vlds_probe(struct vio_dev *vdev, const struct vio_device_id *vio_did) +{ + struct vlds_dev *vlds; + struct mdesc_handle *hp; + const u64 *id; + const char *name; + const u64 *dom_handle; + int name_len; + char int_name_buf[DS_MAX_DOM_NAME_LEN + 1]; + char dev_name_buf[VLDS_PATH_MAX]; + u64 node; + int rv; + + dprintk("entered.\n"); + + rv = 0; + + hp = mdesc_grab(); + + node = vio_vdev_node(hp, vdev); + if (node == MDESC_NODE_NULL) { + dprintk("Failed to get vdev MD node.\n"); + mdesc_release(hp); + rv = -ENXIO; + goto error; + } + + id = mdesc_get_property(hp, node, "id", NULL); + if (id == NULL) { + dprintk("failed to get id property.\n"); + mdesc_release(hp); + rv = -ENXIO; + goto error; + } + + dom_handle = mdesc_get_property(hp, node, + "vlds-remote-domain-handle", NULL); + if (dom_handle == NULL) { + dprintk("failed to get vlds-remote-domain-handle property.\n"); + mdesc_release(hp); + rv = -ENXIO; + goto error; + } + + /* get the name of the ldom this vlds-port refers to */ + name = mdesc_get_property(hp, node, "vlds-remote-domain-name", + &name_len); + if (name == NULL) { + dprintk("failed to get vlds-remote-domain-name property.\n"); + mdesc_release(hp); + rv = -ENXIO; + goto error; + } + + mdesc_release(hp); + + /* sanity check - should never happen */ + if (name_len > DS_MAX_DOM_NAME_LEN) + goto error; + + /* create the (NULL-terminated) internal name */ + memcpy(int_name_buf, name, name_len); + int_name_buf[name_len] = '\0'; + + /* create the /dev name */ + (void) scnprintf(dev_name_buf, VLDS_PATH_MAX, "%s%llu", + VLDS_DEV_DOMAIN_FILENAME_TAG, *dom_handle); + + rv = vlds_alloc_vlds_dev(int_name_buf, dev_name_buf, &vdev->dev, + *dom_handle, &vlds); + if (rv != 0) + goto error; + + dev_set_drvdata(&vdev->dev, vlds); + + dprintk("%s: Probe successfful: cfg_handle=%llu, id=%llu\n", + vlds->int_name, vdev->dev_no, *id); + + return 0; + +error: + + dprintk("probe failed (rv=%d)\n", rv); + + return rv; +} + +static int vlds_free_vlds_dev(struct vlds_dev *vlds) +{ + + dprintk("entered. (%s)\n", vlds->int_name); + + /* Unregister all the services associated with this vlds. */ + vlds_unreg_all(vlds); + + mutex_lock(&vlds_data_mutex); + list_del(&vlds->list); + vlds_data.num_vlds_dev_list--; + mutex_unlock(&vlds_data_mutex); + + device_destroy(vlds_data.chrdev_class, vlds->devt); + cdev_del(&vlds->cdev); + kfree(vlds->int_name); + mutex_destroy(&vlds->vlds_mutex); + kfree(vlds); + + return 0; +} + +static int vlds_remove(struct vio_dev *vdev) +{ + int rv; + struct vlds_dev *vlds; + + dprintk("entered.\n"); + + vlds = dev_get_drvdata(&vdev->dev); + + if (vlds == NULL) { + dprintk("failed to get vlds_dev from vio_dev.\n"); + rv = -ENXIO; + } else { + dprintk("removing (%s)\n", vlds->int_name); + rv = vlds_free_vlds_dev(vlds); + } + + return rv; +} + +static const struct vio_device_id vlds_match[] = { + { + .type = "vlds-port", + }, + {}, +}; + +static char *vlds_devnode(struct device *dev, umode_t *mode) +{ + if (mode != NULL) + *mode = 0600; + + return kasprintf(GFP_KERNEL, "vlds/%s", dev_name(dev)); +} + +static struct vio_driver vlds_driver = { + .id_table = vlds_match, + .probe = vlds_probe, + .remove = vlds_remove, + .name = VLDS_DEVICE_NAME, + .no_irq = true, +}; + +static int __init vlds_init(void) +{ + int rv; + unsigned char devt_buf[32]; + + /* set the default ldoms debug level */ + vldsdbg_level = ldoms_debug_level; + + dprintk("entered. (DEBUG enabled)\n"); + + dprintk("%s", driver_version); + + INIT_LIST_HEAD(&vlds_data.vlds_dev_list); + vlds_data.num_vlds_dev_list = 0; + + INIT_LIST_HEAD(&vlds_event_info_list); + + rv = alloc_chrdev_region(&vlds_data.devt, VLDS_MINOR_BASE, + VLDS_MAX_DEVS, VLDS_DEVICE_NAME); + if (rv < 0) { + dprintk("alloc_chrdev_region failed: %d\n", rv); + return rv; + } + + if (vlds_data.devt == (dev_t)0) { + dprintk("alloc_chrdev_region failed: (vlds_data.devt == 0)\n"); + rv = -ENXIO; + return rv; + } + + dprintk("dev_t allocated = %s\n", + format_dev_t(devt_buf, vlds_data.devt)); + + vlds_data.chrdev_class = class_create(THIS_MODULE, VLDS_DEVICE_NAME); + if (IS_ERR(vlds_data.chrdev_class)) { + rv = PTR_ERR(vlds_data.chrdev_class); + dprintk("class_create() failed: %d\n", rv); + vlds_data.chrdev_class = NULL; + goto error; + } + + /* set callback to create devices under /dev/ds directory */ + vlds_data.chrdev_class->devnode = vlds_devnode; + + /* + * Add a device for the SP directly since there is no + * vlds-port MD node for the SP and we need one to provide + * access to SP domain services. + */ + rv = vlds_alloc_vlds_dev(VLDS_SP_INT_NAME, VLDS_SP_DEV_NAME, + NULL, VLDS_INVALID_HANDLE, &sp_vlds); + if (rv != 0) + dprintk("Failed to create SP vlds device (%d)\n", rv); + + rv = vio_register_driver(&vlds_driver); + if (rv != 0) { + dprintk("vio_register_driver() failed: %d\n", rv); + goto error; + } + + return 0; + +error: + if (vlds_data.chrdev_class) + class_destroy(vlds_data.chrdev_class); + + if (vlds_data.devt) + unregister_chrdev_region(vlds_data.devt, VLDS_MAX_DEVS); + + return rv; +} + +static void __exit vlds_exit(void) +{ + + dprintk("entered.\n"); + + /* remove the SP vlds */ + vlds_free_vlds_dev(sp_vlds); + + /* + * Note - vio_unregister_driver() will invoke a call to + * vlds_remove() for every successfully probed device. + */ + vio_unregister_driver(&vlds_driver); + + if (vlds_data.chrdev_class) + class_destroy(vlds_data.chrdev_class); + + if (vlds_data.devt) + unregister_chrdev_region(vlds_data.devt, VLDS_MAX_DEVS); +} + +module_init(vlds_init); +module_exit(vlds_exit); + +MODULE_AUTHOR("Oracle"); +MODULE_DESCRIPTION("Sun4v LDOMs Virtual Domain Services Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 53fe200e0b79..0a8156db81cc 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -1918,19 +1918,27 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) struct vnet *vp; const u64 *rmac; int len, i, err, switch_port; + u64 node; print_version(); hp = mdesc_grab(); - vp = vnet_find_parent(hp, vdev->mp); + node = vio_vdev_node(hp, vdev); + if (node == MDESC_NODE_NULL) { + pr_err("Failed to get vdev MD node.\n"); + err = -ENXIO; + goto err_out_put_mdesc; + } + + vp = vnet_find_parent(hp, node); if (IS_ERR(vp)) { pr_err("Cannot find port parent vnet\n"); err = PTR_ERR(vp); goto err_out_put_mdesc; } - rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len); + rmac = mdesc_get_property(hp, node, remote_macaddr_prop, &len); err = -ENODEV; if (!rmac) { pr_err("Port lacks %s property\n", remote_macaddr_prop); @@ -1939,8 +1947,10 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) port = kzalloc(sizeof(*port), GFP_KERNEL); err = -ENOMEM; - if (!port) + if (!port) { + pr_err("Cannot allocate vnet_port\n"); goto err_out_put_mdesc; + } for (i = 0; i < ETH_ALEN; i++) port->raddr[i] = (*rmac >> (5 - i) * 8) & 0xff; @@ -1963,7 +1973,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) INIT_LIST_HEAD(&port->list); switch_port = 0; - if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL) + if (mdesc_get_property(hp, node, "switch-port", NULL) != NULL) switch_port = 1; port->switch_port = switch_port; port->tso = true; diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index c01f45095877..1f2eeb5ffe97 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -465,5 +465,10 @@ config MIPS_EJTAG_FDC_KGDB_CHAN default 3 help FDC channel number to use for KGDB. +config VCC + tristate "Sun Virtual Console Concentrator" + depends on SUN_LDOMS + help + Support for Sun logical domain consoles. endif # TTY diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile index 5817e2397463..6712e9248f68 100644 --- a/drivers/tty/Makefile +++ b/drivers/tty/Makefile @@ -30,5 +30,6 @@ obj-$(CONFIG_PPC_EPAPR_HV_BYTECHAN) += ehv_bytechan.o obj-$(CONFIG_GOLDFISH_TTY) += goldfish.o obj-$(CONFIG_DA_TTY) += metag_da.o obj-$(CONFIG_MIPS_EJTAG_FDC_TTY) += mips_ejtag_fdc.o +obj-$(CONFIG_VCC) += vcc.o obj-y += ipwireless/ diff --git a/drivers/tty/vcc.c b/drivers/tty/vcc.c new file mode 100644 index 000000000000..e1c90ea5ee60 --- /dev/null +++ b/drivers/tty/vcc.c @@ -0,0 +1,879 @@ +/* + * vcc.c: sun4v virtual channel concentrator + * + * Copyright (C) 2014 Oracle. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DRV_MODULE_NAME "vcc" +#define DRV_MODULE_VERSION "1.0" +#define DRV_MODULE_RELDATE "July 20, 2014" + +static char version[] = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; +MODULE_DESCRIPTION("Sun LDOM virtual console concentrator driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +struct vcc { + struct tty_port port; /* must be first element */ + spinlock_t lock; + char *domain; + + /* + * This buffer is required to support the tty write_room interface + * and guarantee that any characters that the driver accepts will + * be eventually sent, either immediately or later. + */ + int chars_in_buffer; + struct vio_vcc buffer; + + struct timer_list rx_timer; + struct timer_list tx_timer; + struct vio_driver_state vio; +}; + +#define VCC_MAX_PORTS 256 +#define VCC_MINOR_START 0 +#define VCC_BUFF_LEN VIO_VCC_MTU_SIZE + +#define VCC_CTL_BREAK -1 +#define VCC_CTL_HUP -2 + +#define TIMER_SET(v, x, t) ((v)->x##_timer.expires = (t)) +#define TIMER_CLEAR(v, x) ((v)->x##_timer.expires = 0) +#define TIMER_ACTIVE(v, x) ((v)->x##_timer.expires) + +static const char vcc_driver_name[] = "vcc"; +static const char vcc_device_node[] = "vcc"; +static struct tty_driver *vcc_tty_driver; + +int vcc_dbg; +int vcc_dbg_ldc; +int vcc_dbg_vio; + +module_param(vcc_dbg, uint, 0664); +module_param(vcc_dbg_ldc, uint, 0664); +module_param(vcc_dbg_vio, uint, 0664); + +#define VCC_DBG_DRV 0x1 +#define VCC_DBG_LDC 0x2 +#define VCC_DBG_PKT 0x4 + +#define vccdbg(f, a...) \ + do { \ + if (vcc_dbg & VCC_DBG_DRV) \ + pr_info(f, ## a); \ + } while (0) \ + +#define vccdbgl(l) \ + do { \ + if (vcc_dbg & VCC_DBG_LDC) \ + ldc_print(l); \ + } while (0) \ + +#define vccdbgp(pkt) \ + do { \ + if (vcc_dbg & VCC_DBG_PKT) { \ + int i; \ + for (i = 0; i < pkt.tag.stype; i++) \ + pr_info("[%c]", pkt.data[i]); \ + } \ + } while (0) \ + +/* + * xxx Be careful when adding flags to this line discipline. Don't add anything + * that will cause echoing or we'll go into recursive loop echoing chars back + * and forth with the console drivers. + */ +static struct ktermios vcc_tty_termios = { + .c_iflag = IGNBRK | IGNPAR, + .c_oflag = OPOST, + .c_cflag = B38400 | CS8 | CREAD | HUPCL, + .c_cc = INIT_C_CC, + .c_ispeed = 38400, + .c_ospeed = 38400 +}; + +static void vcc_kick_rx(struct vcc *vcc) +{ + struct vio_driver_state *vio = &vcc->vio; + + vccdbg("%s\n", __func__); + + assert_spin_locked(&vcc->lock); + + if (TIMER_ACTIVE(vcc, rx)) + return; + + /* + * Disable interrupts until we can read the data again. + */ + ldc_disable_hv_intr(vio->lp); + + TIMER_SET(vcc, rx, jiffies + 1); + add_timer(&vcc->rx_timer); +} + +static void vcc_kick_tx(struct vcc *vcc) +{ + vccdbg("%s\n", __func__); + + assert_spin_locked(&vcc->lock); + + if (TIMER_ACTIVE(vcc, tx)) + return; + + TIMER_SET(vcc, tx, jiffies + 1); + add_timer(&vcc->tx_timer); +} + +static int vcc_rx_check(struct tty_struct *tty, int size) +{ + BUG_ON(!tty); + + /* + * tty_buffer_request_room eventually calls kmalloc with GFP_ATOMIC + * so it won't sleep. + */ + if (test_bit(TTY_THROTTLED, &tty->flags) || + tty_buffer_request_room(tty->port, VCC_BUFF_LEN) < VCC_BUFF_LEN) + return 0; + + return 1; +} + +static int vcc_rx(struct tty_struct *tty, char *buf, int size) +{ + int len; + + BUG_ON(!tty); + + /* + * tty_insert_flig_string... calls __tty_buffer_request_room. + */ + len = tty_insert_flip_string(tty->port, buf, size); + + /* This is synch because tty->low_latency == 1 */ + if (len) + tty_flip_buffer_push(tty->port); + + vccdbg("%s: rv=%d\n", __func__, len); + + return len; +} + +static int vcc_ldc_read(struct vcc *vcc) +{ + struct vio_driver_state *vio = &vcc->vio; + struct tty_struct *tty; + struct vio_vcc pkt; + int rv = 0; + vccdbg("%s\n", __func__); + + tty = vcc->port.tty; + if (!tty) { + rv = ldc_rx_reset(vio->lp); + vccdbg("%s: reset rx q: rv=%d\n", __func__, rv); + goto done; + } + + /* + * Read as long as the LDC has incoming data. + * xxx Since we read in interrupt context, should we defer to + * a lower IRQ level? + */ + while (1) { + if (!vcc_rx_check(tty, VIO_VCC_MTU_SIZE)) { + vcc_kick_rx(vcc); + break; + } + vccdbgl(vio->lp); + rv = ldc_read(vio->lp, &pkt, sizeof(pkt)); + if (rv <= 0) + break; + + vccdbg("%s: ldc_read()=%d\n", __func__, rv); + vccdbg("TAG [%02x:%02x:%04x:%08x]\n", + pkt.tag.type, + pkt.tag.stype, + pkt.tag.stype_env, + pkt.tag.sid); + + if (pkt.tag.type == VIO_TYPE_DATA) { + /* + * We called vcc_rx_check before which should allocate + * space so this should not fail. + */ + vccdbgp(pkt); + vcc_rx(tty, pkt.data, pkt.tag.stype); + } else { + pr_err("%s: unknown msg [%02x:%02x:%04x:%08x]\n", + __func__, pkt.tag.type, pkt.tag.stype, + pkt.tag.stype_env, pkt.tag.sid); + + rv = -ECONNRESET; + break; + } + BUG_ON(rv != LDC_PACKET_SIZE); + } +done: + vccdbg("%s: rv=%d\n", __func__, rv); + return rv; +} + +static void vcc_rx_timer(unsigned long arg) +{ + struct vcc *vcc = (struct vcc *)arg; + struct vio_driver_state *vio = &vcc->vio; + unsigned long flags; + int rv; + + vccdbg("%s\n", __func__); + spin_lock_irqsave(&vcc->lock, flags); + TIMER_CLEAR(vcc, rx); + + /* + * Re-enable interrupts. + */ + ldc_enable_hv_intr(vio->lp); + + rv = vcc_ldc_read(vcc); + if (rv < 0) { + struct vio_driver_state *vio = &vcc->vio; + + if (rv == -ECONNRESET) + vio_conn_reset(vio); /* xxx noop */ + } + spin_unlock_irqrestore(&vcc->lock, flags); + vccdbg("%s done\n", __func__); +} + +static void vcc_tx_timer(unsigned long arg) +{ + struct vcc *vcc = (struct vcc *)arg; + struct vio_vcc *pkt; + unsigned long flags; + int tosend = 0; + int rv; + + vccdbg("%s\n", __func__); + if (!vcc) { + pr_err("%s: vcc not found\n", __func__); + return; + } + + spin_lock_irqsave(&vcc->lock, flags); + TIMER_CLEAR(vcc, tx); + + tosend = min(VCC_BUFF_LEN, vcc->chars_in_buffer); + if (!tosend) + goto done; + + pkt = &vcc->buffer; + pkt->tag.type = VIO_TYPE_DATA; + pkt->tag.stype = tosend; + vccdbgl(vcc->vio.lp); + + /* won't send partial writes */ + rv = ldc_write(vcc->vio.lp, pkt, VIO_TAG_SIZE + tosend); + BUG_ON(!rv); + + if (rv < 0) { + vccdbg("%s: ldc_write()=%d\n", __func__, rv); + vcc_kick_tx(vcc); + } else { + struct tty_struct *tty = vcc->port.tty; + + vcc->chars_in_buffer = 0; + + /* + * We are still obligated to deliver the data to the + * hypervisor even if the tty has been closed because + * we committed to delivering it. But don't try to wake + * a non-existent tty. + */ + if (tty) + tty_wakeup(tty); + } +done: + spin_unlock_irqrestore(&vcc->lock, flags); + vccdbg("%s done\n", __func__); +} + +static void vcc_event(void *arg, int event) +{ + struct vcc *vcc = arg; + struct vio_driver_state *vio = &vcc->vio; + unsigned long flags; + int rv; + + vccdbg("%s(%d)\n", __func__, event); + spin_lock_irqsave(&vcc->lock, flags); + + if (event == LDC_EVENT_RESET || event == LDC_EVENT_UP) { + vio_link_state_change(vio, event); + spin_unlock_irqrestore(&vcc->lock, flags); + return; + } + + if (event != LDC_EVENT_DATA_READY) { + pr_err("%s: unexpected LDC event %d\n", __func__, event); + spin_unlock_irqrestore(&vcc->lock, flags); + return; + } + + rv = vcc_ldc_read(vcc); + if (rv < 0) { + if (rv == -ECONNRESET) + vio_conn_reset(vio); /* xxx noop */ + } + spin_unlock_irqrestore(&vcc->lock, flags); +} + +static struct ldc_channel_config vcc_ldc_cfg = { + .event = vcc_event, + .mtu = VIO_VCC_MTU_SIZE, + .mode = LDC_MODE_RAW, + .debug = 0, +}; + +/* Ordered from largest major to lowest */ +static struct vio_version vcc_versions[] = { + { .major = 1, .minor = 0 }, +}; + +static struct tty_port_operations vcc_port_ops = { 0 }; + +static ssize_t vcc_sysfs_domain_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + int rv; + unsigned long flags; + struct vcc *vcc = dev_get_drvdata(device); + + spin_lock_irqsave(&vcc->lock, flags); + rv = scnprintf(buf, PAGE_SIZE, "%s\n", vcc->domain); + spin_unlock_irqrestore(&vcc->lock, flags); + + return rv; +} + +static int vcc_send_ctl(struct vcc *vcc, int ctl) +{ + int rv; + struct vio_vcc pkt; + + pkt.tag.type = VIO_TYPE_CTRL; + pkt.tag.sid = ctl; /* ctrl_msg */ + pkt.tag.stype = 0; /* size */ + + rv = ldc_write(vcc->vio.lp, &pkt, sizeof(pkt.tag)); + BUG_ON(!rv); + vccdbg("%s: ldc_write(%ld)=%d\n", __func__, sizeof(pkt.tag), rv); + + return rv; +} + +static ssize_t vcc_sysfs_break_store(struct device *device, + struct device_attribute *attr, const char *buf, size_t count) +{ + int rv = count; + int brk; + unsigned long flags; + struct vcc *vcc = dev_get_drvdata(device); + + spin_lock_irqsave(&vcc->lock, flags); + + if (sscanf(buf, "%ud", &brk) != 1 || brk != 1) + rv = -EINVAL; + else if (vcc_send_ctl(vcc, VCC_CTL_HUP) < 0) + vcc_kick_tx(vcc); + + spin_unlock_irqrestore(&vcc->lock, flags); + + return count; +} + +static DEVICE_ATTR(domain, S_IRUSR, vcc_sysfs_domain_show, NULL); +static DEVICE_ATTR(break, S_IWUSR, NULL, vcc_sysfs_break_store); + +static struct attribute *vcc_sysfs_entries[] = { + &dev_attr_domain.attr, + &dev_attr_break.attr, + NULL +}; + +static struct attribute_group vcc_attribute_group = { + .name = NULL, /* put in device directory */ + .attrs = vcc_sysfs_entries, +}; + +static void print_version(void) +{ + printk_once(KERN_INFO "%s", version); +} + +static int vcc_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + int rv; + char *name; + const char *domain; + struct vcc *vcc; + struct device *dev; + struct mdesc_handle *hp; + u64 node; + + print_version(); + + vccdbg("%s: name=%s port=%ld\n", __func__, dev_name(&vdev->dev), + vdev->port_id); + + if (vdev->port_id >= VCC_MAX_PORTS) + return -ENXIO; + + if (!vcc_tty_driver) { + pr_err("%s: vcc tty driver not registered\n", __func__); + return -ENODEV; + } + + vcc = kzalloc(sizeof(*vcc), GFP_KERNEL); + if (!vcc) { + pr_err("%s: cannot allocate vcc\n", __func__); + return -ENOMEM; + } + + name = kstrdup(dev_name(&vdev->dev), GFP_KERNEL); + rv = vio_driver_init(&vcc->vio, vdev, VDEV_CONSOLE_CON, + vcc_versions, ARRAY_SIZE(vcc_versions), + NULL, name); + if (rv) + goto free_port; + + vcc->vio.debug = vcc_dbg_vio; + vcc_ldc_cfg.debug = vcc_dbg_ldc; + + rv = vio_ldc_alloc(&vcc->vio, &vcc_ldc_cfg, vcc); + if (rv) + goto free_port; + + tty_port_init(&vcc->port); + spin_lock_init(&vcc->lock); + vcc->port.ops = &vcc_port_ops; + + dev = tty_port_register_device(&vcc->port, vcc_tty_driver, + vdev->port_id, &vdev->dev); + if (IS_ERR(dev)) { + rv = PTR_ERR(dev); + goto free_ldc; + } + + hp = mdesc_grab(); + + node = vio_vdev_node(hp, vdev); + if (node == MDESC_NODE_NULL) { + rv = -ENXIO; + mdesc_release(hp); + goto unreg_tty; + } + + domain = mdesc_get_property(hp, node, "vcc-domain-name", NULL); + if (!domain) { + rv = -ENXIO; + mdesc_release(hp); + goto unreg_tty; + } + vcc->domain = kstrdup(domain, GFP_KERNEL); + + mdesc_release(hp); + + rv = sysfs_create_group(&vdev->dev.kobj, &vcc_attribute_group); + if (rv) + goto remove_sysfs; + + init_timer(&vcc->rx_timer); + vcc->rx_timer.function = vcc_rx_timer; + vcc->rx_timer.data = (unsigned long)vcc; + + init_timer(&vcc->tx_timer); + vcc->tx_timer.function = vcc_tx_timer; + vcc->tx_timer.data = (unsigned long)vcc; + + dev_set_drvdata(&vdev->dev, vcc); + + /* + * Disable interrupts before the port is up. + * + * We can get an interrupt during vio_port_up() -> ldc_bind(). + * vio_port_up() grabs the vio->lock beforehand so we cannot + * grab it in vcc_event(). + * + * Once the port is up and the lock released, we can field + * interrupts. + */ + ldc_disable_hv_intr(vcc->vio.lp); + vio_port_up(&vcc->vio); + ldc_enable_hv_intr(vcc->vio.lp); + + return 0; + +remove_sysfs: + sysfs_remove_group(&vdev->dev.kobj, &vcc_attribute_group); + kfree(vcc->domain); + +unreg_tty: + tty_unregister_device(vcc_tty_driver, vdev->port_id); + +free_ldc: + vio_ldc_free(&vcc->vio); + +free_port: + kfree(name); + kfree(vcc); + + return rv; +} + +static int vcc_remove(struct vio_dev *vdev) +{ + struct vcc *vcc = dev_get_drvdata(&vdev->dev); + struct tty_struct *tty; + unsigned long flags; + + vccdbg("%s\n", __func__); + + if (!vcc) + return -ENODEV; + + del_timer_sync(&vcc->rx_timer); + del_timer_sync(&vcc->tx_timer); + + spin_lock_irqsave(&vcc->lock, flags); + tty = vcc->port.tty; + spin_unlock_irqrestore(&vcc->lock, flags); + + if (tty) + tty_hangup(tty); + + tty_unregister_device(vcc_tty_driver, vdev->port_id); + + del_timer_sync(&vcc->vio.timer); + vio_ldc_free(&vcc->vio); + sysfs_remove_group(&vdev->dev.kobj, &vcc_attribute_group); + dev_set_drvdata(&vdev->dev, NULL); + + kfree(vcc->vio.name); + kfree(vcc->domain); + kfree(vcc); + + return 0; +} + +static const struct vio_device_id vcc_match[] = { + { + .type = "vcc-port", + }, + {}, +}; +MODULE_DEVICE_TABLE(vio, vcc_match); + +static struct vio_driver vcc_driver = { + .id_table = vcc_match, + .probe = vcc_probe, + .remove = vcc_remove, + .name = "vcc", +}; + +static int vcc_open(struct tty_struct *tty, struct file *filp) +{ + struct vcc *vcc; + int rv, count; + + vccdbg("%s\n", __func__); + if (!tty) { + pr_err("%s: NULL tty\n", __func__); + return -ENXIO; + } + + if (!tty->port) { + pr_err("%s: NULL tty port\n", __func__); + return -ENXIO; + } + if (!tty->port->ops) { + pr_err("%s: NULL tty port ops\n", __func__); + return -ENXIO; + } + + vcc = container_of(tty->port, struct vcc, port); + + if (!vcc->vio.lp) { + pr_err("%s: NULL lp\n", __func__); + return -ENXIO; + } + vccdbgl(vcc->vio.lp); + + /* + * vcc_close is called even if vcc_open fails so call + * tty_port_open() regardless in case of -EBUSY. + */ + count = tty->port->count; + if (count) + pr_err("%s: tty port busy\n", __func__); + rv = tty_port_open(tty->port, tty, filp); + if (rv == 0 && count != 0) + rv = -EBUSY; + + return rv; + +} + +static void vcc_close(struct tty_struct *tty, struct file *filp) +{ + vccdbg("%s\n", __func__); + if (!tty) { + pr_err("%s: NULL tty\n", __func__); + return; + } + if (!tty->port) { + pr_err("%s: NULL tty port\n", __func__); + return; + } + tty_port_close(tty->port, tty, filp); +} + +static void vcc_ldc_hup(struct vcc *vcc) +{ + unsigned long flags; + + vccdbg("%s\n", __func__); + + spin_lock_irqsave(&vcc->lock, flags); + + if (vcc_send_ctl(vcc, VCC_CTL_HUP) < 0) + vcc_kick_tx(vcc); + + spin_unlock_irqrestore(&vcc->lock, flags); +} + +static void vcc_hangup(struct tty_struct *tty) +{ + struct vcc *vcc = container_of(tty->port, struct vcc, port); + + vcc_ldc_hup(vcc); + tty_port_hangup(tty->port); +} + +static int vcc_write(struct tty_struct *tty, + const unsigned char *buf, int count) +{ + struct vcc *vcc = container_of(tty->port, struct vcc, port); + struct vio_vcc *pkt; + unsigned long flags; + int total_sent = 0; + int tosend = 0; + int rv = -EINVAL; + + vccdbg("%s\n", __func__); + + spin_lock_irqsave(&vcc->lock, flags); + + pkt = &vcc->buffer; + pkt->tag.type = VIO_TYPE_DATA; + + while (count > 0) { + tosend = min(count, (VCC_BUFF_LEN - vcc->chars_in_buffer)); + /* + * No more space, this probably means that the last call to + * vcc_write() didn't succeed and the buffer was filled up. + */ + if (!tosend) + break; + + memcpy(&pkt->data[vcc->chars_in_buffer], + &buf[total_sent], + tosend); + + vcc->chars_in_buffer += tosend; + + pkt->tag.stype = tosend; + vccdbg("TAG [%02x:%02x:%04x:%08x]\n", + pkt->tag.type, + pkt->tag.stype, + pkt->tag.stype_env, + pkt->tag.sid); + vccdbg("DATA [%s]\n", pkt->data); + vccdbgl(vcc->vio.lp); + + /* won't send partial writes */ + rv = ldc_write(vcc->vio.lp, pkt, VIO_TAG_SIZE + tosend); + vccdbg("%s: ldc_write(%ld)=%d\n", __func__, + VIO_TAG_SIZE + tosend, rv); + + /* + * Since we know we have enough room in vcc->buffer for + * tosend we record that it was sent regardless of whether the + * hypervisor actually took it because we have it buffered. + */ + total_sent += tosend; + count -= tosend; + if (rv < 0) { + vcc_kick_tx(vcc); + break; + } + + vcc->chars_in_buffer = 0; + } + + spin_unlock_irqrestore(&vcc->lock, flags); + + vccdbg("%s: total=%d rv=%d\n", __func__, total_sent, rv); + + return total_sent ? total_sent : rv; +} + +static int vcc_write_room(struct tty_struct *tty) +{ + struct vcc *vcc = container_of(tty->port, struct vcc, port); + + return VCC_BUFF_LEN - vcc->chars_in_buffer; +} + +static int vcc_chars_in_buffer(struct tty_struct *tty) +{ + struct vcc *vcc = container_of(tty->port, struct vcc, port); + + return vcc->chars_in_buffer; +} + +static int vcc_break_ctl(struct tty_struct *tty, int state) +{ + struct vcc *vcc = container_of(tty->port, struct vcc, port); + unsigned long flags; + + vccdbg("%s(%d)\n", __func__, state); + + if (state == 0) /* turn off break */ + return 0; + + spin_lock_irqsave(&vcc->lock, flags); + + if (vcc_send_ctl(vcc, VCC_CTL_BREAK) < 0) + vcc_kick_tx(vcc); + + spin_unlock_irqrestore(&vcc->lock, flags); + + return 0; +} + +static const struct tty_operations vcc_ops = { + .open = vcc_open, + .close = vcc_close, + .hangup = vcc_hangup, + .write = vcc_write, + .write_room = vcc_write_room, + .chars_in_buffer = vcc_chars_in_buffer, + .break_ctl = vcc_break_ctl +}; + +/* + * We want to dynamically manage our ports through the tty_port_* + * interfaces so we allocate and register/unregister on our own. + */ +#define VCC_TTY_FLAGS (TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_REAL_RAW) + +static int vcc_tty_init(void) +{ + int rv; + + vcc_tty_driver = tty_alloc_driver(VCC_MAX_PORTS, VCC_TTY_FLAGS); + + if (!vcc_tty_driver) { + pr_err("%s: tty driver alloc failed\n", __func__); + return -ENOMEM; + } + + vcc_tty_driver->driver_name = vcc_driver_name; + vcc_tty_driver->name = vcc_device_node; + + /* + * We'll let the system assign us a major number, indicated by leaving + * it blank. + */ + vcc_tty_driver->minor_start = VCC_MINOR_START; + vcc_tty_driver->type = TTY_DRIVER_TYPE_SYSTEM; + vcc_tty_driver->init_termios = vcc_tty_termios; + + tty_set_operations(vcc_tty_driver, &vcc_ops); + + /* + * The following call will result in sysfs entries that denote the + * dynamically assigned major and minor numbers for our devices. + */ + rv = tty_register_driver(vcc_tty_driver); + if (!rv) { + vccdbg("%s: tty driver registered\n", __func__); + return 0; + } + + pr_err("%s: tty driver register failed\n", __func__); + + tty_unregister_driver(vcc_tty_driver); + put_tty_driver(vcc_tty_driver); + vcc_tty_driver = NULL; + + return rv; +} + +static void vcc_tty_exit(void) +{ + vccdbg("%s\n", __func__); + + tty_unregister_driver(vcc_tty_driver); + put_tty_driver(vcc_tty_driver); + vccdbg("%s: tty driver unregistered\n", __func__); + + vcc_tty_driver = NULL; +} + +static int __init vcc_init(void) +{ + int rv; + + vccdbg("%s\n", __func__); + + rv = vcc_tty_init(); + if (rv) { + pr_err("%s: vcc_tty_init failed (%d)\n", __func__, rv); + return rv; + } + + rv = vio_register_driver(&vcc_driver); + if (rv) { + pr_err("%s: vcc driver register failed (%d)\n", __func__, rv); + vcc_tty_exit(); + } else { + vccdbg("%s: vcc driver registered\n", __func__); + } + + return rv; +} + +static void __exit vcc_exit(void) +{ + vccdbg("%s\n", __func__); + vio_unregister_driver(&vcc_driver); + vccdbg("%s: vcc vio driver unregistered\n", __func__); + vcc_tty_exit(); + vccdbg("%s: vcc tty driver unregistered\n", __func__); +} + +module_init(vcc_init); +module_exit(vcc_exit); diff --git a/include/linux/ds.h b/include/linux/ds.h new file mode 100644 index 000000000000..021acb3fae3f --- /dev/null +++ b/include/linux/ds.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 Oracle Corporation + */ + +#ifndef _DS_H +#define _DS_H + +#include + +typedef u64 ds_svc_hdl_t; +typedef void *ds_cb_arg_t; + +typedef struct ds_ver { + u64 major; + u64 minor; +} ds_ver_t; + +/* + * Domain Services Capability + * + * A DS capability is exported by a provider using a unique service + * identifier string. Along with this identifier the highest + * version that the capability that the client supports. It is + * assumed that the capability supports this specified version or + * any lower version (down to 1.0). The service may be negotiated to + * register at this specified version or at a lower version. + */ +typedef struct ds_capability { + char *svc_id; /* service identifier */ + ds_ver_t vers; /* supported version */ +} ds_capability_t; + +/* + * Domain Services Client Event Callbacks + * + * A client implementing a DS capability provides a set of callbacks + * when it registers with the DS framework. The use of these callbacks + * is described below: + * + * ds_reg_cb() + * + * The ds_reg_cb() callback is invoked when the DS framework + * has successfully completed version negotiation with the + * remote endpoint for the capability. The cb also passes the + * negotiated version of the service. + * + * ds_unreg_cb() + * + * The ds_unreg_cb() callback is invoked when the DS framework + * detects an event that causes the registered capability to + * become unavailable. This includes an explicit unregister + * message, a failure in the underlying communication transport, + * etc. Any such event invalidates the service handle that was + * received from the register callback. Once this callback has + * been made, the client must re-register (unreg+reg) the service. + * + * ds_data_cb() + * + * The ds_data_cb() callback is invoked whenever there is an + * incoming data message for the client to process. It provides + * the contents of the message along with the message length. + */ +typedef struct ds_ops { + void (*ds_reg_cb)(ds_cb_arg_t arg, ds_svc_hdl_t hdl, ds_ver_t *ver); + void (*ds_unreg_cb)(ds_cb_arg_t arg, ds_svc_hdl_t hdl); + void (*ds_data_cb)(ds_cb_arg_t arg, ds_svc_hdl_t hdl, + void *buf, size_t buflen); + ds_cb_arg_t cb_arg; /* optional arg to ops - can be NULL */ +} ds_ops_t; + +/* + * Domain Services Capability Interface + */ +extern int ds_cap_init(ds_capability_t *cap, ds_ops_t *ops, u32 flags, + u64 domain_handle, ds_svc_hdl_t *hdlp); +extern int ds_cap_fini(ds_svc_hdl_t hdl); +extern int ds_cap_send(ds_svc_hdl_t hdl, void *buf, size_t buflen); + +#define DS_CAP_IS_CLIENT 0x0001 /* client service */ +#define DS_CAP_IS_PROVIDER 0x0002 /* provider service */ +#define DS_TARGET_IS_DOMAIN 0x0004 /* domain target */ + +#endif /* _DS_H */ diff --git a/include/linux/vldc.h b/include/linux/vldc.h new file mode 100644 index 000000000000..cf269f79f974 --- /dev/null +++ b/include/linux/vldc.h @@ -0,0 +1,10 @@ +/* + * Copyright (C) 2014 Oracle Corporation + */ + +#ifndef _VLDC_H +#define _VLDC_H + +#include + +#endif /* _VLDC_H */ diff --git a/include/linux/vlds.h b/include/linux/vlds.h new file mode 100644 index 000000000000..e4c242050c26 --- /dev/null +++ b/include/linux/vlds.h @@ -0,0 +1,10 @@ +/* + * Copyright (C) 2015 Oracle Corporation + */ + +#ifndef _VLDS_H +#define _VLDS_H + +#include + +#endif /* _VLDS_H */ diff --git a/include/uapi/linux/ds.h b/include/uapi/linux/ds.h new file mode 100644 index 000000000000..c73709808f72 --- /dev/null +++ b/include/uapi/linux/ds.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2015 Oracle Corporation + */ + +#ifndef _UAPI_DS_H +#define _UAPI_DS_H + +#include +#include + +#define DS_MAJOR_VERSION 1 +#define DS_MINOR_VERSION 0 + +#define DS_SPTOK_TOKEN_LEN 20 /* SP token length */ + +#define DS_MAX_DOM_NAME_LEN 256 /* Max length of DS domain name */ +#define DS_MAX_SVC_NAME_LEN 256 /* Max length of DS service name */ + +#define DS_SP_NAME "sp" /* name assigned to the SP DS dev */ + +typedef struct ds_sptok { + __u32 ds_sptok_ipaddr; /* IP address on SP */ + __u32 ds_sptok_portid; /* Port number on SP */ + __u8 ds_sptok_token[DS_SPTOK_TOKEN_LEN]; +} ds_sptok_t; + +typedef struct ds_ioctl_sptok_data { + __u32 major_version; + __u32 minor_version; + char service_name[DS_MAX_SVC_NAME_LEN]; + ds_sptok_t sp_tok; +} ds_ioctl_sptok_data_t ; + +#define DS_IOCTL_BASE 'D' + +#define DS_SPTOK_GET _IOR(DS_IOCTL_BASE, 1, ds_ioctl_sptok_data_t) + +#endif /* _UAPI_DS_H */ diff --git a/include/uapi/linux/vldc.h b/include/uapi/linux/vldc.h new file mode 100644 index 000000000000..2d821dabc9fb --- /dev/null +++ b/include/uapi/linux/vldc.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2014 Oracle Corporation + */ + +#ifndef _UAPI_VLDC_H +#define _UAPI_VLDC_H + +#include +#include + +struct vldc_data_t { + u64 src_addr; + u64 dst_addr; + u64 length; +}; + +#define VLDC_IOCTL_BASE 'V' + +#define VLDC_IOCTL_READ_COOKIE _IOR(VLDC_IOCTL_BASE, 1, struct vldc_data_t) +#define VLDC_IOCTL_WRITE_COOKIE _IOW(VLDC_IOCTL_BASE, 2, struct vldc_data_t) + +#endif /* _UAPI_VLDC_H */ diff --git a/include/uapi/linux/vlds.h b/include/uapi/linux/vlds.h new file mode 100644 index 000000000000..5b8d12bf3382 --- /dev/null +++ b/include/uapi/linux/vlds.h @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2015 Oracle Corporation + */ + +#ifndef _UAPI_VLDS_H +#define _UAPI_VLDS_H + +#include +#include + +#define VLDS_DEV_DIR "/dev/vlds" + +#define VLDS_DEV_DOMAIN_FILENAME_TAG "host:" + +/* String arguments to ioctl */ +typedef struct vlds_string_arg { + u64 vlds_strp; + u64 vlds_strlen; +} vlds_string_t; +#define VLDS_MAX_NAMELEN 256 + +/* Version (used by VLDS_IOCTL_SVC_REG) */ +typedef struct vlds_ver { + u16 vlds_major; + u16 vlds_minor; +} vlds_ver_t; + +/* Capability structure (used by VLDS_IOCTL_SVC_REG) */ +typedef struct vlds_cap { + vlds_string_t vlds_service; + vlds_ver_t vlds_vers; /* max supported version */ +} vlds_cap_t; + +typedef struct vlds_svc_reg_arg { + u64 vlds_hdlp; /* DS Service Handle ptr. (returned) */ + u64 vlds_capp; /* DS Capability Structure ptr. */ + u64 vlds_reg_flags; /* DS reg flags */ +} vlds_svc_reg_arg_t; + +/* vlds_reg_flags */ +#define VLDS_REG_CLIENT 0x01 /* Register as client */ +#define VLDS_REG_EVENT 0x02 /* Event driven service - not polled */ + +typedef struct vlds_unreg_hdl_arg { + u64 vlds_hdl; /* DS Service Handle */ +} vlds_unreg_hdl_arg_t; + +typedef struct vlds_hdl_lookup_arg { + vlds_string_t vlds_service; /* DS Service Name */ + u64 vlds_isclient; /* DS Client flag */ + u64 vlds_hdlsp; /* DS Handle array ptr */ + u64 vlds_maxhdls; /* DS Max no. of hdls to return */ + u64 vlds_nhdlsp; /* DS No. of hdls returned */ +} vlds_hdl_lookup_arg_t; + +typedef struct vlds_dmn_lookup_arg { + u64 vlds_dhdlp; /* DS Domain hdl ptr. (returned) */ + vlds_string_t vlds_dname; /* DS Domain name (returned) */ +} vlds_dmn_lookup_arg_t; + +typedef struct vlds_send_msg_arg { + u64 vlds_hdl; /* DS Service Handle */ + u64 vlds_bufp; /* buffer */ + u64 vlds_buflen; /* message length/buffer size */ +} vlds_send_msg_arg_t; +#define VLDS_MAX_SENDBUF_LEN 65535 /* 64k max buf size */ + +typedef struct vlds_recv_msg_arg { + u64 vlds_hdl; /* DS Service Handle */ + u64 vlds_bufp; /* buffer */ + u64 vlds_buflen; /* message length/buffer size */ + u64 vlds_msglenp; /* ptr to returned message length */ +} vlds_recv_msg_arg_t; + +typedef struct vlds_hdl_state { + u64 state; + vlds_ver_t vlds_vers; /* negotiated version */ +} vlds_hdl_state_t; + +typedef struct vlds_hdl_get_state_arg { + u64 vlds_hdl; /* DS Service Handle */ + u64 vlds_statep; /* Ptr to vlds_hdl_state */ +} vlds_hdl_get_state_arg_t; +#define VLDS_HDL_STATE_NOT_YET_CONNECTED 0x0 +#define VLDS_HDL_STATE_CONNECTED 0x1 +#define VLDS_HDL_STATE_DISCONNECTED 0x2 + +typedef struct vlds_set_event_fd_arg { + int fd; /* eventfd() fd used by process */ +} vlds_set_event_fd_arg_t; + +typedef struct vlds_get_next_event_arg { + u64 vlds_hdlp; /* Event Service Handle (returned) */ + u64 vlds_event_typep; /* Reg, Unreg or Data event? (returned) */ + u64 neg_versp; /* reg event negotiated version (returned) */ + u64 vlds_bufp; /* data event msg buffer (returned) */ + u64 vlds_buflen; /* data event msg buffer size */ + u64 vlds_msglenp; /* data event returned msg length (returned) */ +} vlds_get_next_event_arg_t; +/* event types returned in event_typep field */ +#define VLDS_EVENT_TYPE_REG 0x0 +#define VLDS_EVENT_TYPE_UNREG 0x1 +#define VLDS_EVENT_TYPE_DATA 0x2 + +#define VLDS_IOCTL_BASE 'D' + +#define VLDS_IOCTL_SVC_REG _IOWR(VLDS_IOCTL_BASE, 1, \ + struct vlds_svc_reg_arg) +#define VLDS_IOCTL_UNREG_HDL _IOW(VLDS_IOCTL_BASE, 2, \ + struct vlds_unreg_hdl_arg) +#define VLDS_IOCTL_HDL_LOOKUP _IOR(VLDS_IOCTL_BASE, 3, \ + struct vlds_hdl_lookup_arg) +#define VLDS_IOCTL_DMN_LOOKUP _IOR(VLDS_IOCTL_BASE, 4, \ + struct vlds_dmn_lookup_arg) +#define VLDS_IOCTL_SEND_MSG _IOW(VLDS_IOCTL_BASE, 5, \ + struct vlds_send_msg_arg) +#define VLDS_IOCTL_RECV_MSG _IOR(VLDS_IOCTL_BASE, 6, \ + struct vlds_recv_msg_arg) +#define VLDS_IOCTL_HDL_GET_STATE _IOR(VLDS_IOCTL_BASE, 7, \ + struct vlds_hdl_get_state_arg) + +/* start Linux specific ioctls at 32 */ +#define VLDS_IOCTL_SET_EVENT_FD _IOW(VLDS_IOCTL_BASE, 32, \ + struct vlds_set_event_fd_arg) +#define VLDS_IOCTL_UNSET_EVENT_FD _IO(VLDS_IOCTL_BASE, 33) +#define VLDS_IOCTL_GET_NEXT_EVENT _IOR(VLDS_IOCTL_BASE, 34, \ + struct vlds_get_next_event_arg) + +#endif /* _UAPI_VLDS_H */ + +