linux-zen-server/drivers/infiniband/ulp/rtrs/rtrs-pri.h

407 lines
11 KiB
C
Raw Normal View History

2023-08-30 17:53:23 +02:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* RDMA Transport Layer
*
* Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
* Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
* Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
*/
#ifndef RTRS_PRI_H
#define RTRS_PRI_H
#include <linux/uuid.h>
#include <rdma/rdma_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib.h>
#include "rtrs.h"
#define RTRS_PROTO_VER_MAJOR 2
#define RTRS_PROTO_VER_MINOR 0
#define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
__stringify(RTRS_PROTO_VER_MINOR)
/*
* Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
* and the minimum chunk size is 4096 (2^12).
* So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory
* since queue_depth in rtrs_msg_conn_rsp is defined as le16.
* Therefore the pratical max value of sess_queue_depth is
* somewhere between 1 and 65535 and it depends on the system.
*/
#define MAX_SESS_QUEUE_DEPTH 65535
enum rtrs_imm_const {
MAX_IMM_TYPE_BITS = 4,
MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
MAX_IMM_PAYL_BITS = 28,
MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
};
enum rtrs_imm_type {
RTRS_IO_REQ_IMM = 0, /* client to server */
RTRS_IO_RSP_IMM = 1, /* server to client */
RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
RTRS_HB_ACK_IMM = 9,
RTRS_LAST_IMM,
};
enum {
SERVICE_CON_QUEUE_DEPTH = 512,
MAX_PATHS_NUM = 128,
MIN_CHUNK_SIZE = 8192,
RTRS_HB_INTERVAL_MS = 5000,
RTRS_HB_MISSED_MAX = 5,
RTRS_MAGIC = 0x1BBD,
RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
};
struct rtrs_ib_dev;
struct rtrs_rdma_dev_pd_ops {
int (*init)(struct rtrs_ib_dev *dev);
};
struct rtrs_rdma_dev_pd {
struct mutex mutex;
struct list_head list;
enum ib_pd_flags pd_flags;
const struct rtrs_rdma_dev_pd_ops *ops;
};
struct rtrs_ib_dev {
struct ib_device *ib_dev;
struct ib_pd *ib_pd;
struct kref ref;
struct list_head entry;
struct rtrs_rdma_dev_pd *pool;
};
struct rtrs_con {
struct rtrs_path *path;
struct ib_qp *qp;
struct ib_cq *cq;
struct rdma_cm_id *cm_id;
unsigned int cid;
int nr_cqe;
atomic_t wr_cnt;
atomic_t sq_wr_avail;
};
struct rtrs_path {
struct list_head entry;
struct sockaddr_storage dst_addr;
struct sockaddr_storage src_addr;
char sessname[NAME_MAX];
uuid_t uuid;
struct rtrs_con **con;
unsigned int con_num;
unsigned int irq_con_num;
unsigned int recon_cnt;
unsigned int signal_interval;
struct rtrs_ib_dev *dev;
int dev_ref;
struct ib_cqe *hb_cqe;
void (*hb_err_handler)(struct rtrs_con *con);
struct workqueue_struct *hb_wq;
struct delayed_work hb_dwork;
unsigned int hb_interval_ms;
unsigned int hb_missed_cnt;
unsigned int hb_missed_max;
ktime_t hb_last_sent;
ktime_t hb_cur_latency;
};
/* rtrs information unit */
struct rtrs_iu {
struct ib_cqe cqe;
dma_addr_t dma_addr;
void *buf;
size_t size;
enum dma_data_direction direction;
};
/**
* enum rtrs_msg_types - RTRS message types, see also rtrs/README
* @RTRS_MSG_INFO_REQ: Client additional info request to the server
* @RTRS_MSG_INFO_RSP: Server additional info response to the client
* @RTRS_MSG_WRITE: Client writes data per RDMA to server
* @RTRS_MSG_READ: Client requests data transfer from server
* @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf
*/
enum rtrs_msg_types {
RTRS_MSG_INFO_REQ,
RTRS_MSG_INFO_RSP,
RTRS_MSG_WRITE,
RTRS_MSG_READ,
RTRS_MSG_RKEY_RSP,
};
/**
* enum rtrs_msg_flags - RTRS message flags.
* @RTRS_NEED_INVAL: Send invalidation in response.
* @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
*/
enum rtrs_msg_flags {
RTRS_MSG_NEED_INVAL_F = 1 << 0,
RTRS_MSG_NEW_RKEY_F = 1 << 1,
};
/**
* struct rtrs_sg_desc - RDMA-Buffer entry description
* @addr: Address of RDMA destination buffer
* @key: Authorization rkey to write to the buffer
* @len: Size of the buffer
*/
struct rtrs_sg_desc {
__le64 addr;
__le32 key;
__le32 len;
};
/**
* struct rtrs_msg_conn_req - Client connection request to the server
* @magic: RTRS magic
* @version: RTRS protocol version
* @cid: Current connection id
* @cid_num: Number of connections per session
* @recon_cnt: Reconnections counter
* @sess_uuid: UUID of a session (path)
* @paths_uuid: UUID of a group of sessions (paths)
*
* NOTE: max size 56 bytes, see man rdma_connect().
*/
struct rtrs_msg_conn_req {
/* Is set to 0 by cma.c in case of AF_IB, do not touch that.
* see https://www.spinics.net/lists/linux-rdma/msg22397.html
*/
u8 __cma_version;
/* On sender side that should be set to 0, or cma_save_ip_info()
* extract garbage and will fail.
*/
u8 __ip_version;
__le16 magic;
__le16 version;
__le16 cid;
__le16 cid_num;
__le16 recon_cnt;
uuid_t sess_uuid;
uuid_t paths_uuid;
u8 first_conn : 1;
u8 reserved_bits : 7;
u8 reserved[11];
};
/**
* struct rtrs_msg_conn_rsp - Server connection response to the client
* @magic: RTRS magic
* @version: RTRS protocol version
* @errno: If rdma_accept() then 0, if rdma_reject() indicates error
* @queue_depth: max inflight messages (queue-depth) in this session
* @max_io_size: max io size server supports
* @max_hdr_size: max msg header size server supports
*
* NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
*/
struct rtrs_msg_conn_rsp {
__le16 magic;
__le16 version;
__le16 errno;
__le16 queue_depth;
__le32 max_io_size;
__le32 max_hdr_size;
__le32 flags;
u8 reserved[36];
};
/**
* struct rtrs_msg_info_req
* @type: @RTRS_MSG_INFO_REQ
* @pathname: Path name chosen by client
*/
struct rtrs_msg_info_req {
__le16 type;
u8 pathname[NAME_MAX];
u8 reserved[15];
};
/**
* struct rtrs_msg_info_rsp
* @type: @RTRS_MSG_INFO_RSP
* @sg_cnt: Number of @desc entries
* @desc: RDMA buffers where the client can write to server
*/
struct rtrs_msg_info_rsp {
__le16 type;
__le16 sg_cnt;
u8 reserved[4];
struct rtrs_sg_desc desc[];
};
/**
* struct rtrs_msg_rkey_rsp
* @type: @RTRS_MSG_RKEY_RSP
* @buf_id: RDMA buf_id of the new rkey
* @rkey: new remote key for RDMA buffers id from server
*/
struct rtrs_msg_rkey_rsp {
__le16 type;
__le16 buf_id;
__le32 rkey;
};
/**
* struct rtrs_msg_rdma_read - RDMA data transfer request from client
* @type: always @RTRS_MSG_READ
* @usr_len: length of user payload
* @sg_cnt: number of @desc entries
* @desc: RDMA buffers where the server can write the result to
*/
struct rtrs_msg_rdma_read {
__le16 type;
__le16 usr_len;
__le16 flags;
__le16 sg_cnt;
struct rtrs_sg_desc desc[];
};
/**
* struct_msg_rdma_write - Message transferred to server with RDMA-Write
* @type: always @RTRS_MSG_WRITE
* @usr_len: length of user payload
*/
struct rtrs_msg_rdma_write {
__le16 type;
__le16 usr_len;
};
/**
* struct_msg_rdma_hdr - header for read or write request
* @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ
*/
struct rtrs_msg_rdma_hdr {
__le16 type;
};
/* rtrs.c */
struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t,
struct ib_device *dev, enum dma_data_direction,
void (*done)(struct ib_cq *cq, struct ib_wc *wc));
void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num);
int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
struct ib_send_wr *head);
int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
struct ib_sge *sge, unsigned int num_sge,
u32 rkey, u64 rdma_addr, u32 imm_data,
enum ib_send_flags flags,
struct ib_send_wr *head,
struct ib_send_wr *tail);
int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
u32 max_send_sge, int cq_vector, int nr_cqe,
u32 max_send_wr, u32 max_recv_wr,
enum ib_poll_context poll_ctx);
void rtrs_cq_qp_destroy(struct rtrs_con *con);
void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
unsigned int interval_ms, unsigned int missed_max,
void (*err_handler)(struct rtrs_con *con),
struct workqueue_struct *wq);
void rtrs_start_hb(struct rtrs_path *path);
void rtrs_stop_hb(struct rtrs_path *path);
void rtrs_send_hb_ack(struct rtrs_path *path);
void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
struct rtrs_rdma_dev_pd *pool);
void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
struct rtrs_rdma_dev_pd *pool);
int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
static inline u32 rtrs_to_imm(u32 type, u32 payload)
{
BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
(payload & MAX_IMM_PAYL_MASK);
}
static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
{
*payload = imm & MAX_IMM_PAYL_MASK;
*type = imm >> MAX_IMM_PAYL_BITS;
}
static inline u32 rtrs_to_io_req_imm(u32 addr)
{
return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
}
static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
{
enum rtrs_imm_type type;
u32 payload;
/* 9 bits for errno, 19 bits for msg_id */
payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
return rtrs_to_imm(type, payload);
}
static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
{
/* 9 bits for errno, 19 bits for msg_id */
*msg_id = payload & 0x7ffff;
*errno = -(int)((payload >> 19) & 0x1ff);
}
#define STAT_STORE_FUNC(type, set_value, reset) \
static ssize_t set_value##_store(struct kobject *kobj, \
struct kobj_attribute *attr, \
const char *buf, size_t count) \
{ \
int ret = -EINVAL; \
type *stats = container_of(kobj, type, kobj_stats); \
\
if (sysfs_streq(buf, "1")) \
ret = reset(stats, true); \
else if (sysfs_streq(buf, "0")) \
ret = reset(stats, false); \
if (ret) \
return ret; \
\
return count; \
}
#define STAT_SHOW_FUNC(type, get_value, print) \
static ssize_t get_value##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
char *page) \
{ \
type *stats = container_of(kobj, type, kobj_stats); \
\
return print(stats, page); \
}
#define STAT_ATTR(type, stat, print, reset) \
STAT_STORE_FUNC(type, stat, reset) \
STAT_SHOW_FUNC(type, stat, print) \
static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
#endif /* RTRS_PRI_H */