2713e905892253ac297efd43f4636ee5313444dc
[sfrench/cifs-2.6.git] / drivers / infiniband / sw / rxe / rxe_req.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6
7 #include <linux/skbuff.h>
8 #include <crypto/hash.h>
9
10 #include "rxe.h"
11 #include "rxe_loc.h"
12 #include "rxe_queue.h"
13
14 static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
15                        u32 opcode);
16
17 static inline void retry_first_write_send(struct rxe_qp *qp,
18                                           struct rxe_send_wqe *wqe, int npsn)
19 {
20         int i;
21
22         for (i = 0; i < npsn; i++) {
23                 int to_send = (wqe->dma.resid > qp->mtu) ?
24                                 qp->mtu : wqe->dma.resid;
25
26                 qp->req.opcode = next_opcode(qp, wqe,
27                                              wqe->wr.opcode);
28
29                 if (wqe->wr.send_flags & IB_SEND_INLINE) {
30                         wqe->dma.resid -= to_send;
31                         wqe->dma.sge_offset += to_send;
32                 } else {
33                         advance_dma_data(&wqe->dma, to_send);
34                 }
35         }
36 }
37
38 static void req_retry(struct rxe_qp *qp)
39 {
40         struct rxe_send_wqe *wqe;
41         unsigned int wqe_index;
42         unsigned int mask;
43         int npsn;
44         int first = 1;
45         struct rxe_queue *q = qp->sq.queue;
46         unsigned int cons;
47         unsigned int prod;
48
49         cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
50         prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
51
52         qp->req.wqe_index       = cons;
53         qp->req.psn             = qp->comp.psn;
54         qp->req.opcode          = -1;
55
56         for (wqe_index = cons; wqe_index != prod;
57                         wqe_index = queue_next_index(q, wqe_index)) {
58                 wqe = queue_addr_from_index(qp->sq.queue, wqe_index);
59                 mask = wr_opcode_mask(wqe->wr.opcode, qp);
60
61                 if (wqe->state == wqe_state_posted)
62                         break;
63
64                 if (wqe->state == wqe_state_done)
65                         continue;
66
67                 wqe->iova = (mask & WR_ATOMIC_MASK) ?
68                              wqe->wr.wr.atomic.remote_addr :
69                              (mask & WR_READ_OR_WRITE_MASK) ?
70                              wqe->wr.wr.rdma.remote_addr :
71                              0;
72
73                 if (!first || (mask & WR_READ_MASK) == 0) {
74                         wqe->dma.resid = wqe->dma.length;
75                         wqe->dma.cur_sge = 0;
76                         wqe->dma.sge_offset = 0;
77                 }
78
79                 if (first) {
80                         first = 0;
81
82                         if (mask & WR_WRITE_OR_SEND_MASK) {
83                                 npsn = (qp->comp.psn - wqe->first_psn) &
84                                         BTH_PSN_MASK;
85                                 retry_first_write_send(qp, wqe, npsn);
86                         }
87
88                         if (mask & WR_READ_MASK) {
89                                 npsn = (wqe->dma.length - wqe->dma.resid) /
90                                         qp->mtu;
91                                 wqe->iova += npsn * qp->mtu;
92                         }
93                 }
94
95                 wqe->state = wqe_state_posted;
96         }
97 }
98
99 void rnr_nak_timer(struct timer_list *t)
100 {
101         struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
102
103         rxe_dbg_qp(qp, "nak timer fired\n");
104
105         /* request a send queue retry */
106         qp->req.need_retry = 1;
107         qp->req.wait_for_rnr_timer = 0;
108         rxe_sched_task(&qp->req.task);
109 }
110
111 static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
112 {
113         struct rxe_send_wqe *wqe;
114         struct rxe_queue *q = qp->sq.queue;
115         unsigned int index = qp->req.wqe_index;
116         unsigned int cons;
117         unsigned int prod;
118
119         wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
120         cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
121         prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
122
123         if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
124                 /* check to see if we are drained;
125                  * state_lock used by requester and completer
126                  */
127                 spin_lock_bh(&qp->state_lock);
128                 do {
129                         if (qp->req.state != QP_STATE_DRAIN) {
130                                 /* comp just finished */
131                                 spin_unlock_bh(&qp->state_lock);
132                                 break;
133                         }
134
135                         if (wqe && ((index != cons) ||
136                                 (wqe->state != wqe_state_posted))) {
137                                 /* comp not done yet */
138                                 spin_unlock_bh(&qp->state_lock);
139                                 break;
140                         }
141
142                         qp->req.state = QP_STATE_DRAINED;
143                         spin_unlock_bh(&qp->state_lock);
144
145                         if (qp->ibqp.event_handler) {
146                                 struct ib_event ev;
147
148                                 ev.device = qp->ibqp.device;
149                                 ev.element.qp = &qp->ibqp;
150                                 ev.event = IB_EVENT_SQ_DRAINED;
151                                 qp->ibqp.event_handler(&ev,
152                                         qp->ibqp.qp_context);
153                         }
154                 } while (0);
155         }
156
157         if (index == prod)
158                 return NULL;
159
160         wqe = queue_addr_from_index(q, index);
161
162         if (unlikely((qp->req.state == QP_STATE_DRAIN ||
163                       qp->req.state == QP_STATE_DRAINED) &&
164                      (wqe->state != wqe_state_processing)))
165                 return NULL;
166
167         wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
168         return wqe;
169 }
170
171 /**
172  * rxe_wqe_is_fenced - check if next wqe is fenced
173  * @qp: the queue pair
174  * @wqe: the next wqe
175  *
176  * Returns: 1 if wqe needs to wait
177  *          0 if wqe is ready to go
178  */
179 static int rxe_wqe_is_fenced(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
180 {
181         /* Local invalidate fence (LIF) see IBA 10.6.5.1
182          * Requires ALL previous operations on the send queue
183          * are complete. Make mandatory for the rxe driver.
184          */
185         if (wqe->wr.opcode == IB_WR_LOCAL_INV)
186                 return qp->req.wqe_index != queue_get_consumer(qp->sq.queue,
187                                                 QUEUE_TYPE_FROM_CLIENT);
188
189         /* Fence see IBA 10.8.3.3
190          * Requires that all previous read and atomic operations
191          * are complete.
192          */
193         return (wqe->wr.send_flags & IB_SEND_FENCE) &&
194                 atomic_read(&qp->req.rd_atomic) != qp->attr.max_rd_atomic;
195 }
196
197 static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
198 {
199         switch (opcode) {
200         case IB_WR_RDMA_WRITE:
201                 if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
202                     qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
203                         return fits ?
204                                 IB_OPCODE_RC_RDMA_WRITE_LAST :
205                                 IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
206                 else
207                         return fits ?
208                                 IB_OPCODE_RC_RDMA_WRITE_ONLY :
209                                 IB_OPCODE_RC_RDMA_WRITE_FIRST;
210
211         case IB_WR_RDMA_WRITE_WITH_IMM:
212                 if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
213                     qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
214                         return fits ?
215                                 IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
216                                 IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
217                 else
218                         return fits ?
219                                 IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
220                                 IB_OPCODE_RC_RDMA_WRITE_FIRST;
221
222         case IB_WR_SEND:
223                 if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
224                     qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
225                         return fits ?
226                                 IB_OPCODE_RC_SEND_LAST :
227                                 IB_OPCODE_RC_SEND_MIDDLE;
228                 else
229                         return fits ?
230                                 IB_OPCODE_RC_SEND_ONLY :
231                                 IB_OPCODE_RC_SEND_FIRST;
232
233         case IB_WR_SEND_WITH_IMM:
234                 if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
235                     qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
236                         return fits ?
237                                 IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE :
238                                 IB_OPCODE_RC_SEND_MIDDLE;
239                 else
240                         return fits ?
241                                 IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
242                                 IB_OPCODE_RC_SEND_FIRST;
243
244         case IB_WR_RDMA_READ:
245                 return IB_OPCODE_RC_RDMA_READ_REQUEST;
246
247         case IB_WR_ATOMIC_CMP_AND_SWP:
248                 return IB_OPCODE_RC_COMPARE_SWAP;
249
250         case IB_WR_ATOMIC_FETCH_AND_ADD:
251                 return IB_OPCODE_RC_FETCH_ADD;
252
253         case IB_WR_SEND_WITH_INV:
254                 if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
255                     qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
256                         return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE :
257                                 IB_OPCODE_RC_SEND_MIDDLE;
258                 else
259                         return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
260                                 IB_OPCODE_RC_SEND_FIRST;
261
262         case IB_WR_ATOMIC_WRITE:
263                 return IB_OPCODE_RC_ATOMIC_WRITE;
264
265         case IB_WR_REG_MR:
266         case IB_WR_LOCAL_INV:
267                 return opcode;
268         }
269
270         return -EINVAL;
271 }
272
273 static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits)
274 {
275         switch (opcode) {
276         case IB_WR_RDMA_WRITE:
277                 if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
278                     qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
279                         return fits ?
280                                 IB_OPCODE_UC_RDMA_WRITE_LAST :
281                                 IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
282                 else
283                         return fits ?
284                                 IB_OPCODE_UC_RDMA_WRITE_ONLY :
285                                 IB_OPCODE_UC_RDMA_WRITE_FIRST;
286
287         case IB_WR_RDMA_WRITE_WITH_IMM:
288                 if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
289                     qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
290                         return fits ?
291                                 IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
292                                 IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
293                 else
294                         return fits ?
295                                 IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
296                                 IB_OPCODE_UC_RDMA_WRITE_FIRST;
297
298         case IB_WR_SEND:
299                 if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
300                     qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
301                         return fits ?
302                                 IB_OPCODE_UC_SEND_LAST :
303                                 IB_OPCODE_UC_SEND_MIDDLE;
304                 else
305                         return fits ?
306                                 IB_OPCODE_UC_SEND_ONLY :
307                                 IB_OPCODE_UC_SEND_FIRST;
308
309         case IB_WR_SEND_WITH_IMM:
310                 if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
311                     qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
312                         return fits ?
313                                 IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE :
314                                 IB_OPCODE_UC_SEND_MIDDLE;
315                 else
316                         return fits ?
317                                 IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE :
318                                 IB_OPCODE_UC_SEND_FIRST;
319         }
320
321         return -EINVAL;
322 }
323
324 static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
325                        u32 opcode)
326 {
327         int fits = (wqe->dma.resid <= qp->mtu);
328
329         switch (qp_type(qp)) {
330         case IB_QPT_RC:
331                 return next_opcode_rc(qp, opcode, fits);
332
333         case IB_QPT_UC:
334                 return next_opcode_uc(qp, opcode, fits);
335
336         case IB_QPT_UD:
337         case IB_QPT_GSI:
338                 switch (opcode) {
339                 case IB_WR_SEND:
340                         return IB_OPCODE_UD_SEND_ONLY;
341
342                 case IB_WR_SEND_WITH_IMM:
343                         return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
344                 }
345                 break;
346
347         default:
348                 break;
349         }
350
351         return -EINVAL;
352 }
353
354 static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
355 {
356         int depth;
357
358         if (wqe->has_rd_atomic)
359                 return 0;
360
361         qp->req.need_rd_atomic = 1;
362         depth = atomic_dec_return(&qp->req.rd_atomic);
363
364         if (depth >= 0) {
365                 qp->req.need_rd_atomic = 0;
366                 wqe->has_rd_atomic = 1;
367                 return 0;
368         }
369
370         atomic_inc(&qp->req.rd_atomic);
371         return -EAGAIN;
372 }
373
374 static inline int get_mtu(struct rxe_qp *qp)
375 {
376         struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
377
378         if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))
379                 return qp->mtu;
380
381         return rxe->port.mtu_cap;
382 }
383
384 static struct sk_buff *init_req_packet(struct rxe_qp *qp,
385                                        struct rxe_av *av,
386                                        struct rxe_send_wqe *wqe,
387                                        int opcode, u32 payload,
388                                        struct rxe_pkt_info *pkt)
389 {
390         struct rxe_dev          *rxe = to_rdev(qp->ibqp.device);
391         struct sk_buff          *skb;
392         struct rxe_send_wr      *ibwr = &wqe->wr;
393         int                     pad = (-payload) & 0x3;
394         int                     paylen;
395         int                     solicited;
396         u32                     qp_num;
397         int                     ack_req;
398
399         /* length from start of bth to end of icrc */
400         paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
401         pkt->paylen = paylen;
402
403         /* init skb */
404         skb = rxe_init_packet(rxe, av, paylen, pkt);
405         if (unlikely(!skb))
406                 return NULL;
407
408         /* init bth */
409         solicited = (ibwr->send_flags & IB_SEND_SOLICITED) &&
410                         (pkt->mask & RXE_END_MASK) &&
411                         ((pkt->mask & (RXE_SEND_MASK)) ||
412                         (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
413                         (RXE_WRITE_MASK | RXE_IMMDT_MASK));
414
415         qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
416                                          qp->attr.dest_qp_num;
417
418         ack_req = ((pkt->mask & RXE_END_MASK) ||
419                 (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
420         if (ack_req)
421                 qp->req.noack_pkts = 0;
422
423         bth_init(pkt, pkt->opcode, solicited, 0, pad, IB_DEFAULT_PKEY_FULL, qp_num,
424                  ack_req, pkt->psn);
425
426         /* init optional headers */
427         if (pkt->mask & RXE_RETH_MASK) {
428                 reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
429                 reth_set_va(pkt, wqe->iova);
430                 reth_set_len(pkt, wqe->dma.resid);
431         }
432
433         if (pkt->mask & RXE_IMMDT_MASK)
434                 immdt_set_imm(pkt, ibwr->ex.imm_data);
435
436         if (pkt->mask & RXE_IETH_MASK)
437                 ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey);
438
439         if (pkt->mask & RXE_ATMETH_MASK) {
440                 atmeth_set_va(pkt, wqe->iova);
441                 if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
442                         atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
443                         atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
444                 } else {
445                         atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add);
446                 }
447                 atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey);
448         }
449
450         if (pkt->mask & RXE_DETH_MASK) {
451                 if (qp->ibqp.qp_num == 1)
452                         deth_set_qkey(pkt, GSI_QKEY);
453                 else
454                         deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey);
455                 deth_set_sqp(pkt, qp->ibqp.qp_num);
456         }
457
458         return skb;
459 }
460
461 static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
462                          struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt,
463                          struct sk_buff *skb, u32 payload)
464 {
465         int err;
466
467         err = rxe_prepare(av, pkt, skb);
468         if (err)
469                 return err;
470
471         if (pkt->mask & RXE_WRITE_OR_SEND_MASK) {
472                 if (wqe->wr.send_flags & IB_SEND_INLINE) {
473                         u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
474
475                         memcpy(payload_addr(pkt), tmp, payload);
476
477                         wqe->dma.resid -= payload;
478                         wqe->dma.sge_offset += payload;
479                 } else {
480                         err = copy_data(qp->pd, 0, &wqe->dma,
481                                         payload_addr(pkt), payload,
482                                         RXE_FROM_MR_OBJ);
483                         if (err)
484                                 return err;
485                 }
486                 if (bth_pad(pkt)) {
487                         u8 *pad = payload_addr(pkt) + payload;
488
489                         memset(pad, 0, bth_pad(pkt));
490                 }
491         }
492
493         if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
494                 memcpy(payload_addr(pkt), wqe->dma.atomic_wr, payload);
495                 wqe->dma.resid -= payload;
496         }
497
498         return 0;
499 }
500
501 static void update_wqe_state(struct rxe_qp *qp,
502                 struct rxe_send_wqe *wqe,
503                 struct rxe_pkt_info *pkt)
504 {
505         if (pkt->mask & RXE_END_MASK) {
506                 if (qp_type(qp) == IB_QPT_RC)
507                         wqe->state = wqe_state_pending;
508         } else {
509                 wqe->state = wqe_state_processing;
510         }
511 }
512
513 static void update_wqe_psn(struct rxe_qp *qp,
514                            struct rxe_send_wqe *wqe,
515                            struct rxe_pkt_info *pkt,
516                            u32 payload)
517 {
518         /* number of packets left to send including current one */
519         int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
520
521         /* handle zero length packet case */
522         if (num_pkt == 0)
523                 num_pkt = 1;
524
525         if (pkt->mask & RXE_START_MASK) {
526                 wqe->first_psn = qp->req.psn;
527                 wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK;
528         }
529
530         if (pkt->mask & RXE_READ_MASK)
531                 qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
532         else
533                 qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
534 }
535
536 static void save_state(struct rxe_send_wqe *wqe,
537                        struct rxe_qp *qp,
538                        struct rxe_send_wqe *rollback_wqe,
539                        u32 *rollback_psn)
540 {
541         rollback_wqe->state     = wqe->state;
542         rollback_wqe->first_psn = wqe->first_psn;
543         rollback_wqe->last_psn  = wqe->last_psn;
544         *rollback_psn           = qp->req.psn;
545 }
546
547 static void rollback_state(struct rxe_send_wqe *wqe,
548                            struct rxe_qp *qp,
549                            struct rxe_send_wqe *rollback_wqe,
550                            u32 rollback_psn)
551 {
552         wqe->state     = rollback_wqe->state;
553         wqe->first_psn = rollback_wqe->first_psn;
554         wqe->last_psn  = rollback_wqe->last_psn;
555         qp->req.psn    = rollback_psn;
556 }
557
558 static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
559 {
560         qp->req.opcode = pkt->opcode;
561
562         if (pkt->mask & RXE_END_MASK)
563                 qp->req.wqe_index = queue_next_index(qp->sq.queue,
564                                                      qp->req.wqe_index);
565
566         qp->need_req_skb = 0;
567
568         if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer))
569                 mod_timer(&qp->retrans_timer,
570                           jiffies + qp->qp_timeout_jiffies);
571 }
572
573 static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
574 {
575         u8 opcode = wqe->wr.opcode;
576         u32 rkey;
577         int ret;
578
579         switch (opcode) {
580         case IB_WR_LOCAL_INV:
581                 rkey = wqe->wr.ex.invalidate_rkey;
582                 if (rkey_is_mw(rkey))
583                         ret = rxe_invalidate_mw(qp, rkey);
584                 else
585                         ret = rxe_invalidate_mr(qp, rkey);
586
587                 if (unlikely(ret)) {
588                         wqe->status = IB_WC_LOC_QP_OP_ERR;
589                         return ret;
590                 }
591                 break;
592         case IB_WR_REG_MR:
593                 ret = rxe_reg_fast_mr(qp, wqe);
594                 if (unlikely(ret)) {
595                         wqe->status = IB_WC_LOC_QP_OP_ERR;
596                         return ret;
597                 }
598                 break;
599         case IB_WR_BIND_MW:
600                 ret = rxe_bind_mw(qp, wqe);
601                 if (unlikely(ret)) {
602                         wqe->status = IB_WC_MW_BIND_ERR;
603                         return ret;
604                 }
605                 break;
606         default:
607                 rxe_dbg_qp(qp, "Unexpected send wqe opcode %d\n", opcode);
608                 wqe->status = IB_WC_LOC_QP_OP_ERR;
609                 return -EINVAL;
610         }
611
612         wqe->state = wqe_state_done;
613         wqe->status = IB_WC_SUCCESS;
614         qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
615
616         /* There is no ack coming for local work requests
617          * which can lead to a deadlock. So go ahead and complete
618          * it now.
619          */
620         rxe_sched_task(&qp->comp.task);
621
622         return 0;
623 }
624
625 int rxe_requester(void *arg)
626 {
627         struct rxe_qp *qp = (struct rxe_qp *)arg;
628         struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
629         struct rxe_pkt_info pkt;
630         struct sk_buff *skb;
631         struct rxe_send_wqe *wqe;
632         enum rxe_hdr_mask mask;
633         u32 payload;
634         int mtu;
635         int opcode;
636         int err;
637         int ret;
638         struct rxe_send_wqe rollback_wqe;
639         u32 rollback_psn;
640         struct rxe_queue *q = qp->sq.queue;
641         struct rxe_ah *ah;
642         struct rxe_av *av;
643
644         if (!rxe_get(qp))
645                 return -EAGAIN;
646
647         if (unlikely(!qp->valid))
648                 goto exit;
649
650         if (unlikely(qp->req.state == QP_STATE_ERROR)) {
651                 wqe = req_next_wqe(qp);
652                 if (wqe)
653                         /*
654                          * Generate an error completion for error qp state
655                          */
656                         goto err;
657                 else
658                         goto exit;
659         }
660
661         if (unlikely(qp->req.state == QP_STATE_RESET)) {
662                 qp->req.wqe_index = queue_get_consumer(q,
663                                                 QUEUE_TYPE_FROM_CLIENT);
664                 qp->req.opcode = -1;
665                 qp->req.need_rd_atomic = 0;
666                 qp->req.wait_psn = 0;
667                 qp->req.need_retry = 0;
668                 qp->req.wait_for_rnr_timer = 0;
669                 goto exit;
670         }
671
672         /* we come here if the retransmit timer has fired
673          * or if the rnr timer has fired. If the retransmit
674          * timer fires while we are processing an RNR NAK wait
675          * until the rnr timer has fired before starting the
676          * retry flow
677          */
678         if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) {
679                 req_retry(qp);
680                 qp->req.need_retry = 0;
681         }
682
683         wqe = req_next_wqe(qp);
684         if (unlikely(!wqe))
685                 goto exit;
686
687         if (rxe_wqe_is_fenced(qp, wqe)) {
688                 qp->req.wait_fence = 1;
689                 goto exit;
690         }
691
692         if (wqe->mask & WR_LOCAL_OP_MASK) {
693                 err = rxe_do_local_ops(qp, wqe);
694                 if (unlikely(err))
695                         goto err;
696                 else
697                         goto done;
698         }
699
700         if (unlikely(qp_type(qp) == IB_QPT_RC &&
701                 psn_compare(qp->req.psn, (qp->comp.psn +
702                                 RXE_MAX_UNACKED_PSNS)) > 0)) {
703                 qp->req.wait_psn = 1;
704                 goto exit;
705         }
706
707         /* Limit the number of inflight SKBs per QP */
708         if (unlikely(atomic_read(&qp->skb_out) >
709                      RXE_INFLIGHT_SKBS_PER_QP_HIGH)) {
710                 qp->need_req_skb = 1;
711                 goto exit;
712         }
713
714         opcode = next_opcode(qp, wqe, wqe->wr.opcode);
715         if (unlikely(opcode < 0)) {
716                 wqe->status = IB_WC_LOC_QP_OP_ERR;
717                 goto err;
718         }
719
720         mask = rxe_opcode[opcode].mask;
721         if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK |
722                         RXE_ATOMIC_WRITE_MASK))) {
723                 if (check_init_depth(qp, wqe))
724                         goto exit;
725         }
726
727         mtu = get_mtu(qp);
728         payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ?
729                         wqe->dma.resid : 0;
730         if (payload > mtu) {
731                 if (qp_type(qp) == IB_QPT_UD) {
732                         /* C10-93.1.1: If the total sum of all the buffer lengths specified for a
733                          * UD message exceeds the MTU of the port as returned by QueryHCA, the CI
734                          * shall not emit any packets for this message. Further, the CI shall not
735                          * generate an error due to this condition.
736                          */
737
738                         /* fake a successful UD send */
739                         wqe->first_psn = qp->req.psn;
740                         wqe->last_psn = qp->req.psn;
741                         qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
742                         qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
743                         qp->req.wqe_index = queue_next_index(qp->sq.queue,
744                                                        qp->req.wqe_index);
745                         wqe->state = wqe_state_done;
746                         wqe->status = IB_WC_SUCCESS;
747                         rxe_run_task(&qp->comp.task);
748                         goto done;
749                 }
750                 payload = mtu;
751         }
752
753         pkt.rxe = rxe;
754         pkt.opcode = opcode;
755         pkt.qp = qp;
756         pkt.psn = qp->req.psn;
757         pkt.mask = rxe_opcode[opcode].mask;
758         pkt.wqe = wqe;
759
760         av = rxe_get_av(&pkt, &ah);
761         if (unlikely(!av)) {
762                 rxe_dbg_qp(qp, "Failed no address vector\n");
763                 wqe->status = IB_WC_LOC_QP_OP_ERR;
764                 goto err;
765         }
766
767         skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
768         if (unlikely(!skb)) {
769                 rxe_dbg_qp(qp, "Failed allocating skb\n");
770                 wqe->status = IB_WC_LOC_QP_OP_ERR;
771                 if (ah)
772                         rxe_put(ah);
773                 goto err;
774         }
775
776         err = finish_packet(qp, av, wqe, &pkt, skb, payload);
777         if (unlikely(err)) {
778                 rxe_dbg_qp(qp, "Error during finish packet\n");
779                 if (err == -EFAULT)
780                         wqe->status = IB_WC_LOC_PROT_ERR;
781                 else
782                         wqe->status = IB_WC_LOC_QP_OP_ERR;
783                 kfree_skb(skb);
784                 if (ah)
785                         rxe_put(ah);
786                 goto err;
787         }
788
789         if (ah)
790                 rxe_put(ah);
791
792         /*
793          * To prevent a race on wqe access between requester and completer,
794          * wqe members state and psn need to be set before calling
795          * rxe_xmit_packet().
796          * Otherwise, completer might initiate an unjustified retry flow.
797          */
798         save_state(wqe, qp, &rollback_wqe, &rollback_psn);
799         update_wqe_state(qp, wqe, &pkt);
800         update_wqe_psn(qp, wqe, &pkt, payload);
801
802         err = rxe_xmit_packet(qp, &pkt, skb);
803         if (err) {
804                 qp->need_req_skb = 1;
805
806                 rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
807
808                 if (err == -EAGAIN) {
809                         rxe_sched_task(&qp->req.task);
810                         goto exit;
811                 }
812
813                 wqe->status = IB_WC_LOC_QP_OP_ERR;
814                 goto err;
815         }
816
817         update_state(qp, &pkt);
818
819         /* A non-zero return value will cause rxe_do_task to
820          * exit its loop and end the tasklet. A zero return
821          * will continue looping and return to rxe_requester
822          */
823 done:
824         ret = 0;
825         goto out;
826 err:
827         /* update wqe_index for each wqe completion */
828         qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
829         wqe->state = wqe_state_error;
830         qp->req.state = QP_STATE_ERROR;
831         rxe_run_task(&qp->comp.task);
832 exit:
833         ret = -EAGAIN;
834 out:
835         rxe_put(qp);
836
837         return ret;
838 }