Linux-libre 4.14.145-gnu
[librecmc/linux-libre.git] / drivers / misc / mic / scif / scif_rma.c
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2015 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * Intel SCIF driver.
16  *
17  */
18 #include <linux/dma_remapping.h>
19 #include <linux/pagemap.h>
20 #include <linux/sched/mm.h>
21 #include <linux/sched/signal.h>
22
23 #include "scif_main.h"
24 #include "scif_map.h"
25
26 /* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
27 #define SCIF_MAP_ULIMIT 0x40
28
29 bool scif_ulimit_check = 1;
30
31 /**
32  * scif_rma_ep_init:
33  * @ep: end point
34  *
35  * Initialize RMA per EP data structures.
36  */
37 void scif_rma_ep_init(struct scif_endpt *ep)
38 {
39         struct scif_endpt_rma_info *rma = &ep->rma_info;
40
41         mutex_init(&rma->rma_lock);
42         init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
43                          SCIF_DMA_64BIT_PFN);
44         spin_lock_init(&rma->tc_lock);
45         mutex_init(&rma->mmn_lock);
46         INIT_LIST_HEAD(&rma->reg_list);
47         INIT_LIST_HEAD(&rma->remote_reg_list);
48         atomic_set(&rma->tw_refcount, 0);
49         atomic_set(&rma->tcw_refcount, 0);
50         atomic_set(&rma->tcw_total_pages, 0);
51         atomic_set(&rma->fence_refcount, 0);
52
53         rma->async_list_del = 0;
54         rma->dma_chan = NULL;
55         INIT_LIST_HEAD(&rma->mmn_list);
56         INIT_LIST_HEAD(&rma->vma_list);
57         init_waitqueue_head(&rma->markwq);
58 }
59
60 /**
61  * scif_rma_ep_can_uninit:
62  * @ep: end point
63  *
64  * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
65  */
66 int scif_rma_ep_can_uninit(struct scif_endpt *ep)
67 {
68         int ret = 0;
69
70         mutex_lock(&ep->rma_info.rma_lock);
71         /* Destroy RMA Info only if both lists are empty */
72         if (list_empty(&ep->rma_info.reg_list) &&
73             list_empty(&ep->rma_info.remote_reg_list) &&
74             list_empty(&ep->rma_info.mmn_list) &&
75             !atomic_read(&ep->rma_info.tw_refcount) &&
76             !atomic_read(&ep->rma_info.tcw_refcount) &&
77             !atomic_read(&ep->rma_info.fence_refcount))
78                 ret = 1;
79         mutex_unlock(&ep->rma_info.rma_lock);
80         return ret;
81 }
82
83 /**
84  * scif_create_pinned_pages:
85  * @nr_pages: number of pages in window
86  * @prot: read/write protection
87  *
88  * Allocate and prepare a set of pinned pages.
89  */
90 static struct scif_pinned_pages *
91 scif_create_pinned_pages(int nr_pages, int prot)
92 {
93         struct scif_pinned_pages *pin;
94
95         might_sleep();
96         pin = scif_zalloc(sizeof(*pin));
97         if (!pin)
98                 goto error;
99
100         pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
101         if (!pin->pages)
102                 goto error_free_pinned_pages;
103
104         pin->prot = prot;
105         pin->magic = SCIFEP_MAGIC;
106         return pin;
107
108 error_free_pinned_pages:
109         scif_free(pin, sizeof(*pin));
110 error:
111         return NULL;
112 }
113
114 /**
115  * scif_destroy_pinned_pages:
116  * @pin: A set of pinned pages.
117  *
118  * Deallocate resources for pinned pages.
119  */
120 static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
121 {
122         int j;
123         int writeable = pin->prot & SCIF_PROT_WRITE;
124         int kernel = SCIF_MAP_KERNEL & pin->map_flags;
125
126         for (j = 0; j < pin->nr_pages; j++) {
127                 if (pin->pages[j] && !kernel) {
128                         if (writeable)
129                                 SetPageDirty(pin->pages[j]);
130                         put_page(pin->pages[j]);
131                 }
132         }
133
134         scif_free(pin->pages,
135                   pin->nr_pages * sizeof(*pin->pages));
136         scif_free(pin, sizeof(*pin));
137         return 0;
138 }
139
140 /*
141  * scif_create_window:
142  * @ep: end point
143  * @nr_pages: number of pages
144  * @offset: registration offset
145  * @temp: true if a temporary window is being created
146  *
147  * Allocate and prepare a self registration window.
148  */
149 struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
150                                        s64 offset, bool temp)
151 {
152         struct scif_window *window;
153
154         might_sleep();
155         window = scif_zalloc(sizeof(*window));
156         if (!window)
157                 goto error;
158
159         window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
160         if (!window->dma_addr)
161                 goto error_free_window;
162
163         window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
164         if (!window->num_pages)
165                 goto error_free_window;
166
167         window->offset = offset;
168         window->ep = (u64)ep;
169         window->magic = SCIFEP_MAGIC;
170         window->reg_state = OP_IDLE;
171         init_waitqueue_head(&window->regwq);
172         window->unreg_state = OP_IDLE;
173         init_waitqueue_head(&window->unregwq);
174         INIT_LIST_HEAD(&window->list);
175         window->type = SCIF_WINDOW_SELF;
176         window->temp = temp;
177         return window;
178
179 error_free_window:
180         scif_free(window->dma_addr,
181                   nr_pages * sizeof(*window->dma_addr));
182         scif_free(window, sizeof(*window));
183 error:
184         return NULL;
185 }
186
187 /**
188  * scif_destroy_incomplete_window:
189  * @ep: end point
190  * @window: registration window
191  *
192  * Deallocate resources for self window.
193  */
194 static void scif_destroy_incomplete_window(struct scif_endpt *ep,
195                                            struct scif_window *window)
196 {
197         int err;
198         int nr_pages = window->nr_pages;
199         struct scif_allocmsg *alloc = &window->alloc_handle;
200         struct scifmsg msg;
201
202 retry:
203         /* Wait for a SCIF_ALLOC_GNT/REJ message */
204         err = wait_event_timeout(alloc->allocwq,
205                                  alloc->state != OP_IN_PROGRESS,
206                                  SCIF_NODE_ALIVE_TIMEOUT);
207         if (!err && scifdev_alive(ep))
208                 goto retry;
209
210         mutex_lock(&ep->rma_info.rma_lock);
211         if (alloc->state == OP_COMPLETED) {
212                 msg.uop = SCIF_FREE_VIRT;
213                 msg.src = ep->port;
214                 msg.payload[0] = ep->remote_ep;
215                 msg.payload[1] = window->alloc_handle.vaddr;
216                 msg.payload[2] = (u64)window;
217                 msg.payload[3] = SCIF_REGISTER;
218                 _scif_nodeqp_send(ep->remote_dev, &msg);
219         }
220         mutex_unlock(&ep->rma_info.rma_lock);
221
222         scif_free_window_offset(ep, window, window->offset);
223         scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
224         scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
225         scif_free(window, sizeof(*window));
226 }
227
228 /**
229  * scif_unmap_window:
230  * @remote_dev: SCIF remote device
231  * @window: registration window
232  *
233  * Delete any DMA mappings created for a registered self window
234  */
235 void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
236 {
237         int j;
238
239         if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
240                 if (window->st) {
241                         dma_unmap_sg(&remote_dev->sdev->dev,
242                                      window->st->sgl, window->st->nents,
243                                      DMA_BIDIRECTIONAL);
244                         sg_free_table(window->st);
245                         kfree(window->st);
246                         window->st = NULL;
247                 }
248         } else {
249                 for (j = 0; j < window->nr_contig_chunks; j++) {
250                         if (window->dma_addr[j]) {
251                                 scif_unmap_single(window->dma_addr[j],
252                                                   remote_dev,
253                                                   window->num_pages[j] <<
254                                                   PAGE_SHIFT);
255                                 window->dma_addr[j] = 0x0;
256                         }
257                 }
258         }
259 }
260
261 static inline struct mm_struct *__scif_acquire_mm(void)
262 {
263         if (scif_ulimit_check)
264                 return get_task_mm(current);
265         return NULL;
266 }
267
268 static inline void __scif_release_mm(struct mm_struct *mm)
269 {
270         if (mm)
271                 mmput(mm);
272 }
273
274 static inline int
275 __scif_dec_pinned_vm_lock(struct mm_struct *mm,
276                           int nr_pages, bool try_lock)
277 {
278         if (!mm || !nr_pages || !scif_ulimit_check)
279                 return 0;
280         if (try_lock) {
281                 if (!down_write_trylock(&mm->mmap_sem)) {
282                         dev_err(scif_info.mdev.this_device,
283                                 "%s %d err\n", __func__, __LINE__);
284                         return -1;
285                 }
286         } else {
287                 down_write(&mm->mmap_sem);
288         }
289         mm->pinned_vm -= nr_pages;
290         up_write(&mm->mmap_sem);
291         return 0;
292 }
293
294 static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
295                                              int nr_pages)
296 {
297         unsigned long locked, lock_limit;
298
299         if (!mm || !nr_pages || !scif_ulimit_check)
300                 return 0;
301
302         locked = nr_pages;
303         locked += mm->pinned_vm;
304         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
305         if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
306                 dev_err(scif_info.mdev.this_device,
307                         "locked(%lu) > lock_limit(%lu)\n",
308                         locked, lock_limit);
309                 return -ENOMEM;
310         }
311         mm->pinned_vm = locked;
312         return 0;
313 }
314
315 /**
316  * scif_destroy_window:
317  * @ep: end point
318  * @window: registration window
319  *
320  * Deallocate resources for self window.
321  */
322 int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
323 {
324         int j;
325         struct scif_pinned_pages *pinned_pages = window->pinned_pages;
326         int nr_pages = window->nr_pages;
327
328         might_sleep();
329         if (!window->temp && window->mm) {
330                 __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
331                 __scif_release_mm(window->mm);
332                 window->mm = NULL;
333         }
334
335         scif_free_window_offset(ep, window, window->offset);
336         scif_unmap_window(ep->remote_dev, window);
337         /*
338          * Decrement references for this set of pinned pages from
339          * this window.
340          */
341         j = atomic_sub_return(1, &pinned_pages->ref_count);
342         if (j < 0)
343                 dev_err(scif_info.mdev.this_device,
344                         "%s %d incorrect ref count %d\n",
345                         __func__, __LINE__, j);
346         /*
347          * If the ref count for pinned_pages is zero then someone
348          * has already called scif_unpin_pages() for it and we should
349          * destroy the page cache.
350          */
351         if (!j)
352                 scif_destroy_pinned_pages(window->pinned_pages);
353         scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
354         scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
355         window->magic = 0;
356         scif_free(window, sizeof(*window));
357         return 0;
358 }
359
360 /**
361  * scif_create_remote_lookup:
362  * @remote_dev: SCIF remote device
363  * @window: remote window
364  *
365  * Allocate and prepare lookup entries for the remote
366  * end to copy over the physical addresses.
367  * Returns 0 on success and appropriate errno on failure.
368  */
369 static int scif_create_remote_lookup(struct scif_dev *remote_dev,
370                                      struct scif_window *window)
371 {
372         int i, j, err = 0;
373         int nr_pages = window->nr_pages;
374         bool vmalloc_dma_phys, vmalloc_num_pages;
375
376         might_sleep();
377         /* Map window */
378         err = scif_map_single(&window->mapped_offset,
379                               window, remote_dev, sizeof(*window));
380         if (err)
381                 goto error_window;
382
383         /* Compute the number of lookup entries. 21 == 2MB Shift */
384         window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
385                                         ((2) * 1024 * 1024)) >> 21;
386
387         window->dma_addr_lookup.lookup =
388                 scif_alloc_coherent(&window->dma_addr_lookup.offset,
389                                     remote_dev, window->nr_lookup *
390                                     sizeof(*window->dma_addr_lookup.lookup),
391                                     GFP_KERNEL | __GFP_ZERO);
392         if (!window->dma_addr_lookup.lookup) {
393                 err = -ENOMEM;
394                 goto error_window;
395         }
396
397         window->num_pages_lookup.lookup =
398                 scif_alloc_coherent(&window->num_pages_lookup.offset,
399                                     remote_dev, window->nr_lookup *
400                                     sizeof(*window->num_pages_lookup.lookup),
401                                     GFP_KERNEL | __GFP_ZERO);
402         if (!window->num_pages_lookup.lookup) {
403                 err = -ENOMEM;
404                 goto error_window;
405         }
406
407         vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
408         vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
409
410         /* Now map each of the pages containing physical addresses */
411         for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
412                 err = scif_map_page(&window->dma_addr_lookup.lookup[j],
413                                     vmalloc_dma_phys ?
414                                     vmalloc_to_page(&window->dma_addr[i]) :
415                                     virt_to_page(&window->dma_addr[i]),
416                                     remote_dev);
417                 if (err)
418                         goto error_window;
419                 err = scif_map_page(&window->num_pages_lookup.lookup[j],
420                                     vmalloc_num_pages ?
421                                     vmalloc_to_page(&window->num_pages[i]) :
422                                     virt_to_page(&window->num_pages[i]),
423                                     remote_dev);
424                 if (err)
425                         goto error_window;
426         }
427         return 0;
428 error_window:
429         return err;
430 }
431
432 /**
433  * scif_destroy_remote_lookup:
434  * @remote_dev: SCIF remote device
435  * @window: remote window
436  *
437  * Destroy lookup entries used for the remote
438  * end to copy over the physical addresses.
439  */
440 static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
441                                        struct scif_window *window)
442 {
443         int i, j;
444
445         if (window->nr_lookup) {
446                 struct scif_rma_lookup *lup = &window->dma_addr_lookup;
447                 struct scif_rma_lookup *npup = &window->num_pages_lookup;
448
449                 for (i = 0, j = 0; i < window->nr_pages;
450                         i += SCIF_NR_ADDR_IN_PAGE, j++) {
451                         if (lup->lookup && lup->lookup[j])
452                                 scif_unmap_single(lup->lookup[j],
453                                                   remote_dev,
454                                                   PAGE_SIZE);
455                         if (npup->lookup && npup->lookup[j])
456                                 scif_unmap_single(npup->lookup[j],
457                                                   remote_dev,
458                                                   PAGE_SIZE);
459                 }
460                 if (lup->lookup)
461                         scif_free_coherent(lup->lookup, lup->offset,
462                                            remote_dev, window->nr_lookup *
463                                            sizeof(*lup->lookup));
464                 if (npup->lookup)
465                         scif_free_coherent(npup->lookup, npup->offset,
466                                            remote_dev, window->nr_lookup *
467                                            sizeof(*npup->lookup));
468                 if (window->mapped_offset)
469                         scif_unmap_single(window->mapped_offset,
470                                           remote_dev, sizeof(*window));
471                 window->nr_lookup = 0;
472         }
473 }
474
475 /**
476  * scif_create_remote_window:
477  * @ep: end point
478  * @nr_pages: number of pages in window
479  *
480  * Allocate and prepare a remote registration window.
481  */
482 static struct scif_window *
483 scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
484 {
485         struct scif_window *window;
486
487         might_sleep();
488         window = scif_zalloc(sizeof(*window));
489         if (!window)
490                 goto error_ret;
491
492         window->magic = SCIFEP_MAGIC;
493         window->nr_pages = nr_pages;
494
495         window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
496         if (!window->dma_addr)
497                 goto error_window;
498
499         window->num_pages = scif_zalloc(nr_pages *
500                                         sizeof(*window->num_pages));
501         if (!window->num_pages)
502                 goto error_window;
503
504         if (scif_create_remote_lookup(scifdev, window))
505                 goto error_window;
506
507         window->type = SCIF_WINDOW_PEER;
508         window->unreg_state = OP_IDLE;
509         INIT_LIST_HEAD(&window->list);
510         return window;
511 error_window:
512         scif_destroy_remote_window(window);
513 error_ret:
514         return NULL;
515 }
516
517 /**
518  * scif_destroy_remote_window:
519  * @ep: end point
520  * @window: remote registration window
521  *
522  * Deallocate resources for remote window.
523  */
524 void
525 scif_destroy_remote_window(struct scif_window *window)
526 {
527         scif_free(window->dma_addr, window->nr_pages *
528                   sizeof(*window->dma_addr));
529         scif_free(window->num_pages, window->nr_pages *
530                   sizeof(*window->num_pages));
531         window->magic = 0;
532         scif_free(window, sizeof(*window));
533 }
534
535 /**
536  * scif_iommu_map: create DMA mappings if the IOMMU is enabled
537  * @remote_dev: SCIF remote device
538  * @window: remote registration window
539  *
540  * Map the physical pages using dma_map_sg(..) and then detect the number
541  * of contiguous DMA mappings allocated
542  */
543 static int scif_iommu_map(struct scif_dev *remote_dev,
544                           struct scif_window *window)
545 {
546         struct scatterlist *sg;
547         int i, err;
548         scif_pinned_pages_t pin = window->pinned_pages;
549
550         window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
551         if (!window->st)
552                 return -ENOMEM;
553
554         err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
555         if (err)
556                 return err;
557
558         for_each_sg(window->st->sgl, sg, window->st->nents, i)
559                 sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
560
561         err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
562                          window->st->nents, DMA_BIDIRECTIONAL);
563         if (!err)
564                 return -ENOMEM;
565         /* Detect contiguous ranges of DMA mappings */
566         sg = window->st->sgl;
567         for (i = 0; sg; i++) {
568                 dma_addr_t last_da;
569
570                 window->dma_addr[i] = sg_dma_address(sg);
571                 window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
572                 last_da = sg_dma_address(sg) + sg_dma_len(sg);
573                 while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
574                         window->num_pages[i] +=
575                                 (sg_dma_len(sg) >> PAGE_SHIFT);
576                         last_da = window->dma_addr[i] +
577                                 sg_dma_len(sg);
578                 }
579                 window->nr_contig_chunks++;
580         }
581         return 0;
582 }
583
584 /**
585  * scif_map_window:
586  * @remote_dev: SCIF remote device
587  * @window: self registration window
588  *
589  * Map pages of a window into the aperture/PCI.
590  * Also determine addresses required for DMA.
591  */
592 int
593 scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
594 {
595         int i, j, k, err = 0, nr_contig_pages;
596         scif_pinned_pages_t pin;
597         phys_addr_t phys_prev, phys_curr;
598
599         might_sleep();
600
601         pin = window->pinned_pages;
602
603         if (intel_iommu_enabled && !scifdev_self(remote_dev))
604                 return scif_iommu_map(remote_dev, window);
605
606         for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
607                 phys_prev = page_to_phys(pin->pages[i]);
608                 nr_contig_pages = 1;
609
610                 /* Detect physically contiguous chunks */
611                 for (k = i + 1; k < window->nr_pages; k++) {
612                         phys_curr = page_to_phys(pin->pages[k]);
613                         if (phys_curr != (phys_prev + PAGE_SIZE))
614                                 break;
615                         phys_prev = phys_curr;
616                         nr_contig_pages++;
617                 }
618                 window->num_pages[j] = nr_contig_pages;
619                 window->nr_contig_chunks++;
620                 if (scif_is_mgmt_node()) {
621                         /*
622                          * Management node has to deal with SMPT on X100 and
623                          * hence the DMA mapping is required
624                          */
625                         err = scif_map_single(&window->dma_addr[j],
626                                               phys_to_virt(page_to_phys(
627                                                            pin->pages[i])),
628                                               remote_dev,
629                                               nr_contig_pages << PAGE_SHIFT);
630                         if (err)
631                                 return err;
632                 } else {
633                         window->dma_addr[j] = page_to_phys(pin->pages[i]);
634                 }
635         }
636         return err;
637 }
638
639 /**
640  * scif_send_scif_unregister:
641  * @ep: end point
642  * @window: self registration window
643  *
644  * Send a SCIF_UNREGISTER message.
645  */
646 static int scif_send_scif_unregister(struct scif_endpt *ep,
647                                      struct scif_window *window)
648 {
649         struct scifmsg msg;
650
651         msg.uop = SCIF_UNREGISTER;
652         msg.src = ep->port;
653         msg.payload[0] = window->alloc_handle.vaddr;
654         msg.payload[1] = (u64)window;
655         return scif_nodeqp_send(ep->remote_dev, &msg);
656 }
657
658 /**
659  * scif_unregister_window:
660  * @window: self registration window
661  *
662  * Send an unregistration request and wait for a response.
663  */
664 int scif_unregister_window(struct scif_window *window)
665 {
666         int err = 0;
667         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
668         bool send_msg = false;
669
670         might_sleep();
671         switch (window->unreg_state) {
672         case OP_IDLE:
673         {
674                 window->unreg_state = OP_IN_PROGRESS;
675                 send_msg = true;
676                 /* fall through */
677         }
678         case OP_IN_PROGRESS:
679         {
680                 scif_get_window(window, 1);
681                 mutex_unlock(&ep->rma_info.rma_lock);
682                 if (send_msg) {
683                         err = scif_send_scif_unregister(ep, window);
684                         if (err) {
685                                 window->unreg_state = OP_COMPLETED;
686                                 goto done;
687                         }
688                 } else {
689                         /* Return ENXIO since unregistration is in progress */
690                         mutex_lock(&ep->rma_info.rma_lock);
691                         return -ENXIO;
692                 }
693 retry:
694                 /* Wait for a SCIF_UNREGISTER_(N)ACK message */
695                 err = wait_event_timeout(window->unregwq,
696                                          window->unreg_state != OP_IN_PROGRESS,
697                                          SCIF_NODE_ALIVE_TIMEOUT);
698                 if (!err && scifdev_alive(ep))
699                         goto retry;
700                 if (!err) {
701                         err = -ENODEV;
702                         window->unreg_state = OP_COMPLETED;
703                         dev_err(scif_info.mdev.this_device,
704                                 "%s %d err %d\n", __func__, __LINE__, err);
705                 }
706                 if (err > 0)
707                         err = 0;
708 done:
709                 mutex_lock(&ep->rma_info.rma_lock);
710                 scif_put_window(window, 1);
711                 break;
712         }
713         case OP_FAILED:
714         {
715                 if (!scifdev_alive(ep)) {
716                         err = -ENODEV;
717                         window->unreg_state = OP_COMPLETED;
718                 }
719                 break;
720         }
721         case OP_COMPLETED:
722                 break;
723         default:
724                 err = -ENODEV;
725         }
726
727         if (window->unreg_state == OP_COMPLETED && window->ref_count)
728                 scif_put_window(window, window->nr_pages);
729
730         if (!window->ref_count) {
731                 atomic_inc(&ep->rma_info.tw_refcount);
732                 list_del_init(&window->list);
733                 scif_free_window_offset(ep, window, window->offset);
734                 mutex_unlock(&ep->rma_info.rma_lock);
735                 if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
736                     scifdev_alive(ep)) {
737                         scif_drain_dma_intr(ep->remote_dev->sdev,
738                                             ep->rma_info.dma_chan);
739                 } else {
740                         if (!__scif_dec_pinned_vm_lock(window->mm,
741                                                        window->nr_pages, 1)) {
742                                 __scif_release_mm(window->mm);
743                                 window->mm = NULL;
744                         }
745                 }
746                 scif_queue_for_cleanup(window, &scif_info.rma);
747                 mutex_lock(&ep->rma_info.rma_lock);
748         }
749         return err;
750 }
751
752 /**
753  * scif_send_alloc_request:
754  * @ep: end point
755  * @window: self registration window
756  *
757  * Send a remote window allocation request
758  */
759 static int scif_send_alloc_request(struct scif_endpt *ep,
760                                    struct scif_window *window)
761 {
762         struct scifmsg msg;
763         struct scif_allocmsg *alloc = &window->alloc_handle;
764
765         /* Set up the Alloc Handle */
766         alloc->state = OP_IN_PROGRESS;
767         init_waitqueue_head(&alloc->allocwq);
768
769         /* Send out an allocation request */
770         msg.uop = SCIF_ALLOC_REQ;
771         msg.payload[1] = window->nr_pages;
772         msg.payload[2] = (u64)&window->alloc_handle;
773         return _scif_nodeqp_send(ep->remote_dev, &msg);
774 }
775
776 /**
777  * scif_prep_remote_window:
778  * @ep: end point
779  * @window: self registration window
780  *
781  * Send a remote window allocation request, wait for an allocation response,
782  * and prepares the remote window by copying over the page lists
783  */
784 static int scif_prep_remote_window(struct scif_endpt *ep,
785                                    struct scif_window *window)
786 {
787         struct scifmsg msg;
788         struct scif_window *remote_window;
789         struct scif_allocmsg *alloc = &window->alloc_handle;
790         dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
791         int i = 0, j = 0;
792         int nr_contig_chunks, loop_nr_contig_chunks;
793         int remaining_nr_contig_chunks, nr_lookup;
794         int err, map_err;
795
796         map_err = scif_map_window(ep->remote_dev, window);
797         if (map_err)
798                 dev_err(&ep->remote_dev->sdev->dev,
799                         "%s %d map_err %d\n", __func__, __LINE__, map_err);
800         remaining_nr_contig_chunks = window->nr_contig_chunks;
801         nr_contig_chunks = window->nr_contig_chunks;
802 retry:
803         /* Wait for a SCIF_ALLOC_GNT/REJ message */
804         err = wait_event_timeout(alloc->allocwq,
805                                  alloc->state != OP_IN_PROGRESS,
806                                  SCIF_NODE_ALIVE_TIMEOUT);
807         mutex_lock(&ep->rma_info.rma_lock);
808         /* Synchronize with the thread waking up allocwq */
809         mutex_unlock(&ep->rma_info.rma_lock);
810         if (!err && scifdev_alive(ep))
811                 goto retry;
812
813         if (!err)
814                 err = -ENODEV;
815
816         if (err > 0)
817                 err = 0;
818         else
819                 return err;
820
821         /* Bail out. The remote end rejected this request */
822         if (alloc->state == OP_FAILED)
823                 return -ENOMEM;
824
825         if (map_err) {
826                 dev_err(&ep->remote_dev->sdev->dev,
827                         "%s %d err %d\n", __func__, __LINE__, map_err);
828                 msg.uop = SCIF_FREE_VIRT;
829                 msg.src = ep->port;
830                 msg.payload[0] = ep->remote_ep;
831                 msg.payload[1] = window->alloc_handle.vaddr;
832                 msg.payload[2] = (u64)window;
833                 msg.payload[3] = SCIF_REGISTER;
834                 spin_lock(&ep->lock);
835                 if (ep->state == SCIFEP_CONNECTED)
836                         err = _scif_nodeqp_send(ep->remote_dev, &msg);
837                 else
838                         err = -ENOTCONN;
839                 spin_unlock(&ep->lock);
840                 return err;
841         }
842
843         remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
844                                      ep->remote_dev);
845
846         /* Compute the number of lookup entries. 21 == 2MB Shift */
847         nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
848                           >> ilog2(SCIF_NR_ADDR_IN_PAGE);
849
850         dma_phys_lookup =
851                 scif_ioremap(remote_window->dma_addr_lookup.offset,
852                              nr_lookup *
853                              sizeof(*remote_window->dma_addr_lookup.lookup),
854                              ep->remote_dev);
855         num_pages_lookup =
856                 scif_ioremap(remote_window->num_pages_lookup.offset,
857                              nr_lookup *
858                              sizeof(*remote_window->num_pages_lookup.lookup),
859                              ep->remote_dev);
860
861         while (remaining_nr_contig_chunks) {
862                 loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
863                                               (int)SCIF_NR_ADDR_IN_PAGE);
864                 /* #1/2 - Copy  physical addresses over to the remote side */
865
866                 /* #2/2 - Copy DMA addresses (addresses that are fed into the
867                  * DMA engine) We transfer bus addresses which are then
868                  * converted into a MIC physical address on the remote
869                  * side if it is a MIC, if the remote node is a mgmt node we
870                  * transfer the MIC physical address
871                  */
872                 tmp = scif_ioremap(dma_phys_lookup[j],
873                                    loop_nr_contig_chunks *
874                                    sizeof(*window->dma_addr),
875                                    ep->remote_dev);
876                 tmp1 = scif_ioremap(num_pages_lookup[j],
877                                     loop_nr_contig_chunks *
878                                     sizeof(*window->num_pages),
879                                     ep->remote_dev);
880                 if (scif_is_mgmt_node()) {
881                         memcpy_toio((void __force __iomem *)tmp,
882                                     &window->dma_addr[i], loop_nr_contig_chunks
883                                     * sizeof(*window->dma_addr));
884                         memcpy_toio((void __force __iomem *)tmp1,
885                                     &window->num_pages[i], loop_nr_contig_chunks
886                                     * sizeof(*window->num_pages));
887                 } else {
888                         if (scifdev_is_p2p(ep->remote_dev)) {
889                                 /*
890                                  * add remote node's base address for this node
891                                  * to convert it into a MIC address
892                                  */
893                                 int m;
894                                 dma_addr_t dma_addr;
895
896                                 for (m = 0; m < loop_nr_contig_chunks; m++) {
897                                         dma_addr = window->dma_addr[i + m] +
898                                                 ep->remote_dev->base_addr;
899                                         writeq(dma_addr,
900                                                (void __force __iomem *)&tmp[m]);
901                                 }
902                                 memcpy_toio((void __force __iomem *)tmp1,
903                                             &window->num_pages[i],
904                                             loop_nr_contig_chunks
905                                             * sizeof(*window->num_pages));
906                         } else {
907                                 /* Mgmt node or loopback - transfer DMA
908                                  * addresses as is, this is the same as a
909                                  * MIC physical address (we use the dma_addr
910                                  * and not the phys_addr array since the
911                                  * phys_addr is only setup if there is a mmap()
912                                  * request from the mgmt node)
913                                  */
914                                 memcpy_toio((void __force __iomem *)tmp,
915                                             &window->dma_addr[i],
916                                             loop_nr_contig_chunks *
917                                             sizeof(*window->dma_addr));
918                                 memcpy_toio((void __force __iomem *)tmp1,
919                                             &window->num_pages[i],
920                                             loop_nr_contig_chunks *
921                                             sizeof(*window->num_pages));
922                         }
923                 }
924                 remaining_nr_contig_chunks -= loop_nr_contig_chunks;
925                 i += loop_nr_contig_chunks;
926                 j++;
927                 scif_iounmap(tmp, loop_nr_contig_chunks *
928                              sizeof(*window->dma_addr), ep->remote_dev);
929                 scif_iounmap(tmp1, loop_nr_contig_chunks *
930                              sizeof(*window->num_pages), ep->remote_dev);
931         }
932
933         /* Prepare the remote window for the peer */
934         remote_window->peer_window = (u64)window;
935         remote_window->offset = window->offset;
936         remote_window->prot = window->prot;
937         remote_window->nr_contig_chunks = nr_contig_chunks;
938         remote_window->ep = ep->remote_ep;
939         scif_iounmap(num_pages_lookup,
940                      nr_lookup *
941                      sizeof(*remote_window->num_pages_lookup.lookup),
942                      ep->remote_dev);
943         scif_iounmap(dma_phys_lookup,
944                      nr_lookup *
945                      sizeof(*remote_window->dma_addr_lookup.lookup),
946                      ep->remote_dev);
947         scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
948         window->peer_window = alloc->vaddr;
949         return err;
950 }
951
952 /**
953  * scif_send_scif_register:
954  * @ep: end point
955  * @window: self registration window
956  *
957  * Send a SCIF_REGISTER message if EP is connected and wait for a
958  * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
959  * message so that the peer can free its remote window allocated earlier.
960  */
961 static int scif_send_scif_register(struct scif_endpt *ep,
962                                    struct scif_window *window)
963 {
964         int err = 0;
965         struct scifmsg msg;
966
967         msg.src = ep->port;
968         msg.payload[0] = ep->remote_ep;
969         msg.payload[1] = window->alloc_handle.vaddr;
970         msg.payload[2] = (u64)window;
971         spin_lock(&ep->lock);
972         if (ep->state == SCIFEP_CONNECTED) {
973                 msg.uop = SCIF_REGISTER;
974                 window->reg_state = OP_IN_PROGRESS;
975                 err = _scif_nodeqp_send(ep->remote_dev, &msg);
976                 spin_unlock(&ep->lock);
977                 if (!err) {
978 retry:
979                         /* Wait for a SCIF_REGISTER_(N)ACK message */
980                         err = wait_event_timeout(window->regwq,
981                                                  window->reg_state !=
982                                                  OP_IN_PROGRESS,
983                                                  SCIF_NODE_ALIVE_TIMEOUT);
984                         if (!err && scifdev_alive(ep))
985                                 goto retry;
986                         err = !err ? -ENODEV : 0;
987                         if (window->reg_state == OP_FAILED)
988                                 err = -ENOTCONN;
989                 }
990         } else {
991                 msg.uop = SCIF_FREE_VIRT;
992                 msg.payload[3] = SCIF_REGISTER;
993                 err = _scif_nodeqp_send(ep->remote_dev, &msg);
994                 spin_unlock(&ep->lock);
995                 if (!err)
996                         err = -ENOTCONN;
997         }
998         return err;
999 }
1000
1001 /**
1002  * scif_get_window_offset:
1003  * @ep: end point descriptor
1004  * @flags: flags
1005  * @offset: offset hint
1006  * @num_pages: number of pages
1007  * @out_offset: computed offset returned by reference.
1008  *
1009  * Compute/Claim a new offset for this EP.
1010  */
1011 int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
1012                            int num_pages, s64 *out_offset)
1013 {
1014         s64 page_index;
1015         struct iova *iova_ptr;
1016         int err = 0;
1017
1018         if (flags & SCIF_MAP_FIXED) {
1019                 page_index = SCIF_IOVA_PFN(offset);
1020                 iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
1021                                         page_index + num_pages - 1);
1022                 if (!iova_ptr)
1023                         err = -EADDRINUSE;
1024         } else {
1025                 iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
1026                                       SCIF_DMA_63BIT_PFN - 1, 0);
1027                 if (!iova_ptr)
1028                         err = -ENOMEM;
1029         }
1030         if (!err)
1031                 *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
1032         return err;
1033 }
1034
1035 /**
1036  * scif_free_window_offset:
1037  * @ep: end point descriptor
1038  * @window: registration window
1039  * @offset: Offset to be freed
1040  *
1041  * Free offset for this EP. The callee is supposed to grab
1042  * the RMA mutex before calling this API.
1043  */
1044 void scif_free_window_offset(struct scif_endpt *ep,
1045                              struct scif_window *window, s64 offset)
1046 {
1047         if ((window && !window->offset_freed) || !window) {
1048                 free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
1049                 if (window)
1050                         window->offset_freed = true;
1051         }
1052 }
1053
1054 /**
1055  * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
1056  * @msg:        Interrupt message
1057  *
1058  * Remote side is requesting a memory allocation.
1059  */
1060 void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
1061 {
1062         int err;
1063         struct scif_window *window = NULL;
1064         int nr_pages = msg->payload[1];
1065
1066         window = scif_create_remote_window(scifdev, nr_pages);
1067         if (!window) {
1068                 err = -ENOMEM;
1069                 goto error;
1070         }
1071
1072         /* The peer's allocation request is granted */
1073         msg->uop = SCIF_ALLOC_GNT;
1074         msg->payload[0] = (u64)window;
1075         msg->payload[1] = window->mapped_offset;
1076         err = scif_nodeqp_send(scifdev, msg);
1077         if (err)
1078                 scif_destroy_remote_window(window);
1079         return;
1080 error:
1081         /* The peer's allocation request is rejected */
1082         dev_err(&scifdev->sdev->dev,
1083                 "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
1084                 __func__, __LINE__, err, window, nr_pages);
1085         msg->uop = SCIF_ALLOC_REJ;
1086         scif_nodeqp_send(scifdev, msg);
1087 }
1088
1089 /**
1090  * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
1091  * @msg:        Interrupt message
1092  *
1093  * Remote side responded to a memory allocation.
1094  */
1095 void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
1096 {
1097         struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
1098         struct scif_window *window = container_of(handle, struct scif_window,
1099                                                   alloc_handle);
1100         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1101
1102         mutex_lock(&ep->rma_info.rma_lock);
1103         handle->vaddr = msg->payload[0];
1104         handle->phys_addr = msg->payload[1];
1105         if (msg->uop == SCIF_ALLOC_GNT)
1106                 handle->state = OP_COMPLETED;
1107         else
1108                 handle->state = OP_FAILED;
1109         wake_up(&handle->allocwq);
1110         mutex_unlock(&ep->rma_info.rma_lock);
1111 }
1112
1113 /**
1114  * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
1115  * @msg:        Interrupt message
1116  *
1117  * Free up memory kmalloc'd earlier.
1118  */
1119 void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
1120 {
1121         struct scif_window *window = (struct scif_window *)msg->payload[1];
1122
1123         scif_destroy_remote_window(window);
1124 }
1125
1126 static void
1127 scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
1128 {
1129         int j;
1130         struct scif_hw_dev *sdev = dev->sdev;
1131         phys_addr_t apt_base = 0;
1132
1133         /*
1134          * Add the aperture base if the DMA address is not card relative
1135          * since the DMA addresses need to be an offset into the bar
1136          */
1137         if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
1138             sdev->aper && !sdev->card_rel_da)
1139                 apt_base = sdev->aper->pa;
1140         else
1141                 return;
1142
1143         for (j = 0; j < window->nr_contig_chunks; j++) {
1144                 if (window->num_pages[j])
1145                         window->dma_addr[j] += apt_base;
1146                 else
1147                         break;
1148         }
1149 }
1150
1151 /**
1152  * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
1153  * @msg:        Interrupt message
1154  *
1155  * Update remote window list with a new registered window.
1156  */
1157 void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
1158 {
1159         struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
1160         struct scif_window *window =
1161                 (struct scif_window *)msg->payload[1];
1162
1163         mutex_lock(&ep->rma_info.rma_lock);
1164         spin_lock(&ep->lock);
1165         if (ep->state == SCIFEP_CONNECTED) {
1166                 msg->uop = SCIF_REGISTER_ACK;
1167                 scif_nodeqp_send(ep->remote_dev, msg);
1168                 scif_fixup_aper_base(ep->remote_dev, window);
1169                 /* No further failures expected. Insert new window */
1170                 scif_insert_window(window, &ep->rma_info.remote_reg_list);
1171         } else {
1172                 msg->uop = SCIF_REGISTER_NACK;
1173                 scif_nodeqp_send(ep->remote_dev, msg);
1174         }
1175         spin_unlock(&ep->lock);
1176         mutex_unlock(&ep->rma_info.rma_lock);
1177         /* free up any lookup resources now that page lists are transferred */
1178         scif_destroy_remote_lookup(ep->remote_dev, window);
1179         /*
1180          * We could not insert the window but we need to
1181          * destroy the window.
1182          */
1183         if (msg->uop == SCIF_REGISTER_NACK)
1184                 scif_destroy_remote_window(window);
1185 }
1186
1187 /**
1188  * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
1189  * @msg:        Interrupt message
1190  *
1191  * Remove window from remote registration list;
1192  */
1193 void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
1194 {
1195         struct scif_rma_req req;
1196         struct scif_window *window = NULL;
1197         struct scif_window *recv_window =
1198                 (struct scif_window *)msg->payload[0];
1199         struct scif_endpt *ep;
1200         int del_window = 0;
1201
1202         ep = (struct scif_endpt *)recv_window->ep;
1203         req.out_window = &window;
1204         req.offset = recv_window->offset;
1205         req.prot = 0;
1206         req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1207         req.type = SCIF_WINDOW_FULL;
1208         req.head = &ep->rma_info.remote_reg_list;
1209         msg->payload[0] = ep->remote_ep;
1210
1211         mutex_lock(&ep->rma_info.rma_lock);
1212         /* Does a valid window exist? */
1213         if (scif_query_window(&req)) {
1214                 dev_err(&scifdev->sdev->dev,
1215                         "%s %d -ENXIO\n", __func__, __LINE__);
1216                 msg->uop = SCIF_UNREGISTER_ACK;
1217                 goto error;
1218         }
1219         if (window) {
1220                 if (window->ref_count)
1221                         scif_put_window(window, window->nr_pages);
1222                 else
1223                         dev_err(&scifdev->sdev->dev,
1224                                 "%s %d ref count should be +ve\n",
1225                                 __func__, __LINE__);
1226                 window->unreg_state = OP_COMPLETED;
1227                 if (!window->ref_count) {
1228                         msg->uop = SCIF_UNREGISTER_ACK;
1229                         atomic_inc(&ep->rma_info.tw_refcount);
1230                         ep->rma_info.async_list_del = 1;
1231                         list_del_init(&window->list);
1232                         del_window = 1;
1233                 } else {
1234                         /* NACK! There are valid references to this window */
1235                         msg->uop = SCIF_UNREGISTER_NACK;
1236                 }
1237         } else {
1238                 /* The window did not make its way to the list at all. ACK */
1239                 msg->uop = SCIF_UNREGISTER_ACK;
1240                 scif_destroy_remote_window(recv_window);
1241         }
1242 error:
1243         mutex_unlock(&ep->rma_info.rma_lock);
1244         if (del_window)
1245                 scif_drain_dma_intr(ep->remote_dev->sdev,
1246                                     ep->rma_info.dma_chan);
1247         scif_nodeqp_send(ep->remote_dev, msg);
1248         if (del_window)
1249                 scif_queue_for_cleanup(window, &scif_info.rma);
1250 }
1251
1252 /**
1253  * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
1254  * @msg:        Interrupt message
1255  *
1256  * Wake up the window waiting to complete registration.
1257  */
1258 void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1259 {
1260         struct scif_window *window =
1261                 (struct scif_window *)msg->payload[2];
1262         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1263
1264         mutex_lock(&ep->rma_info.rma_lock);
1265         window->reg_state = OP_COMPLETED;
1266         wake_up(&window->regwq);
1267         mutex_unlock(&ep->rma_info.rma_lock);
1268 }
1269
1270 /**
1271  * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
1272  * @msg:        Interrupt message
1273  *
1274  * Wake up the window waiting to inform it that registration
1275  * cannot be completed.
1276  */
1277 void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1278 {
1279         struct scif_window *window =
1280                 (struct scif_window *)msg->payload[2];
1281         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1282
1283         mutex_lock(&ep->rma_info.rma_lock);
1284         window->reg_state = OP_FAILED;
1285         wake_up(&window->regwq);
1286         mutex_unlock(&ep->rma_info.rma_lock);
1287 }
1288
1289 /**
1290  * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
1291  * @msg:        Interrupt message
1292  *
1293  * Wake up the window waiting to complete unregistration.
1294  */
1295 void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1296 {
1297         struct scif_window *window =
1298                 (struct scif_window *)msg->payload[1];
1299         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1300
1301         mutex_lock(&ep->rma_info.rma_lock);
1302         window->unreg_state = OP_COMPLETED;
1303         wake_up(&window->unregwq);
1304         mutex_unlock(&ep->rma_info.rma_lock);
1305 }
1306
1307 /**
1308  * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
1309  * @msg:        Interrupt message
1310  *
1311  * Wake up the window waiting to inform it that unregistration
1312  * cannot be completed immediately.
1313  */
1314 void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1315 {
1316         struct scif_window *window =
1317                 (struct scif_window *)msg->payload[1];
1318         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1319
1320         mutex_lock(&ep->rma_info.rma_lock);
1321         window->unreg_state = OP_FAILED;
1322         wake_up(&window->unregwq);
1323         mutex_unlock(&ep->rma_info.rma_lock);
1324 }
1325
1326 int __scif_pin_pages(void *addr, size_t len, int *out_prot,
1327                      int map_flags, scif_pinned_pages_t *pages)
1328 {
1329         struct scif_pinned_pages *pinned_pages;
1330         int nr_pages, err = 0, i;
1331         bool vmalloc_addr = false;
1332         bool try_upgrade = false;
1333         int prot = *out_prot;
1334         int ulimit = 0;
1335         struct mm_struct *mm = NULL;
1336
1337         /* Unsupported flags */
1338         if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
1339                 return -EINVAL;
1340         ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
1341
1342         /* Unsupported protection requested */
1343         if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1344                 return -EINVAL;
1345
1346         /* addr/len must be page aligned. len should be non zero */
1347         if (!len ||
1348             (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1349             (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1350                 return -EINVAL;
1351
1352         might_sleep();
1353
1354         nr_pages = len >> PAGE_SHIFT;
1355
1356         /* Allocate a set of pinned pages */
1357         pinned_pages = scif_create_pinned_pages(nr_pages, prot);
1358         if (!pinned_pages)
1359                 return -ENOMEM;
1360
1361         if (map_flags & SCIF_MAP_KERNEL) {
1362                 if (is_vmalloc_addr(addr))
1363                         vmalloc_addr = true;
1364
1365                 for (i = 0; i < nr_pages; i++) {
1366                         if (vmalloc_addr)
1367                                 pinned_pages->pages[i] =
1368                                         vmalloc_to_page(addr + (i * PAGE_SIZE));
1369                         else
1370                                 pinned_pages->pages[i] =
1371                                         virt_to_page(addr + (i * PAGE_SIZE));
1372                 }
1373                 pinned_pages->nr_pages = nr_pages;
1374                 pinned_pages->map_flags = SCIF_MAP_KERNEL;
1375         } else {
1376                 /*
1377                  * SCIF supports registration caching. If a registration has
1378                  * been requested with read only permissions, then we try
1379                  * to pin the pages with RW permissions so that a subsequent
1380                  * transfer with RW permission can hit the cache instead of
1381                  * invalidating it. If the upgrade fails with RW then we
1382                  * revert back to R permission and retry
1383                  */
1384                 if (prot == SCIF_PROT_READ)
1385                         try_upgrade = true;
1386                 prot |= SCIF_PROT_WRITE;
1387 retry:
1388                 mm = current->mm;
1389                 down_write(&mm->mmap_sem);
1390                 if (ulimit) {
1391                         err = __scif_check_inc_pinned_vm(mm, nr_pages);
1392                         if (err) {
1393                                 up_write(&mm->mmap_sem);
1394                                 pinned_pages->nr_pages = 0;
1395                                 goto error_unmap;
1396                         }
1397                 }
1398
1399                 pinned_pages->nr_pages = get_user_pages(
1400                                 (u64)addr,
1401                                 nr_pages,
1402                                 (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
1403                                 pinned_pages->pages,
1404                                 NULL);
1405                 up_write(&mm->mmap_sem);
1406                 if (nr_pages != pinned_pages->nr_pages) {
1407                         if (try_upgrade) {
1408                                 if (ulimit)
1409                                         __scif_dec_pinned_vm_lock(mm,
1410                                                                   nr_pages, 0);
1411                                 /* Roll back any pinned pages */
1412                                 for (i = 0; i < pinned_pages->nr_pages; i++) {
1413                                         if (pinned_pages->pages[i])
1414                                                 put_page(
1415                                                 pinned_pages->pages[i]);
1416                                 }
1417                                 prot &= ~SCIF_PROT_WRITE;
1418                                 try_upgrade = false;
1419                                 goto retry;
1420                         }
1421                 }
1422                 pinned_pages->map_flags = 0;
1423         }
1424
1425         if (pinned_pages->nr_pages < nr_pages) {
1426                 err = -EFAULT;
1427                 pinned_pages->nr_pages = nr_pages;
1428                 goto dec_pinned;
1429         }
1430
1431         *out_prot = prot;
1432         atomic_set(&pinned_pages->ref_count, 1);
1433         *pages = pinned_pages;
1434         return err;
1435 dec_pinned:
1436         if (ulimit)
1437                 __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
1438         /* Something went wrong! Rollback */
1439 error_unmap:
1440         pinned_pages->nr_pages = nr_pages;
1441         scif_destroy_pinned_pages(pinned_pages);
1442         *pages = NULL;
1443         dev_dbg(scif_info.mdev.this_device,
1444                 "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
1445         return err;
1446 }
1447
1448 int scif_pin_pages(void *addr, size_t len, int prot,
1449                    int map_flags, scif_pinned_pages_t *pages)
1450 {
1451         return __scif_pin_pages(addr, len, &prot, map_flags, pages);
1452 }
1453 EXPORT_SYMBOL_GPL(scif_pin_pages);
1454
1455 int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
1456 {
1457         int err = 0, ret;
1458
1459         if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
1460                 return -EINVAL;
1461
1462         ret = atomic_sub_return(1, &pinned_pages->ref_count);
1463         if (ret < 0) {
1464                 dev_err(scif_info.mdev.this_device,
1465                         "%s %d scif_unpin_pages called without pinning? rc %d\n",
1466                         __func__, __LINE__, ret);
1467                 return -EINVAL;
1468         }
1469         /*
1470          * Destroy the window if the ref count for this set of pinned
1471          * pages has dropped to zero. If it is positive then there is
1472          * a valid registered window which is backed by these pages and
1473          * it will be destroyed once all such windows are unregistered.
1474          */
1475         if (!ret)
1476                 err = scif_destroy_pinned_pages(pinned_pages);
1477
1478         return err;
1479 }
1480 EXPORT_SYMBOL_GPL(scif_unpin_pages);
1481
1482 static inline void
1483 scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
1484 {
1485         mutex_lock(&ep->rma_info.rma_lock);
1486         scif_insert_window(window, &ep->rma_info.reg_list);
1487         mutex_unlock(&ep->rma_info.rma_lock);
1488 }
1489
1490 off_t scif_register_pinned_pages(scif_epd_t epd,
1491                                  scif_pinned_pages_t pinned_pages,
1492                                  off_t offset, int map_flags)
1493 {
1494         struct scif_endpt *ep = (struct scif_endpt *)epd;
1495         s64 computed_offset;
1496         struct scif_window *window;
1497         int err;
1498         size_t len;
1499         struct device *spdev;
1500
1501         /* Unsupported flags */
1502         if (map_flags & ~SCIF_MAP_FIXED)
1503                 return -EINVAL;
1504
1505         len = pinned_pages->nr_pages << PAGE_SHIFT;
1506
1507         /*
1508          * Offset is not page aligned/negative or offset+len
1509          * wraps around with SCIF_MAP_FIXED.
1510          */
1511         if ((map_flags & SCIF_MAP_FIXED) &&
1512             ((ALIGN(offset, PAGE_SIZE) != offset) ||
1513             (offset < 0) ||
1514             (len > LONG_MAX - offset)))
1515                 return -EINVAL;
1516
1517         might_sleep();
1518
1519         err = scif_verify_epd(ep);
1520         if (err)
1521                 return err;
1522         /*
1523          * It is an error to pass pinned_pages to scif_register_pinned_pages()
1524          * after calling scif_unpin_pages().
1525          */
1526         if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
1527                 return -EINVAL;
1528
1529         /* Compute the offset for this registration */
1530         err = scif_get_window_offset(ep, map_flags, offset,
1531                                      len, &computed_offset);
1532         if (err) {
1533                 atomic_sub(1, &pinned_pages->ref_count);
1534                 return err;
1535         }
1536
1537         /* Allocate and prepare self registration window */
1538         window = scif_create_window(ep, pinned_pages->nr_pages,
1539                                     computed_offset, false);
1540         if (!window) {
1541                 atomic_sub(1, &pinned_pages->ref_count);
1542                 scif_free_window_offset(ep, NULL, computed_offset);
1543                 return -ENOMEM;
1544         }
1545
1546         window->pinned_pages = pinned_pages;
1547         window->nr_pages = pinned_pages->nr_pages;
1548         window->prot = pinned_pages->prot;
1549
1550         spdev = scif_get_peer_dev(ep->remote_dev);
1551         if (IS_ERR(spdev)) {
1552                 err = PTR_ERR(spdev);
1553                 scif_destroy_window(ep, window);
1554                 return err;
1555         }
1556         err = scif_send_alloc_request(ep, window);
1557         if (err) {
1558                 dev_err(&ep->remote_dev->sdev->dev,
1559                         "%s %d err %d\n", __func__, __LINE__, err);
1560                 goto error_unmap;
1561         }
1562
1563         /* Prepare the remote registration window */
1564         err = scif_prep_remote_window(ep, window);
1565         if (err) {
1566                 dev_err(&ep->remote_dev->sdev->dev,
1567                         "%s %d err %d\n", __func__, __LINE__, err);
1568                 goto error_unmap;
1569         }
1570
1571         /* Tell the peer about the new window */
1572         err = scif_send_scif_register(ep, window);
1573         if (err) {
1574                 dev_err(&ep->remote_dev->sdev->dev,
1575                         "%s %d err %d\n", __func__, __LINE__, err);
1576                 goto error_unmap;
1577         }
1578
1579         scif_put_peer_dev(spdev);
1580         /* No further failures expected. Insert new window */
1581         scif_insert_local_window(window, ep);
1582         return computed_offset;
1583 error_unmap:
1584         scif_destroy_window(ep, window);
1585         scif_put_peer_dev(spdev);
1586         dev_err(&ep->remote_dev->sdev->dev,
1587                 "%s %d err %d\n", __func__, __LINE__, err);
1588         return err;
1589 }
1590 EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
1591
1592 off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
1593                     int prot, int map_flags)
1594 {
1595         scif_pinned_pages_t pinned_pages;
1596         off_t err;
1597         struct scif_endpt *ep = (struct scif_endpt *)epd;
1598         s64 computed_offset;
1599         struct scif_window *window;
1600         struct mm_struct *mm = NULL;
1601         struct device *spdev;
1602
1603         dev_dbg(scif_info.mdev.this_device,
1604                 "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
1605                 epd, addr, len, offset, prot, map_flags);
1606         /* Unsupported flags */
1607         if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
1608                 return -EINVAL;
1609
1610         /*
1611          * Offset is not page aligned/negative or offset+len
1612          * wraps around with SCIF_MAP_FIXED.
1613          */
1614         if ((map_flags & SCIF_MAP_FIXED) &&
1615             ((ALIGN(offset, PAGE_SIZE) != offset) ||
1616             (offset < 0) ||
1617             (len > LONG_MAX - offset)))
1618                 return -EINVAL;
1619
1620         /* Unsupported protection requested */
1621         if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1622                 return -EINVAL;
1623
1624         /* addr/len must be page aligned. len should be non zero */
1625         if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1626             (ALIGN(len, PAGE_SIZE) != len))
1627                 return -EINVAL;
1628
1629         might_sleep();
1630
1631         err = scif_verify_epd(ep);
1632         if (err)
1633                 return err;
1634
1635         /* Compute the offset for this registration */
1636         err = scif_get_window_offset(ep, map_flags, offset,
1637                                      len >> PAGE_SHIFT, &computed_offset);
1638         if (err)
1639                 return err;
1640
1641         spdev = scif_get_peer_dev(ep->remote_dev);
1642         if (IS_ERR(spdev)) {
1643                 err = PTR_ERR(spdev);
1644                 scif_free_window_offset(ep, NULL, computed_offset);
1645                 return err;
1646         }
1647         /* Allocate and prepare self registration window */
1648         window = scif_create_window(ep, len >> PAGE_SHIFT,
1649                                     computed_offset, false);
1650         if (!window) {
1651                 scif_free_window_offset(ep, NULL, computed_offset);
1652                 scif_put_peer_dev(spdev);
1653                 return -ENOMEM;
1654         }
1655
1656         window->nr_pages = len >> PAGE_SHIFT;
1657
1658         err = scif_send_alloc_request(ep, window);
1659         if (err) {
1660                 scif_destroy_incomplete_window(ep, window);
1661                 scif_put_peer_dev(spdev);
1662                 return err;
1663         }
1664
1665         if (!(map_flags & SCIF_MAP_KERNEL)) {
1666                 mm = __scif_acquire_mm();
1667                 map_flags |= SCIF_MAP_ULIMIT;
1668         }
1669         /* Pin down the pages */
1670         err = __scif_pin_pages(addr, len, &prot,
1671                                map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
1672                                &pinned_pages);
1673         if (err) {
1674                 scif_destroy_incomplete_window(ep, window);
1675                 __scif_release_mm(mm);
1676                 goto error;
1677         }
1678
1679         window->pinned_pages = pinned_pages;
1680         window->prot = pinned_pages->prot;
1681         window->mm = mm;
1682
1683         /* Prepare the remote registration window */
1684         err = scif_prep_remote_window(ep, window);
1685         if (err) {
1686                 dev_err(&ep->remote_dev->sdev->dev,
1687                         "%s %d err %ld\n", __func__, __LINE__, err);
1688                 goto error_unmap;
1689         }
1690
1691         /* Tell the peer about the new window */
1692         err = scif_send_scif_register(ep, window);
1693         if (err) {
1694                 dev_err(&ep->remote_dev->sdev->dev,
1695                         "%s %d err %ld\n", __func__, __LINE__, err);
1696                 goto error_unmap;
1697         }
1698
1699         scif_put_peer_dev(spdev);
1700         /* No further failures expected. Insert new window */
1701         scif_insert_local_window(window, ep);
1702         dev_dbg(&ep->remote_dev->sdev->dev,
1703                 "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
1704                 epd, addr, len, computed_offset);
1705         return computed_offset;
1706 error_unmap:
1707         scif_destroy_window(ep, window);
1708 error:
1709         scif_put_peer_dev(spdev);
1710         dev_err(&ep->remote_dev->sdev->dev,
1711                 "%s %d err %ld\n", __func__, __LINE__, err);
1712         return err;
1713 }
1714 EXPORT_SYMBOL_GPL(scif_register);
1715
1716 int
1717 scif_unregister(scif_epd_t epd, off_t offset, size_t len)
1718 {
1719         struct scif_endpt *ep = (struct scif_endpt *)epd;
1720         struct scif_window *window = NULL;
1721         struct scif_rma_req req;
1722         int nr_pages, err;
1723         struct device *spdev;
1724
1725         dev_dbg(scif_info.mdev.this_device,
1726                 "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
1727                 ep, offset, len);
1728         /* len must be page aligned. len should be non zero */
1729         if (!len ||
1730             (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1731                 return -EINVAL;
1732
1733         /* Offset is not page aligned or offset+len wraps around */
1734         if ((ALIGN(offset, PAGE_SIZE) != offset) ||
1735             (offset < 0) ||
1736             (len > LONG_MAX - offset))
1737                 return -EINVAL;
1738
1739         err = scif_verify_epd(ep);
1740         if (err)
1741                 return err;
1742
1743         might_sleep();
1744         nr_pages = len >> PAGE_SHIFT;
1745
1746         req.out_window = &window;
1747         req.offset = offset;
1748         req.prot = 0;
1749         req.nr_bytes = len;
1750         req.type = SCIF_WINDOW_FULL;
1751         req.head = &ep->rma_info.reg_list;
1752
1753         spdev = scif_get_peer_dev(ep->remote_dev);
1754         if (IS_ERR(spdev)) {
1755                 err = PTR_ERR(spdev);
1756                 return err;
1757         }
1758         mutex_lock(&ep->rma_info.rma_lock);
1759         /* Does a valid window exist? */
1760         err = scif_query_window(&req);
1761         if (err) {
1762                 dev_err(&ep->remote_dev->sdev->dev,
1763                         "%s %d err %d\n", __func__, __LINE__, err);
1764                 goto error;
1765         }
1766         /* Unregister all the windows in this range */
1767         err = scif_rma_list_unregister(window, offset, nr_pages);
1768         if (err)
1769                 dev_err(&ep->remote_dev->sdev->dev,
1770                         "%s %d err %d\n", __func__, __LINE__, err);
1771 error:
1772         mutex_unlock(&ep->rma_info.rma_lock);
1773         scif_put_peer_dev(spdev);
1774         return err;
1775 }
1776 EXPORT_SYMBOL_GPL(scif_unregister);