ath79/mikrotik: use routerbootpart partitions
[oweals/openwrt.git] / target / linux / layerscape / patches-5.4 / 809-jailhouse-0001-ivshmem-net-virtual-network-device-for-Jailhouse.patch
1 From 7f48bab7c7b468961cf70efa1d86a75173e3987a Mon Sep 17 00:00:00 2001
2 From: Mans Rullgard <mans@mansr.com>
3 Date: Thu, 26 May 2016 16:04:02 +0100
4 Subject: [PATCH] ivshmem-net: virtual network device for Jailhouse
5
6 Work in progress.
7
8 (cherry picked from commit ed818547b45e652db57d5966efe336ed646feb45)
9 ---
10  drivers/net/Kconfig       |   4 +
11  drivers/net/Makefile      |   2 +
12  drivers/net/ivshmem-net.c | 923 ++++++++++++++++++++++++++++++++++++++++++++++
13  3 files changed, 929 insertions(+)
14  create mode 100644 drivers/net/ivshmem-net.c
15
16 --- a/drivers/net/Kconfig
17 +++ b/drivers/net/Kconfig
18 @@ -528,4 +528,8 @@ config NET_FAILOVER
19           a VM with direct attached VF by failing over to the paravirtual
20           datapath when the VF is unplugged.
21  
22 +config IVSHMEM_NET
23 +       tristate "IVSHMEM virtual network device"
24 +       depends on PCI
25 +
26  endif # NETDEVICES
27 --- a/drivers/net/Makefile
28 +++ b/drivers/net/Makefile
29 @@ -79,3 +79,5 @@ thunderbolt-net-y += thunderbolt.o
30  obj-$(CONFIG_THUNDERBOLT_NET) += thunderbolt-net.o
31  obj-$(CONFIG_NETDEVSIM) += netdevsim/
32  obj-$(CONFIG_NET_FAILOVER) += net_failover.o
33 +
34 +obj-$(CONFIG_IVSHMEM_NET) += ivshmem-net.o
35 --- /dev/null
36 +++ b/drivers/net/ivshmem-net.c
37 @@ -0,0 +1,923 @@
38 +/*
39 + * Copyright 2016 Mans Rullgard <mans@mansr.com>
40 + *
41 + * This program is free software; you can redistribute it and/or modify
42 + * it under the terms of the GNU General Public License as published by
43 + * the Free Software Foundation; either version 2 of the License, or
44 + * (at your option) any later version.
45 + *
46 + * This program is distributed in the hope that it will be useful,
47 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
48 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
49 + * GNU General Public License for more details.
50 + *
51 + * You should have received a copy of the GNU General Public License
52 + * along with this program; if not, see <http://www.gnu.org/licenses/>.
53 + */
54 +
55 +#include <linux/kernel.h>
56 +#include <linux/module.h>
57 +#include <linux/pci.h>
58 +#include <linux/io.h>
59 +#include <linux/bitops.h>
60 +#include <linux/interrupt.h>
61 +#include <linux/netdevice.h>
62 +#include <linux/etherdevice.h>
63 +#include <linux/rtnetlink.h>
64 +#include <linux/virtio_ring.h>
65 +
66 +#define DRV_NAME "ivshmem-net"
67 +
68 +#define JAILHOUSE_CFG_SHMEM_PTR        0x40
69 +#define JAILHOUSE_CFG_SHMEM_SZ 0x48
70 +
71 +#define IVSHM_NET_STATE_RESET  0
72 +#define IVSHM_NET_STATE_INIT   1
73 +#define IVSHM_NET_STATE_READY  2
74 +#define IVSHM_NET_STATE_RUN    3
75 +
76 +#define IVSHM_NET_MTU_MIN 256
77 +#define IVSHM_NET_MTU_MAX 65535
78 +#define IVSHM_NET_MTU_DEF 16384
79 +
80 +#define IVSHM_NET_FRAME_SIZE(s) ALIGN(18 + (s), SMP_CACHE_BYTES)
81 +
82 +#define IVSHM_NET_VQ_ALIGN 64
83 +
84 +struct ivshmem_regs {
85 +       u32 imask;
86 +       u32 istat;
87 +       u32 ivpos;
88 +       u32 doorbell;
89 +       u32 lstate;
90 +       u32 rstate;
91 +};
92 +
93 +struct ivshm_net_queue {
94 +       struct vring vr;
95 +       u32 free_head;
96 +       u32 num_free;
97 +       u32 num_added;
98 +       u16 last_avail_idx;
99 +       u16 last_used_idx;
100 +
101 +       void *data;
102 +       void *end;
103 +       u32 size;
104 +       u32 head;
105 +       u32 tail;
106 +};
107 +
108 +struct ivshm_net_stats {
109 +       u32 interrupts;
110 +       u32 tx_packets;
111 +       u32 tx_notify;
112 +       u32 tx_pause;
113 +       u32 rx_packets;
114 +       u32 rx_notify;
115 +       u32 napi_poll;
116 +       u32 napi_complete;
117 +       u32 napi_poll_n[10];
118 +};
119 +
120 +struct ivshm_net {
121 +       struct ivshm_net_queue rx;
122 +       struct ivshm_net_queue tx;
123 +
124 +       u32 vrsize;
125 +       u32 qlen;
126 +       u32 qsize;
127 +
128 +       spinlock_t tx_free_lock;
129 +       spinlock_t tx_clean_lock;
130 +
131 +       struct napi_struct napi;
132 +
133 +       u32 lstate;
134 +       u32 rstate;
135 +
136 +       struct workqueue_struct *state_wq;
137 +       struct work_struct state_work;
138 +
139 +       struct ivshm_net_stats stats;
140 +
141 +       struct ivshmem_regs __iomem *ivshm_regs;
142 +       void *shm;
143 +       phys_addr_t shmaddr;
144 +       resource_size_t shmlen;
145 +       u32 peer_id;
146 +
147 +       struct pci_dev *pdev;
148 +       struct msix_entry msix;
149 +       bool using_msix;
150 +};
151 +
152 +static void *ivshm_net_desc_data(struct ivshm_net *in,
153 +                                struct ivshm_net_queue *q,
154 +                                struct vring_desc *desc,
155 +                                u32 *len)
156 +{
157 +       u64 addr = READ_ONCE(desc->addr);
158 +       u32 dlen = READ_ONCE(desc->len);
159 +       void *data;
160 +
161 +       if (addr < in->shmaddr || desc->addr > in->shmaddr + in->shmlen)
162 +               return NULL;
163 +
164 +       data = in->shm + (addr - in->shmaddr);
165 +
166 +       if (data < q->data || data >= q->end)
167 +               return NULL;
168 +
169 +       if (dlen > q->end - data)
170 +               return NULL;
171 +
172 +       *len = dlen;
173 +
174 +       return data;
175 +}
176 +
177 +static void ivshm_net_init_queue(struct ivshm_net *in,
178 +                                struct ivshm_net_queue *q,
179 +                                void *mem, unsigned int len)
180 +{
181 +       memset(q, 0, sizeof(*q));
182 +
183 +       vring_init(&q->vr, len, mem, IVSHM_NET_VQ_ALIGN);
184 +       q->data = mem + in->vrsize;
185 +       q->end = q->data + in->qsize;
186 +       q->size = in->qsize;
187 +}
188 +
189 +static void ivshm_net_init_queues(struct net_device *ndev)
190 +{
191 +       struct ivshm_net *in = netdev_priv(ndev);
192 +       int ivpos = readl(&in->ivshm_regs->ivpos);
193 +       void *tx;
194 +       void *rx;
195 +       int i;
196 +
197 +       tx = in->shm +  ivpos * in->shmlen / 2;
198 +       rx = in->shm + !ivpos * in->shmlen / 2;
199 +
200 +       memset(tx, 0, in->shmlen / 2);
201 +
202 +       ivshm_net_init_queue(in, &in->rx, rx, in->qlen);
203 +       ivshm_net_init_queue(in, &in->tx, tx, in->qlen);
204 +
205 +       swap(in->rx.vr.used, in->tx.vr.used);
206 +
207 +       in->tx.num_free = in->tx.vr.num;
208 +
209 +       for (i = 0; i < in->tx.vr.num - 1; i++)
210 +               in->tx.vr.desc[i].next = i + 1;
211 +}
212 +
213 +static int ivshm_net_calc_qsize(struct net_device *ndev)
214 +{
215 +       struct ivshm_net *in = netdev_priv(ndev);
216 +       unsigned int vrsize;
217 +       unsigned int qsize;
218 +       unsigned int qlen;
219 +
220 +       for (qlen = 4096; qlen > 32; qlen >>= 1) {
221 +               vrsize = vring_size(qlen, IVSHM_NET_VQ_ALIGN);
222 +               vrsize = ALIGN(vrsize, IVSHM_NET_VQ_ALIGN);
223 +               if (vrsize < in->shmlen / 16)
224 +                       break;
225 +       }
226 +
227 +       if (vrsize > in->shmlen / 2)
228 +               return -EINVAL;
229 +
230 +       qsize = in->shmlen / 2 - vrsize;
231 +
232 +       if (qsize < 4 * IVSHM_NET_MTU_MIN)
233 +               return -EINVAL;
234 +
235 +       in->vrsize = vrsize;
236 +       in->qlen = qlen;
237 +       in->qsize = qsize;
238 +
239 +       return 0;
240 +}
241 +
242 +static void ivshm_net_notify_tx(struct ivshm_net *in, unsigned int num)
243 +{
244 +       u16 evt, old, new;
245 +
246 +       virt_mb();
247 +
248 +       evt = READ_ONCE(vring_avail_event(&in->tx.vr));
249 +       old = in->tx.last_avail_idx - num;
250 +       new = in->tx.last_avail_idx;
251 +
252 +       if (vring_need_event(evt, new, old)) {
253 +               writel(in->peer_id << 16, &in->ivshm_regs->doorbell);
254 +               in->stats.tx_notify++;
255 +       }
256 +}
257 +
258 +static void ivshm_net_enable_rx_irq(struct ivshm_net *in)
259 +{
260 +       vring_avail_event(&in->rx.vr) = in->rx.last_avail_idx;
261 +       virt_wmb();
262 +}
263 +
264 +static void ivshm_net_notify_rx(struct ivshm_net *in, unsigned int num)
265 +{
266 +       u16 evt, old, new;
267 +
268 +       virt_mb();
269 +
270 +       evt = vring_used_event(&in->rx.vr);
271 +       old = in->rx.last_used_idx - num;
272 +       new = in->rx.last_used_idx;
273 +
274 +       if (vring_need_event(evt, new, old)) {
275 +               writel(in->peer_id << 16, &in->ivshm_regs->doorbell);
276 +               in->stats.rx_notify++;
277 +       }
278 +}
279 +
280 +static void ivshm_net_enable_tx_irq(struct ivshm_net *in)
281 +{
282 +       vring_used_event(&in->tx.vr) = in->tx.last_used_idx;
283 +       virt_wmb();
284 +}
285 +
286 +static bool ivshm_net_rx_avail(struct ivshm_net *in)
287 +{
288 +       virt_mb();
289 +       return READ_ONCE(in->rx.vr.avail->idx) != in->rx.last_avail_idx;
290 +}
291 +
292 +static size_t ivshm_net_tx_space(struct ivshm_net *in)
293 +{
294 +       struct ivshm_net_queue *tx = &in->tx;
295 +       u32 tail = tx->tail;
296 +       u32 head = tx->head;
297 +       u32 space;
298 +
299 +       if (head < tail)
300 +               space = tail - head;
301 +       else
302 +               space = max(tx->size - head, tail);
303 +
304 +       return space;
305 +}
306 +
307 +static bool ivshm_net_tx_ok(struct ivshm_net *in, unsigned int mtu)
308 +{
309 +       return in->tx.num_free >= 2 &&
310 +               ivshm_net_tx_space(in) >= 2 * IVSHM_NET_FRAME_SIZE(mtu);
311 +}
312 +
313 +static u32 ivshm_net_tx_advance(struct ivshm_net_queue *q, u32 *pos, u32 len)
314 +{
315 +       u32 p = *pos;
316 +
317 +       len = IVSHM_NET_FRAME_SIZE(len);
318 +
319 +       if (q->size - p < len)
320 +               p = 0;
321 +       *pos = p + len;
322 +
323 +       return p;
324 +}
325 +
326 +static int ivshm_net_tx_frame(struct net_device *ndev, struct sk_buff *skb)
327 +{
328 +       struct ivshm_net *in = netdev_priv(ndev);
329 +       struct ivshm_net_queue *tx = &in->tx;
330 +       struct vring *vr = &tx->vr;
331 +       struct vring_desc *desc;
332 +       unsigned int desc_idx;
333 +       unsigned int avail;
334 +       u32 head;
335 +       void *buf;
336 +
337 +       BUG_ON(tx->num_free < 1);
338 +
339 +       spin_lock(&in->tx_free_lock);
340 +       desc_idx = tx->free_head;
341 +       desc = &vr->desc[desc_idx];
342 +       tx->free_head = desc->next;
343 +       tx->num_free--;
344 +       spin_unlock(&in->tx_free_lock);
345 +
346 +       head = ivshm_net_tx_advance(tx, &tx->head, skb->len);
347 +
348 +       buf = tx->data + head;
349 +       skb_copy_and_csum_dev(skb, buf);
350 +
351 +       desc->addr = in->shmaddr + (buf - in->shm);
352 +       desc->len = skb->len;
353 +
354 +       avail = tx->last_avail_idx++ & (vr->num - 1);
355 +       vr->avail->ring[avail] = desc_idx;
356 +       tx->num_added++;
357 +
358 +       if (!skb->xmit_more) {
359 +               virt_store_release(&vr->avail->idx, tx->last_avail_idx);
360 +               ivshm_net_notify_tx(in, tx->num_added);
361 +               tx->num_added = 0;
362 +       }
363 +
364 +       return 0;
365 +}
366 +
367 +static void ivshm_net_tx_clean(struct net_device *ndev)
368 +{
369 +       struct ivshm_net *in = netdev_priv(ndev);
370 +       struct ivshm_net_queue *tx = &in->tx;
371 +       struct vring *vr = &tx->vr;
372 +       struct vring_desc *desc;
373 +       struct vring_desc *fdesc;
374 +       unsigned int used;
375 +       unsigned int num;
376 +       u16 used_idx;
377 +       u16 last;
378 +       u32 fhead;
379 +
380 +       if (!spin_trylock(&in->tx_clean_lock))
381 +               return;
382 +
383 +       used_idx = virt_load_acquire(&vr->used->idx);
384 +       last = tx->last_used_idx;
385 +
386 +       fdesc = NULL;
387 +       num = 0;
388 +
389 +       while (last != used_idx) {
390 +               void *data;
391 +               u32 len;
392 +               u32 tail;
393 +
394 +               used = vr->used->ring[last & (vr->num - 1)].id;
395 +               if (used >= vr->num) {
396 +                       netdev_err(ndev, "invalid tx used %d\n", used);
397 +                       break;
398 +               }
399 +
400 +               desc = &vr->desc[used];
401 +
402 +               data = ivshm_net_desc_data(in, &in->tx, desc, &len);
403 +               if (!data) {
404 +                       netdev_err(ndev, "bad tx descriptor\n");
405 +                       break;
406 +               }
407 +
408 +               tail = ivshm_net_tx_advance(tx, &tx->tail, len);
409 +               if (data != tx->data + tail) {
410 +                       netdev_err(ndev, "bad tx descriptor\n");
411 +                       break;
412 +               }
413 +
414 +               if (!num)
415 +                       fdesc = desc;
416 +               else
417 +                       desc->next = fhead;
418 +
419 +               fhead = used;
420 +               last++;
421 +               num++;
422 +       }
423 +
424 +       tx->last_used_idx = last;
425 +
426 +       spin_unlock(&in->tx_clean_lock);
427 +
428 +       if (num) {
429 +               spin_lock(&in->tx_free_lock);
430 +               fdesc->next = tx->free_head;
431 +               tx->free_head = fhead;
432 +               tx->num_free += num;
433 +               BUG_ON(tx->num_free > vr->num);
434 +               spin_unlock(&in->tx_free_lock);
435 +       }
436 +}
437 +
438 +static struct vring_desc *ivshm_net_rx_desc(struct net_device *ndev)
439 +{
440 +       struct ivshm_net *in = netdev_priv(ndev);
441 +       struct ivshm_net_queue *rx = &in->rx;
442 +       struct vring *vr = &rx->vr;
443 +       unsigned int avail;
444 +       u16 avail_idx;
445 +
446 +       avail_idx = virt_load_acquire(&vr->avail->idx);
447 +
448 +       if (avail_idx == rx->last_avail_idx)
449 +               return NULL;
450 +
451 +       avail = vr->avail->ring[rx->last_avail_idx++ & (vr->num - 1)];
452 +       if (avail >= vr->num) {
453 +               netdev_err(ndev, "invalid rx avail %d\n", avail);
454 +               return NULL;
455 +       }
456 +
457 +       return &vr->desc[avail];
458 +}
459 +
460 +static void ivshm_net_rx_finish(struct ivshm_net *in, struct vring_desc *desc)
461 +{
462 +       struct ivshm_net_queue *rx = &in->rx;
463 +       struct vring *vr = &rx->vr;
464 +       unsigned int desc_id = desc - vr->desc;
465 +       unsigned int used;
466 +
467 +       used = rx->last_used_idx++ & (vr->num - 1);
468 +       vr->used->ring[used].id = desc_id;
469 +
470 +       virt_store_release(&vr->used->idx, rx->last_used_idx);
471 +}
472 +
473 +static int ivshm_net_poll(struct napi_struct *napi, int budget)
474 +{
475 +       struct net_device *ndev = napi->dev;
476 +       struct ivshm_net *in = container_of(napi, struct ivshm_net, napi);
477 +       int received = 0;
478 +
479 +       in->stats.napi_poll++;
480 +
481 +       ivshm_net_tx_clean(ndev);
482 +
483 +       while (received < budget) {
484 +               struct vring_desc *desc;
485 +               struct sk_buff *skb;
486 +               void *data;
487 +               u32 len;
488 +
489 +               desc = ivshm_net_rx_desc(ndev);
490 +               if (!desc)
491 +                       break;
492 +
493 +               data = ivshm_net_desc_data(in, &in->rx, desc, &len);
494 +               if (!data) {
495 +                       netdev_err(ndev, "bad rx descriptor\n");
496 +                       break;
497 +               }
498 +
499 +               skb = napi_alloc_skb(napi, len);
500 +
501 +               if (skb) {
502 +                       memcpy(skb_put(skb, len), data, len);
503 +                       skb->protocol = eth_type_trans(skb, ndev);
504 +                       napi_gro_receive(napi, skb);
505 +               }
506 +
507 +               ndev->stats.rx_packets++;
508 +               ndev->stats.rx_bytes += len;
509 +
510 +               ivshm_net_rx_finish(in, desc);
511 +               received++;
512 +       }
513 +
514 +       if (received < budget) {
515 +               in->stats.napi_complete++;
516 +               napi_complete_done(napi, received);
517 +               ivshm_net_enable_rx_irq(in);
518 +               if (ivshm_net_rx_avail(in))
519 +                       napi_schedule(napi);
520 +       }
521 +
522 +       if (received)
523 +               ivshm_net_notify_rx(in, received);
524 +
525 +       in->stats.rx_packets += received;
526 +       in->stats.napi_poll_n[received ? 1 + min(ilog2(received), 8) : 0]++;
527 +
528 +       if (ivshm_net_tx_ok(in, ndev->mtu))
529 +               netif_wake_queue(ndev);
530 +
531 +       return received;
532 +}
533 +
534 +static netdev_tx_t ivshm_net_xmit(struct sk_buff *skb, struct net_device *ndev)
535 +{
536 +       struct ivshm_net *in = netdev_priv(ndev);
537 +
538 +       ivshm_net_tx_clean(ndev);
539 +
540 +       if (!ivshm_net_tx_ok(in, ndev->mtu)) {
541 +               ivshm_net_enable_tx_irq(in);
542 +               netif_stop_queue(ndev);
543 +               skb->xmit_more = 0;
544 +               in->stats.tx_pause++;
545 +       }
546 +
547 +       ivshm_net_tx_frame(ndev, skb);
548 +
549 +       in->stats.tx_packets++;
550 +       ndev->stats.tx_packets++;
551 +       ndev->stats.tx_bytes += skb->len;
552 +
553 +       dev_consume_skb_any(skb);
554 +
555 +       return NETDEV_TX_OK;
556 +}
557 +
558 +static void ivshm_net_set_state(struct ivshm_net *in, u32 state)
559 +{
560 +       virt_wmb();
561 +       WRITE_ONCE(in->lstate, state);
562 +       writel(state, &in->ivshm_regs->lstate);
563 +}
564 +
565 +static void ivshm_net_run(struct net_device *ndev)
566 +{
567 +       struct ivshm_net *in = netdev_priv(ndev);
568 +
569 +       netif_start_queue(ndev);
570 +       napi_enable(&in->napi);
571 +       napi_schedule(&in->napi);
572 +       ivshm_net_set_state(in, IVSHM_NET_STATE_RUN);
573 +}
574 +
575 +static void ivshm_net_state_change(struct work_struct *work)
576 +{
577 +       struct ivshm_net *in = container_of(work, struct ivshm_net, state_work);
578 +       struct net_device *ndev = in->napi.dev;
579 +       u32 rstate = readl(&in->ivshm_regs->rstate);
580 +
581 +
582 +       switch (in->lstate) {
583 +       case IVSHM_NET_STATE_RESET:
584 +               if (rstate < IVSHM_NET_STATE_READY)
585 +                       ivshm_net_set_state(in, IVSHM_NET_STATE_INIT);
586 +               break;
587 +
588 +       case IVSHM_NET_STATE_INIT:
589 +               if (rstate > IVSHM_NET_STATE_RESET) {
590 +                       ivshm_net_init_queues(ndev);
591 +                       ivshm_net_set_state(in, IVSHM_NET_STATE_READY);
592 +
593 +                       rtnl_lock();
594 +                       call_netdevice_notifiers(NETDEV_CHANGEADDR, ndev);
595 +                       rtnl_unlock();
596 +               }
597 +               break;
598 +
599 +       case IVSHM_NET_STATE_READY:
600 +               if (rstate >= IVSHM_NET_STATE_READY) {
601 +                       netif_carrier_on(ndev);
602 +                       if (ndev->flags & IFF_UP)
603 +                               ivshm_net_run(ndev);
604 +               } else {
605 +                       netif_carrier_off(ndev);
606 +                       ivshm_net_set_state(in, IVSHM_NET_STATE_RESET);
607 +               }
608 +               break;
609 +
610 +       case IVSHM_NET_STATE_RUN:
611 +               if (rstate < IVSHM_NET_STATE_READY) {
612 +                       netif_stop_queue(ndev);
613 +                       napi_disable(&in->napi);
614 +                       netif_carrier_off(ndev);
615 +                       ivshm_net_set_state(in, IVSHM_NET_STATE_RESET);
616 +               }
617 +               break;
618 +       }
619 +
620 +       virt_wmb();
621 +       WRITE_ONCE(in->rstate, rstate);
622 +}
623 +
624 +static bool ivshm_net_check_state(struct net_device *ndev)
625 +{
626 +       struct ivshm_net *in = netdev_priv(ndev);
627 +       u32 rstate = readl(&in->ivshm_regs->rstate);
628 +
629 +       if (rstate != READ_ONCE(in->rstate) ||
630 +           in->lstate != IVSHM_NET_STATE_RUN) {
631 +               queue_work(in->state_wq, &in->state_work);
632 +               return false;
633 +       }
634 +
635 +       return true;
636 +}
637 +
638 +static irqreturn_t ivshm_net_int(int irq, void *data)
639 +{
640 +       struct net_device *ndev = data;
641 +       struct ivshm_net *in = netdev_priv(ndev);
642 +
643 +       in->stats.interrupts++;
644 +
645 +       ivshm_net_check_state(ndev);
646 +       napi_schedule_irqoff(&in->napi);
647 +
648 +       return IRQ_HANDLED;
649 +}
650 +
651 +static int ivshm_net_open(struct net_device *ndev)
652 +{
653 +       struct ivshm_net *in = netdev_priv(ndev);
654 +
655 +       netdev_reset_queue(ndev);
656 +       ndev->operstate = IF_OPER_UP;
657 +
658 +       if (in->lstate == IVSHM_NET_STATE_READY)
659 +               ivshm_net_run(ndev);
660 +
661 +       return 0;
662 +}
663 +
664 +static int ivshm_net_stop(struct net_device *ndev)
665 +{
666 +       struct ivshm_net *in = netdev_priv(ndev);
667 +
668 +       ndev->operstate = IF_OPER_DOWN;
669 +
670 +       if (in->lstate == IVSHM_NET_STATE_RUN) {
671 +               napi_disable(&in->napi);
672 +               netif_stop_queue(ndev);
673 +               ivshm_net_set_state(in, IVSHM_NET_STATE_READY);
674 +       }
675 +
676 +       return 0;
677 +}
678 +
679 +static int ivshm_net_change_mtu(struct net_device *ndev, int mtu)
680 +{
681 +       struct ivshm_net *in = netdev_priv(ndev);
682 +       struct ivshm_net_queue *tx = &in->tx;
683 +
684 +       if (mtu < IVSHM_NET_MTU_MIN || mtu > IVSHM_NET_MTU_MAX)
685 +               return -EINVAL;
686 +
687 +       if (in->tx.size / mtu < 4)
688 +               return -EINVAL;
689 +
690 +       if (ivshm_net_tx_space(in) < 2 * IVSHM_NET_FRAME_SIZE(mtu))
691 +               return -EBUSY;
692 +
693 +       if (in->tx.size - tx->head < IVSHM_NET_FRAME_SIZE(mtu) &&
694 +           tx->head < tx->tail)
695 +               return -EBUSY;
696 +
697 +       netif_tx_lock_bh(ndev);
698 +       if (in->tx.size - tx->head < IVSHM_NET_FRAME_SIZE(mtu))
699 +               tx->head = 0;
700 +       netif_tx_unlock_bh(ndev);
701 +
702 +       ndev->mtu = mtu;
703 +
704 +       return 0;
705 +}
706 +
707 +#ifdef CONFIG_NET_POLL_CONTROLLER
708 +static void ivshm_net_poll_controller(struct net_device *ndev)
709 +{
710 +       struct ivshm_net *in = netdev_priv(ndev);
711 +
712 +       napi_schedule(&in->napi);
713 +}
714 +#endif
715 +
716 +static const struct net_device_ops ivshm_net_ops = {
717 +       .ndo_open       = ivshm_net_open,
718 +       .ndo_stop       = ivshm_net_stop,
719 +       .ndo_start_xmit = ivshm_net_xmit,
720 +       .ndo_change_mtu = ivshm_net_change_mtu,
721 +#ifdef CONFIG_NET_POLL_CONTROLLER
722 +       .ndo_poll_controller = ivshm_net_poll_controller,
723 +#endif
724 +};
725 +
726 +static const char ivshm_net_stats[][ETH_GSTRING_LEN] = {
727 +       "interrupts",
728 +       "tx_packets",
729 +       "tx_notify",
730 +       "tx_pause",
731 +       "rx_packets",
732 +       "rx_notify",
733 +       "napi_poll",
734 +       "napi_complete",
735 +       "napi_poll_0",
736 +       "napi_poll_1",
737 +       "napi_poll_2",
738 +       "napi_poll_4",
739 +       "napi_poll_8",
740 +       "napi_poll_16",
741 +       "napi_poll_32",
742 +       "napi_poll_64",
743 +       "napi_poll_128",
744 +       "napi_poll_256",
745 +};
746 +
747 +#define NUM_STATS ARRAY_SIZE(ivshm_net_stats)
748 +
749 +static int ivshm_net_get_sset_count(struct net_device *ndev, int sset)
750 +{
751 +       if (sset == ETH_SS_STATS)
752 +               return NUM_STATS;
753 +
754 +       return -EOPNOTSUPP;
755 +}
756 +
757 +static void ivshm_net_get_strings(struct net_device *ndev, u32 sset, u8 *buf)
758 +{
759 +       if (sset == ETH_SS_STATS)
760 +               memcpy(buf, &ivshm_net_stats, sizeof(ivshm_net_stats));
761 +}
762 +
763 +static void ivshm_net_get_ethtool_stats(struct net_device *ndev,
764 +                                       struct ethtool_stats *estats, u64 *st)
765 +{
766 +       struct ivshm_net *in = netdev_priv(ndev);
767 +       unsigned int n = 0;
768 +       unsigned int i;
769 +
770 +       st[n++] = in->stats.interrupts;
771 +       st[n++] = in->stats.tx_packets;
772 +       st[n++] = in->stats.tx_notify;
773 +       st[n++] = in->stats.tx_pause;
774 +       st[n++] = in->stats.rx_packets;
775 +       st[n++] = in->stats.rx_notify;
776 +       st[n++] = in->stats.napi_poll;
777 +       st[n++] = in->stats.napi_complete;
778 +
779 +       for (i = 0; i < ARRAY_SIZE(in->stats.napi_poll_n); i++)
780 +               st[n++] = in->stats.napi_poll_n[i];
781 +
782 +       memset(&in->stats, 0, sizeof(in->stats));
783 +}
784 +
785 +static const struct ethtool_ops ivshm_net_ethtool_ops = {
786 +       .get_sset_count         = ivshm_net_get_sset_count,
787 +       .get_strings            = ivshm_net_get_strings,
788 +       .get_ethtool_stats      = ivshm_net_get_ethtool_stats,
789 +};
790 +
791 +static int ivshm_net_probe(struct pci_dev *pdev,
792 +                          const struct pci_device_id *id)
793 +{
794 +       struct net_device *ndev;
795 +       struct ivshm_net *in;
796 +       struct ivshmem_regs __iomem *regs;
797 +       resource_size_t shmaddr;
798 +       resource_size_t shmlen;
799 +       int interrupt;
800 +       void *shm;
801 +       u32 ivpos;
802 +       int err;
803 +
804 +       err = pcim_enable_device(pdev);
805 +       if (err) {
806 +               dev_err(&pdev->dev, "pci_enable_device: %d\n", err);
807 +               return err;
808 +       }
809 +
810 +       err = pcim_iomap_regions(pdev, BIT(0), DRV_NAME);
811 +       if (err) {
812 +               dev_err(&pdev->dev, "pcim_iomap_regions: %d\n", err);
813 +               return err;
814 +       }
815 +
816 +       regs = pcim_iomap_table(pdev)[0];
817 +
818 +       shmlen = pci_resource_len(pdev, 2);
819 +
820 +       if (shmlen) {
821 +               shmaddr = pci_resource_start(pdev, 2);
822 +       } else {
823 +               union { u64 v; u32 hl[2]; } val;
824 +
825 +               pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_PTR,
826 +                                     &val.hl[0]);
827 +               pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_PTR + 4,
828 +                                     &val.hl[1]);
829 +               shmaddr = val.v;
830 +
831 +               pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_SZ,
832 +                                     &val.hl[0]);
833 +               pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_SZ + 4,
834 +                                     &val.hl[1]);
835 +               shmlen = val.v;
836 +       }
837 +
838 +
839 +       if (!devm_request_mem_region(&pdev->dev, shmaddr, shmlen, DRV_NAME))
840 +               return -EBUSY;
841 +
842 +       shm = devm_memremap(&pdev->dev, shmaddr, shmlen, MEMREMAP_WC);
843 +       if (!shm)
844 +               return -ENOMEM;
845 +
846 +       ivpos = readl(&regs->ivpos);
847 +       if (ivpos > 1) {
848 +               dev_err(&pdev->dev, "invalid IVPosition %d\n", ivpos);
849 +               return -EINVAL;
850 +       }
851 +
852 +       dev_info(&pdev->dev, "shared memory size %pa\n", &shmlen);
853 +
854 +       ndev = alloc_etherdev(sizeof(*in));
855 +       if (!ndev)
856 +               return -ENOMEM;
857 +
858 +       pci_set_drvdata(pdev, ndev);
859 +       SET_NETDEV_DEV(ndev, &pdev->dev);
860 +
861 +       in = netdev_priv(ndev);
862 +       in->ivshm_regs = regs;
863 +       in->shm = shm;
864 +       in->shmaddr = shmaddr;
865 +       in->shmlen = shmlen;
866 +       in->peer_id = !ivpos;
867 +       in->pdev = pdev;
868 +       spin_lock_init(&in->tx_free_lock);
869 +       spin_lock_init(&in->tx_clean_lock);
870 +
871 +       err = ivshm_net_calc_qsize(ndev);
872 +       if (err)
873 +               goto err_free;
874 +
875 +       in->state_wq = alloc_ordered_workqueue(DRV_NAME, 0);
876 +       if (!in->state_wq)
877 +               goto err_free;
878 +
879 +       INIT_WORK(&in->state_work, ivshm_net_state_change);
880 +
881 +       eth_random_addr(ndev->dev_addr);
882 +       ndev->netdev_ops = &ivshm_net_ops;
883 +       ndev->ethtool_ops = &ivshm_net_ethtool_ops;
884 +       ndev->mtu = min_t(u32, IVSHM_NET_MTU_DEF, in->qsize / 16);
885 +       ndev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG;
886 +       ndev->features = ndev->hw_features;
887 +
888 +       netif_carrier_off(ndev);
889 +       netif_napi_add(ndev, &in->napi, ivshm_net_poll, NAPI_POLL_WEIGHT);
890 +
891 +       err = register_netdev(ndev);
892 +       if (err)
893 +               goto err_wq;
894 +
895 +       err = pci_enable_msix(pdev, &in->msix, 1);
896 +       if (!err) {
897 +               interrupt = in->msix.vector;
898 +               in->using_msix = true;
899 +       } else {
900 +               interrupt = pdev->irq;
901 +               in->using_msix = false;
902 +       }
903 +
904 +       err = request_irq(interrupt, ivshm_net_int, 0, DRV_NAME, ndev);
905 +       if (err)
906 +               goto err_int;
907 +
908 +       pci_set_master(pdev);
909 +
910 +       writel(IVSHM_NET_STATE_RESET, &in->ivshm_regs->lstate);
911 +
912 +       return 0;
913 +
914 +err_int:
915 +       if (in->using_msix)
916 +               pci_disable_msix(pdev);
917 +       unregister_netdev(ndev);
918 +err_wq:
919 +       destroy_workqueue(in->state_wq);
920 +err_free:
921 +       free_netdev(ndev);
922 +
923 +       return err;
924 +}
925 +
926 +static void ivshm_net_remove(struct pci_dev *pdev)
927 +{
928 +       struct net_device *ndev = pci_get_drvdata(pdev);
929 +       struct ivshm_net *in = netdev_priv(ndev);
930 +
931 +       if (in->using_msix)  {
932 +               free_irq(in->msix.vector, ndev);
933 +               pci_disable_msix(pdev);
934 +       } else {
935 +               free_irq(pdev->irq, ndev);
936 +       }
937 +
938 +       unregister_netdev(ndev);
939 +       cancel_work_sync(&in->state_work);
940 +       destroy_workqueue(in->state_wq);
941 +       free_netdev(ndev);
942 +}
943 +
944 +static const struct pci_device_id ivshm_net_id_table[] = {
945 +       { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1110),
946 +               (PCI_CLASS_OTHERS << 16) | (0x01 << 8), 0xffff00 },
947 +       { 0 }
948 +};
949 +MODULE_DEVICE_TABLE(pci, ivshm_net_id_table);
950 +
951 +static struct pci_driver ivshm_net_driver = {
952 +       .name           = DRV_NAME,
953 +       .id_table       = ivshm_net_id_table,
954 +       .probe          = ivshm_net_probe,
955 +       .remove         = ivshm_net_remove,
956 +};
957 +module_pci_driver(ivshm_net_driver);
958 +
959 +MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
960 +MODULE_LICENSE("GPL");