Linux-libre 5.3.12-gnu
[librecmc/linux-libre.git] / drivers / platform / x86 / intel_ips.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2009-2010 Intel Corporation
4  *
5  * Authors:
6  *      Jesse Barnes <jbarnes@virtuousgeek.org>
7  */
8
9 /*
10  * Some Intel Ibex Peak based platforms support so-called "intelligent
11  * power sharing", which allows the CPU and GPU to cooperate to maximize
12  * performance within a given TDP (thermal design point).  This driver
13  * performs the coordination between the CPU and GPU, monitors thermal and
14  * power statistics in the platform, and initializes power monitoring
15  * hardware.  It also provides a few tunables to control behavior.  Its
16  * primary purpose is to safely allow CPU and GPU turbo modes to be enabled
17  * by tracking power and thermal budget; secondarily it can boost turbo
18  * performance by allocating more power or thermal budget to the CPU or GPU
19  * based on available headroom and activity.
20  *
21  * The basic algorithm is driven by a 5s moving average of temperature.  If
22  * thermal headroom is available, the CPU and/or GPU power clamps may be
23  * adjusted upwards.  If we hit the thermal ceiling or a thermal trigger,
24  * we scale back the clamp.  Aside from trigger events (when we're critically
25  * close or over our TDP) we don't adjust the clamps more than once every
26  * five seconds.
27  *
28  * The thermal device (device 31, function 6) has a set of registers that
29  * are updated by the ME firmware.  The ME should also take the clamp values
30  * written to those registers and write them to the CPU, but we currently
31  * bypass that functionality and write the CPU MSR directly.
32  *
33  * UNSUPPORTED:
34  *   - dual MCP configs
35  *
36  * TODO:
37  *   - handle CPU hotplug
38  *   - provide turbo enable/disable api
39  *
40  * Related documents:
41  *   - CDI 403777, 403778 - Auburndale EDS vol 1 & 2
42  *   - CDI 401376 - Ibex Peak EDS
43  *   - ref 26037, 26641 - IPS BIOS spec
44  *   - ref 26489 - Nehalem BIOS writer's guide
45  *   - ref 26921 - Ibex Peak BIOS Specification
46  */
47
48 #include <linux/debugfs.h>
49 #include <linux/delay.h>
50 #include <linux/interrupt.h>
51 #include <linux/kernel.h>
52 #include <linux/kthread.h>
53 #include <linux/module.h>
54 #include <linux/pci.h>
55 #include <linux/sched.h>
56 #include <linux/sched/loadavg.h>
57 #include <linux/seq_file.h>
58 #include <linux/string.h>
59 #include <linux/tick.h>
60 #include <linux/timer.h>
61 #include <linux/dmi.h>
62 #include <drm/i915_drm.h>
63 #include <asm/msr.h>
64 #include <asm/processor.h>
65 #include "intel_ips.h"
66
67 #include <linux/io-64-nonatomic-lo-hi.h>
68
69 #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
70
71 /*
72  * Package level MSRs for monitor/control
73  */
74 #define PLATFORM_INFO   0xce
75 #define   PLATFORM_TDP          (1<<29)
76 #define   PLATFORM_RATIO        (1<<28)
77
78 #define IA32_MISC_ENABLE        0x1a0
79 #define   IA32_MISC_TURBO_EN    (1ULL<<38)
80
81 #define TURBO_POWER_CURRENT_LIMIT       0x1ac
82 #define   TURBO_TDC_OVR_EN      (1UL<<31)
83 #define   TURBO_TDC_MASK        (0x000000007fff0000UL)
84 #define   TURBO_TDC_SHIFT       (16)
85 #define   TURBO_TDP_OVR_EN      (1UL<<15)
86 #define   TURBO_TDP_MASK        (0x0000000000003fffUL)
87
88 /*
89  * Core/thread MSRs for monitoring
90  */
91 #define IA32_PERF_CTL           0x199
92 #define   IA32_PERF_TURBO_DIS   (1ULL<<32)
93
94 /*
95  * Thermal PCI device regs
96  */
97 #define THM_CFG_TBAR    0x10
98 #define THM_CFG_TBAR_HI 0x14
99
100 #define THM_TSIU        0x00
101 #define THM_TSE         0x01
102 #define   TSE_EN        0xb8
103 #define THM_TSS         0x02
104 #define THM_TSTR        0x03
105 #define THM_TSTTP       0x04
106 #define THM_TSCO        0x08
107 #define THM_TSES        0x0c
108 #define THM_TSGPEN      0x0d
109 #define   TSGPEN_HOT_LOHI       (1<<1)
110 #define   TSGPEN_CRIT_LOHI      (1<<2)
111 #define THM_TSPC        0x0e
112 #define THM_PPEC        0x10
113 #define THM_CTA         0x12
114 #define THM_PTA         0x14
115 #define   PTA_SLOPE_MASK        (0xff00)
116 #define   PTA_SLOPE_SHIFT       8
117 #define   PTA_OFFSET_MASK       (0x00ff)
118 #define THM_MGTA        0x16
119 #define   MGTA_SLOPE_MASK       (0xff00)
120 #define   MGTA_SLOPE_SHIFT      8
121 #define   MGTA_OFFSET_MASK      (0x00ff)
122 #define THM_TRC         0x1a
123 #define   TRC_CORE2_EN  (1<<15)
124 #define   TRC_THM_EN    (1<<12)
125 #define   TRC_C6_WAR    (1<<8)
126 #define   TRC_CORE1_EN  (1<<7)
127 #define   TRC_CORE_PWR  (1<<6)
128 #define   TRC_PCH_EN    (1<<5)
129 #define   TRC_MCH_EN    (1<<4)
130 #define   TRC_DIMM4     (1<<3)
131 #define   TRC_DIMM3     (1<<2)
132 #define   TRC_DIMM2     (1<<1)
133 #define   TRC_DIMM1     (1<<0)
134 #define THM_TES         0x20
135 #define THM_TEN         0x21
136 #define   TEN_UPDATE_EN 1
137 #define THM_PSC         0x24
138 #define   PSC_NTG       (1<<0) /* No GFX turbo support */
139 #define   PSC_NTPC      (1<<1) /* No CPU turbo support */
140 #define   PSC_PP_DEF    (0<<2) /* Perf policy up to driver */
141 #define   PSP_PP_PC     (1<<2) /* BIOS prefers CPU perf */
142 #define   PSP_PP_BAL    (2<<2) /* BIOS wants balanced perf */
143 #define   PSP_PP_GFX    (3<<2) /* BIOS prefers GFX perf */
144 #define   PSP_PBRT      (1<<4) /* BIOS run time support */
145 #define THM_CTV1        0x30
146 #define   CTV_TEMP_ERROR (1<<15)
147 #define   CTV_TEMP_MASK 0x3f
148 #define   CTV_
149 #define THM_CTV2        0x32
150 #define THM_CEC         0x34 /* undocumented power accumulator in joules */
151 #define THM_AE          0x3f
152 #define THM_HTS         0x50 /* 32 bits */
153 #define   HTS_PCPL_MASK (0x7fe00000)
154 #define   HTS_PCPL_SHIFT 21
155 #define   HTS_GPL_MASK  (0x001ff000)
156 #define   HTS_GPL_SHIFT 12
157 #define   HTS_PP_MASK   (0x00000c00)
158 #define   HTS_PP_SHIFT  10
159 #define   HTS_PP_DEF    0
160 #define   HTS_PP_PROC   1
161 #define   HTS_PP_BAL    2
162 #define   HTS_PP_GFX    3
163 #define   HTS_PCTD_DIS  (1<<9)
164 #define   HTS_GTD_DIS   (1<<8)
165 #define   HTS_PTL_MASK  (0x000000fe)
166 #define   HTS_PTL_SHIFT 1
167 #define   HTS_NVV       (1<<0)
168 #define THM_HTSHI       0x54 /* 16 bits */
169 #define   HTS2_PPL_MASK         (0x03ff)
170 #define   HTS2_PRST_MASK        (0x3c00)
171 #define   HTS2_PRST_SHIFT       10
172 #define   HTS2_PRST_UNLOADED    0
173 #define   HTS2_PRST_RUNNING     1
174 #define   HTS2_PRST_TDISOP      2 /* turbo disabled due to power */
175 #define   HTS2_PRST_TDISHT      3 /* turbo disabled due to high temp */
176 #define   HTS2_PRST_TDISUSR     4 /* user disabled turbo */
177 #define   HTS2_PRST_TDISPLAT    5 /* platform disabled turbo */
178 #define   HTS2_PRST_TDISPM      6 /* power management disabled turbo */
179 #define   HTS2_PRST_TDISERR     7 /* some kind of error disabled turbo */
180 #define THM_PTL         0x56
181 #define THM_MGTV        0x58
182 #define   TV_MASK       0x000000000000ff00
183 #define   TV_SHIFT      8
184 #define THM_PTV         0x60
185 #define   PTV_MASK      0x00ff
186 #define THM_MMGPC       0x64
187 #define THM_MPPC        0x66
188 #define THM_MPCPC       0x68
189 #define THM_TSPIEN      0x82
190 #define   TSPIEN_AUX_LOHI       (1<<0)
191 #define   TSPIEN_HOT_LOHI       (1<<1)
192 #define   TSPIEN_CRIT_LOHI      (1<<2)
193 #define   TSPIEN_AUX2_LOHI      (1<<3)
194 #define THM_TSLOCK      0x83
195 #define THM_ATR         0x84
196 #define THM_TOF         0x87
197 #define THM_STS         0x98
198 #define   STS_PCPL_MASK         (0x7fe00000)
199 #define   STS_PCPL_SHIFT        21
200 #define   STS_GPL_MASK          (0x001ff000)
201 #define   STS_GPL_SHIFT         12
202 #define   STS_PP_MASK           (0x00000c00)
203 #define   STS_PP_SHIFT          10
204 #define   STS_PP_DEF            0
205 #define   STS_PP_PROC           1
206 #define   STS_PP_BAL            2
207 #define   STS_PP_GFX            3
208 #define   STS_PCTD_DIS          (1<<9)
209 #define   STS_GTD_DIS           (1<<8)
210 #define   STS_PTL_MASK          (0x000000fe)
211 #define   STS_PTL_SHIFT         1
212 #define   STS_NVV               (1<<0)
213 #define THM_SEC         0x9c
214 #define   SEC_ACK       (1<<0)
215 #define THM_TC3         0xa4
216 #define THM_TC1         0xa8
217 #define   STS_PPL_MASK          (0x0003ff00)
218 #define   STS_PPL_SHIFT         16
219 #define THM_TC2         0xac
220 #define THM_DTV         0xb0
221 #define THM_ITV         0xd8
222 #define   ITV_ME_SEQNO_MASK 0x00ff0000 /* ME should update every ~200ms */
223 #define   ITV_ME_SEQNO_SHIFT (16)
224 #define   ITV_MCH_TEMP_MASK 0x0000ff00
225 #define   ITV_MCH_TEMP_SHIFT (8)
226 #define   ITV_PCH_TEMP_MASK 0x000000ff
227
228 #define thm_readb(off) readb(ips->regmap + (off))
229 #define thm_readw(off) readw(ips->regmap + (off))
230 #define thm_readl(off) readl(ips->regmap + (off))
231 #define thm_readq(off) readq(ips->regmap + (off))
232
233 #define thm_writeb(off, val) writeb((val), ips->regmap + (off))
234 #define thm_writew(off, val) writew((val), ips->regmap + (off))
235 #define thm_writel(off, val) writel((val), ips->regmap + (off))
236
237 static const int IPS_ADJUST_PERIOD = 5000; /* ms */
238 static bool late_i915_load = false;
239
240 /* For initial average collection */
241 static const int IPS_SAMPLE_PERIOD = 200; /* ms */
242 static const int IPS_SAMPLE_WINDOW = 5000; /* 5s moving window of samples */
243 #define IPS_SAMPLE_COUNT (IPS_SAMPLE_WINDOW / IPS_SAMPLE_PERIOD)
244
245 /* Per-SKU limits */
246 struct ips_mcp_limits {
247         int mcp_power_limit; /* mW units */
248         int core_power_limit;
249         int mch_power_limit;
250         int core_temp_limit; /* degrees C */
251         int mch_temp_limit;
252 };
253
254 /* Max temps are -10 degrees C to avoid PROCHOT# */
255
256 static struct ips_mcp_limits ips_sv_limits = {
257         .mcp_power_limit = 35000,
258         .core_power_limit = 29000,
259         .mch_power_limit = 20000,
260         .core_temp_limit = 95,
261         .mch_temp_limit = 90
262 };
263
264 static struct ips_mcp_limits ips_lv_limits = {
265         .mcp_power_limit = 25000,
266         .core_power_limit = 21000,
267         .mch_power_limit = 13000,
268         .core_temp_limit = 95,
269         .mch_temp_limit = 90
270 };
271
272 static struct ips_mcp_limits ips_ulv_limits = {
273         .mcp_power_limit = 18000,
274         .core_power_limit = 14000,
275         .mch_power_limit = 11000,
276         .core_temp_limit = 95,
277         .mch_temp_limit = 90
278 };
279
280 struct ips_driver {
281         struct device *dev;
282         void __iomem *regmap;
283         int irq;
284
285         struct task_struct *monitor;
286         struct task_struct *adjust;
287         struct dentry *debug_root;
288         struct timer_list timer;
289
290         /* Average CPU core temps (all averages in .01 degrees C for precision) */
291         u16 ctv1_avg_temp;
292         u16 ctv2_avg_temp;
293         /* GMCH average */
294         u16 mch_avg_temp;
295         /* Average for the CPU (both cores?) */
296         u16 mcp_avg_temp;
297         /* Average power consumption (in mW) */
298         u32 cpu_avg_power;
299         u32 mch_avg_power;
300
301         /* Offset values */
302         u16 cta_val;
303         u16 pta_val;
304         u16 mgta_val;
305
306         /* Maximums & prefs, protected by turbo status lock */
307         spinlock_t turbo_status_lock;
308         u16 mcp_temp_limit;
309         u16 mcp_power_limit;
310         u16 core_power_limit;
311         u16 mch_power_limit;
312         bool cpu_turbo_enabled;
313         bool __cpu_turbo_on;
314         bool gpu_turbo_enabled;
315         bool __gpu_turbo_on;
316         bool gpu_preferred;
317         bool poll_turbo_status;
318         bool second_cpu;
319         bool turbo_toggle_allowed;
320         struct ips_mcp_limits *limits;
321
322         /* Optional MCH interfaces for if i915 is in use */
323         unsigned long (*read_mch_val)(void);
324         bool (*gpu_raise)(void);
325         bool (*gpu_lower)(void);
326         bool (*gpu_busy)(void);
327         bool (*gpu_turbo_disable)(void);
328
329         /* For restoration at unload */
330         u64 orig_turbo_limit;
331         u64 orig_turbo_ratios;
332 };
333
334 static bool
335 ips_gpu_turbo_enabled(struct ips_driver *ips);
336
337 /**
338  * ips_cpu_busy - is CPU busy?
339  * @ips: IPS driver struct
340  *
341  * Check CPU for load to see whether we should increase its thermal budget.
342  *
343  * RETURNS:
344  * True if the CPU could use more power, false otherwise.
345  */
346 static bool ips_cpu_busy(struct ips_driver *ips)
347 {
348         if ((avenrun[0] >> FSHIFT) > 1)
349                 return true;
350
351         return false;
352 }
353
354 /**
355  * ips_cpu_raise - raise CPU power clamp
356  * @ips: IPS driver struct
357  *
358  * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for
359  * this platform.
360  *
361  * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as
362  * long as we haven't hit the TDP limit for the SKU).
363  */
364 static void ips_cpu_raise(struct ips_driver *ips)
365 {
366         u64 turbo_override;
367         u16 cur_tdp_limit, new_tdp_limit;
368
369         if (!ips->cpu_turbo_enabled)
370                 return;
371
372         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
373
374         cur_tdp_limit = turbo_override & TURBO_TDP_MASK;
375         new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */
376
377         /* Clamp to SKU TDP limit */
378         if (((new_tdp_limit * 10) / 8) > ips->core_power_limit)
379                 new_tdp_limit = cur_tdp_limit;
380
381         thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
382
383         turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
384         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
385
386         turbo_override &= ~TURBO_TDP_MASK;
387         turbo_override |= new_tdp_limit;
388
389         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
390 }
391
392 /**
393  * ips_cpu_lower - lower CPU power clamp
394  * @ips: IPS driver struct
395  *
396  * Lower CPU power clamp b %IPS_CPU_STEP if possible.
397  *
398  * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going
399  * as low as the platform limits will allow (though we could go lower there
400  * wouldn't be much point).
401  */
402 static void ips_cpu_lower(struct ips_driver *ips)
403 {
404         u64 turbo_override;
405         u16 cur_limit, new_limit;
406
407         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
408
409         cur_limit = turbo_override & TURBO_TDP_MASK;
410         new_limit = cur_limit - 8; /* 1W decrease */
411
412         /* Clamp to SKU TDP limit */
413         if (new_limit  < (ips->orig_turbo_limit & TURBO_TDP_MASK))
414                 new_limit = ips->orig_turbo_limit & TURBO_TDP_MASK;
415
416         thm_writew(THM_MPCPC, (new_limit * 10) / 8);
417
418         turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
419         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
420
421         turbo_override &= ~TURBO_TDP_MASK;
422         turbo_override |= new_limit;
423
424         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
425 }
426
427 /**
428  * do_enable_cpu_turbo - internal turbo enable function
429  * @data: unused
430  *
431  * Internal function for actually updating MSRs.  When we enable/disable
432  * turbo, we need to do it on each CPU; this function is the one called
433  * by on_each_cpu() when needed.
434  */
435 static void do_enable_cpu_turbo(void *data)
436 {
437         u64 perf_ctl;
438
439         rdmsrl(IA32_PERF_CTL, perf_ctl);
440         if (perf_ctl & IA32_PERF_TURBO_DIS) {
441                 perf_ctl &= ~IA32_PERF_TURBO_DIS;
442                 wrmsrl(IA32_PERF_CTL, perf_ctl);
443         }
444 }
445
446 /**
447  * ips_enable_cpu_turbo - enable turbo mode on all CPUs
448  * @ips: IPS driver struct
449  *
450  * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on
451  * all logical threads.
452  */
453 static void ips_enable_cpu_turbo(struct ips_driver *ips)
454 {
455         /* Already on, no need to mess with MSRs */
456         if (ips->__cpu_turbo_on)
457                 return;
458
459         if (ips->turbo_toggle_allowed)
460                 on_each_cpu(do_enable_cpu_turbo, ips, 1);
461
462         ips->__cpu_turbo_on = true;
463 }
464
465 /**
466  * do_disable_cpu_turbo - internal turbo disable function
467  * @data: unused
468  *
469  * Internal function for actually updating MSRs.  When we enable/disable
470  * turbo, we need to do it on each CPU; this function is the one called
471  * by on_each_cpu() when needed.
472  */
473 static void do_disable_cpu_turbo(void *data)
474 {
475         u64 perf_ctl;
476
477         rdmsrl(IA32_PERF_CTL, perf_ctl);
478         if (!(perf_ctl & IA32_PERF_TURBO_DIS)) {
479                 perf_ctl |= IA32_PERF_TURBO_DIS;
480                 wrmsrl(IA32_PERF_CTL, perf_ctl);
481         }
482 }
483
484 /**
485  * ips_disable_cpu_turbo - disable turbo mode on all CPUs
486  * @ips: IPS driver struct
487  *
488  * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on
489  * all logical threads.
490  */
491 static void ips_disable_cpu_turbo(struct ips_driver *ips)
492 {
493         /* Already off, leave it */
494         if (!ips->__cpu_turbo_on)
495                 return;
496
497         if (ips->turbo_toggle_allowed)
498                 on_each_cpu(do_disable_cpu_turbo, ips, 1);
499
500         ips->__cpu_turbo_on = false;
501 }
502
503 /**
504  * ips_gpu_busy - is GPU busy?
505  * @ips: IPS driver struct
506  *
507  * Check GPU for load to see whether we should increase its thermal budget.
508  * We need to call into the i915 driver in this case.
509  *
510  * RETURNS:
511  * True if the GPU could use more power, false otherwise.
512  */
513 static bool ips_gpu_busy(struct ips_driver *ips)
514 {
515         if (!ips_gpu_turbo_enabled(ips))
516                 return false;
517
518         return ips->gpu_busy();
519 }
520
521 /**
522  * ips_gpu_raise - raise GPU power clamp
523  * @ips: IPS driver struct
524  *
525  * Raise the GPU frequency/power if possible.  We need to call into the
526  * i915 driver in this case.
527  */
528 static void ips_gpu_raise(struct ips_driver *ips)
529 {
530         if (!ips_gpu_turbo_enabled(ips))
531                 return;
532
533         if (!ips->gpu_raise())
534                 ips->gpu_turbo_enabled = false;
535
536         return;
537 }
538
539 /**
540  * ips_gpu_lower - lower GPU power clamp
541  * @ips: IPS driver struct
542  *
543  * Lower GPU frequency/power if possible.  Need to call i915.
544  */
545 static void ips_gpu_lower(struct ips_driver *ips)
546 {
547         if (!ips_gpu_turbo_enabled(ips))
548                 return;
549
550         if (!ips->gpu_lower())
551                 ips->gpu_turbo_enabled = false;
552
553         return;
554 }
555
556 /**
557  * ips_enable_gpu_turbo - notify the gfx driver turbo is available
558  * @ips: IPS driver struct
559  *
560  * Call into the graphics driver indicating that it can safely use
561  * turbo mode.
562  */
563 static void ips_enable_gpu_turbo(struct ips_driver *ips)
564 {
565         if (ips->__gpu_turbo_on)
566                 return;
567         ips->__gpu_turbo_on = true;
568 }
569
570 /**
571  * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode
572  * @ips: IPS driver struct
573  *
574  * Request that the graphics driver disable turbo mode.
575  */
576 static void ips_disable_gpu_turbo(struct ips_driver *ips)
577 {
578         /* Avoid calling i915 if turbo is already disabled */
579         if (!ips->__gpu_turbo_on)
580                 return;
581
582         if (!ips->gpu_turbo_disable())
583                 dev_err(ips->dev, "failed to disable graphics turbo\n");
584         else
585                 ips->__gpu_turbo_on = false;
586 }
587
588 /**
589  * mcp_exceeded - check whether we're outside our thermal & power limits
590  * @ips: IPS driver struct
591  *
592  * Check whether the MCP is over its thermal or power budget.
593  */
594 static bool mcp_exceeded(struct ips_driver *ips)
595 {
596         unsigned long flags;
597         bool ret = false;
598         u32 temp_limit;
599         u32 avg_power;
600
601         spin_lock_irqsave(&ips->turbo_status_lock, flags);
602
603         temp_limit = ips->mcp_temp_limit * 100;
604         if (ips->mcp_avg_temp > temp_limit)
605                 ret = true;
606
607         avg_power = ips->cpu_avg_power + ips->mch_avg_power;
608         if (avg_power > ips->mcp_power_limit)
609                 ret = true;
610
611         spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
612
613         return ret;
614 }
615
616 /**
617  * cpu_exceeded - check whether a CPU core is outside its limits
618  * @ips: IPS driver struct
619  * @cpu: CPU number to check
620  *
621  * Check a given CPU's average temp or power is over its limit.
622  */
623 static bool cpu_exceeded(struct ips_driver *ips, int cpu)
624 {
625         unsigned long flags;
626         int avg;
627         bool ret = false;
628
629         spin_lock_irqsave(&ips->turbo_status_lock, flags);
630         avg = cpu ? ips->ctv2_avg_temp : ips->ctv1_avg_temp;
631         if (avg > (ips->limits->core_temp_limit * 100))
632                 ret = true;
633         if (ips->cpu_avg_power > ips->core_power_limit * 100)
634                 ret = true;
635         spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
636
637         if (ret)
638                 dev_info(ips->dev, "CPU power or thermal limit exceeded\n");
639
640         return ret;
641 }
642
643 /**
644  * mch_exceeded - check whether the GPU is over budget
645  * @ips: IPS driver struct
646  *
647  * Check the MCH temp & power against their maximums.
648  */
649 static bool mch_exceeded(struct ips_driver *ips)
650 {
651         unsigned long flags;
652         bool ret = false;
653
654         spin_lock_irqsave(&ips->turbo_status_lock, flags);
655         if (ips->mch_avg_temp > (ips->limits->mch_temp_limit * 100))
656                 ret = true;
657         if (ips->mch_avg_power > ips->mch_power_limit)
658                 ret = true;
659         spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
660
661         return ret;
662 }
663
664 /**
665  * verify_limits - verify BIOS provided limits
666  * @ips: IPS structure
667  *
668  * BIOS can optionally provide non-default limits for power and temp.  Check
669  * them here and use the defaults if the BIOS values are not provided or
670  * are otherwise unusable.
671  */
672 static void verify_limits(struct ips_driver *ips)
673 {
674         if (ips->mcp_power_limit < ips->limits->mcp_power_limit ||
675             ips->mcp_power_limit > 35000)
676                 ips->mcp_power_limit = ips->limits->mcp_power_limit;
677
678         if (ips->mcp_temp_limit < ips->limits->core_temp_limit ||
679             ips->mcp_temp_limit < ips->limits->mch_temp_limit ||
680             ips->mcp_temp_limit > 150)
681                 ips->mcp_temp_limit = min(ips->limits->core_temp_limit,
682                                           ips->limits->mch_temp_limit);
683 }
684
685 /**
686  * update_turbo_limits - get various limits & settings from regs
687  * @ips: IPS driver struct
688  *
689  * Update the IPS power & temp limits, along with turbo enable flags,
690  * based on latest register contents.
691  *
692  * Used at init time and for runtime BIOS support, which requires polling
693  * the regs for updates (as a result of AC->DC transition for example).
694  *
695  * LOCKING:
696  * Caller must hold turbo_status_lock (outside of init)
697  */
698 static void update_turbo_limits(struct ips_driver *ips)
699 {
700         u32 hts = thm_readl(THM_HTS);
701
702         ips->cpu_turbo_enabled = !(hts & HTS_PCTD_DIS);
703         /* 
704          * Disable turbo for now, until we can figure out why the power figures
705          * are wrong
706          */
707         ips->cpu_turbo_enabled = false;
708
709         if (ips->gpu_busy)
710                 ips->gpu_turbo_enabled = !(hts & HTS_GTD_DIS);
711
712         ips->core_power_limit = thm_readw(THM_MPCPC);
713         ips->mch_power_limit = thm_readw(THM_MMGPC);
714         ips->mcp_temp_limit = thm_readw(THM_PTL);
715         ips->mcp_power_limit = thm_readw(THM_MPPC);
716
717         verify_limits(ips);
718         /* Ignore BIOS CPU vs GPU pref */
719 }
720
721 /**
722  * ips_adjust - adjust power clamp based on thermal state
723  * @data: ips driver structure
724  *
725  * Wake up every 5s or so and check whether we should adjust the power clamp.
726  * Check CPU and GPU load to determine which needs adjustment.  There are
727  * several things to consider here:
728  *   - do we need to adjust up or down?
729  *   - is CPU busy?
730  *   - is GPU busy?
731  *   - is CPU in turbo?
732  *   - is GPU in turbo?
733  *   - is CPU or GPU preferred? (CPU is default)
734  *
735  * So, given the above, we do the following:
736  *   - up (TDP available)
737  *     - CPU not busy, GPU not busy - nothing
738  *     - CPU busy, GPU not busy - adjust CPU up
739  *     - CPU not busy, GPU busy - adjust GPU up
740  *     - CPU busy, GPU busy - adjust preferred unit up, taking headroom from
741  *       non-preferred unit if necessary
742  *   - down (at TDP limit)
743  *     - adjust both CPU and GPU down if possible
744  *
745                 cpu+ gpu+       cpu+gpu-        cpu-gpu+        cpu-gpu-
746 cpu < gpu <     cpu+gpu+        cpu+            gpu+            nothing
747 cpu < gpu >=    cpu+gpu-(mcp<)  cpu+gpu-(mcp<)  gpu-            gpu-
748 cpu >= gpu <    cpu-gpu+(mcp<)  cpu-            cpu-gpu+(mcp<)  cpu-
749 cpu >= gpu >=   cpu-gpu-        cpu-gpu-        cpu-gpu-        cpu-gpu-
750  *
751  */
752 static int ips_adjust(void *data)
753 {
754         struct ips_driver *ips = data;
755         unsigned long flags;
756
757         dev_dbg(ips->dev, "starting ips-adjust thread\n");
758
759         /*
760          * Adjust CPU and GPU clamps every 5s if needed.  Doing it more
761          * often isn't recommended due to ME interaction.
762          */
763         do {
764                 bool cpu_busy = ips_cpu_busy(ips);
765                 bool gpu_busy = ips_gpu_busy(ips);
766
767                 spin_lock_irqsave(&ips->turbo_status_lock, flags);
768                 if (ips->poll_turbo_status)
769                         update_turbo_limits(ips);
770                 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
771
772                 /* Update turbo status if necessary */
773                 if (ips->cpu_turbo_enabled)
774                         ips_enable_cpu_turbo(ips);
775                 else
776                         ips_disable_cpu_turbo(ips);
777
778                 if (ips->gpu_turbo_enabled)
779                         ips_enable_gpu_turbo(ips);
780                 else
781                         ips_disable_gpu_turbo(ips);
782
783                 /* We're outside our comfort zone, crank them down */
784                 if (mcp_exceeded(ips)) {
785                         ips_cpu_lower(ips);
786                         ips_gpu_lower(ips);
787                         goto sleep;
788                 }
789
790                 if (!cpu_exceeded(ips, 0) && cpu_busy)
791                         ips_cpu_raise(ips);
792                 else
793                         ips_cpu_lower(ips);
794
795                 if (!mch_exceeded(ips) && gpu_busy)
796                         ips_gpu_raise(ips);
797                 else
798                         ips_gpu_lower(ips);
799
800 sleep:
801                 schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
802         } while (!kthread_should_stop());
803
804         dev_dbg(ips->dev, "ips-adjust thread stopped\n");
805
806         return 0;
807 }
808
809 /*
810  * Helpers for reading out temp/power values and calculating their
811  * averages for the decision making and monitoring functions.
812  */
813
814 static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
815 {
816         u64 total = 0;
817         int i;
818         u16 avg;
819
820         for (i = 0; i < IPS_SAMPLE_COUNT; i++)
821                 total += (u64)(array[i] * 100);
822
823         do_div(total, IPS_SAMPLE_COUNT);
824
825         avg = (u16)total;
826
827         return avg;
828 }
829
830 static u16 read_mgtv(struct ips_driver *ips)
831 {
832         u16 ret;
833         u64 slope, offset;
834         u64 val;
835
836         val = thm_readq(THM_MGTV);
837         val = (val & TV_MASK) >> TV_SHIFT;
838
839         slope = offset = thm_readw(THM_MGTA);
840         slope = (slope & MGTA_SLOPE_MASK) >> MGTA_SLOPE_SHIFT;
841         offset = offset & MGTA_OFFSET_MASK;
842
843         ret = ((val * slope + 0x40) >> 7) + offset;
844
845         return 0; /* MCH temp reporting buggy */
846 }
847
848 static u16 read_ptv(struct ips_driver *ips)
849 {
850         u16 val;
851
852         val = thm_readw(THM_PTV) & PTV_MASK;
853
854         return val;
855 }
856
857 static u16 read_ctv(struct ips_driver *ips, int cpu)
858 {
859         int reg = cpu ? THM_CTV2 : THM_CTV1;
860         u16 val;
861
862         val = thm_readw(reg);
863         if (!(val & CTV_TEMP_ERROR))
864                 val = (val) >> 6; /* discard fractional component */
865         else
866                 val = 0;
867
868         return val;
869 }
870
871 static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period)
872 {
873         u32 val;
874         u32 ret;
875
876         /*
877          * CEC is in joules/65535.  Take difference over time to
878          * get watts.
879          */
880         val = thm_readl(THM_CEC);
881
882         /* period is in ms and we want mW */
883         ret = (((val - *last) * 1000) / period);
884         ret = (ret * 1000) / 65535;
885         *last = val;
886
887         return 0;
888 }
889
890 static const u16 temp_decay_factor = 2;
891 static u16 update_average_temp(u16 avg, u16 val)
892 {
893         u16 ret;
894
895         /* Multiply by 100 for extra precision */
896         ret = (val * 100 / temp_decay_factor) +
897                 (((temp_decay_factor - 1) * avg) / temp_decay_factor);
898         return ret;
899 }
900
901 static const u16 power_decay_factor = 2;
902 static u16 update_average_power(u32 avg, u32 val)
903 {
904         u32 ret;
905
906         ret = (val / power_decay_factor) +
907                 (((power_decay_factor - 1) * avg) / power_decay_factor);
908
909         return ret;
910 }
911
912 static u32 calc_avg_power(struct ips_driver *ips, u32 *array)
913 {
914         u64 total = 0;
915         u32 avg;
916         int i;
917
918         for (i = 0; i < IPS_SAMPLE_COUNT; i++)
919                 total += array[i];
920
921         do_div(total, IPS_SAMPLE_COUNT);
922         avg = (u32)total;
923
924         return avg;
925 }
926
927 static void monitor_timeout(struct timer_list *t)
928 {
929         struct ips_driver *ips = from_timer(ips, t, timer);
930         wake_up_process(ips->monitor);
931 }
932
933 /**
934  * ips_monitor - temp/power monitoring thread
935  * @data: ips driver structure
936  *
937  * This is the main function for the IPS driver.  It monitors power and
938  * tempurature in the MCP and adjusts CPU and GPU power clams accordingly.
939  *
940  * We keep a 5s moving average of power consumption and tempurature.  Using
941  * that data, along with CPU vs GPU preference, we adjust the power clamps
942  * up or down.
943  */
944 static int ips_monitor(void *data)
945 {
946         struct ips_driver *ips = data;
947         unsigned long seqno_timestamp, expire, last_msecs, last_sample_period;
948         int i;
949         u32 *cpu_samples, *mchp_samples, old_cpu_power;
950         u16 *mcp_samples, *ctv1_samples, *ctv2_samples, *mch_samples;
951         u8 cur_seqno, last_seqno;
952
953         mcp_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
954         ctv1_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
955         ctv2_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
956         mch_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
957         cpu_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u32), GFP_KERNEL);
958         mchp_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u32), GFP_KERNEL);
959         if (!mcp_samples || !ctv1_samples || !ctv2_samples || !mch_samples ||
960                         !cpu_samples || !mchp_samples) {
961                 dev_err(ips->dev,
962                         "failed to allocate sample array, ips disabled\n");
963                 kfree(mcp_samples);
964                 kfree(ctv1_samples);
965                 kfree(ctv2_samples);
966                 kfree(mch_samples);
967                 kfree(cpu_samples);
968                 kfree(mchp_samples);
969                 return -ENOMEM;
970         }
971
972         last_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
973                 ITV_ME_SEQNO_SHIFT;
974         seqno_timestamp = get_jiffies_64();
975
976         old_cpu_power = thm_readl(THM_CEC);
977         schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
978
979         /* Collect an initial average */
980         for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
981                 u32 mchp, cpu_power;
982                 u16 val;
983
984                 mcp_samples[i] = read_ptv(ips);
985
986                 val = read_ctv(ips, 0);
987                 ctv1_samples[i] = val;
988
989                 val = read_ctv(ips, 1);
990                 ctv2_samples[i] = val;
991
992                 val = read_mgtv(ips);
993                 mch_samples[i] = val;
994
995                 cpu_power = get_cpu_power(ips, &old_cpu_power,
996                                           IPS_SAMPLE_PERIOD);
997                 cpu_samples[i] = cpu_power;
998
999                 if (ips->read_mch_val) {
1000                         mchp = ips->read_mch_val();
1001                         mchp_samples[i] = mchp;
1002                 }
1003
1004                 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1005                 if (kthread_should_stop())
1006                         break;
1007         }
1008
1009         ips->mcp_avg_temp = calc_avg_temp(ips, mcp_samples);
1010         ips->ctv1_avg_temp = calc_avg_temp(ips, ctv1_samples);
1011         ips->ctv2_avg_temp = calc_avg_temp(ips, ctv2_samples);
1012         ips->mch_avg_temp = calc_avg_temp(ips, mch_samples);
1013         ips->cpu_avg_power = calc_avg_power(ips, cpu_samples);
1014         ips->mch_avg_power = calc_avg_power(ips, mchp_samples);
1015         kfree(mcp_samples);
1016         kfree(ctv1_samples);
1017         kfree(ctv2_samples);
1018         kfree(mch_samples);
1019         kfree(cpu_samples);
1020         kfree(mchp_samples);
1021
1022         /* Start the adjustment thread now that we have data */
1023         wake_up_process(ips->adjust);
1024
1025         /*
1026          * Ok, now we have an initial avg.  From here on out, we track the
1027          * running avg using a decaying average calculation.  This allows
1028          * us to reduce the sample frequency if the CPU and GPU are idle.
1029          */
1030         old_cpu_power = thm_readl(THM_CEC);
1031         schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1032         last_sample_period = IPS_SAMPLE_PERIOD;
1033
1034         timer_setup(&ips->timer, monitor_timeout, TIMER_DEFERRABLE);
1035         do {
1036                 u32 cpu_val, mch_val;
1037                 u16 val;
1038
1039                 /* MCP itself */
1040                 val = read_ptv(ips);
1041                 ips->mcp_avg_temp = update_average_temp(ips->mcp_avg_temp, val);
1042
1043                 /* Processor 0 */
1044                 val = read_ctv(ips, 0);
1045                 ips->ctv1_avg_temp =
1046                         update_average_temp(ips->ctv1_avg_temp, val);
1047                 /* Power */
1048                 cpu_val = get_cpu_power(ips, &old_cpu_power,
1049                                         last_sample_period);
1050                 ips->cpu_avg_power =
1051                         update_average_power(ips->cpu_avg_power, cpu_val);
1052
1053                 if (ips->second_cpu) {
1054                         /* Processor 1 */
1055                         val = read_ctv(ips, 1);
1056                         ips->ctv2_avg_temp =
1057                                 update_average_temp(ips->ctv2_avg_temp, val);
1058                 }
1059
1060                 /* MCH */
1061                 val = read_mgtv(ips);
1062                 ips->mch_avg_temp = update_average_temp(ips->mch_avg_temp, val);
1063                 /* Power */
1064                 if (ips->read_mch_val) {
1065                         mch_val = ips->read_mch_val();
1066                         ips->mch_avg_power =
1067                                 update_average_power(ips->mch_avg_power,
1068                                                      mch_val);
1069                 }
1070
1071                 /*
1072                  * Make sure ME is updating thermal regs.
1073                  * Note:
1074                  * If it's been more than a second since the last update,
1075                  * the ME is probably hung.
1076                  */
1077                 cur_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
1078                         ITV_ME_SEQNO_SHIFT;
1079                 if (cur_seqno == last_seqno &&
1080                     time_after(jiffies, seqno_timestamp + HZ)) {
1081                         dev_warn(ips->dev,
1082                                  "ME failed to update for more than 1s, likely hung\n");
1083                 } else {
1084                         seqno_timestamp = get_jiffies_64();
1085                         last_seqno = cur_seqno;
1086                 }
1087
1088                 last_msecs = jiffies_to_msecs(jiffies);
1089                 expire = jiffies + msecs_to_jiffies(IPS_SAMPLE_PERIOD);
1090
1091                 __set_current_state(TASK_INTERRUPTIBLE);
1092                 mod_timer(&ips->timer, expire);
1093                 schedule();
1094
1095                 /* Calculate actual sample period for power averaging */
1096                 last_sample_period = jiffies_to_msecs(jiffies) - last_msecs;
1097                 if (!last_sample_period)
1098                         last_sample_period = 1;
1099         } while (!kthread_should_stop());
1100
1101         del_timer_sync(&ips->timer);
1102
1103         dev_dbg(ips->dev, "ips-monitor thread stopped\n");
1104
1105         return 0;
1106 }
1107
1108 #if 0
1109 #define THM_DUMPW(reg) \
1110         { \
1111         u16 val = thm_readw(reg); \
1112         dev_dbg(ips->dev, #reg ": 0x%04x\n", val); \
1113         }
1114 #define THM_DUMPL(reg) \
1115         { \
1116         u32 val = thm_readl(reg); \
1117         dev_dbg(ips->dev, #reg ": 0x%08x\n", val); \
1118         }
1119 #define THM_DUMPQ(reg) \
1120         { \
1121         u64 val = thm_readq(reg); \
1122         dev_dbg(ips->dev, #reg ": 0x%016x\n", val); \
1123         }
1124
1125 static void dump_thermal_info(struct ips_driver *ips)
1126 {
1127         u16 ptl;
1128
1129         ptl = thm_readw(THM_PTL);
1130         dev_dbg(ips->dev, "Processor temp limit: %d\n", ptl);
1131
1132         THM_DUMPW(THM_CTA);
1133         THM_DUMPW(THM_TRC);
1134         THM_DUMPW(THM_CTV1);
1135         THM_DUMPL(THM_STS);
1136         THM_DUMPW(THM_PTV);
1137         THM_DUMPQ(THM_MGTV);
1138 }
1139 #endif
1140
1141 /**
1142  * ips_irq_handler - handle temperature triggers and other IPS events
1143  * @irq: irq number
1144  * @arg: unused
1145  *
1146  * Handle temperature limit trigger events, generally by lowering the clamps.
1147  * If we're at a critical limit, we clamp back to the lowest possible value
1148  * to prevent emergency shutdown.
1149  */
1150 static irqreturn_t ips_irq_handler(int irq, void *arg)
1151 {
1152         struct ips_driver *ips = arg;
1153         u8 tses = thm_readb(THM_TSES);
1154         u8 tes = thm_readb(THM_TES);
1155
1156         if (!tses && !tes)
1157                 return IRQ_NONE;
1158
1159         dev_info(ips->dev, "TSES: 0x%02x\n", tses);
1160         dev_info(ips->dev, "TES: 0x%02x\n", tes);
1161
1162         /* STS update from EC? */
1163         if (tes & 1) {
1164                 u32 sts, tc1;
1165
1166                 sts = thm_readl(THM_STS);
1167                 tc1 = thm_readl(THM_TC1);
1168
1169                 if (sts & STS_NVV) {
1170                         spin_lock(&ips->turbo_status_lock);
1171                         ips->core_power_limit = (sts & STS_PCPL_MASK) >>
1172                                 STS_PCPL_SHIFT;
1173                         ips->mch_power_limit = (sts & STS_GPL_MASK) >>
1174                                 STS_GPL_SHIFT;
1175                         /* ignore EC CPU vs GPU pref */
1176                         ips->cpu_turbo_enabled = !(sts & STS_PCTD_DIS);
1177                         /* 
1178                          * Disable turbo for now, until we can figure
1179                          * out why the power figures are wrong
1180                          */
1181                         ips->cpu_turbo_enabled = false;
1182                         if (ips->gpu_busy)
1183                                 ips->gpu_turbo_enabled = !(sts & STS_GTD_DIS);
1184                         ips->mcp_temp_limit = (sts & STS_PTL_MASK) >>
1185                                 STS_PTL_SHIFT;
1186                         ips->mcp_power_limit = (tc1 & STS_PPL_MASK) >>
1187                                 STS_PPL_SHIFT;
1188                         verify_limits(ips);
1189                         spin_unlock(&ips->turbo_status_lock);
1190
1191                         thm_writeb(THM_SEC, SEC_ACK);
1192                 }
1193                 thm_writeb(THM_TES, tes);
1194         }
1195
1196         /* Thermal trip */
1197         if (tses) {
1198                 dev_warn(ips->dev, "thermal trip occurred, tses: 0x%04x\n",
1199                          tses);
1200                 thm_writeb(THM_TSES, tses);
1201         }
1202
1203         return IRQ_HANDLED;
1204 }
1205
1206 #ifndef CONFIG_DEBUG_FS
1207 static void ips_debugfs_init(struct ips_driver *ips) { return; }
1208 static void ips_debugfs_cleanup(struct ips_driver *ips) { return; }
1209 #else
1210
1211 /* Expose current state and limits in debugfs if possible */
1212
1213 static int cpu_temp_show(struct seq_file *m, void *data)
1214 {
1215         struct ips_driver *ips = m->private;
1216
1217         seq_printf(m, "%d.%02d\n", ips->ctv1_avg_temp / 100,
1218                    ips->ctv1_avg_temp % 100);
1219
1220         return 0;
1221 }
1222 DEFINE_SHOW_ATTRIBUTE(cpu_temp);
1223
1224 static int cpu_power_show(struct seq_file *m, void *data)
1225 {
1226         struct ips_driver *ips = m->private;
1227
1228         seq_printf(m, "%dmW\n", ips->cpu_avg_power);
1229
1230         return 0;
1231 }
1232 DEFINE_SHOW_ATTRIBUTE(cpu_power);
1233
1234 static int cpu_clamp_show(struct seq_file *m, void *data)
1235 {
1236         u64 turbo_override;
1237         int tdp, tdc;
1238
1239         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1240
1241         tdp = (int)(turbo_override & TURBO_TDP_MASK);
1242         tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT);
1243
1244         /* Convert to .1W/A units */
1245         tdp = tdp * 10 / 8;
1246         tdc = tdc * 10 / 8;
1247
1248         /* Watts Amperes */
1249         seq_printf(m, "%d.%dW %d.%dA\n", tdp / 10, tdp % 10,
1250                    tdc / 10, tdc % 10);
1251
1252         return 0;
1253 }
1254 DEFINE_SHOW_ATTRIBUTE(cpu_clamp);
1255
1256 static int mch_temp_show(struct seq_file *m, void *data)
1257 {
1258         struct ips_driver *ips = m->private;
1259
1260         seq_printf(m, "%d.%02d\n", ips->mch_avg_temp / 100,
1261                    ips->mch_avg_temp % 100);
1262
1263         return 0;
1264 }
1265 DEFINE_SHOW_ATTRIBUTE(mch_temp);
1266
1267 static int mch_power_show(struct seq_file *m, void *data)
1268 {
1269         struct ips_driver *ips = m->private;
1270
1271         seq_printf(m, "%dmW\n", ips->mch_avg_power);
1272
1273         return 0;
1274 }
1275 DEFINE_SHOW_ATTRIBUTE(mch_power);
1276
1277 static void ips_debugfs_cleanup(struct ips_driver *ips)
1278 {
1279         debugfs_remove_recursive(ips->debug_root);
1280 }
1281
1282 static void ips_debugfs_init(struct ips_driver *ips)
1283 {
1284         ips->debug_root = debugfs_create_dir("ips", NULL);
1285
1286         debugfs_create_file("cpu_temp", 0444, ips->debug_root, ips, &cpu_temp_fops);
1287         debugfs_create_file("cpu_power", 0444, ips->debug_root, ips, &cpu_power_fops);
1288         debugfs_create_file("cpu_clamp", 0444, ips->debug_root, ips, &cpu_clamp_fops);
1289         debugfs_create_file("mch_temp", 0444, ips->debug_root, ips, &mch_temp_fops);
1290         debugfs_create_file("mch_power", 0444, ips->debug_root, ips, &mch_power_fops);
1291 }
1292 #endif /* CONFIG_DEBUG_FS */
1293
1294 /**
1295  * ips_detect_cpu - detect whether CPU supports IPS
1296  *
1297  * Walk our list and see if we're on a supported CPU.  If we find one,
1298  * return the limits for it.
1299  */
1300 static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
1301 {
1302         u64 turbo_power, misc_en;
1303         struct ips_mcp_limits *limits = NULL;
1304         u16 tdp;
1305
1306         if (!(boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 37)) {
1307                 dev_info(ips->dev, "Non-IPS CPU detected.\n");
1308                 return NULL;
1309         }
1310
1311         rdmsrl(IA32_MISC_ENABLE, misc_en);
1312         /*
1313          * If the turbo enable bit isn't set, we shouldn't try to enable/disable
1314          * turbo manually or we'll get an illegal MSR access, even though
1315          * turbo will still be available.
1316          */
1317         if (misc_en & IA32_MISC_TURBO_EN)
1318                 ips->turbo_toggle_allowed = true;
1319         else
1320                 ips->turbo_toggle_allowed = false;
1321
1322         if (strstr(boot_cpu_data.x86_model_id, "CPU       M"))
1323                 limits = &ips_sv_limits;
1324         else if (strstr(boot_cpu_data.x86_model_id, "CPU       L"))
1325                 limits = &ips_lv_limits;
1326         else if (strstr(boot_cpu_data.x86_model_id, "CPU       U"))
1327                 limits = &ips_ulv_limits;
1328         else {
1329                 dev_info(ips->dev, "No CPUID match found.\n");
1330                 return NULL;
1331         }
1332
1333         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
1334         tdp = turbo_power & TURBO_TDP_MASK;
1335
1336         /* Sanity check TDP against CPU */
1337         if (limits->core_power_limit != (tdp / 8) * 1000) {
1338                 dev_info(ips->dev,
1339                          "CPU TDP doesn't match expected value (found %d, expected %d)\n",
1340                          tdp / 8, limits->core_power_limit / 1000);
1341                 limits->core_power_limit = (tdp / 8) * 1000;
1342         }
1343
1344         return limits;
1345 }
1346
1347 /**
1348  * ips_get_i915_syms - try to get GPU control methods from i915 driver
1349  * @ips: IPS driver
1350  *
1351  * The i915 driver exports several interfaces to allow the IPS driver to
1352  * monitor and control graphics turbo mode.  If we can find them, we can
1353  * enable graphics turbo, otherwise we must disable it to avoid exceeding
1354  * thermal and power limits in the MCP.
1355  */
1356 static bool ips_get_i915_syms(struct ips_driver *ips)
1357 {
1358         ips->read_mch_val = symbol_get(i915_read_mch_val);
1359         if (!ips->read_mch_val)
1360                 goto out_err;
1361         ips->gpu_raise = symbol_get(i915_gpu_raise);
1362         if (!ips->gpu_raise)
1363                 goto out_put_mch;
1364         ips->gpu_lower = symbol_get(i915_gpu_lower);
1365         if (!ips->gpu_lower)
1366                 goto out_put_raise;
1367         ips->gpu_busy = symbol_get(i915_gpu_busy);
1368         if (!ips->gpu_busy)
1369                 goto out_put_lower;
1370         ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
1371         if (!ips->gpu_turbo_disable)
1372                 goto out_put_busy;
1373
1374         return true;
1375
1376 out_put_busy:
1377         symbol_put(i915_gpu_busy);
1378 out_put_lower:
1379         symbol_put(i915_gpu_lower);
1380 out_put_raise:
1381         symbol_put(i915_gpu_raise);
1382 out_put_mch:
1383         symbol_put(i915_read_mch_val);
1384 out_err:
1385         return false;
1386 }
1387
1388 static bool
1389 ips_gpu_turbo_enabled(struct ips_driver *ips)
1390 {
1391         if (!ips->gpu_busy && late_i915_load) {
1392                 if (ips_get_i915_syms(ips)) {
1393                         dev_info(ips->dev,
1394                                  "i915 driver attached, reenabling gpu turbo\n");
1395                         ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS);
1396                 }
1397         }
1398
1399         return ips->gpu_turbo_enabled;
1400 }
1401
1402 void
1403 ips_link_to_i915_driver(void)
1404 {
1405         /* We can't cleanly get at the various ips_driver structs from
1406          * this caller (the i915 driver), so just set a flag saying
1407          * that it's time to try getting the symbols again.
1408          */
1409         late_i915_load = true;
1410 }
1411 EXPORT_SYMBOL_GPL(ips_link_to_i915_driver);
1412
1413 static const struct pci_device_id ips_id_table[] = {
1414         { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
1415         { 0, }
1416 };
1417
1418 MODULE_DEVICE_TABLE(pci, ips_id_table);
1419
1420 static int ips_blacklist_callback(const struct dmi_system_id *id)
1421 {
1422         pr_info("Blacklisted intel_ips for %s\n", id->ident);
1423         return 1;
1424 }
1425
1426 static const struct dmi_system_id ips_blacklist[] = {
1427         {
1428                 .callback = ips_blacklist_callback,
1429                 .ident = "HP ProBook",
1430                 .matches = {
1431                         DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1432                         DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"),
1433                 },
1434         },
1435         { }     /* terminating entry */
1436 };
1437
1438 static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
1439 {
1440         u64 platform_info;
1441         struct ips_driver *ips;
1442         u32 hts;
1443         int ret = 0;
1444         u16 htshi, trc, trc_required_mask;
1445         u8 tse;
1446
1447         if (dmi_check_system(ips_blacklist))
1448                 return -ENODEV;
1449
1450         ips = devm_kzalloc(&dev->dev, sizeof(*ips), GFP_KERNEL);
1451         if (!ips)
1452                 return -ENOMEM;
1453
1454         spin_lock_init(&ips->turbo_status_lock);
1455         ips->dev = &dev->dev;
1456
1457         ips->limits = ips_detect_cpu(ips);
1458         if (!ips->limits) {
1459                 dev_info(&dev->dev, "IPS not supported on this CPU\n");
1460                 return -ENXIO;
1461         }
1462
1463         ret = pcim_enable_device(dev);
1464         if (ret) {
1465                 dev_err(&dev->dev, "can't enable PCI device, aborting\n");
1466                 return ret;
1467         }
1468
1469         ret = pcim_iomap_regions(dev, 1 << 0, pci_name(dev));
1470         if (ret) {
1471                 dev_err(&dev->dev, "failed to map thermal regs, aborting\n");
1472                 return ret;
1473         }
1474         ips->regmap = pcim_iomap_table(dev)[0];
1475
1476         pci_set_drvdata(dev, ips);
1477
1478         tse = thm_readb(THM_TSE);
1479         if (tse != TSE_EN) {
1480                 dev_err(&dev->dev, "thermal device not enabled (0x%02x), aborting\n", tse);
1481                 return -ENXIO;
1482         }
1483
1484         trc = thm_readw(THM_TRC);
1485         trc_required_mask = TRC_CORE1_EN | TRC_CORE_PWR | TRC_MCH_EN;
1486         if ((trc & trc_required_mask) != trc_required_mask) {
1487                 dev_err(&dev->dev, "thermal reporting for required devices not enabled, aborting\n");
1488                 return -ENXIO;
1489         }
1490
1491         if (trc & TRC_CORE2_EN)
1492                 ips->second_cpu = true;
1493
1494         update_turbo_limits(ips);
1495         dev_dbg(&dev->dev, "max cpu power clamp: %dW\n",
1496                 ips->mcp_power_limit / 10);
1497         dev_dbg(&dev->dev, "max core power clamp: %dW\n",
1498                 ips->core_power_limit / 10);
1499         /* BIOS may update limits at runtime */
1500         if (thm_readl(THM_PSC) & PSP_PBRT)
1501                 ips->poll_turbo_status = true;
1502
1503         if (!ips_get_i915_syms(ips)) {
1504                 dev_info(&dev->dev, "failed to get i915 symbols, graphics turbo disabled until i915 loads\n");
1505                 ips->gpu_turbo_enabled = false;
1506         } else {
1507                 dev_dbg(&dev->dev, "graphics turbo enabled\n");
1508                 ips->gpu_turbo_enabled = true;
1509         }
1510
1511         /*
1512          * Check PLATFORM_INFO MSR to make sure this chip is
1513          * turbo capable.
1514          */
1515         rdmsrl(PLATFORM_INFO, platform_info);
1516         if (!(platform_info & PLATFORM_TDP)) {
1517                 dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n");
1518                 return -ENODEV;
1519         }
1520
1521         /*
1522          * IRQ handler for ME interaction
1523          * Note: don't use MSI here as the PCH has bugs.
1524          */
1525         ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY);
1526         if (ret < 0)
1527                 return ret;
1528
1529         ips->irq = pci_irq_vector(dev, 0);
1530
1531         ret = request_irq(ips->irq, ips_irq_handler, IRQF_SHARED, "ips", ips);
1532         if (ret) {
1533                 dev_err(&dev->dev, "request irq failed, aborting\n");
1534                 return ret;
1535         }
1536
1537         /* Enable aux, hot & critical interrupts */
1538         thm_writeb(THM_TSPIEN, TSPIEN_AUX2_LOHI | TSPIEN_CRIT_LOHI |
1539                    TSPIEN_HOT_LOHI | TSPIEN_AUX_LOHI);
1540         thm_writeb(THM_TEN, TEN_UPDATE_EN);
1541
1542         /* Collect adjustment values */
1543         ips->cta_val = thm_readw(THM_CTA);
1544         ips->pta_val = thm_readw(THM_PTA);
1545         ips->mgta_val = thm_readw(THM_MGTA);
1546
1547         /* Save turbo limits & ratios */
1548         rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1549
1550         ips_disable_cpu_turbo(ips);
1551         ips->cpu_turbo_enabled = false;
1552
1553         /* Create thermal adjust thread */
1554         ips->adjust = kthread_create(ips_adjust, ips, "ips-adjust");
1555         if (IS_ERR(ips->adjust)) {
1556                 dev_err(&dev->dev,
1557                         "failed to create thermal adjust thread, aborting\n");
1558                 ret = -ENOMEM;
1559                 goto error_free_irq;
1560
1561         }
1562
1563         /*
1564          * Set up the work queue and monitor thread. The monitor thread
1565          * will wake up ips_adjust thread.
1566          */
1567         ips->monitor = kthread_run(ips_monitor, ips, "ips-monitor");
1568         if (IS_ERR(ips->monitor)) {
1569                 dev_err(&dev->dev,
1570                         "failed to create thermal monitor thread, aborting\n");
1571                 ret = -ENOMEM;
1572                 goto error_thread_cleanup;
1573         }
1574
1575         hts = (ips->core_power_limit << HTS_PCPL_SHIFT) |
1576                 (ips->mcp_temp_limit << HTS_PTL_SHIFT) | HTS_NVV;
1577         htshi = HTS2_PRST_RUNNING << HTS2_PRST_SHIFT;
1578
1579         thm_writew(THM_HTSHI, htshi);
1580         thm_writel(THM_HTS, hts);
1581
1582         ips_debugfs_init(ips);
1583
1584         dev_info(&dev->dev, "IPS driver initialized, MCP temp limit %d\n",
1585                  ips->mcp_temp_limit);
1586         return ret;
1587
1588 error_thread_cleanup:
1589         kthread_stop(ips->adjust);
1590 error_free_irq:
1591         free_irq(ips->irq, ips);
1592         pci_free_irq_vectors(dev);
1593         return ret;
1594 }
1595
1596 static void ips_remove(struct pci_dev *dev)
1597 {
1598         struct ips_driver *ips = pci_get_drvdata(dev);
1599         u64 turbo_override;
1600
1601         ips_debugfs_cleanup(ips);
1602
1603         /* Release i915 driver */
1604         if (ips->read_mch_val)
1605                 symbol_put(i915_read_mch_val);
1606         if (ips->gpu_raise)
1607                 symbol_put(i915_gpu_raise);
1608         if (ips->gpu_lower)
1609                 symbol_put(i915_gpu_lower);
1610         if (ips->gpu_busy)
1611                 symbol_put(i915_gpu_busy);
1612         if (ips->gpu_turbo_disable)
1613                 symbol_put(i915_gpu_turbo_disable);
1614
1615         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1616         turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
1617         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1618         wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1619
1620         free_irq(ips->irq, ips);
1621         pci_free_irq_vectors(dev);
1622         if (ips->adjust)
1623                 kthread_stop(ips->adjust);
1624         if (ips->monitor)
1625                 kthread_stop(ips->monitor);
1626         dev_dbg(&dev->dev, "IPS driver removed\n");
1627 }
1628
1629 static struct pci_driver ips_pci_driver = {
1630         .name = "intel ips",
1631         .id_table = ips_id_table,
1632         .probe = ips_probe,
1633         .remove = ips_remove,
1634 };
1635
1636 module_pci_driver(ips_pci_driver);
1637
1638 MODULE_LICENSE("GPL v2");
1639 MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
1640 MODULE_DESCRIPTION("Intelligent Power Sharing Driver");