sched/doc: Update documentation for base_slice_ns and CONFIG_HZ relation
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / vcn_v4_0_3.c
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/firmware.h>
25 #include <drm/drm_drv.h>
26
27 #include "amdgpu.h"
28 #include "amdgpu_vcn.h"
29 #include "amdgpu_pm.h"
30 #include "soc15.h"
31 #include "soc15d.h"
32 #include "soc15_hw_ip.h"
33 #include "vcn_v2_0.h"
34 #include "mmsch_v4_0_3.h"
35
36 #include "vcn/vcn_4_0_3_offset.h"
37 #include "vcn/vcn_4_0_3_sh_mask.h"
38 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
39
40 #define mmUVD_DPG_LMA_CTL               regUVD_DPG_LMA_CTL
41 #define mmUVD_DPG_LMA_CTL_BASE_IDX      regUVD_DPG_LMA_CTL_BASE_IDX
42 #define mmUVD_DPG_LMA_DATA              regUVD_DPG_LMA_DATA
43 #define mmUVD_DPG_LMA_DATA_BASE_IDX     regUVD_DPG_LMA_DATA_BASE_IDX
44
45 #define VCN_VID_SOC_ADDRESS_2_0         0x1fb00
46 #define VCN1_VID_SOC_ADDRESS_3_0        0x48300
47
48 static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
49 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
50 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
51 static int vcn_v4_0_3_set_powergating_state(void *handle,
52                 enum amd_powergating_state state);
53 static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev,
54                 int inst_idx, struct dpg_pause_state *new_state);
55 static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
56 static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
57 static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
58                                   int inst_idx, bool indirect);
59 /**
60  * vcn_v4_0_3_early_init - set function pointers
61  *
62  * @handle: amdgpu_device pointer
63  *
64  * Set ring and irq function pointers
65  */
66 static int vcn_v4_0_3_early_init(void *handle)
67 {
68         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
69
70         /* re-use enc ring as unified ring */
71         adev->vcn.num_enc_rings = 1;
72
73         vcn_v4_0_3_set_unified_ring_funcs(adev);
74         vcn_v4_0_3_set_irq_funcs(adev);
75         vcn_v4_0_3_set_ras_funcs(adev);
76
77         return amdgpu_vcn_early_init(adev);
78 }
79
80 /**
81  * vcn_v4_0_3_sw_init - sw init for VCN block
82  *
83  * @handle: amdgpu_device pointer
84  *
85  * Load firmware and sw initialization
86  */
87 static int vcn_v4_0_3_sw_init(void *handle)
88 {
89         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
90         struct amdgpu_ring *ring;
91         int i, r, vcn_inst;
92
93         r = amdgpu_vcn_sw_init(adev);
94         if (r)
95                 return r;
96
97         amdgpu_vcn_setup_ucode(adev);
98
99         r = amdgpu_vcn_resume(adev);
100         if (r)
101                 return r;
102
103         /* VCN DEC TRAP */
104         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
105                 VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
106         if (r)
107                 return r;
108
109         for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
110                 volatile struct amdgpu_vcn4_fw_shared *fw_shared;
111
112                 vcn_inst = GET_INST(VCN, i);
113
114                 ring = &adev->vcn.inst[i].ring_enc[0];
115                 ring->use_doorbell = true;
116
117                 if (!amdgpu_sriov_vf(adev))
118                         ring->doorbell_index =
119                                 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
120                                 9 * vcn_inst;
121                 else
122                         ring->doorbell_index =
123                                 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
124                                 32 * vcn_inst;
125
126                 ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
127                 sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
128                 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
129                                      AMDGPU_RING_PRIO_DEFAULT,
130                                      &adev->vcn.inst[i].sched_score);
131                 if (r)
132                         return r;
133
134                 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
135                 fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
136                 fw_shared->sq.is_enabled = true;
137
138                 if (amdgpu_vcnfw_log)
139                         amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
140         }
141
142         if (amdgpu_sriov_vf(adev)) {
143                 r = amdgpu_virt_alloc_mm_table(adev);
144                 if (r)
145                         return r;
146         }
147
148         if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
149                 adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
150
151         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
152                 r = amdgpu_vcn_ras_sw_init(adev);
153                 if (r) {
154                         dev_err(adev->dev, "Failed to initialize vcn ras block!\n");
155                         return r;
156                 }
157         }
158
159         return 0;
160 }
161
162 /**
163  * vcn_v4_0_3_sw_fini - sw fini for VCN block
164  *
165  * @handle: amdgpu_device pointer
166  *
167  * VCN suspend and free up sw allocation
168  */
169 static int vcn_v4_0_3_sw_fini(void *handle)
170 {
171         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
172         int i, r, idx;
173
174         if (drm_dev_enter(&adev->ddev, &idx)) {
175                 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
176                         volatile struct amdgpu_vcn4_fw_shared *fw_shared;
177
178                         fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
179                         fw_shared->present_flag_0 = 0;
180                         fw_shared->sq.is_enabled = cpu_to_le32(false);
181                 }
182                 drm_dev_exit(idx);
183         }
184
185         if (amdgpu_sriov_vf(adev))
186                 amdgpu_virt_free_mm_table(adev);
187
188         r = amdgpu_vcn_suspend(adev);
189         if (r)
190                 return r;
191
192         r = amdgpu_vcn_sw_fini(adev);
193
194         return r;
195 }
196
197 /**
198  * vcn_v4_0_3_hw_init - start and test VCN block
199  *
200  * @handle: amdgpu_device pointer
201  *
202  * Initialize the hardware, boot up the VCPU and do some testing
203  */
204 static int vcn_v4_0_3_hw_init(void *handle)
205 {
206         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
207         struct amdgpu_ring *ring;
208         int i, r, vcn_inst;
209
210         if (amdgpu_sriov_vf(adev)) {
211                 r = vcn_v4_0_3_start_sriov(adev);
212                 if (r)
213                         goto done;
214
215                 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
216                         ring = &adev->vcn.inst[i].ring_enc[0];
217                         ring->wptr = 0;
218                         ring->wptr_old = 0;
219                         vcn_v4_0_3_unified_ring_set_wptr(ring);
220                         ring->sched.ready = true;
221                 }
222         } else {
223                 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
224                         vcn_inst = GET_INST(VCN, i);
225                         ring = &adev->vcn.inst[i].ring_enc[0];
226
227                         if (ring->use_doorbell) {
228                                 adev->nbio.funcs->vcn_doorbell_range(
229                                         adev, ring->use_doorbell,
230                                         (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
231                                                 9 * vcn_inst,
232                                         adev->vcn.inst[i].aid_id);
233
234                                 WREG32_SOC15(
235                                         VCN, GET_INST(VCN, ring->me),
236                                         regVCN_RB1_DB_CTRL,
237                                         ring->doorbell_index
238                                                         << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
239                                                 VCN_RB1_DB_CTRL__EN_MASK);
240
241                                 /* Read DB_CTRL to flush the write DB_CTRL command. */
242                                 RREG32_SOC15(
243                                         VCN, GET_INST(VCN, ring->me),
244                                         regVCN_RB1_DB_CTRL);
245                         }
246
247                         r = amdgpu_ring_test_helper(ring);
248                         if (r)
249                                 goto done;
250                 }
251         }
252
253 done:
254         if (!r)
255                 DRM_DEV_INFO(adev->dev, "VCN decode initialized successfully(under %s).\n",
256                         (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
257
258         return r;
259 }
260
261 /**
262  * vcn_v4_0_3_hw_fini - stop the hardware block
263  *
264  * @handle: amdgpu_device pointer
265  *
266  * Stop the VCN block, mark ring as not ready any more
267  */
268 static int vcn_v4_0_3_hw_fini(void *handle)
269 {
270         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
271
272         cancel_delayed_work_sync(&adev->vcn.idle_work);
273
274         if (adev->vcn.cur_state != AMD_PG_STATE_GATE)
275                 vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
276
277         return 0;
278 }
279
280 /**
281  * vcn_v4_0_3_suspend - suspend VCN block
282  *
283  * @handle: amdgpu_device pointer
284  *
285  * HW fini and suspend VCN block
286  */
287 static int vcn_v4_0_3_suspend(void *handle)
288 {
289         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
290         int r;
291
292         r = vcn_v4_0_3_hw_fini(adev);
293         if (r)
294                 return r;
295
296         r = amdgpu_vcn_suspend(adev);
297
298         return r;
299 }
300
301 /**
302  * vcn_v4_0_3_resume - resume VCN block
303  *
304  * @handle: amdgpu_device pointer
305  *
306  * Resume firmware and hw init VCN block
307  */
308 static int vcn_v4_0_3_resume(void *handle)
309 {
310         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
311         int r;
312
313         r = amdgpu_vcn_resume(adev);
314         if (r)
315                 return r;
316
317         r = vcn_v4_0_3_hw_init(adev);
318
319         return r;
320 }
321
322 /**
323  * vcn_v4_0_3_mc_resume - memory controller programming
324  *
325  * @adev: amdgpu_device pointer
326  * @inst_idx: instance number
327  *
328  * Let the VCN memory controller know it's offsets
329  */
330 static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
331 {
332         uint32_t offset, size, vcn_inst;
333         const struct common_firmware_header *hdr;
334
335         hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
336         size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
337
338         vcn_inst = GET_INST(VCN, inst_idx);
339         /* cache window 0: fw */
340         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
341                 WREG32_SOC15(
342                         VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
343                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
344                                  .tmr_mc_addr_lo));
345                 WREG32_SOC15(
346                         VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
347                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
348                                  .tmr_mc_addr_hi));
349                 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
350                 offset = 0;
351         } else {
352                 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
353                              lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
354                 WREG32_SOC15(VCN, vcn_inst,
355                              regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
356                              upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
357                 offset = size;
358                 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
359                              AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
360         }
361         WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
362
363         /* cache window 1: stack */
364         WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
365                      lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
366         WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
367                      upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
368         WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
369         WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1,
370                      AMDGPU_VCN_STACK_SIZE);
371
372         /* cache window 2: context */
373         WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
374                      lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
375                                    AMDGPU_VCN_STACK_SIZE));
376         WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
377                      upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
378                                    AMDGPU_VCN_STACK_SIZE));
379         WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
380         WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2,
381                      AMDGPU_VCN_CONTEXT_SIZE);
382
383         /* non-cache window */
384         WREG32_SOC15(
385                 VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
386                 lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
387         WREG32_SOC15(
388                 VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
389                 upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
390         WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
391         WREG32_SOC15(
392                 VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
393                 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
394 }
395
396 /**
397  * vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode
398  *
399  * @adev: amdgpu_device pointer
400  * @inst_idx: instance number index
401  * @indirect: indirectly write sram
402  *
403  * Let the VCN memory controller know it's offsets with dpg mode
404  */
405 static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
406 {
407         uint32_t offset, size;
408         const struct common_firmware_header *hdr;
409
410         hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
411         size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
412
413         /* cache window 0: fw */
414         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
415                 if (!indirect) {
416                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
417                                 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
418                                 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
419                                         inst_idx].tmr_mc_addr_lo), 0, indirect);
420                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
421                                 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
422                                 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
423                                         inst_idx].tmr_mc_addr_hi), 0, indirect);
424                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
425                                 VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
426                 } else {
427                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
428                                 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
429                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
430                                 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
431                         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
432                                 VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
433                 }
434                 offset = 0;
435         } else {
436                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
437                         VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
438                         lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
439                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
440                         VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
441                         upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
442                 offset = size;
443                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
444                         VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
445                         AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
446         }
447
448         if (!indirect)
449                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
450                         VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
451         else
452                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
453                         VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
454
455         /* cache window 1: stack */
456         if (!indirect) {
457                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
458                         VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
459                         lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
460                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
461                         VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
462                         upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
463                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
464                         VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
465         } else {
466                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
467                         VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
468                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
469                         VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
470                 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
471                         VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
472         }
473         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
474                         VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
475
476         /* cache window 2: context */
477         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
478                         VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
479                         lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
480                                 AMDGPU_VCN_STACK_SIZE), 0, indirect);
481         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
482                         VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
483                         upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
484                                 AMDGPU_VCN_STACK_SIZE), 0, indirect);
485         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
486                         VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
487         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
488                         VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
489
490         /* non-cache window */
491         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
492                         VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
493                         lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
494         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
495                         VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
496                         upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
497         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
498                         VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
499         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
500                         VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
501                         AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
502
503         /* VCN global tiling registers */
504         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
505                 VCN, 0, regUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
506         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
507                 VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
508 }
509
510 /**
511  * vcn_v4_0_3_disable_clock_gating - disable VCN clock gating
512  *
513  * @adev: amdgpu_device pointer
514  * @inst_idx: instance number
515  *
516  * Disable clock gating for VCN block
517  */
518 static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
519 {
520         uint32_t data;
521         int vcn_inst;
522
523         if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
524                 return;
525
526         vcn_inst = GET_INST(VCN, inst_idx);
527
528         /* VCN disable CGC */
529         data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
530         data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
531         data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
532         data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
533         WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
534
535         data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE);
536         data &= ~(UVD_CGC_GATE__SYS_MASK
537                 | UVD_CGC_GATE__MPEG2_MASK
538                 | UVD_CGC_GATE__REGS_MASK
539                 | UVD_CGC_GATE__RBC_MASK
540                 | UVD_CGC_GATE__LMI_MC_MASK
541                 | UVD_CGC_GATE__LMI_UMC_MASK
542                 | UVD_CGC_GATE__MPC_MASK
543                 | UVD_CGC_GATE__LBSI_MASK
544                 | UVD_CGC_GATE__LRBBM_MASK
545                 | UVD_CGC_GATE__WCB_MASK
546                 | UVD_CGC_GATE__VCPU_MASK
547                 | UVD_CGC_GATE__MMSCH_MASK);
548
549         WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE, data);
550         SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
551
552         data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
553         data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK
554                 | UVD_CGC_CTRL__MPEG2_MODE_MASK
555                 | UVD_CGC_CTRL__REGS_MODE_MASK
556                 | UVD_CGC_CTRL__RBC_MODE_MASK
557                 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
558                 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
559                 | UVD_CGC_CTRL__MPC_MODE_MASK
560                 | UVD_CGC_CTRL__LBSI_MODE_MASK
561                 | UVD_CGC_CTRL__LRBBM_MODE_MASK
562                 | UVD_CGC_CTRL__WCB_MODE_MASK
563                 | UVD_CGC_CTRL__VCPU_MODE_MASK
564                 | UVD_CGC_CTRL__MMSCH_MODE_MASK);
565         WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
566
567         data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE);
568         data |= (UVD_SUVD_CGC_GATE__SRE_MASK
569                 | UVD_SUVD_CGC_GATE__SIT_MASK
570                 | UVD_SUVD_CGC_GATE__SMP_MASK
571                 | UVD_SUVD_CGC_GATE__SCM_MASK
572                 | UVD_SUVD_CGC_GATE__SDB_MASK
573                 | UVD_SUVD_CGC_GATE__SRE_H264_MASK
574                 | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
575                 | UVD_SUVD_CGC_GATE__SIT_H264_MASK
576                 | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
577                 | UVD_SUVD_CGC_GATE__SCM_H264_MASK
578                 | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
579                 | UVD_SUVD_CGC_GATE__SDB_H264_MASK
580                 | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
581                 | UVD_SUVD_CGC_GATE__ENT_MASK
582                 | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
583                 | UVD_SUVD_CGC_GATE__SITE_MASK
584                 | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
585                 | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
586                 | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
587                 | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
588                 | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
589         WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE, data);
590
591         data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
592         data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
593                 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
594                 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
595                 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
596                 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
597                 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
598                 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
599                 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
600         WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
601 }
602
603 /**
604  * vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
605  *
606  * @adev: amdgpu_device pointer
607  * @sram_sel: sram select
608  * @inst_idx: instance number index
609  * @indirect: indirectly write sram
610  *
611  * Disable clock gating for VCN block with dpg mode
612  */
613 static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
614                                 int inst_idx, uint8_t indirect)
615 {
616         uint32_t reg_data = 0;
617
618         if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
619                 return;
620
621         /* enable sw clock gating control */
622         reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
623         reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
624         reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
625         reg_data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK |
626                  UVD_CGC_CTRL__MPEG2_MODE_MASK |
627                  UVD_CGC_CTRL__REGS_MODE_MASK |
628                  UVD_CGC_CTRL__RBC_MODE_MASK |
629                  UVD_CGC_CTRL__LMI_MC_MODE_MASK |
630                  UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
631                  UVD_CGC_CTRL__IDCT_MODE_MASK |
632                  UVD_CGC_CTRL__MPRD_MODE_MASK |
633                  UVD_CGC_CTRL__MPC_MODE_MASK |
634                  UVD_CGC_CTRL__LBSI_MODE_MASK |
635                  UVD_CGC_CTRL__LRBBM_MODE_MASK |
636                  UVD_CGC_CTRL__WCB_MODE_MASK |
637                  UVD_CGC_CTRL__VCPU_MODE_MASK);
638         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
639                 VCN, 0, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
640
641         /* turn off clock gating */
642         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
643                 VCN, 0, regUVD_CGC_GATE), 0, sram_sel, indirect);
644
645         /* turn on SUVD clock gating */
646         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
647                 VCN, 0, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
648
649         /* turn on sw mode in UVD_SUVD_CGC_CTRL */
650         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
651                 VCN, 0, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
652 }
653
654 /**
655  * vcn_v4_0_3_enable_clock_gating - enable VCN clock gating
656  *
657  * @adev: amdgpu_device pointer
658  * @inst_idx: instance number
659  *
660  * Enable clock gating for VCN block
661  */
662 static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
663 {
664         uint32_t data;
665         int vcn_inst;
666
667         if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
668                 return;
669
670         vcn_inst = GET_INST(VCN, inst_idx);
671
672         /* enable VCN CGC */
673         data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
674         data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
675         data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
676         data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
677         WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
678
679         data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
680         data |= (UVD_CGC_CTRL__SYS_MODE_MASK
681                 | UVD_CGC_CTRL__MPEG2_MODE_MASK
682                 | UVD_CGC_CTRL__REGS_MODE_MASK
683                 | UVD_CGC_CTRL__RBC_MODE_MASK
684                 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
685                 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
686                 | UVD_CGC_CTRL__MPC_MODE_MASK
687                 | UVD_CGC_CTRL__LBSI_MODE_MASK
688                 | UVD_CGC_CTRL__LRBBM_MODE_MASK
689                 | UVD_CGC_CTRL__WCB_MODE_MASK
690                 | UVD_CGC_CTRL__VCPU_MODE_MASK);
691         WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
692
693         data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
694         data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
695                 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
696                 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
697                 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
698                 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
699                 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
700                 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
701                 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
702         WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
703 }
704
705 /**
706  * vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode
707  *
708  * @adev: amdgpu_device pointer
709  * @inst_idx: instance number index
710  * @indirect: indirectly write sram
711  *
712  * Start VCN block with dpg mode
713  */
714 static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
715 {
716         volatile struct amdgpu_vcn4_fw_shared *fw_shared =
717                                                 adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
718         struct amdgpu_ring *ring;
719         int vcn_inst;
720         uint32_t tmp;
721
722         vcn_inst = GET_INST(VCN, inst_idx);
723         /* disable register anti-hang mechanism */
724         WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
725                  ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
726         /* enable dynamic power gating mode */
727         tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
728         tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
729         tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
730         WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
731
732         if (indirect) {
733                 DRM_DEV_DEBUG(adev->dev, "VCN %d start: on AID %d",
734                         inst_idx, adev->vcn.inst[inst_idx].aid_id);
735                 adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
736                                 (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
737                 /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
738                 WREG32_SOC15_DPG_MODE(inst_idx, 0xDEADBEEF,
739                         adev->vcn.inst[inst_idx].aid_id, 0, true);
740         }
741
742         /* enable clock gating */
743         vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
744
745         /* enable VCPU clock */
746         tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
747         tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
748         tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
749
750         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
751                 VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
752
753         /* disable master interrupt */
754         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
755                 VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
756
757         /* setup regUVD_LMI_CTRL */
758         tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
759                 UVD_LMI_CTRL__REQ_MODE_MASK |
760                 UVD_LMI_CTRL__CRC_RESET_MASK |
761                 UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
762                 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
763                 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
764                 (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
765                 0x00100000L);
766         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
767                 VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
768
769         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
770                 VCN, 0, regUVD_MPC_CNTL),
771                 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
772
773         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
774                 VCN, 0, regUVD_MPC_SET_MUXA0),
775                 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
776                  (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
777                  (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
778                  (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
779
780         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
781                 VCN, 0, regUVD_MPC_SET_MUXB0),
782                  ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
783                  (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
784                  (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
785                  (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
786
787         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
788                 VCN, 0, regUVD_MPC_SET_MUX),
789                 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
790                  (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
791                  (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
792
793         vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect);
794
795         tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
796         tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
797         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
798                 VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
799
800         /* enable LMI MC and UMC channels */
801         tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
802         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
803                 VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
804
805         vcn_v4_0_3_enable_ras(adev, inst_idx, indirect);
806
807         /* enable master interrupt */
808         WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
809                 VCN, 0, regUVD_MASTINT_EN),
810                 UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
811
812         if (indirect)
813                 amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
814
815         ring = &adev->vcn.inst[inst_idx].ring_enc[0];
816
817         /* program the RB_BASE for ring buffer */
818         WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
819                      lower_32_bits(ring->gpu_addr));
820         WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
821                      upper_32_bits(ring->gpu_addr));
822
823         WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
824                      ring->ring_size / sizeof(uint32_t));
825
826         /* resetting ring, fw should not check RB ring */
827         tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
828         tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
829         WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
830         fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
831
832         /* Initialize the ring buffer's read and write pointers */
833         WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
834         WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
835         ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
836
837         tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
838         tmp |= VCN_RB_ENABLE__RB_EN_MASK;
839         WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
840         fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
841
842         /*resetting done, fw can check RB ring */
843         fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
844
845         return 0;
846 }
847
848 static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
849 {
850         int i, vcn_inst;
851         struct amdgpu_ring *ring_enc;
852         uint64_t cache_addr;
853         uint64_t rb_enc_addr;
854         uint64_t ctx_addr;
855         uint32_t param, resp, expected;
856         uint32_t offset, cache_size;
857         uint32_t tmp, timeout;
858
859         struct amdgpu_mm_table *table = &adev->virt.mm_table;
860         uint32_t *table_loc;
861         uint32_t table_size;
862         uint32_t size, size_dw;
863         uint32_t init_status;
864         uint32_t enabled_vcn;
865
866         struct mmsch_v4_0_cmd_direct_write
867                 direct_wt = { {0} };
868         struct mmsch_v4_0_cmd_direct_read_modify_write
869                 direct_rd_mod_wt = { {0} };
870         struct mmsch_v4_0_cmd_end end = { {0} };
871         struct mmsch_v4_0_3_init_header header;
872
873         volatile struct amdgpu_vcn4_fw_shared *fw_shared;
874         volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
875
876         direct_wt.cmd_header.command_type =
877                 MMSCH_COMMAND__DIRECT_REG_WRITE;
878         direct_rd_mod_wt.cmd_header.command_type =
879                 MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
880         end.cmd_header.command_type = MMSCH_COMMAND__END;
881
882         for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
883                 vcn_inst = GET_INST(VCN, i);
884
885                 memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
886                 header.version = MMSCH_VERSION;
887                 header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
888
889                 table_loc = (uint32_t *)table->cpu_addr;
890                 table_loc += header.total_size;
891
892                 table_size = 0;
893
894                 MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
895                         ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
896
897                 cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
898
899                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
900                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
901                                 regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
902                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
903
904                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
905                                 regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
906                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
907
908                         offset = 0;
909                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
910                                 regUVD_VCPU_CACHE_OFFSET0), 0);
911                 } else {
912                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
913                                 regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
914                                 lower_32_bits(adev->vcn.inst[i].gpu_addr));
915                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
916                                 regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
917                                 upper_32_bits(adev->vcn.inst[i].gpu_addr));
918                         offset = cache_size;
919                         MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
920                                 regUVD_VCPU_CACHE_OFFSET0),
921                                 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
922                 }
923
924                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
925                         regUVD_VCPU_CACHE_SIZE0),
926                         cache_size);
927
928                 cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
929                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
930                         regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
931                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
932                         regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
933                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
934                         regUVD_VCPU_CACHE_OFFSET1), 0);
935                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
936                         regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
937
938                 cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
939                         AMDGPU_VCN_STACK_SIZE;
940
941                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
942                         regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
943
944                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
945                         regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
946
947                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
948                         regUVD_VCPU_CACHE_OFFSET2), 0);
949
950                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
951                         regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
952
953                 fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
954                 rb_setup = &fw_shared->rb_setup;
955
956                 ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
957                 ring_enc->wptr = 0;
958                 rb_enc_addr = ring_enc->gpu_addr;
959
960                 rb_setup->is_rb_enabled_flags |= RB_ENABLED;
961                 rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
962                 rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
963                 rb_setup->rb_size = ring_enc->ring_size / 4;
964                 fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
965
966                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
967                         regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
968                         lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
969                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
970                         regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
971                         upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
972                 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
973                         regUVD_VCPU_NONCACHE_SIZE0),
974                         AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
975                 MMSCH_V4_0_INSERT_END();
976
977                 header.vcn0.init_status = 0;
978                 header.vcn0.table_offset = header.total_size;
979                 header.vcn0.table_size = table_size;
980                 header.total_size += table_size;
981
982                 /* Send init table to mmsch */
983                 size = sizeof(struct mmsch_v4_0_3_init_header);
984                 table_loc = (uint32_t *)table->cpu_addr;
985                 memcpy((void *)table_loc, &header, size);
986
987                 ctx_addr = table->gpu_addr;
988                 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
989                 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
990
991                 tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
992                 tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
993                 tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
994                 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
995
996                 size = header.total_size;
997                 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
998
999                 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
1000
1001                 param = 0x00000001;
1002                 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
1003                 tmp = 0;
1004                 timeout = 1000;
1005                 resp = 0;
1006                 expected = MMSCH_VF_MAILBOX_RESP__OK;
1007                 while (resp != expected) {
1008                         resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
1009                         if (resp != 0)
1010                                 break;
1011
1012                         udelay(10);
1013                         tmp = tmp + 10;
1014                         if (tmp >= timeout) {
1015                                 DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
1016                                         " waiting for regMMSCH_VF_MAILBOX_RESP "\
1017                                         "(expected=0x%08x, readback=0x%08x)\n",
1018                                         tmp, expected, resp);
1019                                 return -EBUSY;
1020                         }
1021                 }
1022
1023                 enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
1024                 init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
1025                 if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
1026                                         && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
1027                         DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
1028                                 "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
1029                 }
1030         }
1031
1032         return 0;
1033 }
1034
1035 /**
1036  * vcn_v4_0_3_start - VCN start
1037  *
1038  * @adev: amdgpu_device pointer
1039  *
1040  * Start VCN block
1041  */
1042 static int vcn_v4_0_3_start(struct amdgpu_device *adev)
1043 {
1044         volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1045         struct amdgpu_ring *ring;
1046         int i, j, k, r, vcn_inst;
1047         uint32_t tmp;
1048
1049         if (adev->pm.dpm_enabled)
1050                 amdgpu_dpm_enable_uvd(adev, true);
1051
1052         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1053                 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1054                         r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
1055                         continue;
1056                 }
1057
1058                 vcn_inst = GET_INST(VCN, i);
1059                 /* set VCN status busy */
1060                 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
1061                       UVD_STATUS__UVD_BUSY;
1062                 WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
1063
1064                 /*SW clock gating */
1065                 vcn_v4_0_3_disable_clock_gating(adev, i);
1066
1067                 /* enable VCPU clock */
1068                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
1069                          UVD_VCPU_CNTL__CLK_EN_MASK,
1070                          ~UVD_VCPU_CNTL__CLK_EN_MASK);
1071
1072                 /* disable master interrupt */
1073                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
1074                          ~UVD_MASTINT_EN__VCPU_EN_MASK);
1075
1076                 /* enable LMI MC and UMC channels */
1077                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
1078                          ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1079
1080                 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
1081                 tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1082                 tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1083                 WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
1084
1085                 /* setup regUVD_LMI_CTRL */
1086                 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
1087                 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
1088                              tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
1089                                      UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1090                                      UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1091                                      UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
1092
1093                 /* setup regUVD_MPC_CNTL */
1094                 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
1095                 tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
1096                 tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
1097                 WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
1098
1099                 /* setup UVD_MPC_SET_MUXA0 */
1100                 WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
1101                              ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1102                               (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1103                               (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1104                               (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
1105
1106                 /* setup UVD_MPC_SET_MUXB0 */
1107                 WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
1108                              ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1109                               (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1110                               (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1111                               (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
1112
1113                 /* setup UVD_MPC_SET_MUX */
1114                 WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
1115                              ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1116                               (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1117                               (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
1118
1119                 vcn_v4_0_3_mc_resume(adev, i);
1120
1121                 /* VCN global tiling registers */
1122                 WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
1123                              adev->gfx.config.gb_addr_config);
1124                 WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
1125                              adev->gfx.config.gb_addr_config);
1126
1127                 /* unblock VCPU register access */
1128                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
1129                          ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1130
1131                 /* release VCPU reset to boot */
1132                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
1133                          ~UVD_VCPU_CNTL__BLK_RST_MASK);
1134
1135                 for (j = 0; j < 10; ++j) {
1136                         uint32_t status;
1137
1138                         for (k = 0; k < 100; ++k) {
1139                                 status = RREG32_SOC15(VCN, vcn_inst,
1140                                                       regUVD_STATUS);
1141                                 if (status & 2)
1142                                         break;
1143                                 mdelay(10);
1144                         }
1145                         r = 0;
1146                         if (status & 2)
1147                                 break;
1148
1149                         DRM_DEV_ERROR(adev->dev,
1150                                 "VCN decode not responding, trying to reset the VCPU!!!\n");
1151                         WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
1152                                                   regUVD_VCPU_CNTL),
1153                                  UVD_VCPU_CNTL__BLK_RST_MASK,
1154                                  ~UVD_VCPU_CNTL__BLK_RST_MASK);
1155                         mdelay(10);
1156                         WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
1157                                                   regUVD_VCPU_CNTL),
1158                                  0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
1159
1160                         mdelay(10);
1161                         r = -1;
1162                 }
1163
1164                 if (r) {
1165                         DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
1166                         return r;
1167                 }
1168
1169                 /* enable master interrupt */
1170                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
1171                          UVD_MASTINT_EN__VCPU_EN_MASK,
1172                          ~UVD_MASTINT_EN__VCPU_EN_MASK);
1173
1174                 /* clear the busy bit of VCN_STATUS */
1175                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
1176                          ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1177
1178                 ring = &adev->vcn.inst[i].ring_enc[0];
1179                 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1180
1181                 /* program the RB_BASE for ring buffer */
1182                 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
1183                              lower_32_bits(ring->gpu_addr));
1184                 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
1185                              upper_32_bits(ring->gpu_addr));
1186
1187                 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
1188                              ring->ring_size / sizeof(uint32_t));
1189
1190                 /* resetting ring, fw should not check RB ring */
1191                 tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
1192                 tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
1193                 WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
1194
1195                 /* Initialize the ring buffer's read and write pointers */
1196                 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
1197                 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
1198
1199                 tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
1200                 tmp |= VCN_RB_ENABLE__RB_EN_MASK;
1201                 WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
1202
1203                 ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
1204                 fw_shared->sq.queue_mode &=
1205                         cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
1206
1207         }
1208         return 0;
1209 }
1210
1211 /**
1212  * vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode
1213  *
1214  * @adev: amdgpu_device pointer
1215  * @inst_idx: instance number index
1216  *
1217  * Stop VCN block with dpg mode
1218  */
1219 static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1220 {
1221         uint32_t tmp;
1222         int vcn_inst;
1223
1224         vcn_inst = GET_INST(VCN, inst_idx);
1225
1226         /* Wait for power status to be 1 */
1227         SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
1228                            UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1229
1230         /* wait for read ptr to be equal to write ptr */
1231         tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
1232         SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1233
1234         SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
1235                            UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1236
1237         /* disable dynamic power gating mode */
1238         WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
1239                  ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1240         return 0;
1241 }
1242
1243 /**
1244  * vcn_v4_0_3_stop - VCN stop
1245  *
1246  * @adev: amdgpu_device pointer
1247  *
1248  * Stop VCN block
1249  */
1250 static int vcn_v4_0_3_stop(struct amdgpu_device *adev)
1251 {
1252         volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1253         int i, r = 0, vcn_inst;
1254         uint32_t tmp;
1255
1256         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1257                 vcn_inst = GET_INST(VCN, i);
1258
1259                 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1260                 fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
1261
1262                 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1263                         vcn_v4_0_3_stop_dpg_mode(adev, i);
1264                         continue;
1265                 }
1266
1267                 /* wait for vcn idle */
1268                 r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
1269                                        UVD_STATUS__IDLE, 0x7);
1270                 if (r)
1271                         goto Done;
1272
1273                 tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1274                         UVD_LMI_STATUS__READ_CLEAN_MASK |
1275                         UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1276                         UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1277                 r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
1278                                        tmp);
1279                 if (r)
1280                         goto Done;
1281
1282                 /* stall UMC channel */
1283                 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
1284                 tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1285                 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
1286                 tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
1287                         UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1288                 r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
1289                                        tmp);
1290                 if (r)
1291                         goto Done;
1292
1293                 /* Unblock VCPU Register access */
1294                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
1295                          UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1296                          ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1297
1298                 /* release VCPU reset to boot */
1299                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
1300                          UVD_VCPU_CNTL__BLK_RST_MASK,
1301                          ~UVD_VCPU_CNTL__BLK_RST_MASK);
1302
1303                 /* disable VCPU clock */
1304                 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
1305                          ~(UVD_VCPU_CNTL__CLK_EN_MASK));
1306
1307                 /* reset LMI UMC/LMI/VCPU */
1308                 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
1309                 tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1310                 WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
1311
1312                 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
1313                 tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1314                 WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
1315
1316                 /* clear VCN status */
1317                 WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
1318
1319                 /* apply HW clock gating */
1320                 vcn_v4_0_3_enable_clock_gating(adev, i);
1321         }
1322 Done:
1323         if (adev->pm.dpm_enabled)
1324                 amdgpu_dpm_enable_uvd(adev, false);
1325
1326         return 0;
1327 }
1328
1329 /**
1330  * vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode
1331  *
1332  * @adev: amdgpu_device pointer
1333  * @inst_idx: instance number index
1334  * @new_state: pause state
1335  *
1336  * Pause dpg mode for VCN block
1337  */
1338 static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
1339                                 struct dpg_pause_state *new_state)
1340 {
1341
1342         return 0;
1343 }
1344
1345 /**
1346  * vcn_v4_0_3_unified_ring_get_rptr - get unified read pointer
1347  *
1348  * @ring: amdgpu_ring pointer
1349  *
1350  * Returns the current hardware unified read pointer
1351  */
1352 static uint64_t vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring *ring)
1353 {
1354         struct amdgpu_device *adev = ring->adev;
1355
1356         if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1357                 DRM_ERROR("wrong ring id is identified in %s", __func__);
1358
1359         return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
1360 }
1361
1362 /**
1363  * vcn_v4_0_3_unified_ring_get_wptr - get unified write pointer
1364  *
1365  * @ring: amdgpu_ring pointer
1366  *
1367  * Returns the current hardware unified write pointer
1368  */
1369 static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
1370 {
1371         struct amdgpu_device *adev = ring->adev;
1372
1373         if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1374                 DRM_ERROR("wrong ring id is identified in %s", __func__);
1375
1376         if (ring->use_doorbell)
1377                 return *ring->wptr_cpu_addr;
1378         else
1379                 return RREG32_SOC15(VCN, GET_INST(VCN, ring->me),
1380                                     regUVD_RB_WPTR);
1381 }
1382
1383 /**
1384  * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer
1385  *
1386  * @ring: amdgpu_ring pointer
1387  *
1388  * Commits the enc write pointer to the hardware
1389  */
1390 static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
1391 {
1392         struct amdgpu_device *adev = ring->adev;
1393
1394         if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1395                 DRM_ERROR("wrong ring id is identified in %s", __func__);
1396
1397         if (ring->use_doorbell) {
1398                 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1399                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1400         } else {
1401                 WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
1402                              lower_32_bits(ring->wptr));
1403         }
1404 }
1405
1406 static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
1407         .type = AMDGPU_RING_TYPE_VCN_ENC,
1408         .align_mask = 0x3f,
1409         .nop = VCN_ENC_CMD_NO_OP,
1410         .get_rptr = vcn_v4_0_3_unified_ring_get_rptr,
1411         .get_wptr = vcn_v4_0_3_unified_ring_get_wptr,
1412         .set_wptr = vcn_v4_0_3_unified_ring_set_wptr,
1413         .emit_frame_size =
1414                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1415                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1416                 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
1417                 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
1418                 1, /* vcn_v2_0_enc_ring_insert_end */
1419         .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
1420         .emit_ib = vcn_v2_0_enc_ring_emit_ib,
1421         .emit_fence = vcn_v2_0_enc_ring_emit_fence,
1422         .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
1423         .test_ring = amdgpu_vcn_enc_ring_test_ring,
1424         .test_ib = amdgpu_vcn_unified_ring_test_ib,
1425         .insert_nop = amdgpu_ring_insert_nop,
1426         .insert_end = vcn_v2_0_enc_ring_insert_end,
1427         .pad_ib = amdgpu_ring_generic_pad_ib,
1428         .begin_use = amdgpu_vcn_ring_begin_use,
1429         .end_use = amdgpu_vcn_ring_end_use,
1430         .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
1431         .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
1432         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1433 };
1434
1435 /**
1436  * vcn_v4_0_3_set_unified_ring_funcs - set unified ring functions
1437  *
1438  * @adev: amdgpu_device pointer
1439  *
1440  * Set unified ring functions
1441  */
1442 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev)
1443 {
1444         int i, vcn_inst;
1445
1446         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1447                 adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_3_unified_ring_vm_funcs;
1448                 adev->vcn.inst[i].ring_enc[0].me = i;
1449                 vcn_inst = GET_INST(VCN, i);
1450                 adev->vcn.inst[i].aid_id =
1451                         vcn_inst / adev->vcn.num_inst_per_aid;
1452         }
1453         DRM_DEV_INFO(adev->dev, "VCN decode is enabled in VM mode\n");
1454 }
1455
1456 /**
1457  * vcn_v4_0_3_is_idle - check VCN block is idle
1458  *
1459  * @handle: amdgpu_device pointer
1460  *
1461  * Check whether VCN block is idle
1462  */
1463 static bool vcn_v4_0_3_is_idle(void *handle)
1464 {
1465         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1466         int i, ret = 1;
1467
1468         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1469                 ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) ==
1470                         UVD_STATUS__IDLE);
1471         }
1472
1473         return ret;
1474 }
1475
1476 /**
1477  * vcn_v4_0_3_wait_for_idle - wait for VCN block idle
1478  *
1479  * @handle: amdgpu_device pointer
1480  *
1481  * Wait for VCN block idle
1482  */
1483 static int vcn_v4_0_3_wait_for_idle(void *handle)
1484 {
1485         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1486         int i, ret = 0;
1487
1488         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1489                 ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS,
1490                                          UVD_STATUS__IDLE, UVD_STATUS__IDLE);
1491                 if (ret)
1492                         return ret;
1493         }
1494
1495         return ret;
1496 }
1497
1498 /* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state
1499  *
1500  * @handle: amdgpu_device pointer
1501  * @state: clock gating state
1502  *
1503  * Set VCN block clockgating state
1504  */
1505 static int vcn_v4_0_3_set_clockgating_state(void *handle,
1506                                           enum amd_clockgating_state state)
1507 {
1508         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1509         bool enable = state == AMD_CG_STATE_GATE;
1510         int i;
1511
1512         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1513                 if (enable) {
1514                         if (RREG32_SOC15(VCN, GET_INST(VCN, i),
1515                                          regUVD_STATUS) != UVD_STATUS__IDLE)
1516                                 return -EBUSY;
1517                         vcn_v4_0_3_enable_clock_gating(adev, i);
1518                 } else {
1519                         vcn_v4_0_3_disable_clock_gating(adev, i);
1520                 }
1521         }
1522         return 0;
1523 }
1524
1525 /**
1526  * vcn_v4_0_3_set_powergating_state - set VCN block powergating state
1527  *
1528  * @handle: amdgpu_device pointer
1529  * @state: power gating state
1530  *
1531  * Set VCN block powergating state
1532  */
1533 static int vcn_v4_0_3_set_powergating_state(void *handle,
1534                                           enum amd_powergating_state state)
1535 {
1536         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1537         int ret;
1538
1539         /* for SRIOV, guest should not control VCN Power-gating
1540          * MMSCH FW should control Power-gating and clock-gating
1541          * guest should avoid touching CGC and PG
1542          */
1543         if (amdgpu_sriov_vf(adev)) {
1544                 adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
1545                 return 0;
1546         }
1547
1548         if (state == adev->vcn.cur_state)
1549                 return 0;
1550
1551         if (state == AMD_PG_STATE_GATE)
1552                 ret = vcn_v4_0_3_stop(adev);
1553         else
1554                 ret = vcn_v4_0_3_start(adev);
1555
1556         if (!ret)
1557                 adev->vcn.cur_state = state;
1558
1559         return ret;
1560 }
1561
1562 /**
1563  * vcn_v4_0_3_set_interrupt_state - set VCN block interrupt state
1564  *
1565  * @adev: amdgpu_device pointer
1566  * @source: interrupt sources
1567  * @type: interrupt types
1568  * @state: interrupt states
1569  *
1570  * Set VCN block interrupt state
1571  */
1572 static int vcn_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
1573                                         struct amdgpu_irq_src *source,
1574                                         unsigned int type,
1575                                         enum amdgpu_interrupt_state state)
1576 {
1577         return 0;
1578 }
1579
1580 /**
1581  * vcn_v4_0_3_process_interrupt - process VCN block interrupt
1582  *
1583  * @adev: amdgpu_device pointer
1584  * @source: interrupt sources
1585  * @entry: interrupt entry from clients and sources
1586  *
1587  * Process VCN block interrupt
1588  */
1589 static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev,
1590                                       struct amdgpu_irq_src *source,
1591                                       struct amdgpu_iv_entry *entry)
1592 {
1593         uint32_t i, inst;
1594
1595         i = node_id_to_phys_map[entry->node_id];
1596
1597         DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
1598
1599         for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
1600                 if (adev->vcn.inst[inst].aid_id == i)
1601                         break;
1602
1603         if (inst >= adev->vcn.num_vcn_inst) {
1604                 dev_WARN_ONCE(adev->dev, 1,
1605                               "Interrupt received for unknown VCN instance %d",
1606                               entry->node_id);
1607                 return 0;
1608         }
1609
1610         switch (entry->src_id) {
1611         case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1612                 amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
1613                 break;
1614         default:
1615                 DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
1616                           entry->src_id, entry->src_data[0]);
1617                 break;
1618         }
1619
1620         return 0;
1621 }
1622
1623 static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = {
1624         .set = vcn_v4_0_3_set_interrupt_state,
1625         .process = vcn_v4_0_3_process_interrupt,
1626 };
1627
1628 /**
1629  * vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions
1630  *
1631  * @adev: amdgpu_device pointer
1632  *
1633  * Set VCN block interrupt irq functions
1634  */
1635 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
1636 {
1637         int i;
1638
1639         for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1640                 adev->vcn.inst->irq.num_types++;
1641         }
1642         adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
1643 }
1644
1645 static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
1646         .name = "vcn_v4_0_3",
1647         .early_init = vcn_v4_0_3_early_init,
1648         .late_init = NULL,
1649         .sw_init = vcn_v4_0_3_sw_init,
1650         .sw_fini = vcn_v4_0_3_sw_fini,
1651         .hw_init = vcn_v4_0_3_hw_init,
1652         .hw_fini = vcn_v4_0_3_hw_fini,
1653         .suspend = vcn_v4_0_3_suspend,
1654         .resume = vcn_v4_0_3_resume,
1655         .is_idle = vcn_v4_0_3_is_idle,
1656         .wait_for_idle = vcn_v4_0_3_wait_for_idle,
1657         .check_soft_reset = NULL,
1658         .pre_soft_reset = NULL,
1659         .soft_reset = NULL,
1660         .post_soft_reset = NULL,
1661         .set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
1662         .set_powergating_state = vcn_v4_0_3_set_powergating_state,
1663 };
1664
1665 const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = {
1666         .type = AMD_IP_BLOCK_TYPE_VCN,
1667         .major = 4,
1668         .minor = 0,
1669         .rev = 3,
1670         .funcs = &vcn_v4_0_3_ip_funcs,
1671 };
1672
1673 static const struct amdgpu_ras_err_status_reg_entry vcn_v4_0_3_ue_reg_list[] = {
1674         {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDD, regVCN_UE_ERR_STATUS_HI_VIDD),
1675         1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDD"},
1676         {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDV, regVCN_UE_ERR_STATUS_HI_VIDV),
1677         1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDV"},
1678 };
1679
1680 static void vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
1681                                                   uint32_t vcn_inst,
1682                                                   void *ras_err_status)
1683 {
1684         struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
1685
1686         /* vcn v4_0_3 only support query uncorrectable errors */
1687         amdgpu_ras_inst_query_ras_error_count(adev,
1688                         vcn_v4_0_3_ue_reg_list,
1689                         ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
1690                         NULL, 0, GET_INST(VCN, vcn_inst),
1691                         AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
1692                         &err_data->ue_count);
1693 }
1694
1695 static void vcn_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
1696                                              void *ras_err_status)
1697 {
1698         uint32_t i;
1699
1700         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
1701                 dev_warn(adev->dev, "VCN RAS is not supported\n");
1702                 return;
1703         }
1704
1705         for (i = 0; i < adev->vcn.num_vcn_inst; i++)
1706                 vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
1707 }
1708
1709 static void vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
1710                                                   uint32_t vcn_inst)
1711 {
1712         amdgpu_ras_inst_reset_ras_error_count(adev,
1713                                         vcn_v4_0_3_ue_reg_list,
1714                                         ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
1715                                         GET_INST(VCN, vcn_inst));
1716 }
1717
1718 static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
1719 {
1720         uint32_t i;
1721
1722         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
1723                 dev_warn(adev->dev, "VCN RAS is not supported\n");
1724                 return;
1725         }
1726
1727         for (i = 0; i < adev->vcn.num_vcn_inst; i++)
1728                 vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
1729 }
1730
1731 static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
1732         .query_ras_error_count = vcn_v4_0_3_query_ras_error_count,
1733         .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
1734 };
1735
1736 static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
1737         .ras_block = {
1738                 .hw_ops = &vcn_v4_0_3_ras_hw_ops,
1739         },
1740 };
1741
1742 static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
1743 {
1744         adev->vcn.ras = &vcn_v4_0_3_ras;
1745 }
1746
1747 static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
1748                                   int inst_idx, bool indirect)
1749 {
1750         uint32_t tmp;
1751
1752         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
1753                 return;
1754
1755         tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
1756               VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
1757               VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
1758               VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
1759         WREG32_SOC15_DPG_MODE(inst_idx,
1760                               SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
1761                               tmp, 0, indirect);
1762
1763         tmp = UVD_VCPU_INT_EN2__RASCNTL_VCPU_VCODEC_EN_MASK;
1764         WREG32_SOC15_DPG_MODE(inst_idx,
1765                               SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_VCPU_INT_EN2),
1766                               tmp, 0, indirect);
1767
1768         tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
1769         WREG32_SOC15_DPG_MODE(inst_idx,
1770                               SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
1771                               tmp, 0, indirect);
1772 }