drm/amdgpu: fix vm pte pde flags to 64-bit for sdma (v3)

v2: fix for all sdma engines
v3: squash in fix for SI/CI

Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index a6862b1..112969f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -417,8 +417,8 @@ static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib,
 		ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
 		ib->ptr[ib->length_dw++] = pe; /* dst addr */
 		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-		ib->ptr[ib->length_dw++] = flags; /* mask */
-		ib->ptr[ib->length_dw++] = 0;
+		ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+		ib->ptr[ib->length_dw++] = upper_32_bits(flags);
 		ib->ptr[ib->length_dw++] = value; /* value */
 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
 		ib->ptr[ib->length_dw++] = incr; /* increment size */