Fix the test: DCE optimized away everything.

Use volatile store to protect the generated PTX from DCE.

Patch by Jingyue Wu.

llvm-svn: 206763
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index 178dff1..c0d7d1c 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -3,16 +3,16 @@
 
 ; Ensure we access the local stack properly
 
-; PTX32:        mov.u32         %r{{[0-9]+}}, __local_depot{{[0-9]+}};
-; PTX32:        cvta.local.u32  %SP, %r{{[0-9]+}};
-; PTX32:        ld.param.u32    %r{{[0-9]+}}, [foo_param_0];
-; PTX32:        st.u32  [%SP+0], %r{{[0-9]+}};
-; PTX64:        mov.u64         %rl{{[0-9]+}}, __local_depot{{[0-9]+}};
-; PTX64:        cvta.local.u64  %SP, %rl{{[0-9]+}};
-; PTX64:        ld.param.u32    %r{{[0-9]+}}, [foo_param_0];
-; PTX64:        st.u32  [%SP+0], %r{{[0-9]+}};
+; PTX32:        mov.u32          %r{{[0-9]+}}, __local_depot{{[0-9]+}};
+; PTX32:        cvta.local.u32   %SP, %r{{[0-9]+}};
+; PTX32:        ld.param.u32     %r{{[0-9]+}}, [foo_param_0];
+; PTX32:        st.volatile.u32  [%SP+0], %r{{[0-9]+}};
+; PTX64:        mov.u64          %rl{{[0-9]+}}, __local_depot{{[0-9]+}};
+; PTX64:        cvta.local.u64   %SP, %rl{{[0-9]+}};
+; PTX64:        ld.param.u32     %r{{[0-9]+}}, [foo_param_0];
+; PTX64:        st.volatile.u32  [%SP+0], %r{{[0-9]+}};
 define void @foo(i32 %a) {
   %local = alloca i32, align 4
-  store i32 %a, i32* %local
+  store volatile i32 %a, i32* %local
   ret void
 }