src/jumper/SkJumper.cpp - platform/external/skia - Gitiles

 /*
  * Copyright 2017 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "SkCpu.h"
 #include "SkJumper.h"
 #include "SkJumper_generated.h"
 #include "SkRasterPipeline.h"
 #include "SkTemplates.h"

 // Stages expect these constants to be set to these values.
 // It's fine to rearrange and add new ones if you update SkJumper_constants.
 static const SkJumper_constants kConstants = {
     1.0f, 0.5f, 255.0f, 1/255.0f, 0x000000ff,
     {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f},
     0.0025f, 0.6975f, 0.3000f, 1/12.92f, 0.055f,       // from_srgb
     12.46f, 0.411192f, 0.689206f, -0.0988f, 0.0043f,   //   to_srgb
     0x77800000, 0x07800000,                            // fp16 <-> fp32
 };

 using JumperStage = void(size_t, void**, const SkJumper_constants*);
 // Jumper stages actually pass around 8 floating point vectors too.
 // They're designed to work when those vectors start unintialized,
 // so we don't need to mention them here.

 #define STAGES(M)     \
     M(seed_shader)    \
     M(constant_color) \
     M(clear)          \
     M(plus_)          \
     M(srcover)        \
     M(dstover)        \
     M(clamp_0)        \
     M(clamp_1)        \
     M(clamp_a)        \
     M(swap)           \
     M(move_src_dst)   \
     M(move_dst_src)   \
     M(premul)         \
     M(unpremul)       \
     M(from_srgb)      \
     M(to_srgb)        \
     M(scale_u8)       \
     M(load_tables)    \
     M(load_8888)      \
     M(store_8888)     \
     M(load_f16)       \
     M(store_f16)      \
     M(matrix_2x3)     \
     M(matrix_3x4)     \
     M(clamp_x)        \
     M(clamp_y)        \
     M(linear_gradient_2stops)

 // Declare the portable, single pixel stages that are linked into Skia from SkJumper_stages.o.
 extern "C" {
     JumperStage sk_just_return;
 #define M(st) JumperStage sk_##st;
     STAGES(M)
 #undef M
 }

 // Translate SkRasterPipeline's enum to pointers to our portable, single pixel stages.
 static void* portable_lookup(SkRasterPipeline::StockStage st) {
     switch (st) {
         default: return nullptr;
     #define M(st) case SkRasterPipeline::st: return (void*)sk_##st;
         STAGES(M)
     #undef M
     }
 }

 // The non-portable options are pre-compiled static data arrays pulled in from SkJumper_generated.h.
 #if defined(__aarch64__)
     static void* aarch64_lookup(SkRasterPipeline::StockStage st) {
         switch (st) {
             default: return nullptr;
         #define M(st) case SkRasterPipeline::st: return (void*)aarch64_sk_##st;
             STAGES(M)
         #undef M
         }
     }
 #elif defined(__ARM_NEON__)
     static void* armv7_lookup(SkRasterPipeline::StockStage st) {
         switch (st) {
             default: return nullptr;
         #define M(st) case SkRasterPipeline::st: return (void*)armv7_sk_##st;
             STAGES(M)
         #undef M
         }
     }
 #elif defined(__x86_64__) || defined(_M_X64)
     static void* sse2_lookup(SkRasterPipeline::StockStage st) {
         switch (st) {
             default: return nullptr;
         #define M(st) case SkRasterPipeline::st: return (void*)sse2_sk_##st;
             STAGES(M)
         #undef M
         }
     }
     static void* sse41_lookup(SkRasterPipeline::StockStage st) {
         switch (st) {
             default: return nullptr;
         #define M(st) case SkRasterPipeline::st: return (void*)sse41_sk_##st;
             STAGES(M)
         #undef M
         }
     }
     static void* hsw_lookup(SkRasterPipeline::StockStage st) {
         switch (st) {
             default: return nullptr;
         #define M(st) case SkRasterPipeline::st: return (void*)hsw_sk_##st;
             STAGES(M)
         #undef M
         }
     }
 #endif

 bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const {
     // We'll look for the best vector instruction set and stride we can use.
     size_t stride                                 = 0;
     void* (*lookup)(SkRasterPipeline::StockStage) = nullptr;
     void* just_return                             = nullptr;

 #if defined(__aarch64__)
     stride      = 4;
     lookup      = aarch64_lookup;
     just_return = (void*)aarch64_sk_just_return;

 #elif defined(__ARM_NEON__)
     if (SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) {
         stride      = 2;
         lookup      = armv7_lookup;
         just_return = (void*)armv7_sk_just_return;
     }

 #elif defined(__x86_64__) || defined(_M_X64)
     stride      = 4;
     lookup      = sse2_lookup;
     just_return = (void*)sse2_sk_just_return;
     if (SkCpu::Supports(SkCpu::SSE41)) {
         stride      = 4;
         lookup      = sse41_lookup;
         just_return = (void*)sse41_sk_just_return;
     }
     if (SkCpu::Supports(SkCpu::HSW)) {
         stride      = 8;
         lookup      = hsw_lookup;
         just_return = (void*)hsw_sk_just_return;
     }
 #endif

     SkAutoSTMalloc<64, void*> program(2*fStages.size() + 1);

     // If possible, build and run a program to run at full vector stride.
     const size_t limit = x+n;

     if (stride) {
         void** ip = program.get();
         for (auto&& st : fStages) {
             auto fn = lookup(st.stage);
             if (!fn) {
                 return false;
             }
             *ip++ = fn;
             *ip++ = st.ctx;
         }
         *ip = (void*)just_return;

         ip = program.get();
         auto start = (JumperStage*)*ip++;
         while (x + stride <= limit) {
             start(x, ip, &kConstants);
             x += stride;
         }
     }

     // If there's any leftover, build and run stride=1 portable code.
     if (x < limit) {
         stride = 1;

         void** ip = program.get();
         for (auto&& st : fStages) {
             auto fn = portable_lookup(st.stage);
             if (!fn) {
                 return false;
             }
             *ip++ = fn;
             *ip++ = st.ctx;
         }
         *ip = (void*)sk_just_return;

         ip = program.get();
         auto start = (JumperStage*)*ip++;
         while (x + stride <= limit) {
             start(x, ip, &kConstants);
             x += stride;
         }
     }

     return true;
 }
	/*
	* Copyright 2017 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "SkCpu.h"
	#include "SkJumper.h"
	#include "SkJumper_generated.h"
	#include "SkRasterPipeline.h"
	#include "SkTemplates.h"

	// Stages expect these constants to be set to these values.
	// It's fine to rearrange and add new ones if you update SkJumper_constants.
	static const SkJumper_constants kConstants = {
	1.0f, 0.5f, 255.0f, 1/255.0f, 0x000000ff,
	{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f},
	0.0025f, 0.6975f, 0.3000f, 1/12.92f, 0.055f, // from_srgb
	12.46f, 0.411192f, 0.689206f, -0.0988f, 0.0043f, // to_srgb
	0x77800000, 0x07800000, // fp16 <-> fp32
	};

	using JumperStage = void(size_t, void*, const SkJumper_constants);
	// Jumper stages actually pass around 8 floating point vectors too.
	// They're designed to work when those vectors start unintialized,
	// so we don't need to mention them here.

	#define STAGES(M) \
	M(seed_shader) \
	M(constant_color) \
	M(clear) \
	M(plus_) \
	M(srcover) \
	M(dstover) \
	M(clamp_0) \
	M(clamp_1) \
	M(clamp_a) \
	M(swap) \
	M(move_src_dst) \
	M(move_dst_src) \
	M(premul) \
	M(unpremul) \
	M(from_srgb) \
	M(to_srgb) \
	M(scale_u8) \
	M(load_tables) \
	M(load_8888) \
	M(store_8888) \
	M(load_f16) \
	M(store_f16) \
	M(matrix_2x3) \
	M(matrix_3x4) \
	M(clamp_x) \
	M(clamp_y) \
	M(linear_gradient_2stops)

	// Declare the portable, single pixel stages that are linked into Skia from SkJumper_stages.o.
	extern "C" {
	JumperStage sk_just_return;
	#define M(st) JumperStage sk_##st;
	STAGES(M)
	#undef M
	}

	// Translate SkRasterPipeline's enum to pointers to our portable, single pixel stages.
	static void* portable_lookup(SkRasterPipeline::StockStage st) {
	switch (st) {
	default: return nullptr;
	#define M(st) case SkRasterPipeline::st: return (void*)sk_##st;
	STAGES(M)
	#undef M
	}
	}

	// The non-portable options are pre-compiled static data arrays pulled in from SkJumper_generated.h.
	#if defined(__aarch64__)
	static void* aarch64_lookup(SkRasterPipeline::StockStage st) {
	switch (st) {
	default: return nullptr;
	#define M(st) case SkRasterPipeline::st: return (void*)aarch64_sk_##st;
	STAGES(M)
	#undef M
	}
	}
	#elif defined(__ARM_NEON__)
	static void* armv7_lookup(SkRasterPipeline::StockStage st) {
	switch (st) {
	default: return nullptr;
	#define M(st) case SkRasterPipeline::st: return (void*)armv7_sk_##st;
	STAGES(M)
	#undef M
	}
	}
	#elif defined(__x86_64__) \|\| defined(_M_X64)
	static void* sse2_lookup(SkRasterPipeline::StockStage st) {
	switch (st) {
	default: return nullptr;
	#define M(st) case SkRasterPipeline::st: return (void*)sse2_sk_##st;
	STAGES(M)
	#undef M
	}
	}
	static void* sse41_lookup(SkRasterPipeline::StockStage st) {
	switch (st) {
	default: return nullptr;
	#define M(st) case SkRasterPipeline::st: return (void*)sse41_sk_##st;
	STAGES(M)
	#undef M
	}
	}
	static void* hsw_lookup(SkRasterPipeline::StockStage st) {
	switch (st) {
	default: return nullptr;
	#define M(st) case SkRasterPipeline::st: return (void*)hsw_sk_##st;
	STAGES(M)
	#undef M
	}
	}
	#endif

	bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const {
	// We'll look for the best vector instruction set and stride we can use.
	size_t stride = 0;
	void* (*lookup)(SkRasterPipeline::StockStage) = nullptr;
	void* just_return = nullptr;

	#if defined(__aarch64__)
	stride = 4;
	lookup = aarch64_lookup;
	just_return = (void*)aarch64_sk_just_return;

	#elif defined(__ARM_NEON__)
	if (SkCpu::Supports(SkCpu::NEON\|SkCpu::NEON_FMA\|SkCpu::VFP_FP16)) {
	stride = 2;
	lookup = armv7_lookup;
	just_return = (void*)armv7_sk_just_return;
	}

	#elif defined(__x86_64__) \|\| defined(_M_X64)
	stride = 4;
	lookup = sse2_lookup;
	just_return = (void*)sse2_sk_just_return;
	if (SkCpu::Supports(SkCpu::SSE41)) {
	stride = 4;
	lookup = sse41_lookup;
	just_return = (void*)sse41_sk_just_return;
	}
	if (SkCpu::Supports(SkCpu::HSW)) {
	stride = 8;
	lookup = hsw_lookup;
	just_return = (void*)hsw_sk_just_return;
	}
	#endif

	SkAutoSTMalloc<64, void> program(2fStages.size() + 1);

	// If possible, build and run a program to run at full vector stride.
	const size_t limit = x+n;

	if (stride) {
	void** ip = program.get();
	for (auto&& st : fStages) {
	auto fn = lookup(st.stage);
	if (!fn) {
	return false;
	}
	*ip++ = fn;
	*ip++ = st.ctx;
	}
	ip = (void)just_return;

	ip = program.get();
	auto start = (JumperStage)ip++;
	while (x + stride <= limit) {
	start(x, ip, &kConstants);
	x += stride;
	}
	}

	// If there's any leftover, build and run stride=1 portable code.
	if (x < limit) {
	stride = 1;

	void** ip = program.get();
	for (auto&& st : fStages) {
	auto fn = portable_lookup(st.stage);
	if (!fn) {
	return false;
	}
	*ip++ = fn;
	*ip++ = st.ctx;
	}
	ip = (void)sk_just_return;

	ip = program.get();
	auto start = (JumperStage)ip++;
	while (x + stride <= limit) {
	start(x, ip, &kConstants);
	x += stride;
	}
	}

	return true;
	}