| /* |
| * Copyright (C) 2021 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H |
| #define ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H |
| |
| #include <android-base/thread_annotations.h> |
| |
| #include <atomic> |
| #include <condition_variable> |
| #include <cstddef> |
| #include <mutex> |
| #include <thread> |
| #include <vector> |
| |
| namespace android { |
| namespace renderscript { |
| |
| /** |
| * Description of the data to be processed for one Toolkit method call, e.g. one blur or one |
| * blend operation. |
| * |
| * The data to be processed is a 2D array of cells. Each cell is a vector of 1 to 4 unsigned bytes. |
| * The most typical configuration is a 2D array of uchar4 used to represent RGBA images. |
| * |
| * This is a base class. There will be a subclass for each Toolkit op. |
| * |
| * Typical usage of a derived class would look like: |
| * BlurTask task(in, out, sizeX, sizeY, vectorSize, etc); |
| * processor->doTask(&task); |
| * |
| * The TaskProcessor should call setTiling() and setUsesSimd() once, before calling processTile(). |
| * Other classes should not call setTiling(), setUsesSimd(), and processTile(). |
| */ |
| class Task { |
| protected: |
| /** |
| * Number of cells in the X direction. |
| */ |
| const size_t mSizeX; |
| /** |
| * Number of cells in the Y direction. |
| */ |
| const size_t mSizeY; |
| /** |
| * Number of elements in a vector (cell). From 1-4. |
| */ |
| const size_t mVectorSize; |
| /** |
| * Whether the task prefers the processData call to represent the work to be done as |
| * one line rather than a rectangle. This would be the case for work that don't involve |
| * vertical neighbors, e.g. blend or histogram. A task would prefer this to minimize the |
| * number of SIMD calls to make, i.e. have one call that covers all the rows. |
| * |
| * This setting will be used only when a tile covers the entire width of the data to be |
| * processed. |
| */ |
| const bool mPrefersDataAsOneRow; |
| /** |
| * Whether the processor we're working on supports SIMD operations. |
| */ |
| bool mUsesSimd = false; |
| |
| private: |
| /** |
| * If not null, we'll process a subset of the whole 2D array. This specifies the restriction. |
| */ |
| const struct Restriction* mRestriction; |
| |
| /** |
| * We'll divide the work into rectangular tiles. See setTiling(). |
| */ |
| |
| /** |
| * Size of a tile in the X direction, as a number of cells. |
| */ |
| size_t mCellsPerTileX = 0; |
| /** |
| * Size of a tile in the Y direction, as a number of cells. |
| */ |
| size_t mCellsPerTileY = 0; |
| /** |
| * Number of tiles per row of the restricted area we're working on. |
| */ |
| size_t mTilesPerRow = 0; |
| /** |
| * Number of tiles per column of the restricted area we're working on. |
| */ |
| size_t mTilesPerColumn = 0; |
| |
| public: |
| /** |
| * Construct a task. |
| * |
| * sizeX and sizeY should be greater than 0. vectorSize should be between 1 and 4. |
| * The restriction should outlive this instance. The Toolkit validates the |
| * arguments so we won't do that again here. |
| */ |
| Task(size_t sizeX, size_t sizeY, size_t vectorSize, bool prefersDataAsOneRow, |
| const Restriction* restriction) |
| : mSizeX{sizeX}, |
| mSizeY{sizeY}, |
| mVectorSize{vectorSize}, |
| mPrefersDataAsOneRow{prefersDataAsOneRow}, |
| mRestriction{restriction} {} |
| virtual ~Task() {} |
| |
| void setUsesSimd(bool uses) { mUsesSimd = uses; } |
| |
| /** |
| * Divide the work into a number of tiles that can be distributed to the various threads. |
| * A tile will be a rectangular region. To be robust, we'll want to handle regular cases |
| * like 400x300 but also unusual ones like 1x120000, 120000x1, 1x1. |
| * |
| * We have a target size for the tiles, which corresponds roughly to how much data a thread |
| * will want to process before checking for more work. If the target is set too low, we'll spend |
| * more time in synchronization. If it's too large, some cores may not be used as efficiently. |
| * |
| * This method returns the number of tiles. |
| * |
| * @param targetTileSizeInBytes Target size. Values less than 1000 will be treated as 1000. |
| */ |
| int setTiling(unsigned int targetTileSizeInBytes); |
| |
| /** |
| * This is called by the TaskProcessor to instruct the task to process a tile. |
| * |
| * @param threadIndex The index of the thread that's processing the tile. |
| * @param tileIndex The index of the tile to process. |
| */ |
| void processTile(unsigned int threadIndex, size_t tileIndex); |
| |
| private: |
| /** |
| * Call to the derived class to process the data bounded by the rectangle specified |
| * by (startX, startY) and (endX, endY). The end values are EXCLUDED. This rectangle |
| * will be contained with the restriction, if one is provided. |
| */ |
| virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, |
| size_t endY) = 0; |
| }; |
| |
| /** |
| * There's one instance of the task processor for the Toolkit. This class owns the thread pool, |
| * and dispatches the tiles of work to the threads. |
| */ |
| class TaskProcessor { |
| /** |
| * Does this processor support SIMD-like instructions? |
| */ |
| const bool mUsesSimd; |
| /** |
| * The number of separate threads we'll spawn. It's one less than the number of threads that |
| * do the work as the client thread that starts the work will also be used. |
| */ |
| const unsigned int mNumberOfPoolThreads; |
| /** |
| * Ensures that only one task is done at a time. |
| */ |
| std::mutex mTaskMutex; |
| /** |
| * Ensures consistent access to the shared queue state. |
| */ |
| std::mutex mQueueMutex; |
| /** |
| * The thread pool workers. |
| */ |
| std::vector<std::thread> mPoolThreads; |
| /** |
| * The task being processed, if any. We only do one task at a time. We could create a queue |
| * of tasks but using a mTaskMutex is sufficient for now. |
| */ |
| Task* mCurrentTask GUARDED_BY(mTaskMutex) = nullptr; |
| /** |
| * Signals that the mPoolThreads should terminate. |
| */ |
| bool mStopThreads GUARDED_BY(mQueueMutex) = false; |
| /** |
| * Signaled when work is available or the mPoolThreads need to shut down. mStopThreads is used |
| * to distinguish between the two. |
| */ |
| std::condition_variable mWorkAvailableOrStop; |
| /** |
| * Signaled when the work for the task is finished. |
| */ |
| std::condition_variable mWorkIsFinished; |
| /** |
| * A user task, e.g. a blend or a blur, is split into a number of tiles. When a thread starts |
| * working on a new tile, it uses this count to identify which tile to work on. The tile |
| * number is sufficient to determine the boundaries of the data to process. |
| * |
| * The number of tiles left to process. |
| */ |
| int mTilesNotYetStarted GUARDED_BY(mQueueMutex) = 0; |
| /** |
| * The number of tiles currently being processed. Must not be greater than |
| * mNumberOfPoolThreads + 1. |
| */ |
| int mTilesInProcess GUARDED_BY(mQueueMutex) = 0; |
| |
| /** |
| * Determines how we'll tile the work and signals the thread pool of available work. |
| * |
| * @param task The task to be performed. |
| */ |
| void startWork(Task* task) REQUIRES(mTaskMutex); |
| |
| /** |
| * Tells the thread to start processing work off the queue. |
| * |
| * The flag is used for prevent the main thread from blocking forever if the work is |
| * so trivial that the worker threads complete the work before the main thread calls this |
| * method. |
| * |
| * @param threadIndex The index number (0..mNumberOfPoolThreads) this thread will referred by. |
| * @param returnWhenNoWork If there's no work, return immediately. |
| */ |
| void processTilesOfWork(int threadIndex, bool returnWhenNoWork); |
| |
| /** |
| * Wait for the pool workers to complete the work on the current task. |
| */ |
| void waitForPoolWorkersToComplete(); |
| |
| public: |
| /** |
| * Create the processor. |
| * |
| * @param numThreads The total number of threads to use. If 0, we'll decided based on system |
| * properties. |
| */ |
| explicit TaskProcessor(unsigned int numThreads = 0); |
| |
| ~TaskProcessor(); |
| |
| /** |
| * Do the specified task. Returns only after the task has been completed. |
| */ |
| void doTask(Task* task); |
| |
| /** |
| * Some Tasks need to allocate temporary storage for each worker thread. |
| * This provides the number of threads. |
| */ |
| unsigned int getNumberOfThreads() const { return mNumberOfPoolThreads + 1; } |
| }; |
| |
| } // namespace renderscript |
| } // namespace android |
| |
| #endif // ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H |