Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 1 | /* |
| 2 | * GPL HEADER START |
| 3 | * |
| 4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License version 2 only, |
| 8 | * as published by the Free Software Foundation. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, but |
| 11 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | * General Public License version 2 for more details (a copy is included |
| 14 | * in the LICENSE file that accompanied this code). |
| 15 | * |
| 16 | * You should have received a copy of the GNU General Public License |
| 17 | * version 2 along with this program; If not, see |
Oleg Drokin | 6a5b99a | 2016-06-14 23:33:40 -0400 | [diff] [blame] | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 19 | * |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 20 | * GPL HEADER END |
| 21 | */ |
| 22 | /* |
| 23 | * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. |
| 24 | * Use is subject to license terms. |
| 25 | * |
Andreas Dilger | 1dc563a | 2015-11-08 18:09:37 -0500 | [diff] [blame] | 26 | * Copyright (c) 2012, 2015, Intel Corporation. |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 27 | */ |
| 28 | /* |
| 29 | * This file is part of Lustre, http://www.lustre.org/ |
| 30 | * Lustre is a trademark of Sun Microsystems, Inc. |
| 31 | */ |
| 32 | /* |
| 33 | * This file is part of Lustre, http://www.lustre.org/ |
| 34 | * Lustre is a trademark of Sun Microsystems, Inc. |
| 35 | * |
| 36 | * Internal interfaces of OSC layer. |
| 37 | * |
| 38 | * Author: Nikita Danilov <nikita.danilov@sun.com> |
| 39 | * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com> |
| 40 | */ |
| 41 | |
| 42 | #ifndef OSC_CL_INTERNAL_H |
| 43 | #define OSC_CL_INTERNAL_H |
| 44 | |
Greg Kroah-Hartman | 9fdaf8c | 2014-07-11 20:51:16 -0700 | [diff] [blame] | 45 | #include "../../include/linux/libcfs/libcfs.h" |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 46 | |
Greg Kroah-Hartman | 3ee3001 | 2014-07-11 22:16:18 -0700 | [diff] [blame] | 47 | #include "../include/obd.h" |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 48 | /* osc_build_res_name() */ |
Greg Kroah-Hartman | 3ee3001 | 2014-07-11 22:16:18 -0700 | [diff] [blame] | 49 | #include "../include/cl_object.h" |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 50 | #include "osc_internal.h" |
| 51 | |
| 52 | /** \defgroup osc osc |
| 53 | * @{ |
| 54 | */ |
| 55 | |
| 56 | struct osc_extent; |
| 57 | |
| 58 | /** |
| 59 | * State maintained by osc layer for each IO context. |
| 60 | */ |
| 61 | struct osc_io { |
| 62 | /** super class */ |
| 63 | struct cl_io_slice oi_cl; |
| 64 | /** true if this io is lockless. */ |
Dmitry Eremin | c32090f | 2016-06-20 16:55:48 -0400 | [diff] [blame] | 65 | unsigned int oi_lockless; |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 66 | /** how many LRU pages are reserved for this IO */ |
Dmitry Eremin | 855b5da | 2016-09-18 16:38:28 -0400 | [diff] [blame] | 67 | unsigned long oi_lru_reserved; |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 68 | |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 69 | /** active extents, we know how many bytes is going to be written, |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 70 | * so having an active extent will prevent it from being fragmented |
| 71 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 72 | struct osc_extent *oi_active; |
| 73 | /** partially truncated extent, we need to hold this extent to prevent |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 74 | * page writeback from happening. |
| 75 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 76 | struct osc_extent *oi_trunc; |
| 77 | |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 78 | /** write osc_lock for this IO, used by osc_extent_find(). */ |
| 79 | struct osc_lock *oi_write_osclock; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 80 | struct obd_info oi_info; |
| 81 | struct obdo oi_oa; |
| 82 | struct osc_async_cbargs { |
| 83 | bool opc_rpc_sent; |
| 84 | int opc_rc; |
| 85 | struct completion opc_sync; |
| 86 | } oi_cbarg; |
| 87 | }; |
| 88 | |
| 89 | /** |
| 90 | * State of transfer for osc. |
| 91 | */ |
| 92 | struct osc_req { |
| 93 | struct cl_req_slice or_cl; |
| 94 | }; |
| 95 | |
| 96 | /** |
| 97 | * State maintained by osc layer for the duration of a system call. |
| 98 | */ |
| 99 | struct osc_session { |
| 100 | struct osc_io os_io; |
| 101 | }; |
| 102 | |
Jinshan Xiong | 5196e42 | 2016-03-30 19:48:26 -0400 | [diff] [blame] | 103 | #define OTI_PVEC_SIZE 256 |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 104 | struct osc_thread_info { |
| 105 | struct ldlm_res_id oti_resname; |
| 106 | ldlm_policy_data_t oti_policy; |
| 107 | struct cl_lock_descr oti_descr; |
| 108 | struct cl_attr oti_attr; |
| 109 | struct lustre_handle oti_handle; |
| 110 | struct cl_page_list oti_plist; |
| 111 | struct cl_io oti_io; |
Jinshan Xiong | d9d4790 | 2016-03-30 19:48:28 -0400 | [diff] [blame] | 112 | void *oti_pvec[OTI_PVEC_SIZE]; |
| 113 | /** |
| 114 | * Fields used by cl_lock_discard_pages(). |
| 115 | */ |
| 116 | pgoff_t oti_next_index; |
| 117 | pgoff_t oti_fn_index; /* first non-overlapped index */ |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 118 | struct cl_sync_io oti_anchor; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 119 | }; |
| 120 | |
| 121 | struct osc_object { |
| 122 | struct cl_object oo_cl; |
| 123 | struct lov_oinfo *oo_oinfo; |
| 124 | /** |
| 125 | * True if locking against this stripe got -EUSERS. |
| 126 | */ |
| 127 | int oo_contended; |
Greg Kroah-Hartman | a649ad1 | 2014-07-12 00:27:46 -0700 | [diff] [blame] | 128 | unsigned long oo_contention_time; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 129 | /** |
| 130 | * List of pages in transfer. |
| 131 | */ |
| 132 | struct list_head oo_inflight[CRT_NR]; |
| 133 | /** |
John L. Hammond | 8c7b0e1 | 2016-03-30 19:48:47 -0400 | [diff] [blame] | 134 | * Lock, protecting osc_page::ops_inflight, because a seat-belt is |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 135 | * locked during take-off and landing. |
| 136 | */ |
| 137 | spinlock_t oo_seatbelt; |
| 138 | |
| 139 | /** |
| 140 | * used by the osc to keep track of what objects to build into rpcs. |
| 141 | * Protected by client_obd->cli_loi_list_lock. |
| 142 | */ |
| 143 | struct list_head oo_ready_item; |
| 144 | struct list_head oo_hp_ready_item; |
| 145 | struct list_head oo_write_item; |
| 146 | struct list_head oo_read_item; |
| 147 | |
| 148 | /** |
| 149 | * extent is a red black tree to manage (async) dirty pages. |
| 150 | */ |
| 151 | struct rb_root oo_root; |
| 152 | /** |
| 153 | * Manage write(dirty) extents. |
| 154 | */ |
| 155 | struct list_head oo_hp_exts; /* list of hp extents */ |
| 156 | struct list_head oo_urgent_exts; /* list of writeback extents */ |
| 157 | struct list_head oo_rpc_exts; |
| 158 | |
| 159 | struct list_head oo_reading_exts; |
| 160 | |
| 161 | atomic_t oo_nr_reads; |
| 162 | atomic_t oo_nr_writes; |
| 163 | |
| 164 | /** Protect extent tree. Will be used to protect |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 165 | * oo_{read|write}_pages soon. |
| 166 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 167 | spinlock_t oo_lock; |
Jinshan Xiong | d9d4790 | 2016-03-30 19:48:28 -0400 | [diff] [blame] | 168 | |
| 169 | /** |
| 170 | * Radix tree for caching pages |
| 171 | */ |
| 172 | struct radix_tree_root oo_tree; |
| 173 | spinlock_t oo_tree_lock; |
| 174 | unsigned long oo_npages; |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 175 | |
| 176 | /* Protect osc_lock this osc_object has */ |
| 177 | spinlock_t oo_ol_spin; |
| 178 | struct list_head oo_ol_list; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 179 | }; |
| 180 | |
| 181 | static inline void osc_object_lock(struct osc_object *obj) |
| 182 | { |
| 183 | spin_lock(&obj->oo_lock); |
| 184 | } |
| 185 | |
| 186 | static inline int osc_object_trylock(struct osc_object *obj) |
| 187 | { |
| 188 | return spin_trylock(&obj->oo_lock); |
| 189 | } |
| 190 | |
| 191 | static inline void osc_object_unlock(struct osc_object *obj) |
| 192 | { |
| 193 | spin_unlock(&obj->oo_lock); |
| 194 | } |
| 195 | |
| 196 | static inline int osc_object_is_locked(struct osc_object *obj) |
| 197 | { |
Li Xi | 5e42bc9 | 2014-04-27 13:07:06 -0400 | [diff] [blame] | 198 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 199 | return spin_is_locked(&obj->oo_lock); |
Li Xi | 5e42bc9 | 2014-04-27 13:07:06 -0400 | [diff] [blame] | 200 | #else |
| 201 | /* |
| 202 | * It is not perfect to return true all the time. |
| 203 | * But since this function is only used for assertion |
| 204 | * and checking, it seems OK. |
| 205 | */ |
| 206 | return 1; |
| 207 | #endif |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 208 | } |
| 209 | |
| 210 | /* |
| 211 | * Lock "micro-states" for osc layer. |
| 212 | */ |
| 213 | enum osc_lock_state { |
| 214 | OLS_NEW, |
| 215 | OLS_ENQUEUED, |
| 216 | OLS_UPCALL_RECEIVED, |
| 217 | OLS_GRANTED, |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 218 | OLS_CANCELLED |
| 219 | }; |
| 220 | |
| 221 | /** |
| 222 | * osc-private state of cl_lock. |
| 223 | * |
| 224 | * Interaction with DLM. |
| 225 | * |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 226 | * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 227 | * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock. |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 228 | * |
| 229 | * This pointer is protected through a reference, acquired by |
| 230 | * osc_lock_upcall0(). Also, an additional reference is acquired by |
| 231 | * ldlm_lock_addref() call protecting the lock from cancellation, until |
| 232 | * osc_lock_unuse() releases it. |
| 233 | * |
| 234 | * Below is a description of how lock references are acquired and released |
| 235 | * inside of DLM. |
| 236 | * |
| 237 | * - When new lock is created and enqueued to the server (ldlm_cli_enqueue()) |
| 238 | * - ldlm_lock_create() |
| 239 | * - ldlm_lock_new(): initializes a lock with 2 references. One for |
| 240 | * the caller (released when reply from the server is received, or on |
| 241 | * error), and another for the hash table. |
| 242 | * - ldlm_lock_addref_internal(): protects the lock from cancellation. |
| 243 | * |
| 244 | * - When reply is received from the server (osc_enqueue_interpret()) |
| 245 | * - ldlm_cli_enqueue_fini() |
| 246 | * - LDLM_LOCK_PUT(): releases caller reference acquired by |
| 247 | * ldlm_lock_new(). |
| 248 | * - if (rc != 0) |
| 249 | * ldlm_lock_decref(): error case: matches ldlm_cli_enqueue(). |
| 250 | * - ldlm_lock_decref(): for async locks, matches ldlm_cli_enqueue(). |
| 251 | * |
| 252 | * - When lock is being cancelled (ldlm_lock_cancel()) |
| 253 | * - ldlm_lock_destroy() |
| 254 | * - LDLM_LOCK_PUT(): releases hash-table reference acquired by |
| 255 | * ldlm_lock_new(). |
| 256 | * |
| 257 | * osc_lock is detached from ldlm_lock by osc_lock_detach() that is called |
| 258 | * either when lock is cancelled (osc_lock_blocking()), or when locks is |
| 259 | * deleted without cancellation (e.g., from cl_locks_prune()). In the latter |
| 260 | * case ldlm lock remains in memory, and can be re-attached to osc_lock in the |
| 261 | * future. |
| 262 | */ |
| 263 | struct osc_lock { |
| 264 | struct cl_lock_slice ols_cl; |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 265 | /** Internal lock to protect states, etc. */ |
| 266 | spinlock_t ols_lock; |
| 267 | /** Owner sleeps on this channel for state change */ |
| 268 | struct cl_sync_io *ols_owner; |
| 269 | /** waiting list for this lock to be cancelled */ |
| 270 | struct list_head ols_waiting_list; |
| 271 | /** wait entry of ols_waiting_list */ |
| 272 | struct list_head ols_wait_entry; |
| 273 | /** list entry for osc_object::oo_ol_list */ |
| 274 | struct list_head ols_nextlock_oscobj; |
| 275 | |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 276 | /** underlying DLM lock */ |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 277 | struct ldlm_lock *ols_dlmlock; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 278 | /** DLM flags with which osc_lock::ols_lock was enqueued */ |
| 279 | __u64 ols_flags; |
| 280 | /** osc_lock::ols_lock handle */ |
| 281 | struct lustre_handle ols_handle; |
| 282 | struct ldlm_enqueue_info ols_einfo; |
| 283 | enum osc_lock_state ols_state; |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 284 | /** lock value block */ |
| 285 | struct ost_lvb ols_lvb; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 286 | |
| 287 | /** |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 288 | * true, if ldlm_lock_addref() was called against |
| 289 | * osc_lock::ols_lock. This is used for sanity checking. |
| 290 | * |
| 291 | * \see osc_lock::ols_has_ref |
| 292 | */ |
| 293 | unsigned ols_hold :1, |
| 294 | /** |
| 295 | * this is much like osc_lock::ols_hold, except that this bit is |
| 296 | * cleared _after_ reference in released in osc_lock_unuse(). This |
| 297 | * fine distinction is needed because: |
| 298 | * |
| 299 | * - if ldlm lock still has a reference, osc_ast_data_get() needs |
| 300 | * to return associated cl_lock (so that a flag is needed that is |
| 301 | * cleared after ldlm_lock_decref() returned), and |
| 302 | * |
| 303 | * - ldlm_lock_decref() can invoke blocking ast (for a |
| 304 | * LDLM_FL_CBPENDING lock), and osc_lock functions like |
| 305 | * osc_lock_cancel() called from there need to know whether to |
| 306 | * release lock reference (so that a flag is needed that is |
| 307 | * cleared before ldlm_lock_decref() is called). |
| 308 | */ |
| 309 | ols_has_ref:1, |
| 310 | /** |
| 311 | * inherit the lockless attribute from top level cl_io. |
| 312 | * If true, osc_lock_enqueue is able to tolerate the -EUSERS error. |
| 313 | */ |
| 314 | ols_locklessable:1, |
| 315 | /** |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 316 | * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat |
Masanari Iida | 11d66e8 | 2013-12-14 02:24:04 +0900 | [diff] [blame] | 317 | * the EVAVAIL error as tolerable, this will make upper logic happy |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 318 | * to wait all glimpse locks to each OSTs to be completed. |
| 319 | * Glimpse lock converts to normal lock if the server lock is |
| 320 | * granted. |
| 321 | * Glimpse lock should be destroyed immediately after use. |
| 322 | */ |
| 323 | ols_glimpse:1, |
| 324 | /** |
| 325 | * For async glimpse lock. |
| 326 | */ |
| 327 | ols_agl:1; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 328 | }; |
| 329 | |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 330 | /** |
| 331 | * Page state private for osc layer. |
| 332 | */ |
| 333 | struct osc_page { |
| 334 | struct cl_page_slice ops_cl; |
| 335 | /** |
| 336 | * Page queues used by osc to detect when RPC can be formed. |
| 337 | */ |
| 338 | struct osc_async_page ops_oap; |
| 339 | /** |
| 340 | * An offset within page from which next transfer starts. This is used |
| 341 | * by cl_page_clip() to submit partial page transfers. |
| 342 | */ |
| 343 | int ops_from; |
| 344 | /** |
| 345 | * An offset within page at which next transfer ends. |
| 346 | * |
| 347 | * \see osc_page::ops_from. |
| 348 | */ |
| 349 | int ops_to; |
| 350 | /** |
| 351 | * Boolean, true iff page is under transfer. Used for sanity checking. |
| 352 | */ |
| 353 | unsigned ops_transfer_pinned:1, |
| 354 | /** |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 355 | * in LRU? |
| 356 | */ |
| 357 | ops_in_lru:1, |
| 358 | /** |
| 359 | * Set if the page must be transferred with OBD_BRW_SRVLOCK. |
| 360 | */ |
| 361 | ops_srvlock:1; |
Jinshan Xiong | 5196e42 | 2016-03-30 19:48:26 -0400 | [diff] [blame] | 362 | /** |
| 363 | * lru page list. See osc_lru_{del|use}() in osc_page.c for usage. |
| 364 | */ |
| 365 | struct list_head ops_lru; |
| 366 | /** |
| 367 | * Linkage into a per-osc_object list of pages in flight. For |
| 368 | * debugging. |
| 369 | */ |
| 370 | struct list_head ops_inflight; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 371 | /** |
| 372 | * Thread that submitted this page for transfer. For debugging. |
| 373 | */ |
Greg Kroah-Hartman | 68b636b | 2013-08-04 08:56:42 +0800 | [diff] [blame] | 374 | struct task_struct *ops_submitter; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 375 | /** |
| 376 | * Submit time - the time when the page is starting RPC. For debugging. |
| 377 | */ |
Greg Kroah-Hartman | a649ad1 | 2014-07-12 00:27:46 -0700 | [diff] [blame] | 378 | unsigned long ops_submit_time; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 379 | }; |
| 380 | |
| 381 | extern struct kmem_cache *osc_lock_kmem; |
| 382 | extern struct kmem_cache *osc_object_kmem; |
| 383 | extern struct kmem_cache *osc_thread_kmem; |
| 384 | extern struct kmem_cache *osc_session_kmem; |
| 385 | extern struct kmem_cache *osc_req_kmem; |
| 386 | extern struct kmem_cache *osc_extent_kmem; |
| 387 | |
| 388 | extern struct lu_device_type osc_device_type; |
| 389 | extern struct lu_context_key osc_key; |
| 390 | extern struct lu_context_key osc_session_key; |
| 391 | |
Oleg Drokin | cd94f23 | 2016-08-21 18:04:34 -0400 | [diff] [blame] | 392 | #define OSC_FLAGS (ASYNC_URGENT | ASYNC_READY) |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 393 | |
| 394 | int osc_lock_init(const struct lu_env *env, |
| 395 | struct cl_object *obj, struct cl_lock *lock, |
| 396 | const struct cl_io *io); |
Oleg Drokin | e9570b4 | 2016-03-30 19:49:05 -0400 | [diff] [blame] | 397 | int osc_io_init(const struct lu_env *env, |
| 398 | struct cl_object *obj, struct cl_io *io); |
| 399 | int osc_req_init(const struct lu_env *env, struct cl_device *dev, |
| 400 | struct cl_req *req); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 401 | struct lu_object *osc_object_alloc(const struct lu_env *env, |
| 402 | const struct lu_object_header *hdr, |
| 403 | struct lu_device *dev); |
| 404 | int osc_page_init(const struct lu_env *env, struct cl_object *obj, |
Jinshan Xiong | 7addf40 | 2016-03-30 19:48:32 -0400 | [diff] [blame] | 405 | struct cl_page *page, pgoff_t ind); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 406 | |
Oleg Drokin | e9570b4 | 2016-03-30 19:49:05 -0400 | [diff] [blame] | 407 | void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, |
| 408 | pgoff_t start, pgoff_t end); |
| 409 | int osc_lvb_print(const struct lu_env *env, void *cookie, |
| 410 | lu_printer_t p, const struct ost_lvb *lvb); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 411 | |
Jinshan Xiong | 5196e42 | 2016-03-30 19:48:26 -0400 | [diff] [blame] | 412 | void osc_lru_add_batch(struct client_obd *cli, struct list_head *list); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 413 | void osc_page_submit(const struct lu_env *env, struct osc_page *opg, |
| 414 | enum cl_req_type crt, int brw_flags); |
| 415 | int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops); |
| 416 | int osc_set_async_flags(struct osc_object *obj, struct osc_page *opg, |
Oleg Drokin | 21aef7d | 2014-08-15 12:55:56 -0400 | [diff] [blame] | 417 | u32 async_flags); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 418 | int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, |
| 419 | struct page *page, loff_t offset); |
| 420 | int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, |
| 421 | struct osc_page *ops); |
Jinshan Xiong | 77605e4 | 2016-03-30 19:48:30 -0400 | [diff] [blame] | 422 | int osc_page_cache_add(const struct lu_env *env, |
| 423 | const struct cl_page_slice *slice, struct cl_io *io); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 424 | int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj, |
| 425 | struct osc_page *ops); |
| 426 | int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, |
| 427 | struct osc_page *ops); |
| 428 | int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, |
| 429 | struct list_head *list, int cmd, int brw_flags); |
| 430 | int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio, |
| 431 | struct osc_object *obj, __u64 size); |
| 432 | void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio, |
| 433 | struct osc_object *obj); |
| 434 | int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj, |
| 435 | pgoff_t start, pgoff_t end, int hp, int discard); |
| 436 | int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj, |
| 437 | pgoff_t start, pgoff_t end); |
| 438 | void osc_io_unplug(const struct lu_env *env, struct client_obd *cli, |
Olaf Weber | c5c4c6f | 2015-09-14 18:41:35 -0400 | [diff] [blame] | 439 | struct osc_object *osc); |
Jinshan Xiong | 2579d8d | 2016-03-30 19:48:27 -0400 | [diff] [blame] | 440 | int lru_queue_work(const struct lu_env *env, void *data); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 441 | |
Oleg Drokin | e9570b4 | 2016-03-30 19:49:05 -0400 | [diff] [blame] | 442 | void osc_object_set_contended(struct osc_object *obj); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 443 | void osc_object_clear_contended(struct osc_object *obj); |
Oleg Drokin | e9570b4 | 2016-03-30 19:49:05 -0400 | [diff] [blame] | 444 | int osc_object_is_contended(struct osc_object *obj); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 445 | |
Oleg Drokin | e9570b4 | 2016-03-30 19:49:05 -0400 | [diff] [blame] | 446 | int osc_lock_is_lockless(const struct osc_lock *olck); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 447 | |
| 448 | /***************************************************************************** |
| 449 | * |
| 450 | * Accessors. |
| 451 | * |
| 452 | */ |
| 453 | |
| 454 | static inline struct osc_thread_info *osc_env_info(const struct lu_env *env) |
| 455 | { |
| 456 | struct osc_thread_info *info; |
| 457 | |
| 458 | info = lu_context_key_get(&env->le_ctx, &osc_key); |
Oleg Drokin | 7f1ae4c | 2016-02-16 00:46:57 -0500 | [diff] [blame] | 459 | LASSERT(info); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 460 | return info; |
| 461 | } |
| 462 | |
| 463 | static inline struct osc_session *osc_env_session(const struct lu_env *env) |
| 464 | { |
| 465 | struct osc_session *ses; |
| 466 | |
| 467 | ses = lu_context_key_get(env->le_ses, &osc_session_key); |
Oleg Drokin | 7f1ae4c | 2016-02-16 00:46:57 -0500 | [diff] [blame] | 468 | LASSERT(ses); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 469 | return ses; |
| 470 | } |
| 471 | |
| 472 | static inline struct osc_io *osc_env_io(const struct lu_env *env) |
| 473 | { |
| 474 | return &osc_env_session(env)->os_io; |
| 475 | } |
| 476 | |
| 477 | static inline int osc_is_object(const struct lu_object *obj) |
| 478 | { |
| 479 | return obj->lo_dev->ld_type == &osc_device_type; |
| 480 | } |
| 481 | |
| 482 | static inline struct osc_device *lu2osc_dev(const struct lu_device *d) |
| 483 | { |
| 484 | LINVRNT(d->ld_type == &osc_device_type); |
| 485 | return container_of0(d, struct osc_device, od_cl.cd_lu_dev); |
| 486 | } |
| 487 | |
| 488 | static inline struct obd_export *osc_export(const struct osc_object *obj) |
| 489 | { |
| 490 | return lu2osc_dev(obj->oo_cl.co_lu.lo_dev)->od_exp; |
| 491 | } |
| 492 | |
| 493 | static inline struct client_obd *osc_cli(const struct osc_object *obj) |
| 494 | { |
| 495 | return &osc_export(obj)->exp_obd->u.cli; |
| 496 | } |
| 497 | |
| 498 | static inline struct osc_object *cl2osc(const struct cl_object *obj) |
| 499 | { |
| 500 | LINVRNT(osc_is_object(&obj->co_lu)); |
| 501 | return container_of0(obj, struct osc_object, oo_cl); |
| 502 | } |
| 503 | |
| 504 | static inline struct cl_object *osc2cl(const struct osc_object *obj) |
| 505 | { |
| 506 | return (struct cl_object *)&obj->oo_cl; |
| 507 | } |
| 508 | |
Oleg Drokin | 52ee0d2 | 2016-02-24 21:59:54 -0500 | [diff] [blame] | 509 | static inline enum ldlm_mode osc_cl_lock2ldlm(enum cl_lock_mode mode) |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 510 | { |
| 511 | LASSERT(mode == CLM_READ || mode == CLM_WRITE || mode == CLM_GROUP); |
| 512 | if (mode == CLM_READ) |
| 513 | return LCK_PR; |
| 514 | else if (mode == CLM_WRITE) |
| 515 | return LCK_PW; |
| 516 | else |
| 517 | return LCK_GROUP; |
| 518 | } |
| 519 | |
Oleg Drokin | 52ee0d2 | 2016-02-24 21:59:54 -0500 | [diff] [blame] | 520 | static inline enum cl_lock_mode osc_ldlm2cl_lock(enum ldlm_mode mode) |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 521 | { |
| 522 | LASSERT(mode == LCK_PR || mode == LCK_PW || mode == LCK_GROUP); |
| 523 | if (mode == LCK_PR) |
| 524 | return CLM_READ; |
| 525 | else if (mode == LCK_PW) |
| 526 | return CLM_WRITE; |
| 527 | else |
| 528 | return CLM_GROUP; |
| 529 | } |
| 530 | |
| 531 | static inline struct osc_page *cl2osc_page(const struct cl_page_slice *slice) |
| 532 | { |
| 533 | LINVRNT(osc_is_object(&slice->cpl_obj->co_lu)); |
| 534 | return container_of0(slice, struct osc_page, ops_cl); |
| 535 | } |
| 536 | |
| 537 | static inline struct osc_page *oap2osc(struct osc_async_page *oap) |
| 538 | { |
| 539 | return container_of0(oap, struct osc_page, ops_oap); |
| 540 | } |
| 541 | |
Jinshan Xiong | 7addf40 | 2016-03-30 19:48:32 -0400 | [diff] [blame] | 542 | static inline pgoff_t osc_index(struct osc_page *opg) |
| 543 | { |
| 544 | return opg->ops_cl.cpl_index; |
| 545 | } |
| 546 | |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 547 | static inline struct cl_page *oap2cl_page(struct osc_async_page *oap) |
| 548 | { |
| 549 | return oap2osc(oap)->ops_cl.cpl_page; |
| 550 | } |
| 551 | |
| 552 | static inline struct osc_page *oap2osc_page(struct osc_async_page *oap) |
| 553 | { |
| 554 | return (struct osc_page *)container_of(oap, struct osc_page, ops_oap); |
| 555 | } |
| 556 | |
| 557 | static inline struct osc_lock *cl2osc_lock(const struct cl_lock_slice *slice) |
| 558 | { |
| 559 | LINVRNT(osc_is_object(&slice->cls_obj->co_lu)); |
| 560 | return container_of0(slice, struct osc_lock, ols_cl); |
| 561 | } |
| 562 | |
| 563 | static inline struct osc_lock *osc_lock_at(const struct cl_lock *lock) |
| 564 | { |
| 565 | return cl2osc_lock(cl_lock_at(lock, &osc_device_type)); |
| 566 | } |
| 567 | |
| 568 | static inline int osc_io_srvlock(struct osc_io *oio) |
| 569 | { |
| 570 | return (oio->oi_lockless && !oio->oi_cl.cis_io->ci_no_srvlock); |
| 571 | } |
| 572 | |
| 573 | enum osc_extent_state { |
| 574 | OES_INV = 0, /** extent is just initialized or destroyed */ |
| 575 | OES_ACTIVE = 1, /** process is using this extent */ |
| 576 | OES_CACHE = 2, /** extent is ready for IO */ |
| 577 | OES_LOCKING = 3, /** locking page to prepare IO */ |
| 578 | OES_LOCK_DONE = 4, /** locking finished, ready to send */ |
| 579 | OES_RPC = 5, /** in RPC */ |
| 580 | OES_TRUNC = 6, /** being truncated */ |
| 581 | OES_STATE_MAX |
| 582 | }; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 583 | |
| 584 | /** |
| 585 | * osc_extent data to manage dirty pages. |
| 586 | * osc_extent has the following attributes: |
| 587 | * 1. all pages in the same must be in one RPC in write back; |
| 588 | * 2. # of pages must be less than max_pages_per_rpc - implied by 1; |
| 589 | * 3. must be covered by only 1 osc_lock; |
| 590 | * 4. exclusive. It's impossible to have overlapped osc_extent. |
| 591 | * |
| 592 | * The lifetime of an extent is from when the 1st page is dirtied to when |
| 593 | * all pages inside it are written out. |
| 594 | * |
| 595 | * LOCKING ORDER |
| 596 | * ============= |
John L. Hammond | 7d53d8f | 2016-03-30 19:48:36 -0400 | [diff] [blame] | 597 | * page lock -> cl_loi_list_lock -> object lock(osc_object::oo_lock) |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 598 | */ |
| 599 | struct osc_extent { |
| 600 | /** red-black tree node */ |
| 601 | struct rb_node oe_node; |
| 602 | /** osc_object of this extent */ |
| 603 | struct osc_object *oe_obj; |
| 604 | /** refcount, removed from red-black tree if reaches zero. */ |
| 605 | atomic_t oe_refc; |
| 606 | /** busy if non-zero */ |
| 607 | atomic_t oe_users; |
| 608 | /** link list of osc_object's oo_{hp|urgent|locking}_exts. */ |
| 609 | struct list_head oe_link; |
| 610 | /** state of this extent */ |
Dmitry Eremin | 6ffc4b3 | 2016-09-18 16:38:36 -0400 | [diff] [blame] | 611 | enum osc_extent_state oe_state; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 612 | /** flags for this extent. */ |
| 613 | unsigned int oe_intree:1, |
| 614 | /** 0 is write, 1 is read */ |
| 615 | oe_rw:1, |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 616 | /** sync extent, queued by osc_queue_sync_pages() */ |
| 617 | oe_sync:1, |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 618 | oe_srvlock:1, |
| 619 | oe_memalloc:1, |
| 620 | /** an ACTIVE extent is going to be truncated, so when this extent |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 621 | * is released, it will turn into TRUNC state instead of CACHE. |
| 622 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 623 | oe_trunc_pending:1, |
| 624 | /** this extent should be written asap and someone may wait for the |
| 625 | * write to finish. This bit is usually set along with urgent if |
| 626 | * the extent was CACHE state. |
| 627 | * fsync_wait extent can't be merged because new extent region may |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 628 | * exceed fsync range. |
| 629 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 630 | oe_fsync_wait:1, |
| 631 | /** covering lock is being canceled */ |
| 632 | oe_hp:1, |
| 633 | /** this extent should be written back asap. set if one of pages is |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 634 | * called by page WB daemon, or sync write or reading requests. |
| 635 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 636 | oe_urgent:1; |
| 637 | /** how many grants allocated for this extent. |
| 638 | * Grant allocated for this extent. There is no grant allocated |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 639 | * for reading extents and sync write extents. |
| 640 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 641 | unsigned int oe_grants; |
| 642 | /** # of dirty pages in this extent */ |
| 643 | unsigned int oe_nr_pages; |
| 644 | /** list of pending oap pages. Pages in this list are NOT sorted. */ |
| 645 | struct list_head oe_pages; |
| 646 | /** Since an extent has to be written out in atomic, this is used to |
| 647 | * remember the next page need to be locked to write this extent out. |
| 648 | * Not used right now. |
| 649 | */ |
| 650 | struct osc_page *oe_next_page; |
| 651 | /** start and end index of this extent, include start and end |
| 652 | * themselves. Page offset here is the page index of osc_pages. |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 653 | * oe_start is used as keyword for red-black tree. |
| 654 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 655 | pgoff_t oe_start; |
| 656 | pgoff_t oe_end; |
| 657 | /** maximum ending index of this extent, this is limited by |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 658 | * max_pages_per_rpc, lock extent and chunk size. |
| 659 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 660 | pgoff_t oe_max_end; |
| 661 | /** waitqueue - for those who want to be notified if this extent's |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 662 | * state has changed. |
| 663 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 664 | wait_queue_head_t oe_waitq; |
| 665 | /** lock covering this extent */ |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 666 | struct ldlm_lock *oe_dlmlock; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 667 | /** terminator of this extent. Must be true if this extent is in IO. */ |
Greg Kroah-Hartman | 68b636b | 2013-08-04 08:56:42 +0800 | [diff] [blame] | 668 | struct task_struct *oe_owner; |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 669 | /** return value of writeback. If somebody is waiting for this extent, |
Oleg Drokin | 30aa9c5 | 2016-02-24 22:00:37 -0500 | [diff] [blame] | 670 | * this value can be known by outside world. |
| 671 | */ |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 672 | int oe_rc; |
| 673 | /** max pages per rpc when this extent was created */ |
| 674 | unsigned int oe_mppr; |
| 675 | }; |
| 676 | |
| 677 | int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, |
| 678 | int sent, int rc); |
Heena Sirwani | 882e7e2 | 2014-10-07 17:29:11 +0530 | [diff] [blame] | 679 | void osc_extent_release(const struct lu_env *env, struct osc_extent *ext); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 680 | |
Jinshan Xiong | 06563b5 | 2016-03-30 19:48:40 -0400 | [diff] [blame] | 681 | int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, |
| 682 | pgoff_t start, pgoff_t end, enum cl_lock_mode mode); |
Jinshan Xiong | d9d4790 | 2016-03-30 19:48:28 -0400 | [diff] [blame] | 683 | |
| 684 | typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *, |
| 685 | struct osc_page *, void *); |
| 686 | int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, |
| 687 | struct osc_object *osc, pgoff_t start, pgoff_t end, |
| 688 | osc_page_gang_cbt cb, void *cbdata); |
Peng Tao | d7e09d0 | 2013-05-02 16:46:55 +0800 | [diff] [blame] | 689 | /** @} osc */ |
| 690 | |
| 691 | #endif /* OSC_CL_INTERNAL_H */ |