Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 1 | """ |
| 2 | Lib/ctypes.util.find_library() support for AIX |
| 3 | Similar approach as done for Darwin support by using separate files |
| 4 | but unlike Darwin - no extension such as ctypes.macholib.* |
| 5 | |
| 6 | dlopen() is an interface to AIX initAndLoad() - primary documentation at: |
| 7 | https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/dlopen.htm |
| 8 | https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/load.htm |
| 9 | |
| 10 | AIX supports two styles for dlopen(): svr4 (System V Release 4) which is common on posix |
| 11 | platforms, but also a BSD style - aka SVR3. |
| 12 | |
| 13 | From AIX 5.3 Difference Addendum (December 2004) |
| 14 | 2.9 SVR4 linking affinity |
| 15 | Nowadays, there are two major object file formats used by the operating systems: |
| 16 | XCOFF: The COFF enhanced by IBM and others. The original COFF (Common |
| 17 | Object File Format) was the base of SVR3 and BSD 4.2 systems. |
| 18 | ELF: Executable and Linking Format that was developed by AT&T and is a |
| 19 | base for SVR4 UNIX. |
| 20 | |
| 21 | While the shared library content is identical on AIX - one is located as a filepath name |
| 22 | (svr4 style) and the other is located as a member of an archive (and the archive |
| 23 | is located as a filepath name). |
| 24 | |
| 25 | The key difference arises when supporting multiple abi formats (i.e., 32 and 64 bit). |
| 26 | For svr4 either only one ABI is supported, or there are two directories, or there |
| 27 | are different file names. The most common solution for multiple ABI is multiple |
| 28 | directories. |
| 29 | |
| 30 | For the XCOFF (aka AIX) style - one directory (one archive file) is sufficient |
| 31 | as multiple shared libraries can be in the archive - even sharing the same name. |
| 32 | In documentation the archive is also referred to as the "base" and the shared |
| 33 | library object is referred to as the "member". |
| 34 | |
Leo Arias | c3d9508 | 2018-02-03 18:36:10 -0600 | [diff] [blame] | 35 | For dlopen() on AIX (read initAndLoad()) the calls are similar. |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 36 | Default activity occurs when no path information is provided. When path |
| 37 | information is provided dlopen() does not search any other directories. |
| 38 | |
| 39 | For SVR4 - the shared library name is the name of the file expected: libFOO.so |
| 40 | For AIX - the shared library is expressed as base(member). The search is for the |
| 41 | base (e.g., libFOO.a) and once the base is found the shared library - identified by |
| 42 | member (e.g., libFOO.so, or shr.o) is located and loaded. |
| 43 | |
| 44 | The mode bit RTLD_MEMBER tells initAndLoad() that it needs to use the AIX (SVR3) |
| 45 | naming style. |
| 46 | """ |
| 47 | __author__ = "Michael Felt <aixtools@felt.demon.nl>" |
| 48 | |
| 49 | import re |
| 50 | from os import environ, path |
| 51 | from sys import executable |
| 52 | from ctypes import c_void_p, sizeof |
| 53 | from subprocess import Popen, PIPE, DEVNULL |
| 54 | |
| 55 | # Executable bit size - 32 or 64 |
| 56 | # Used to filter the search in an archive by size, e.g., -X64 |
| 57 | AIX_ABI = sizeof(c_void_p) * 8 |
| 58 | |
| 59 | |
| 60 | from sys import maxsize |
| 61 | def _last_version(libnames, sep): |
| 62 | def _num_version(libname): |
| 63 | # "libxyz.so.MAJOR.MINOR" => [MAJOR, MINOR] |
| 64 | parts = libname.split(sep) |
| 65 | nums = [] |
| 66 | try: |
| 67 | while parts: |
| 68 | nums.insert(0, int(parts.pop())) |
| 69 | except ValueError: |
| 70 | pass |
| 71 | return nums or [maxsize] |
| 72 | return max(reversed(libnames), key=_num_version) |
| 73 | |
| 74 | def get_ld_header(p): |
| 75 | # "nested-function, but placed at module level |
| 76 | ld_header = None |
| 77 | for line in p.stdout: |
| 78 | if line.startswith(('/', './', '../')): |
| 79 | ld_header = line |
| 80 | elif "INDEX" in line: |
| 81 | return ld_header.rstrip('\n') |
| 82 | return None |
| 83 | |
| 84 | def get_ld_header_info(p): |
| 85 | # "nested-function, but placed at module level |
| 86 | # as an ld_header was found, return known paths, archives and members |
| 87 | # these lines start with a digit |
| 88 | info = [] |
| 89 | for line in p.stdout: |
| 90 | if re.match("[0-9]", line): |
| 91 | info.append(line) |
| 92 | else: |
Leo Arias | c3d9508 | 2018-02-03 18:36:10 -0600 | [diff] [blame] | 93 | # blank line (separator), consume line and end for loop |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 94 | break |
| 95 | return info |
| 96 | |
| 97 | def get_ld_headers(file): |
| 98 | """ |
| 99 | Parse the header of the loader section of executable and archives |
| 100 | This function calls /usr/bin/dump -H as a subprocess |
| 101 | and returns a list of (ld_header, ld_header_info) tuples. |
| 102 | """ |
| 103 | # get_ld_headers parsing: |
| 104 | # 1. Find a line that starts with /, ./, or ../ - set as ld_header |
| 105 | # 2. If "INDEX" in occurs in a following line - return ld_header |
| 106 | # 3. get info (lines starting with [0-9]) |
| 107 | ldr_headers = [] |
Mariatta | c0919c2 | 2017-12-22 23:39:03 -0800 | [diff] [blame] | 108 | p = Popen(["/usr/bin/dump", f"-X{AIX_ABI}", "-H", file], |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 109 | universal_newlines=True, stdout=PIPE, stderr=DEVNULL) |
| 110 | # be sure to read to the end-of-file - getting all entries |
| 111 | while True: |
| 112 | ld_header = get_ld_header(p) |
| 113 | if ld_header: |
| 114 | ldr_headers.append((ld_header, get_ld_header_info(p))) |
| 115 | else: |
| 116 | break |
| 117 | p.stdout.close() |
| 118 | p.wait |
| 119 | return ldr_headers |
| 120 | |
| 121 | def get_shared(ld_headers): |
| 122 | """ |
| 123 | extract the shareable objects from ld_headers |
| 124 | character "[" is used to strip off the path information. |
| 125 | Note: the "[" and "]" characters that are part of dump -H output |
| 126 | are not removed here. |
| 127 | """ |
| 128 | shared = [] |
| 129 | for (line, _) in ld_headers: |
| 130 | # potential member lines contain "[" |
| 131 | # otherwise, no processing needed |
| 132 | if "[" in line: |
| 133 | # Strip off trailing colon (:) |
| 134 | shared.append(line[line.index("["):-1]) |
| 135 | return shared |
| 136 | |
| 137 | def get_one_match(expr, lines): |
| 138 | """ |
| 139 | Must be only one match, otherwise result is None. |
| 140 | When there is a match, strip leading "[" and trailing "]" |
| 141 | """ |
| 142 | # member names in the ld_headers output are between square brackets |
Mariatta | c0919c2 | 2017-12-22 23:39:03 -0800 | [diff] [blame] | 143 | expr = rf'\[({expr})\]' |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 144 | matches = list(filter(None, (re.search(expr, line) for line in lines))) |
| 145 | if len(matches) == 1: |
| 146 | return matches[0].group(1) |
| 147 | else: |
| 148 | return None |
| 149 | |
| 150 | # additional processing to deal with AIX legacy names for 64-bit members |
| 151 | def get_legacy(members): |
| 152 | """ |
| 153 | This routine provides historical aka legacy naming schemes started |
| 154 | in AIX4 shared library support for library members names. |
| 155 | e.g., in /usr/lib/libc.a the member name shr.o for 32-bit binary and |
| 156 | shr_64.o for 64-bit binary. |
| 157 | """ |
| 158 | if AIX_ABI == 64: |
| 159 | # AIX 64-bit member is one of shr64.o, shr_64.o, or shr4_64.o |
| 160 | expr = r'shr4?_?64\.o' |
| 161 | member = get_one_match(expr, members) |
| 162 | if member: |
| 163 | return member |
| 164 | else: |
| 165 | # 32-bit legacy names - both shr.o and shr4.o exist. |
| 166 | # shr.o is the preffered name so we look for shr.o first |
| 167 | # i.e., shr4.o is returned only when shr.o does not exist |
| 168 | for name in ['shr.o', 'shr4.o']: |
| 169 | member = get_one_match(re.escape(name), members) |
| 170 | if member: |
| 171 | return member |
| 172 | return None |
| 173 | |
| 174 | def get_version(name, members): |
| 175 | """ |
| 176 | Sort list of members and return highest numbered version - if it exists. |
| 177 | This function is called when an unversioned libFOO.a(libFOO.so) has |
| 178 | not been found. |
| 179 | |
| 180 | Versioning for the member name is expected to follow |
| 181 | GNU LIBTOOL conventions: the highest version (x, then X.y, then X.Y.z) |
| 182 | * find [libFoo.so.X] |
| 183 | * find [libFoo.so.X.Y] |
| 184 | * find [libFoo.so.X.Y.Z] |
| 185 | |
| 186 | Before the GNU convention became the standard scheme regardless of |
| 187 | binary size AIX packagers used GNU convention "as-is" for 32-bit |
| 188 | archive members but used an "distinguishing" name for 64-bit members. |
| 189 | This scheme inserted either 64 or _64 between libFOO and .so |
| 190 | - generally libFOO_64.so, but occasionally libFOO64.so |
| 191 | """ |
| 192 | # the expression ending for versions must start as |
| 193 | # '.so.[0-9]', i.e., *.so.[at least one digit] |
| 194 | # while multiple, more specific expressions could be specified |
| 195 | # to search for .so.X, .so.X.Y and .so.X.Y.Z |
| 196 | # after the first required 'dot' digit |
| 197 | # any combination of additional 'dot' digits pairs are accepted |
| 198 | # anything more than libFOO.so.digits.digits.digits |
| 199 | # should be seen as a member name outside normal expectations |
Mariatta | c0919c2 | 2017-12-22 23:39:03 -0800 | [diff] [blame] | 200 | exprs = [rf'lib{name}\.so\.[0-9]+[0-9.]*', |
| 201 | rf'lib{name}_?64\.so\.[0-9]+[0-9.]*'] |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 202 | for expr in exprs: |
| 203 | versions = [] |
| 204 | for line in members: |
| 205 | m = re.search(expr, line) |
| 206 | if m: |
| 207 | versions.append(m.group(0)) |
| 208 | if versions: |
| 209 | return _last_version(versions, '.') |
| 210 | return None |
| 211 | |
| 212 | def get_member(name, members): |
| 213 | """ |
| 214 | Return an archive member matching the request in name. |
| 215 | Name is the library name without any prefix like lib, suffix like .so, |
| 216 | or version number. |
| 217 | Given a list of members find and return the most appropriate result |
| 218 | Priority is given to generic libXXX.so, then a versioned libXXX.so.a.b.c |
| 219 | and finally, legacy AIX naming scheme. |
| 220 | """ |
| 221 | # look first for a generic match - prepend lib and append .so |
Mariatta | c0919c2 | 2017-12-22 23:39:03 -0800 | [diff] [blame] | 222 | expr = rf'lib{name}\.so' |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 223 | member = get_one_match(expr, members) |
| 224 | if member: |
| 225 | return member |
| 226 | elif AIX_ABI == 64: |
Mariatta | c0919c2 | 2017-12-22 23:39:03 -0800 | [diff] [blame] | 227 | expr = rf'lib{name}64\.so' |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 228 | member = get_one_match(expr, members) |
| 229 | if member: |
| 230 | return member |
| 231 | # since an exact match with .so as suffix was not found |
| 232 | # look for a versioned name |
| 233 | # If a versioned name is not found, look for AIX legacy member name |
| 234 | member = get_version(name, members) |
| 235 | if member: |
| 236 | return member |
| 237 | else: |
| 238 | return get_legacy(members) |
| 239 | |
| 240 | def get_libpaths(): |
| 241 | """ |
| 242 | On AIX, the buildtime searchpath is stored in the executable. |
| 243 | as "loader header information". |
| 244 | The command /usr/bin/dump -H extracts this info. |
| 245 | Prefix searched libraries with LD_LIBRARY_PATH (preferred), |
| 246 | or LIBPATH if defined. These paths are appended to the paths |
| 247 | to libraries the python executable is linked with. |
| 248 | This mimics AIX dlopen() behavior. |
| 249 | """ |
| 250 | libpaths = environ.get("LD_LIBRARY_PATH") |
| 251 | if libpaths is None: |
| 252 | libpaths = environ.get("LIBPATH") |
| 253 | if libpaths is None: |
| 254 | libpaths = [] |
| 255 | else: |
| 256 | libpaths = libpaths.split(":") |
| 257 | objects = get_ld_headers(executable) |
| 258 | for (_, lines) in objects: |
| 259 | for line in lines: |
| 260 | # the second (optional) argument is PATH if it includes a / |
| 261 | path = line.split()[1] |
| 262 | if "/" in path: |
| 263 | libpaths.extend(path.split(":")) |
| 264 | return libpaths |
| 265 | |
| 266 | def find_shared(paths, name): |
| 267 | """ |
| 268 | paths is a list of directories to search for an archive. |
| 269 | name is the abbreviated name given to find_library(). |
| 270 | Process: search "paths" for archive, and if an archive is found |
| 271 | return the result of get_member(). |
| 272 | If an archive is not found then return None |
| 273 | """ |
| 274 | for dir in paths: |
| 275 | # /lib is a symbolic link to /usr/lib, skip it |
| 276 | if dir == "/lib": |
| 277 | continue |
| 278 | # "lib" is prefixed to emulate compiler name resolution, |
| 279 | # e.g., -lc to libc |
Mariatta | c0919c2 | 2017-12-22 23:39:03 -0800 | [diff] [blame] | 280 | base = f'lib{name}.a' |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 281 | archive = path.join(dir, base) |
| 282 | if path.exists(archive): |
| 283 | members = get_shared(get_ld_headers(archive)) |
| 284 | member = get_member(re.escape(name), members) |
| 285 | if member != None: |
| 286 | return (base, member) |
| 287 | else: |
| 288 | return (None, None) |
| 289 | return (None, None) |
| 290 | |
| 291 | def find_library(name): |
| 292 | """AIX implementation of ctypes.util.find_library() |
| 293 | Find an archive member that will dlopen(). If not available, |
| 294 | also search for a file (or link) with a .so suffix. |
| 295 | |
| 296 | AIX supports two types of schemes that can be used with dlopen(). |
| 297 | The so-called SystemV Release4 (svr4) format is commonly suffixed |
| 298 | with .so while the (default) AIX scheme has the library (archive) |
| 299 | ending with the suffix .a |
| 300 | As an archive has multiple members (e.g., 32-bit and 64-bit) in one file |
| 301 | the argument passed to dlopen must include both the library and |
| 302 | the member names in a single string. |
| 303 | |
| 304 | find_library() looks first for an archive (.a) with a suitable member. |
| 305 | If no archive+member pair is found, look for a .so file. |
| 306 | """ |
| 307 | |
| 308 | libpaths = get_libpaths() |
| 309 | (base, member) = find_shared(libpaths, name) |
| 310 | if base != None: |
Mariatta | c0919c2 | 2017-12-22 23:39:03 -0800 | [diff] [blame] | 311 | return f"{base}({member})" |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 312 | |
| 313 | # To get here, a member in an archive has not been found |
| 314 | # In other words, either: |
| 315 | # a) a .a file was not found |
| 316 | # b) a .a file did not have a suitable member |
| 317 | # So, look for a .so file |
| 318 | # Check libpaths for .so file |
| 319 | # Note, the installation must prepare a link from a .so |
| 320 | # to a versioned file |
| 321 | # This is common practice by GNU libtool on other platforms |
Mariatta | c0919c2 | 2017-12-22 23:39:03 -0800 | [diff] [blame] | 322 | soname = f"lib{name}.so" |
Michael Felt | c5ae169 | 2017-12-19 13:58:49 +0100 | [diff] [blame] | 323 | for dir in libpaths: |
| 324 | # /lib is a symbolic link to /usr/lib, skip it |
| 325 | if dir == "/lib": |
| 326 | continue |
| 327 | shlib = path.join(dir, soname) |
| 328 | if path.exists(shlib): |
| 329 | return soname |
| 330 | # if we are here, we have not found anything plausible |
| 331 | return None |