blob: 463f60a2849f3669c3fb2ee466c06dd077d9c517 [file] [log] [blame]
Michael Feltc5ae1692017-12-19 13:58:49 +01001"""
2Lib/ctypes.util.find_library() support for AIX
3Similar approach as done for Darwin support by using separate files
4but unlike Darwin - no extension such as ctypes.macholib.*
5
6dlopen() is an interface to AIX initAndLoad() - primary documentation at:
7https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/dlopen.htm
8https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/load.htm
9
10AIX supports two styles for dlopen(): svr4 (System V Release 4) which is common on posix
11platforms, but also a BSD style - aka SVR3.
12
13From AIX 5.3 Difference Addendum (December 2004)
142.9 SVR4 linking affinity
15Nowadays, there are two major object file formats used by the operating systems:
16XCOFF: The COFF enhanced by IBM and others. The original COFF (Common
17Object File Format) was the base of SVR3 and BSD 4.2 systems.
18ELF: Executable and Linking Format that was developed by AT&T and is a
19base for SVR4 UNIX.
20
21While the shared library content is identical on AIX - one is located as a filepath name
22(svr4 style) and the other is located as a member of an archive (and the archive
23is located as a filepath name).
24
25The key difference arises when supporting multiple abi formats (i.e., 32 and 64 bit).
26For svr4 either only one ABI is supported, or there are two directories, or there
27are different file names. The most common solution for multiple ABI is multiple
28directories.
29
30For the XCOFF (aka AIX) style - one directory (one archive file) is sufficient
31as multiple shared libraries can be in the archive - even sharing the same name.
32In documentation the archive is also referred to as the "base" and the shared
33library object is referred to as the "member".
34
Leo Ariasc3d95082018-02-03 18:36:10 -060035For dlopen() on AIX (read initAndLoad()) the calls are similar.
Michael Feltc5ae1692017-12-19 13:58:49 +010036Default activity occurs when no path information is provided. When path
37information is provided dlopen() does not search any other directories.
38
39For SVR4 - the shared library name is the name of the file expected: libFOO.so
40For AIX - the shared library is expressed as base(member). The search is for the
41base (e.g., libFOO.a) and once the base is found the shared library - identified by
42member (e.g., libFOO.so, or shr.o) is located and loaded.
43
44The mode bit RTLD_MEMBER tells initAndLoad() that it needs to use the AIX (SVR3)
45naming style.
46"""
47__author__ = "Michael Felt <aixtools@felt.demon.nl>"
48
49import re
50from os import environ, path
51from sys import executable
52from ctypes import c_void_p, sizeof
53from subprocess import Popen, PIPE, DEVNULL
54
55# Executable bit size - 32 or 64
56# Used to filter the search in an archive by size, e.g., -X64
57AIX_ABI = sizeof(c_void_p) * 8
58
59
60from sys import maxsize
61def _last_version(libnames, sep):
62 def _num_version(libname):
63 # "libxyz.so.MAJOR.MINOR" => [MAJOR, MINOR]
64 parts = libname.split(sep)
65 nums = []
66 try:
67 while parts:
68 nums.insert(0, int(parts.pop()))
69 except ValueError:
70 pass
71 return nums or [maxsize]
72 return max(reversed(libnames), key=_num_version)
73
74def get_ld_header(p):
75 # "nested-function, but placed at module level
76 ld_header = None
77 for line in p.stdout:
78 if line.startswith(('/', './', '../')):
79 ld_header = line
80 elif "INDEX" in line:
81 return ld_header.rstrip('\n')
82 return None
83
84def get_ld_header_info(p):
85 # "nested-function, but placed at module level
86 # as an ld_header was found, return known paths, archives and members
87 # these lines start with a digit
88 info = []
89 for line in p.stdout:
90 if re.match("[0-9]", line):
91 info.append(line)
92 else:
Leo Ariasc3d95082018-02-03 18:36:10 -060093 # blank line (separator), consume line and end for loop
Michael Feltc5ae1692017-12-19 13:58:49 +010094 break
95 return info
96
97def get_ld_headers(file):
98 """
99 Parse the header of the loader section of executable and archives
100 This function calls /usr/bin/dump -H as a subprocess
101 and returns a list of (ld_header, ld_header_info) tuples.
102 """
103 # get_ld_headers parsing:
104 # 1. Find a line that starts with /, ./, or ../ - set as ld_header
105 # 2. If "INDEX" in occurs in a following line - return ld_header
106 # 3. get info (lines starting with [0-9])
107 ldr_headers = []
Mariattac0919c22017-12-22 23:39:03 -0800108 p = Popen(["/usr/bin/dump", f"-X{AIX_ABI}", "-H", file],
Michael Feltc5ae1692017-12-19 13:58:49 +0100109 universal_newlines=True, stdout=PIPE, stderr=DEVNULL)
110 # be sure to read to the end-of-file - getting all entries
111 while True:
112 ld_header = get_ld_header(p)
113 if ld_header:
114 ldr_headers.append((ld_header, get_ld_header_info(p)))
115 else:
116 break
117 p.stdout.close()
118 p.wait
119 return ldr_headers
120
121def get_shared(ld_headers):
122 """
123 extract the shareable objects from ld_headers
124 character "[" is used to strip off the path information.
125 Note: the "[" and "]" characters that are part of dump -H output
126 are not removed here.
127 """
128 shared = []
129 for (line, _) in ld_headers:
130 # potential member lines contain "["
131 # otherwise, no processing needed
132 if "[" in line:
133 # Strip off trailing colon (:)
134 shared.append(line[line.index("["):-1])
135 return shared
136
137def get_one_match(expr, lines):
138 """
139 Must be only one match, otherwise result is None.
140 When there is a match, strip leading "[" and trailing "]"
141 """
142 # member names in the ld_headers output are between square brackets
Mariattac0919c22017-12-22 23:39:03 -0800143 expr = rf'\[({expr})\]'
Michael Feltc5ae1692017-12-19 13:58:49 +0100144 matches = list(filter(None, (re.search(expr, line) for line in lines)))
145 if len(matches) == 1:
146 return matches[0].group(1)
147 else:
148 return None
149
150# additional processing to deal with AIX legacy names for 64-bit members
151def get_legacy(members):
152 """
153 This routine provides historical aka legacy naming schemes started
154 in AIX4 shared library support for library members names.
155 e.g., in /usr/lib/libc.a the member name shr.o for 32-bit binary and
156 shr_64.o for 64-bit binary.
157 """
158 if AIX_ABI == 64:
159 # AIX 64-bit member is one of shr64.o, shr_64.o, or shr4_64.o
160 expr = r'shr4?_?64\.o'
161 member = get_one_match(expr, members)
162 if member:
163 return member
164 else:
165 # 32-bit legacy names - both shr.o and shr4.o exist.
166 # shr.o is the preffered name so we look for shr.o first
167 # i.e., shr4.o is returned only when shr.o does not exist
168 for name in ['shr.o', 'shr4.o']:
169 member = get_one_match(re.escape(name), members)
170 if member:
171 return member
172 return None
173
174def get_version(name, members):
175 """
176 Sort list of members and return highest numbered version - if it exists.
177 This function is called when an unversioned libFOO.a(libFOO.so) has
178 not been found.
179
180 Versioning for the member name is expected to follow
181 GNU LIBTOOL conventions: the highest version (x, then X.y, then X.Y.z)
182 * find [libFoo.so.X]
183 * find [libFoo.so.X.Y]
184 * find [libFoo.so.X.Y.Z]
185
186 Before the GNU convention became the standard scheme regardless of
187 binary size AIX packagers used GNU convention "as-is" for 32-bit
188 archive members but used an "distinguishing" name for 64-bit members.
189 This scheme inserted either 64 or _64 between libFOO and .so
190 - generally libFOO_64.so, but occasionally libFOO64.so
191 """
192 # the expression ending for versions must start as
193 # '.so.[0-9]', i.e., *.so.[at least one digit]
194 # while multiple, more specific expressions could be specified
195 # to search for .so.X, .so.X.Y and .so.X.Y.Z
196 # after the first required 'dot' digit
197 # any combination of additional 'dot' digits pairs are accepted
198 # anything more than libFOO.so.digits.digits.digits
199 # should be seen as a member name outside normal expectations
Mariattac0919c22017-12-22 23:39:03 -0800200 exprs = [rf'lib{name}\.so\.[0-9]+[0-9.]*',
201 rf'lib{name}_?64\.so\.[0-9]+[0-9.]*']
Michael Feltc5ae1692017-12-19 13:58:49 +0100202 for expr in exprs:
203 versions = []
204 for line in members:
205 m = re.search(expr, line)
206 if m:
207 versions.append(m.group(0))
208 if versions:
209 return _last_version(versions, '.')
210 return None
211
212def get_member(name, members):
213 """
214 Return an archive member matching the request in name.
215 Name is the library name without any prefix like lib, suffix like .so,
216 or version number.
217 Given a list of members find and return the most appropriate result
218 Priority is given to generic libXXX.so, then a versioned libXXX.so.a.b.c
219 and finally, legacy AIX naming scheme.
220 """
221 # look first for a generic match - prepend lib and append .so
Mariattac0919c22017-12-22 23:39:03 -0800222 expr = rf'lib{name}\.so'
Michael Feltc5ae1692017-12-19 13:58:49 +0100223 member = get_one_match(expr, members)
224 if member:
225 return member
226 elif AIX_ABI == 64:
Mariattac0919c22017-12-22 23:39:03 -0800227 expr = rf'lib{name}64\.so'
Michael Feltc5ae1692017-12-19 13:58:49 +0100228 member = get_one_match(expr, members)
229 if member:
230 return member
231 # since an exact match with .so as suffix was not found
232 # look for a versioned name
233 # If a versioned name is not found, look for AIX legacy member name
234 member = get_version(name, members)
235 if member:
236 return member
237 else:
238 return get_legacy(members)
239
240def get_libpaths():
241 """
242 On AIX, the buildtime searchpath is stored in the executable.
243 as "loader header information".
244 The command /usr/bin/dump -H extracts this info.
245 Prefix searched libraries with LD_LIBRARY_PATH (preferred),
246 or LIBPATH if defined. These paths are appended to the paths
247 to libraries the python executable is linked with.
248 This mimics AIX dlopen() behavior.
249 """
250 libpaths = environ.get("LD_LIBRARY_PATH")
251 if libpaths is None:
252 libpaths = environ.get("LIBPATH")
253 if libpaths is None:
254 libpaths = []
255 else:
256 libpaths = libpaths.split(":")
257 objects = get_ld_headers(executable)
258 for (_, lines) in objects:
259 for line in lines:
260 # the second (optional) argument is PATH if it includes a /
261 path = line.split()[1]
262 if "/" in path:
263 libpaths.extend(path.split(":"))
264 return libpaths
265
266def find_shared(paths, name):
267 """
268 paths is a list of directories to search for an archive.
269 name is the abbreviated name given to find_library().
270 Process: search "paths" for archive, and if an archive is found
271 return the result of get_member().
272 If an archive is not found then return None
273 """
274 for dir in paths:
275 # /lib is a symbolic link to /usr/lib, skip it
276 if dir == "/lib":
277 continue
278 # "lib" is prefixed to emulate compiler name resolution,
279 # e.g., -lc to libc
Mariattac0919c22017-12-22 23:39:03 -0800280 base = f'lib{name}.a'
Michael Feltc5ae1692017-12-19 13:58:49 +0100281 archive = path.join(dir, base)
282 if path.exists(archive):
283 members = get_shared(get_ld_headers(archive))
284 member = get_member(re.escape(name), members)
285 if member != None:
286 return (base, member)
287 else:
288 return (None, None)
289 return (None, None)
290
291def find_library(name):
292 """AIX implementation of ctypes.util.find_library()
293 Find an archive member that will dlopen(). If not available,
294 also search for a file (or link) with a .so suffix.
295
296 AIX supports two types of schemes that can be used with dlopen().
297 The so-called SystemV Release4 (svr4) format is commonly suffixed
298 with .so while the (default) AIX scheme has the library (archive)
299 ending with the suffix .a
300 As an archive has multiple members (e.g., 32-bit and 64-bit) in one file
301 the argument passed to dlopen must include both the library and
302 the member names in a single string.
303
304 find_library() looks first for an archive (.a) with a suitable member.
305 If no archive+member pair is found, look for a .so file.
306 """
307
308 libpaths = get_libpaths()
309 (base, member) = find_shared(libpaths, name)
310 if base != None:
Mariattac0919c22017-12-22 23:39:03 -0800311 return f"{base}({member})"
Michael Feltc5ae1692017-12-19 13:58:49 +0100312
313 # To get here, a member in an archive has not been found
314 # In other words, either:
315 # a) a .a file was not found
316 # b) a .a file did not have a suitable member
317 # So, look for a .so file
318 # Check libpaths for .so file
319 # Note, the installation must prepare a link from a .so
320 # to a versioned file
321 # This is common practice by GNU libtool on other platforms
Mariattac0919c22017-12-22 23:39:03 -0800322 soname = f"lib{name}.so"
Michael Feltc5ae1692017-12-19 13:58:49 +0100323 for dir in libpaths:
324 # /lib is a symbolic link to /usr/lib, skip it
325 if dir == "/lib":
326 continue
327 shlib = path.join(dir, soname)
328 if path.exists(shlib):
329 return soname
330 # if we are here, we have not found anything plausible
331 return None