readers.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. #-----------------------------------------------------------------------------
  2. # Copyright (c) 2013-2022, PyInstaller Development Team.
  3. #
  4. # Distributed under the terms of the GNU General Public License (version 2
  5. # or later) with exception for distributing the bootloader.
  6. #
  7. # The full license is in the file COPYING.txt, distributed with this software.
  8. #
  9. # SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
  10. #-----------------------------------------------------------------------------
  11. """
  12. This CArchiveReader is used only by the archieve_viewer utility.
  13. """
  14. # TODO clean up this module
  15. import os
  16. import struct
  17. from PyInstaller.loader.pyimod02_archive import ArchiveReader
  18. class NotAnArchiveError(Exception):
  19. pass
  20. class CTOCReader:
  21. """
  22. A class encapsulating the table of contents of a CArchive.
  23. When written to disk, it is easily read from C.
  24. """
  25. # (structlen, dpos, dlen, ulen, flag, typcd) followed by name
  26. ENTRYSTRUCT = '!iIIIBB'
  27. ENTRYLEN = struct.calcsize(ENTRYSTRUCT)
  28. def __init__(self):
  29. self.data = []
  30. def frombinary(self, s):
  31. """
  32. Decode the binary string into an in memory list.
  33. S is a binary string.
  34. """
  35. p = 0
  36. while p < len(s):
  37. slen, dpos, dlen, ulen, flag, typcd = struct.unpack(self.ENTRYSTRUCT, s[p:p + self.ENTRYLEN])
  38. nmlen = slen - self.ENTRYLEN
  39. p = p + self.ENTRYLEN
  40. nm, = struct.unpack('%is' % nmlen, s[p:p + nmlen])
  41. p = p + nmlen
  42. # nm may have up to 15 bytes of padding
  43. nm = nm.rstrip(b'\0')
  44. nm = nm.decode('utf-8')
  45. typcd = chr(typcd)
  46. self.data.append((dpos, dlen, ulen, flag, typcd, nm))
  47. def get(self, ndx):
  48. """
  49. Return the table of contents entry (tuple) at index NDX.
  50. """
  51. return self.data[ndx]
  52. def __getitem__(self, ndx):
  53. return self.data[ndx]
  54. def find(self, name):
  55. """
  56. Return the index of the toc entry with name NAME.
  57. Return -1 for failure.
  58. """
  59. for i, nm in enumerate(self.data):
  60. if nm[-1] == name:
  61. return i
  62. return -1
  63. class CArchiveReader(ArchiveReader):
  64. """
  65. An Archive subclass that can hold arbitrary data.
  66. This class encapsulates all files that are bundled within an executable. It can contain ZlibArchive (Python .pyc
  67. files), dlls, Python C extensions and all other data files that are bundled in --onefile mode.
  68. Easily handled from C or from Python.
  69. """
  70. # MAGIC is useful to verify that conversion of Python data types to C structure and back works properly.
  71. MAGIC = b'MEI\014\013\012\013\016'
  72. HDRLEN = 0
  73. LEVEL = 9
  74. # Cookie - holds some information for the bootloader. C struct format definition. '!' at the beginning means network
  75. # byte order. C struct looks like:
  76. #
  77. # typedef struct _cookie {
  78. # char magic[8]; /* 'MEI\014\013\012\013\016' */
  79. # uint32_t len; /* len of entire package */
  80. # uint32_t TOC; /* pos (rel to start) of TableOfContents */
  81. # int TOClen; /* length of TableOfContents */
  82. # int pyvers; /* new in v4 */
  83. # char pylibname[64]; /* Filename of Python dynamic library. */
  84. # } COOKIE;
  85. #
  86. _cookie_format = '!8sIIii64s'
  87. _cookie_size = struct.calcsize(_cookie_format)
  88. def __init__(self, archive_path=None, start=0, length=0, pylib_name=''):
  89. """
  90. Constructor.
  91. archive_path path name of file (create empty CArchive if path is None).
  92. start is the seekposition within PATH.
  93. len is the length of the CArchive (if 0, then read till EOF).
  94. pylib_name name of Python DLL which bootloader will use.
  95. """
  96. self.length = length
  97. self._pylib_name = pylib_name
  98. # A CArchive created from scratch starts at 0, no leading bootloader.
  99. self.pkg_start = 0
  100. super().__init__(archive_path, start)
  101. def checkmagic(self):
  102. """
  103. Verify that self is a valid CArchive.
  104. Magic signature is at end of the archive.
  105. This function is used by ArchiveViewer.py utility.
  106. """
  107. # Magic is at EOF; if we're embedded, we need to figure where that is.
  108. if self.length:
  109. self.lib.seek(self.start + self.length, 0)
  110. else:
  111. self.lib.seek(0, os.SEEK_END)
  112. end_pos = self.lib.tell()
  113. SEARCH_CHUNK_SIZE = 8192
  114. magic_offset = -1
  115. while end_pos >= len(self.MAGIC):
  116. start_pos = max(end_pos - SEARCH_CHUNK_SIZE, 0)
  117. chunk_size = end_pos - start_pos
  118. # Is the remaining chunk large enough to hold the pattern?
  119. if chunk_size < len(self.MAGIC):
  120. break
  121. # Read and scan the chunk
  122. self.lib.seek(start_pos, os.SEEK_SET)
  123. buf = self.lib.read(chunk_size)
  124. pos = buf.rfind(self.MAGIC)
  125. if pos != -1:
  126. magic_offset = start_pos + pos
  127. break
  128. # Adjust search location for next chunk; ensure proper overlap
  129. end_pos = start_pos + len(self.MAGIC) - 1
  130. if magic_offset == -1:
  131. raise RuntimeError("%s is not a valid %s archive file" % (self.path, self.__class__.__name__))
  132. filelen = magic_offset + self._cookie_size
  133. # Read the whole cookie
  134. self.lib.seek(magic_offset, os.SEEK_SET)
  135. buf = self.lib.read(self._cookie_size)
  136. magic, totallen, tocpos, toclen, pyvers, pylib_name = struct.unpack(self._cookie_format, buf)
  137. if magic != self.MAGIC:
  138. raise RuntimeError("%s is not a valid %s archive file" % (self.path, self.__class__.__name__))
  139. self.pkg_start = filelen - totallen
  140. if self.length:
  141. if totallen != self.length or self.pkg_start != self.start:
  142. raise RuntimeError('Problem with embedded archive in %s' % self.path)
  143. # Verify presence of Python library name.
  144. if not pylib_name:
  145. raise RuntimeError('Python library filename not defined in archive.')
  146. self.tocpos, self.toclen = tocpos, toclen
  147. def loadtoc(self):
  148. """
  149. Load the table of contents into memory.
  150. """
  151. self.toc = CTOCReader()
  152. self.lib.seek(self.pkg_start + self.tocpos)
  153. tocstr = self.lib.read(self.toclen)
  154. self.toc.frombinary(tocstr)
  155. def extract(self, name):
  156. """
  157. Get the contents of an entry.
  158. NAME is an entry name OR the index to the TOC.
  159. Return the tuple (ispkg, contents).
  160. For non-Python resources, ispkg is meaningless (and 0).
  161. Used by the import mechanism.
  162. """
  163. if isinstance(name, str):
  164. ndx = self.toc.find(name)
  165. if ndx == -1:
  166. return None
  167. else:
  168. ndx = name
  169. dpos, dlen, ulen, flag, typcd, nm = self.toc.get(ndx)
  170. with self.lib:
  171. self.lib.seek(self.pkg_start + dpos)
  172. rslt = self.lib.read(dlen)
  173. if flag == 1:
  174. import zlib
  175. rslt = zlib.decompress(rslt)
  176. if typcd == 'M':
  177. return 1, rslt
  178. return typcd == 'M', rslt
  179. def contents(self):
  180. """
  181. Return the names of the entries.
  182. """
  183. rslt = []
  184. for dpos, dlen, ulen, flag, typcd, nm in self.toc:
  185. rslt.append(nm)
  186. return rslt
  187. def openEmbedded(self, name):
  188. """
  189. Open a CArchive of name NAME embedded within this CArchive.
  190. This function is used by ArchiveViewer.py utility.
  191. """
  192. ndx = self.toc.find(name)
  193. if ndx == -1:
  194. raise KeyError("Member '%s' not found in %s" % (name, self.path))
  195. dpos, dlen, ulen, flag, typcd, nm = self.toc.get(ndx)
  196. if typcd not in "zZ":
  197. raise NotAnArchiveError('%s is not an archive' % name)
  198. if flag:
  199. raise ValueError('Cannot open compressed archive %s in place' % name)
  200. return CArchiveReader(self.path, self.pkg_start + dpos, dlen)