PageRenderTime 49ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/timApp/modules/cs/git/gitlib.py

https://gitlab.com/tim-jyu/tim
Python | 438 lines | 430 code | 8 blank | 0 comment | 1 complexity | 718020ac653d723b1347c7bfec6370b5 MD5 | raw file
  1. import json
  2. import os.path
  3. import re
  4. import subprocess
  5. import urllib
  6. from dataclasses import dataclass, field
  7. from datetime import datetime, timedelta
  8. from pathlib import Path
  9. from threading import Lock
  10. from typing import Optional, Union, Any
  11. from urllib.request import urlopen, Request
  12. from file_util import File, copy_files_glob, rm
  13. from git.util import Settings, RemoteInfo, NotInAGitRepo
  14. git_libs = {}
  15. git_lib_urls = {}
  16. @dataclass
  17. class APIResponse:
  18. status: int
  19. data: Any
  20. reason: str | None
  21. class CacheItem:
  22. def __init__(self):
  23. self.lock = Lock()
  24. self.time = datetime.min
  25. @dataclass
  26. class LibResponse:
  27. ok: bool
  28. msg: str = field(default="")
  29. data: Any = field(default=None)
  30. class GitLib:
  31. id: str = "_gitlib"
  32. url: str | None = None
  33. _cache = {}
  34. def __init__(self, settings: Settings, info: RemoteInfo):
  35. self.settings = settings
  36. self.domain = info.host
  37. self.api_path = settings.apiProtocol + "://" + self.domain + "/"
  38. self.remote = info
  39. self.remote.path = (
  40. self.sanitize_repo_path(info.path) if info.path is not None else ""
  41. )
  42. def get_headers(self):
  43. raise NotImplementedError("get_headers not implemented")
  44. def call_api(self, path, data=None, method=None):
  45. request = Request(
  46. self.api_path + path,
  47. json.dumps(data).encode("utf-8"),
  48. self.get_headers(),
  49. method=method,
  50. )
  51. try:
  52. response = urlopen(request)
  53. except urllib.error.HTTPError as e:
  54. try:
  55. data = e.file.read().decode("utf-8")
  56. except:
  57. data = None
  58. return APIResponse(e.code, data, e.reason)
  59. except urllib.error.URLError as e:
  60. return APIResponse(e.reason.errno, None, e.reason.strerror)
  61. try:
  62. data = json.loads(response.read().decode("utf-8"))
  63. except:
  64. data = None
  65. return APIResponse(response.status, data, None)
  66. def post(self, path, data=None):
  67. return self.call_api(path, data, method="POST")
  68. def get(self, path, data=None):
  69. return self.call_api(path, data, method="GET")
  70. def delete(self, path, data=None):
  71. return self.call_api(path, data, method="DELETE")
  72. def put(self, path, data=None):
  73. return self.call_api(path, data, method="PUT")
  74. def patch(self, path, data=None):
  75. return self.call_api(path, data, method="PATCH")
  76. @property
  77. def cache_path(self) -> str:
  78. return f"/tmp/git_files/{self.remote.host.lower()}/{self.remote.path}/{self.remote.branch}"
  79. def check_cache(self, force_update=False):
  80. """Updates cache if needed"""
  81. path = self.cache_path
  82. cached: CacheItem = GitLib._cache.setdefault(path, CacheItem())
  83. with cached.lock:
  84. if not (Path(path) / ".git").exists():
  85. self.clone(path)
  86. elif force_update or datetime.now() - cached.time > timedelta(
  87. seconds=self.settings.cache
  88. ):
  89. self.checkout()
  90. else:
  91. return
  92. GitLib._cache[path].time = datetime.now()
  93. def get_files(self, sub_path: str, glob: str = "", force_update=False):
  94. self.check_cache(force_update)
  95. cpath = self.cache_path
  96. basepath = Path(cpath) / sub_path
  97. baseglob = basepath / glob
  98. if baseglob.is_dir():
  99. matches = baseglob.glob("**/*")
  100. file_paths = [file for file in matches if file.is_file()]
  101. elif baseglob.exists():
  102. file_paths = [baseglob]
  103. else:
  104. matches = list(basepath.glob(glob))
  105. file_paths = [file for file in matches if file.is_file()]
  106. files = []
  107. for path in file_paths:
  108. file = File(str(path.relative_to(basepath)))
  109. try:
  110. file.bcontent = path.read_bytes()
  111. except Exception as e:
  112. raise Exception(f"Failed to read git file {path}: {e}")
  113. try:
  114. file.content = file.bcontent.decode(encoding="utf-8")
  115. except Exception as e:
  116. continue
  117. files.append(file)
  118. return files
  119. def copy_files(self, sub_path: str, destination: str, glob: str = ""):
  120. self.check_cache()
  121. basepath = Path(self.cache_path) / sub_path
  122. dpath = Path(destination)
  123. if basepath.is_file() and dpath.exists():
  124. rm(basepath)
  125. elif basepath.is_dir() and dpath.exists() and not dpath.is_dir():
  126. rm(dpath)
  127. return copy_files_glob(glob, str(basepath), destination)
  128. def sanitize_repo_path(self, repo: str):
  129. repo = re.sub(r"[^A-Za-z0-9\.-_]", "-", repo)
  130. repo = re.sub(r"-+", "-", repo)
  131. return repo[:-1] if repo.endswith("-") else repo
  132. def clone(self, path: str | Path):
  133. clone(
  134. path,
  135. self.remote.host,
  136. self.remote.path,
  137. self.remote.protocol,
  138. self.remote.user,
  139. self.remote.name,
  140. self.remote.branch,
  141. )
  142. def checkout(self, sub_path=".", path: str = None, do_fetch=True, force=True):
  143. if path is None:
  144. path = self.cache_path
  145. checkout(path, sub_path, do_fetch, self.remote.name, self.remote.branch, force)
  146. def pull_or_clone(self, path: str):
  147. try:
  148. lpath = get_repo_root(Path(path))
  149. except:
  150. lpath = None
  151. if lpath is None or os.path.abspath(path) != lpath:
  152. self.clone(path)
  153. else:
  154. self.checkout(path)
  155. @classmethod
  156. def all_subclasses(cls):
  157. subclasses = cls.__subclasses__()
  158. return subclasses + [i for sc in subclasses for i in sc.all_subclasses()]
  159. def check_credentials(self, credentials) -> LibResponse:
  160. raise NotImplementedError("check_credentials is not implemented")
  161. def library_specific(self, credentials, options):
  162. pass
  163. def get_lib_class(host: str, library: str | None = None) -> type(GitLib):
  164. if library is not None:
  165. cls = git_libs.get(library, None)
  166. else:
  167. cls = git_lib_urls.get(host.lower(), None)
  168. return cls
  169. def get_lib(info: RemoteInfo, settings: Settings) -> GitLib:
  170. cls = get_lib_class(info.host, settings.library)
  171. if cls is None:
  172. raise Exception(f"Couldn't get a git library for {info.host}")
  173. return cls(settings, info)
  174. def get_repo_root(opath: Path) -> str:
  175. path = opath
  176. while not path.is_dir():
  177. if path == path.parent:
  178. break
  179. path = path.parent
  180. response = subprocess.run(
  181. ["git", "-C", str(path), "rev-parse", "--absolute-git-dir", "2>", "/dev/null"],
  182. stdout=subprocess.PIPE,
  183. encoding="utf-8",
  184. )
  185. if response.returncode != 0 or len(response.stdout) == 0:
  186. raise NotInAGitRepo(str(opath))
  187. return response.stdout.strip()
  188. def is_in_git_repo(path: Path, must_exist=False):
  189. if must_exist and not path.is_dir():
  190. return False
  191. try:
  192. get_repo_root(path)
  193. except NotInAGitRepo:
  194. return False
  195. return True
  196. def get_remote_and_branch(path: str) -> (str, str):
  197. if not is_in_git_repo(Path(path)):
  198. raise NotInAGitRepo(path)
  199. response = subprocess.run(
  200. ["git", "-C", path, "status", "-sb"], capture_output=True, encoding="utf-8"
  201. )
  202. if response.returncode != 0 or len(response.stdout) == 0:
  203. raise Exception(
  204. f"Git status: returncode: {response.returncode}, stdout: {response.stdout}, stderr: {response.stderr}"
  205. )
  206. tmp: str = response.stdout.strip()
  207. tmp = tmp.split("\n", 1)[0]
  208. tmp = tmp.split(" ", 2)[1]
  209. _, remote = tmp.split("...")
  210. remote, remote_branch = remote.split("/", 1)
  211. return remote, remote_branch # TODO: test
  212. def get_remote_info(path: str) -> RemoteInfo:
  213. remote, remote_branch = get_remote_and_branch(path)
  214. response = subprocess.run(
  215. ["git", "-C", path, "config", "--get", f"remote.{remote}.url"],
  216. capture_output=True,
  217. encoding="utf-8",
  218. )
  219. if response.returncode != 0 or len(response.stdout) == 0:
  220. raise Exception(
  221. f"Git config: returncode: {response.returncode}, stdout: {response.stdout}, stderr: {response.stderr}"
  222. )
  223. remote = RemoteInfo.parse_url(response.stdout.strip())
  224. remote.name = remote
  225. remote.branch = remote_branch
  226. return remote # TODO: test
  227. def clone(
  228. path: str | Path,
  229. host: str,
  230. repo: str,
  231. protocol: str = "ssh",
  232. user: str = None,
  233. remote="origin",
  234. branch="master",
  235. ):
  236. if isinstance(path, str):
  237. path = Path(path)
  238. if path.exists() and not path.is_dir():
  239. raise NotADirectoryError(f"Git clone: path is not a directory")
  240. if protocol != "ssh":
  241. raise ValueError("Git clone: Only ssh protocol is supported")
  242. path.mkdir(parents=True, exist_ok=True)
  243. prefix = ""
  244. if user is not None:
  245. prefix = f"{user}@"
  246. # using ssh:// allows specifying a different port
  247. response = subprocess.run(
  248. [
  249. "git",
  250. "clone",
  251. f"{protocol}://{prefix}{host}/{repo}",
  252. "-o",
  253. remote,
  254. "-b",
  255. branch,
  256. str(path),
  257. ],
  258. capture_output=True,
  259. encoding="utf-8",
  260. )
  261. if response.returncode != 0:
  262. raise Exception(
  263. f"Git clone: returncode: {response.returncode}, stdout: {response.stdout}, stderr: {response.stderr}"
  264. )
  265. def checkout(
  266. path: str, sub_path=".", do_fetch=True, remote="origin", branch="master", force=True
  267. ):
  268. checkout_branch = (
  269. f"{remote}/{branch}" if remote is not None and len(remote) > 0 else branch
  270. )
  271. options = []
  272. if force:
  273. options.append("-f")
  274. if do_fetch:
  275. response = subprocess.run(
  276. ["git", "-C", path, "fetch"], capture_output=True, encoding="utf-8"
  277. )
  278. if response.returncode != 0:
  279. raise Exception(
  280. f"Git fetch: returncode: {response.returncode}, stdout: {response.stdout}, stderr: {response.stderr}"
  281. )
  282. response = subprocess.run(
  283. ["git", "-C", path, "rm", "-rf", sub_path],
  284. capture_output=True,
  285. encoding="utf-8",
  286. )
  287. if response.returncode != 0:
  288. raise Exception(
  289. f"Git rm: returncode: {response.returncode}, stdout: {response.stdout}, stderr: {response.stderr}"
  290. )
  291. response = subprocess.run(
  292. ["git", "-C", path, "checkout", *options, checkout_branch, "--", sub_path],
  293. capture_output=True,
  294. encoding="utf-8",
  295. )
  296. if response.returncode != 0:
  297. raise Exception(
  298. f"Git checkout: returncode: {response.returncode}, stdout: {response.stdout}, stderr: {response.stderr}"
  299. )
  300. def pull_or_clone(path: str, rinfo: RemoteInfo):
  301. try:
  302. lpath = get_repo_root(Path(path))
  303. except:
  304. lpath = None
  305. if lpath is None or os.path.abspath(path) != lpath:
  306. clone(
  307. path,
  308. rinfo.host,
  309. rinfo.path,
  310. rinfo.protocol,
  311. rinfo.user,
  312. rinfo.name,
  313. rinfo.branch,
  314. )
  315. else:
  316. checkout(path, remote=rinfo.name, branch=rinfo.branch)
  317. def populate():
  318. global git_libs, git_lib_urls
  319. classes = [GitLib] + GitLib.all_subclasses()
  320. def add_id(cls, id):
  321. if id in git_libs:
  322. raise Exception(
  323. f"GitLib {cls.__name__} has a duplicate id ({id}) with {git_libs[id].__name__}"
  324. )
  325. git_libs[id] = cls
  326. def add_url(cls, url):
  327. if url in git_lib_urls:
  328. raise Exception(
  329. f"GitLib {cls.__name__} has a duplicate url ({url}) with {git_lib_urls[url].__name__}"
  330. )
  331. git_lib_urls[url] = cls
  332. for cls in classes:
  333. if hasattr(cls, "id") and cls.id is not None:
  334. id = cls.id
  335. add_id(cls, id.lower())
  336. else:
  337. id = None
  338. if (
  339. not hasattr(cls, "url")
  340. or cls.url is None
  341. or (isinstance(cls.url, list) and len(cls.url) == 0)
  342. ):
  343. url = None
  344. else:
  345. url = cls.url
  346. if isinstance(url, list):
  347. for u in url:
  348. add_url(cls, u.lower())
  349. else:
  350. add_url(cls, url.lower())
  351. if not id and not url:
  352. raise Exception(f"GitLib {cls.__name__} hasn't defined id or url")
  353. populate()