import os import json import socket import urllib.request from typing import Any from pathlib import Path import subprocess import platform import tempfile from datetime import datetime import psutil import machineid # https://github.com/keygen-sh/py-machineid from rich import print PACKAGE_DIR = Path(__file__).parent DATA_DIR = Path(os.getcwd()).resolve() def get_vm_info(): hw_in_docker = bool(os.getenv("IN_DOCKER", False) in ("1", "true", "True", "TRUE")) hw_in_vm = False try: # check for traces of docker/containerd/podman in cgroup with open("/proc/self/cgroup") as procfile: for line in procfile: cgroup = line.strip() # .split('/', 1)[-1].lower() if "docker" in cgroup or "containerd" in cgroup or "podman" in cgroup: hw_in_docker = True except Exception: pass hw_manufacturer = "Docker" if hw_in_docker else "Unknown" hw_product = "Container" if hw_in_docker else "Unknown" hw_uuid = machineid.id() if platform.system().lower() == "darwin": # Get macOS machine info hw_manufacturer = "Apple" hw_product = "Mac" try: # Hardware: # Hardware Overview: # Model Name: Mac Studio # Model Identifier: Mac13,1 # Model Number: MJMV3LL/A # ... # Serial Number (system): M230YYTD77 # Hardware UUID: 39A12B50-1972-5910-8BEE-235AD20C8EE3 # ... result = subprocess.run(["system_profiler", "SPHardwareDataType"], capture_output=True, text=True, check=True) for line in result.stdout.split("\n"): if "Model Name:" in line: hw_product = line.split(":", 1)[-1].strip() elif "Model Identifier:" in line: hw_product += " " + line.split(":", 1)[-1].strip() elif "Hardware UUID:" in line: hw_uuid = line.split(":", 1)[-1].strip() except Exception: pass else: # get Linux machine info try: # Getting SMBIOS data from sysfs. # SMBIOS 2.8 present. # argo-1 | 2024-10-01T10:40:51Z ERR error="Incoming request ended abruptly: context canceled" connIndex=2 event=1 ingressRule=0 originService=http://archivebox:8000 │ # Handle 0x0100, DMI type 1, 27 bytes # System Information # Manufacturer: DigitalOcean # Product Name: Droplet # Serial Number: 411922099 # UUID: fb65f41c-ec24-4539-beaf-f941903bdb2c # ... # Family: DigitalOcean_Droplet dmidecode = subprocess.run(["dmidecode", "-t", "system"], capture_output=True, text=True, check=True) for line in dmidecode.stdout.split("\n"): if "Manufacturer:" in line: hw_manufacturer = line.split(":", 1)[-1].strip() elif "Product Name:" in line: hw_product = line.split(":", 1)[-1].strip() elif "UUID:" in line: hw_uuid = line.split(":", 1)[-1].strip() except Exception: pass # Check for VM fingerprint in manufacturer/product name if "qemu" in hw_product.lower() or "vbox" in hw_product.lower() or "lxc" in hw_product.lower() or "vm" in hw_product.lower(): hw_in_vm = True # Check for QEMU explicitly in pmap output try: result = subprocess.run(["pmap", "1"], capture_output=True, text=True, check=True) if "qemu" in result.stdout.lower(): hw_in_vm = True except Exception: pass return { "hw_in_docker": hw_in_docker, "hw_in_vm": hw_in_vm, "hw_manufacturer": hw_manufacturer, "hw_product": hw_product, "hw_uuid": hw_uuid, } def get_public_ip() -> str: def fetch_url(url: str) -> str: with urllib.request.urlopen(url, timeout=5) as response: return response.read().decode("utf-8").strip() def fetch_dns(pubip_lookup_host: str) -> str: return socket.gethostbyname(pubip_lookup_host).strip() methods = [ (lambda: fetch_url("https://ipinfo.io/ip"), lambda r: r), (lambda: fetch_url("https://api.ipify.org?format=json"), lambda r: json.loads(r)["ip"]), (lambda: fetch_dns("myip.opendns.com"), lambda r: r), (lambda: fetch_url("http://whatismyip.akamai.com/"), lambda r: r), # try HTTP as final fallback in case of TLS/system time errors ] for fetch, parse in methods: try: result = parse(fetch()) if result: return result except Exception: continue raise Exception("Could not determine public IP address") def get_local_ip(remote_ip: str = "1.1.1.1", remote_port: int = 80) -> str: try: with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s: s.connect((remote_ip, remote_port)) return s.getsockname()[0] except Exception: pass return "127.0.0.1" ip_addrs = lambda addrs: (a for a in addrs if a.family == socket.AF_INET) mac_addrs = lambda addrs: (a for a in addrs if a.family == psutil.AF_LINK) def get_isp_info(ip=None): # Get public IP try: ip = ip or urllib.request.urlopen("https://api.ipify.org").read().decode("utf8") except Exception: pass # Get ISP name, city, and country data = {} try: url = f"https://ipapi.co/{ip}/json/" response = urllib.request.urlopen(url) data = json.loads(response.read().decode()) except Exception: pass isp = data.get("org", "Unknown") city = data.get("city", "Unknown") region = data.get("region", "Unknown") country = data.get("country_name", "Unknown") # Get system DNS resolver servers dns_server = None try: result = subprocess.run(["dig", "example.com", "A"], capture_output=True, text=True, check=True).stdout dns_server = result.split(";; SERVER: ", 1)[-1].split("\n")[0].split("#")[0].strip() except Exception: try: dns_server = Path("/etc/resolv.conf").read_text().split("nameserver ", 1)[-1].split("\n")[0].strip() except Exception: dns_server = "127.0.0.1" print(f"[red]:warning: WARNING: Could not determine DNS server, using {dns_server}[/red]") # Get DNS resolver's ISP name # url = f'https://ipapi.co/{dns_server}/json/' # dns_isp = json.loads(urllib.request.urlopen(url).read().decode()).get('org', 'Unknown') return { "isp": isp, "city": city, "region": region, "country": country, "dns_server": dns_server, # 'net_dns_isp': dns_isp, } def get_host_network() -> dict[str, Any]: default_gateway_local_ip = get_local_ip() gateways = psutil.net_if_addrs() for interface, ips in gateways.items(): for local_ip in ip_addrs(ips): if default_gateway_local_ip == local_ip.address: mac_address = next(mac_addrs(ips)).address public_ip = get_public_ip() return { "hostname": max([socket.gethostname(), platform.node()], key=len), "iface": interface, "mac_address": mac_address, "ip_local": local_ip.address, "ip_public": public_ip, # "is_behind_nat": local_ip.address != public_ip, **get_isp_info(public_ip), } raise Exception("Could not determine host network info") def get_os_info() -> dict[str, Any]: os_release = platform.release() if platform.system().lower() == "darwin": os_release = "macOS " + platform.mac_ver()[0] else: try: os_release = subprocess.run(["lsb_release", "-ds"], capture_output=True, text=True, check=True).stdout.strip() except Exception: pass return { "os_arch": platform.machine(), "os_family": platform.system().lower(), "os_platform": platform.platform(), "os_kernel": platform.version(), "os_release": os_release, } def get_host_stats() -> dict[str, Any]: try: with tempfile.TemporaryDirectory() as tmp_dir: tmp_usage = psutil.disk_usage(str(tmp_dir)) app_usage = psutil.disk_usage(str(PACKAGE_DIR)) data_usage = psutil.disk_usage(str(DATA_DIR)) mem_usage = psutil.virtual_memory() try: swap_usage = psutil.swap_memory() swap_used_pct = swap_usage.percent swap_used_gb = round(swap_usage.used / 1024 / 1024 / 1024, 3) swap_free_gb = round(swap_usage.free / 1024 / 1024 / 1024, 3) except OSError: # Some sandboxed environments deny access to swap stats swap_used_pct = 0.0 swap_used_gb = 0.0 swap_free_gb = 0.0 return { "cpu_boot_time": datetime.fromtimestamp(psutil.boot_time()).isoformat(), "cpu_count": psutil.cpu_count(logical=False), "cpu_load": psutil.getloadavg(), # "cpu_pct": psutil.cpu_percent(interval=1), "mem_virt_used_pct": mem_usage.percent, "mem_virt_used_gb": round(mem_usage.used / 1024 / 1024 / 1024, 3), "mem_virt_free_gb": round(mem_usage.free / 1024 / 1024 / 1024, 3), "mem_swap_used_pct": swap_used_pct, "mem_swap_used_gb": swap_used_gb, "mem_swap_free_gb": swap_free_gb, "disk_tmp_used_pct": tmp_usage.percent, "disk_tmp_used_gb": round(tmp_usage.used / 1024 / 1024 / 1024, 3), "disk_tmp_free_gb": round(tmp_usage.free / 1024 / 1024 / 1024, 3), # in GB "disk_app_used_pct": app_usage.percent, "disk_app_used_gb": round(app_usage.used / 1024 / 1024 / 1024, 3), "disk_app_free_gb": round(app_usage.free / 1024 / 1024 / 1024, 3), "disk_data_used_pct": data_usage.percent, "disk_data_used_gb": round(data_usage.used / 1024 / 1024 / 1024, 3), "disk_data_free_gb": round(data_usage.free / 1024 / 1024 / 1024, 3), } except Exception: return {} def get_host_immutable_info(host_info: dict[str, Any]) -> dict[str, Any]: return {key: value for key, value in host_info.items() if key in ["guid", "net_mac", "os_family", "cpu_arch"]} def get_host_guid() -> str: return machineid.hashed_id("archivebox") # Example usage if __name__ == "__main__": host_info = { "guid": get_host_guid(), "os": get_os_info(), "vm": get_vm_info(), "net": get_host_network(), "stats": get_host_stats(), } print(host_info) # { # 'guid': '1cd2dd279f8a854...6943f2384437991a', # 'os': { # 'os_arch': 'arm64', # 'os_family': 'darwin', # 'os_platform': 'macOS-14.6.1-arm64-arm-64bit', # 'os_kernel': 'Darwin Kernel Version 23.6.0: Mon Jul 29 21:14:30 PDT 2024; root:xnu-10063.141.2~1/RELEASE_ARM64_T6000', # 'os_release': 'macOS 14.6.1' # }, # 'vm': {'hw_in_docker': False, 'hw_in_vm': False, 'hw_manufacturer': 'Apple', 'hw_product': 'Mac Studio Mac13,1', 'hw_uuid': '39A12B50-...-...-...-...'}, # 'net': { # 'hostname': 'somehost.sub.example.com', # 'iface': 'en0', # 'mac_address': 'ab:cd:ef:12:34:56', # 'ip_local': '192.168.2.18', # 'ip_public': '123.123.123.123', # 'isp': 'AS-SONICTELECOM', # 'city': 'Berkeley', # 'region': 'California', # 'country': 'United States', # 'dns_server': '192.168.1.1' # }, # 'stats': { # 'cpu_boot_time': '2024-09-24T21:20:16', # 'cpu_count': 10, # 'cpu_load': (2.35693359375, 4.013671875, 4.1171875), # 'mem_virt_used_pct': 66.0, # 'mem_virt_used_gb': 15.109, # 'mem_virt_free_gb': 0.065, # 'mem_swap_used_pct': 89.4, # 'mem_swap_used_gb': 8.045, # 'mem_swap_free_gb': 0.955, # 'disk_tmp_used_pct': 26.0, # 'disk_tmp_used_gb': 113.1, # 'disk_tmp_free_gb': 322.028, # 'disk_app_used_pct': 56.1, # 'disk_app_used_gb': 2138.796, # 'disk_app_free_gb': 1675.996, # 'disk_data_used_pct': 56.1, # 'disk_data_used_gb': 2138.796, # 'disk_data_free_gb': 1675.996 # } # }