Source code for xuance.tensorflow.utils.device

import os
import platform
import xuance
import tensorflow as tf


[docs] def set_device(expected_device: str): """ Set the computing device for a given deep learning framework. Args: dl_toolbox (str): The deep learning framework to use. Options: "torch", "tensorflow", "mindspore". expected_device (str): The desired computing device. Options: "cuda", "GPU", "gpu", "Ascend", "cpu", "CPU. Returns: str: The assigned computing device, which may differ from `expected_device` if the requested device is unavailable. """ device = expected_device os.environ["TF_USE_LEGACY_KERAS"] = "1" # Configure TensorFlow to use the legacy Keras 2 for tf.keras imports. if expected_device.upper() == "GPU": if len(tf.config.list_physical_devices('GPU')) == 0: device = "CPU" print("WARNING: GPU for Tensorflow2 is not available, set the device as 'CPU'.") elif expected_device.upper() == "CPU": device = "CPU" else: device = "CPU" print(f"WARNING: the device name {expected_device} is invalid, set the device as 'CPU'.") return device
[docs] def collect_device_info( rank: int = 0, agent=None, ) -> dict: """Collect runtime device / system info for reproducibility (TensorFlow 2.x). Returns a JSON-serializable dict. """ info = { "Platform": platform.platform(), "Python": platform.python_version(), "XuanCe": xuance.__version__, "PID": os.getpid(), "Rank": rank, } try: info["TensorFlow"] = getattr(tf, "__version__", "unknown") # Physical devices visible to TF gpus = tf.config.list_physical_devices("GPU") cpus = tf.config.list_physical_devices("CPU") info["CUDA_Available"] = bool(gpus) info["num_gpus"] = len(gpus) info["num_cpus"] = len(cpus) # GPU details (best-effort; names are not always available) gpu_details = [] for i, d in enumerate(gpus): # d.name often like '/physical_device:GPU:0' gpu_details.append({"index": i, "name": getattr(d, "name", str(d)), "device_type": "GPU"}) info["gpus"] = gpu_details # Logical devices (useful when virtual GPUs / memory limits are set) logical_gpus = tf.config.list_logical_devices("GPU") info["num_logical_gpus"] = len(logical_gpus) # Build info sometimes contains cuda/cudnn versions (not always present) build_info = {} try: build_info = tf.sysconfig.get_build_info() or {} except Exception: build_info = {} # These keys vary across TF versions; keep it best-effort & JSON-safe if build_info: info["tf_build_info"] = {k: str(v) for k, v in build_info.items()} # Optional: record current visible devices env var (helps debug) info["cuda_visible_devices"] = os.environ.get("CUDA_VISIBLE_DEVICES", None) # Optional: if your Agent exposes its own device/strategy info, store it if agent is not None: # common patterns: agent.device / agent.strategy if hasattr(agent, "device"): info["agent_device"] = str(getattr(agent, "device")) if hasattr(agent, "strategy"): try: info["tf_strategy"] = type(getattr(agent, "strategy")).__name__ except Exception: pass except Exception as e: # Keep it minimal but valid if TF isn't available or anything fails. info["CUDA_Available"] = False info["device_info_error"] = repr(e) return info