code_SAS_VLM2Vec / adhoc /test_ddp.py
MgGladys's picture
Add files using upload-large-folder tool
1ffaeb6 verified
import os
import torch
import torch.distributed as dist
import socket
def main():
print(f"[Rank {os.environ.get('RANK')}] Hostname: {socket.gethostname()} | Master: {os.environ['MASTER_ADDR']}:{os.environ['MASTER_PORT']}")
rank = int(os.environ["RANK"])
local_rank = int(os.environ["LOCAL_RANK"])
world_size = int(os.environ["WORLD_SIZE"])
print(f"[rank {rank}] hostname: {os.uname().nodename}, MASTER_ADDR: {os.environ['MASTER_ADDR']}")
print(f"Starting rank {rank}, local rank {local_rank}, world size {world_size}")
dist.init_process_group("nccl")
torch.cuda.set_device(local_rank)
print(f"Hello from rank {rank} out of {world_size}")
dist.destroy_process_group()
if __name__ == "__main__":
main()