ansible-playbook /opt/cray/crayctl/ansible_framework/main/enable-dns-conflict-hosts.yml -l ncn-w001
once you are booted back into the 1.3 installsystemctl start dhcpd
for i in ncn-{m,s}00{1..3}-mgmt ncn-w00{2..3}-mgmt; do echo "------$i--------"; ipmitool -I lanplus -U $username -P $password -H $i chassis power on; done
systemctl stop dhcpd && ansible-playbook /opt/cray/crayctl/ansible_framework/main/disable-dns-conflict-hosts.yml -l ncn-w001
ansible ncn* -m ping
until all nodes are upkubectl get nodes
kubectl get pods -A | grep Running | wc -l
kubectl get pods -A | grep Completed | wc -l
kubectl get pods -A | grep Crash | wc -l
kubectl get pods -A | grep Image | wc -l
ansible ncn-m* -m command -a 'ceph health'
If things are not quite working, you can try starting these services back up on the affected nodes.
# Restart dead OSDs
for i in 0 3 6 9;do systemctl status ceph-osd@${i}.service | grep active;done
for i in 0 3 6 9;do systemctl start ceph-osd@${i}.service | grep active;done
# Do the same for mgr and mds services
for i in mgr mds;do systemctl start ceph-${i}@ncn-s003.service | grep Active;done
for i in mgr mds;do systemctl start ceph-${i}@ncn-s003.service | grep Active;done
ceph status