PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/provider/common/bootstrap.go

https://github.com/frankban/juju
Go | 386 lines | 274 code | 33 blank | 79 comment | 53 complexity | e385b2ab7d9e6db3b421173d0605b968 MD5 | raw file
Possible License(s): AGPL-3.0
  1. // Copyright 2013 Canonical Ltd.
  2. // Licensed under the AGPLv3, see LICENCE file for details.
  3. package common
  4. import (
  5. "fmt"
  6. "io"
  7. "os"
  8. "path"
  9. "strings"
  10. "sync"
  11. "time"
  12. "github.com/juju/errors"
  13. "github.com/juju/loggo"
  14. "github.com/juju/utils"
  15. "github.com/juju/utils/parallel"
  16. "github.com/juju/utils/shell"
  17. "github.com/juju/juju/agent"
  18. coreCloudinit "github.com/juju/juju/cloudinit"
  19. "github.com/juju/juju/cloudinit/sshinit"
  20. "github.com/juju/juju/environs"
  21. "github.com/juju/juju/environs/cloudinit"
  22. "github.com/juju/juju/environs/config"
  23. "github.com/juju/juju/instance"
  24. "github.com/juju/juju/network"
  25. coretools "github.com/juju/juju/tools"
  26. "github.com/juju/juju/utils/ssh"
  27. )
  28. var logger = loggo.GetLogger("juju.provider.common")
  29. // Bootstrap is a common implementation of the Bootstrap method defined on
  30. // environs.Environ; we strongly recommend that this implementation be used
  31. // when writing a new provider.
  32. func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
  33. ) (arch, series string, _ environs.BootstrapFinalizer, err error) {
  34. if result, series, finalizer, err := BootstrapInstance(ctx, env, args); err == nil {
  35. return *result.Hardware.Arch, series, finalizer, nil
  36. } else {
  37. return "", "", nil, err
  38. }
  39. }
  40. // BootstrapInstance creates a new instance with the series and architecture
  41. // of its choice, constrained to those of the available tools, and
  42. // returns the instance result, series, and a function that
  43. // must be called to finalize the bootstrap process by transferring
  44. // the tools and installing the initial Juju state server.
  45. // This method is called by Bootstrap above, which implements environs.Bootstrap, but
  46. // is also exported so that providers can manipulate the started instance.
  47. func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
  48. ) (_ *environs.StartInstanceResult, series string, _ environs.BootstrapFinalizer, err error) {
  49. // TODO make safe in the case of racing Bootstraps
  50. // If two Bootstraps are called concurrently, there's
  51. // no way to make sure that only one succeeds.
  52. // First thing, ensure we have tools otherwise there's no point.
  53. series = config.PreferredSeries(env.Config())
  54. availableTools, err := args.AvailableTools.Match(coretools.Filter{Series: series})
  55. if err != nil {
  56. return nil, "", nil, err
  57. }
  58. // Get the bootstrap SSH client. Do this early, so we know
  59. // not to bother with any of the below if we can't finish the job.
  60. client := ssh.DefaultClient
  61. if client == nil {
  62. // This should never happen: if we don't have OpenSSH, then
  63. // go.crypto/ssh should be used with an auto-generated key.
  64. return nil, "", nil, fmt.Errorf("no SSH client available")
  65. }
  66. machineConfig, err := environs.NewBootstrapMachineConfig(args.Constraints, series)
  67. if err != nil {
  68. return nil, "", nil, err
  69. }
  70. machineConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate()
  71. machineConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade()
  72. maybeSetBridge := func(mcfg *cloudinit.MachineConfig) {
  73. // If we need to override the default bridge name, do it now. When
  74. // args.ContainerBridgeName is empty, the default names for LXC
  75. // (lxcbr0) and KVM (virbr0) will be used.
  76. if args.ContainerBridgeName != "" {
  77. logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName)
  78. if mcfg.AgentEnvironment == nil {
  79. mcfg.AgentEnvironment = make(map[string]string)
  80. }
  81. mcfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName
  82. }
  83. }
  84. maybeSetBridge(machineConfig)
  85. fmt.Fprintln(ctx.GetStderr(), "Launching instance")
  86. result, err := env.StartInstance(environs.StartInstanceParams{
  87. Constraints: args.Constraints,
  88. Tools: availableTools,
  89. MachineConfig: machineConfig,
  90. Placement: args.Placement,
  91. })
  92. if err != nil {
  93. return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance")
  94. }
  95. fmt.Fprintf(ctx.GetStderr(), " - %s\n", result.Instance.Id())
  96. finalize := func(ctx environs.BootstrapContext, mcfg *cloudinit.MachineConfig) error {
  97. mcfg.InstanceId = result.Instance.Id()
  98. mcfg.HardwareCharacteristics = result.Hardware
  99. if err := environs.FinishMachineConfig(mcfg, env.Config()); err != nil {
  100. return err
  101. }
  102. maybeSetBridge(mcfg)
  103. return FinishBootstrap(ctx, client, result.Instance, mcfg)
  104. }
  105. return result, series, finalize, nil
  106. }
  107. // FinishBootstrap completes the bootstrap process by connecting
  108. // to the instance via SSH and carrying out the cloud-config.
  109. //
  110. // Note: FinishBootstrap is exposed so it can be replaced for testing.
  111. var FinishBootstrap = func(ctx environs.BootstrapContext, client ssh.Client, inst instance.Instance, machineConfig *cloudinit.MachineConfig) error {
  112. interrupted := make(chan os.Signal, 1)
  113. ctx.InterruptNotify(interrupted)
  114. defer ctx.StopInterruptNotify(interrupted)
  115. // Each attempt to connect to an address must verify the machine is the
  116. // bootstrap machine by checking its nonce file exists and contains the
  117. // nonce in the MachineConfig. This also blocks sshinit from proceeding
  118. // until cloud-init has completed, which is necessary to ensure apt
  119. // invocations don't trample each other.
  120. nonceFile := utils.ShQuote(path.Join(machineConfig.DataDir, cloudinit.NonceFile))
  121. checkNonceCommand := fmt.Sprintf(`
  122. noncefile=%s
  123. if [ ! -e "$noncefile" ]; then
  124. echo "$noncefile does not exist" >&2
  125. exit 1
  126. fi
  127. content=$(cat $noncefile)
  128. if [ "$content" != %s ]; then
  129. echo "$noncefile contents do not match machine nonce" >&2
  130. exit 1
  131. fi
  132. `, nonceFile, utils.ShQuote(machineConfig.MachineNonce))
  133. addr, err := waitSSH(
  134. ctx,
  135. interrupted,
  136. client,
  137. checkNonceCommand,
  138. inst,
  139. machineConfig.Config.BootstrapSSHOpts(),
  140. )
  141. if err != nil {
  142. return err
  143. }
  144. return ConfigureMachine(ctx, client, addr, machineConfig)
  145. }
  146. func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, machineConfig *cloudinit.MachineConfig) error {
  147. // Bootstrap is synchronous, and will spawn a subprocess
  148. // to complete the procedure. If the user hits Ctrl-C,
  149. // SIGINT is sent to the foreground process attached to
  150. // the terminal, which will be the ssh subprocess at this
  151. // point. For that reason, we do not call StopInterruptNotify
  152. // until this function completes.
  153. cloudcfg := coreCloudinit.New()
  154. cloudcfg.SetAptUpdate(machineConfig.EnableOSRefreshUpdate)
  155. cloudcfg.SetAptUpgrade(machineConfig.EnableOSUpgrade)
  156. udata, err := cloudinit.NewUserdataConfig(machineConfig, cloudcfg)
  157. if err != nil {
  158. return err
  159. }
  160. if err := udata.ConfigureJuju(); err != nil {
  161. return err
  162. }
  163. configScript, err := sshinit.ConfigureScript(cloudcfg)
  164. if err != nil {
  165. return err
  166. }
  167. script := shell.DumpFileOnErrorScript(machineConfig.CloudInitOutputLog) + configScript
  168. return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{
  169. Host: "ubuntu@" + host,
  170. Client: client,
  171. Config: cloudcfg,
  172. ProgressWriter: ctx.GetStderr(),
  173. })
  174. }
  175. type addresser interface {
  176. // Refresh refreshes the addresses for the instance.
  177. Refresh() error
  178. // Addresses returns the addresses for the instance.
  179. // To ensure that the results are up to date, call
  180. // Refresh first.
  181. Addresses() ([]network.Address, error)
  182. }
  183. type hostChecker struct {
  184. addr network.Address
  185. client ssh.Client
  186. wg *sync.WaitGroup
  187. // checkDelay is the amount of time to wait between retries.
  188. checkDelay time.Duration
  189. // checkHostScript is executed on the host via SSH.
  190. // hostChecker.loop will return once the script
  191. // runs without error.
  192. checkHostScript string
  193. // closed is closed to indicate that the host checker should
  194. // return, without waiting for the result of any ongoing
  195. // attempts.
  196. closed <-chan struct{}
  197. }
  198. // Close implements io.Closer, as required by parallel.Try.
  199. func (*hostChecker) Close() error {
  200. return nil
  201. }
  202. func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) {
  203. defer hc.wg.Done()
  204. // The value of connectSSH is taken outside the goroutine that may outlive
  205. // hostChecker.loop, or we evoke the wrath of the race detector.
  206. connectSSH := connectSSH
  207. done := make(chan error, 1)
  208. var lastErr error
  209. for {
  210. go func() {
  211. done <- connectSSH(hc.client, hc.addr.Value, hc.checkHostScript)
  212. }()
  213. select {
  214. case <-hc.closed:
  215. return hc, lastErr
  216. case <-dying:
  217. return hc, lastErr
  218. case lastErr = <-done:
  219. if lastErr == nil {
  220. return hc, nil
  221. }
  222. }
  223. select {
  224. case <-hc.closed:
  225. case <-dying:
  226. case <-time.After(hc.checkDelay):
  227. }
  228. }
  229. }
  230. type parallelHostChecker struct {
  231. *parallel.Try
  232. client ssh.Client
  233. stderr io.Writer
  234. wg sync.WaitGroup
  235. // active is a map of adresses to channels for addresses actively
  236. // being tested. The goroutine testing the address will continue
  237. // to attempt connecting to the address until it succeeds, the Try
  238. // is killed, or the corresponding channel in this map is closed.
  239. active map[network.Address]chan struct{}
  240. // checkDelay is how long each hostChecker waits between attempts.
  241. checkDelay time.Duration
  242. // checkHostScript is the script to run on each host to check that
  243. // it is the host we expect.
  244. checkHostScript string
  245. }
  246. func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) {
  247. for _, addr := range addrs {
  248. if _, ok := p.active[addr]; ok {
  249. continue
  250. }
  251. fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value)
  252. closed := make(chan struct{})
  253. hc := &hostChecker{
  254. addr: addr,
  255. client: p.client,
  256. checkDelay: p.checkDelay,
  257. checkHostScript: p.checkHostScript,
  258. closed: closed,
  259. wg: &p.wg,
  260. }
  261. p.wg.Add(1)
  262. p.active[addr] = closed
  263. p.Start(hc.loop)
  264. }
  265. }
  266. // Close prevents additional functions from being added to
  267. // the Try, and tells each active hostChecker to exit.
  268. func (p *parallelHostChecker) Close() error {
  269. // We signal each checker to stop and wait for them
  270. // each to complete; this allows us to get the error,
  271. // as opposed to when using try.Kill which does not
  272. // wait for the functions to complete.
  273. p.Try.Close()
  274. for _, ch := range p.active {
  275. close(ch)
  276. }
  277. return nil
  278. }
  279. // connectSSH is called to connect to the specified host and
  280. // execute the "checkHostScript" bash script on it.
  281. var connectSSH = func(client ssh.Client, host, checkHostScript string) error {
  282. cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil)
  283. cmd.Stdin = strings.NewReader(checkHostScript)
  284. output, err := cmd.CombinedOutput()
  285. if err != nil && len(output) > 0 {
  286. err = fmt.Errorf("%s", strings.TrimSpace(string(output)))
  287. }
  288. return err
  289. }
  290. // waitSSH waits for the instance to be assigned a routable
  291. // address, then waits until we can connect to it via SSH.
  292. //
  293. // waitSSH attempts on all addresses returned by the instance
  294. // in parallel; the first succeeding one wins. We ensure that
  295. // private addresses are for the correct machine by checking
  296. // the presence of a file on the machine that contains the
  297. // machine's nonce. The "checkHostScript" is a bash script
  298. // that performs this file check.
  299. func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) {
  300. globalTimeout := time.After(timeout.Timeout)
  301. pollAddresses := time.NewTimer(0)
  302. // checker checks each address in a loop, in parallel,
  303. // until one succeeds, the global timeout is reached,
  304. // or the tomb is killed.
  305. checker := parallelHostChecker{
  306. Try: parallel.NewTry(0, nil),
  307. client: client,
  308. stderr: ctx.GetStderr(),
  309. active: make(map[network.Address]chan struct{}),
  310. checkDelay: timeout.RetryDelay,
  311. checkHostScript: checkHostScript,
  312. }
  313. defer checker.wg.Wait()
  314. defer checker.Kill()
  315. fmt.Fprintln(ctx.GetStderr(), "Waiting for address")
  316. for {
  317. select {
  318. case <-pollAddresses.C:
  319. pollAddresses.Reset(timeout.AddressesDelay)
  320. if err := inst.Refresh(); err != nil {
  321. return "", fmt.Errorf("refreshing addresses: %v", err)
  322. }
  323. addresses, err := inst.Addresses()
  324. if err != nil {
  325. return "", fmt.Errorf("getting addresses: %v", err)
  326. }
  327. checker.UpdateAddresses(addresses)
  328. case <-globalTimeout:
  329. checker.Close()
  330. lastErr := checker.Wait()
  331. format := "waited for %v "
  332. args := []interface{}{timeout.Timeout}
  333. if len(checker.active) == 0 {
  334. format += "without getting any addresses"
  335. } else {
  336. format += "without being able to connect"
  337. }
  338. if lastErr != nil && lastErr != parallel.ErrStopped {
  339. format += ": %v"
  340. args = append(args, lastErr)
  341. }
  342. return "", fmt.Errorf(format, args...)
  343. case <-interrupted:
  344. return "", fmt.Errorf("interrupted")
  345. case <-checker.Dead():
  346. result, err := checker.Result()
  347. if err != nil {
  348. return "", err
  349. }
  350. return result.(*hostChecker).addr.Value, nil
  351. }
  352. }
  353. }