PageRenderTime 53ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/provider/common/bootstrap.go

https://github.com/didrocks/juju
Go | 408 lines | 292 code | 36 blank | 80 comment | 66 complexity | b5769c359b0427cca293fc41c718375f MD5 | raw file
Possible License(s): AGPL-3.0
  1. // Copyright 2013 Canonical Ltd.
  2. // Licensed under the AGPLv3, see LICENCE file for details.
  3. package common
  4. import (
  5. "fmt"
  6. "io"
  7. "os"
  8. "path"
  9. "strings"
  10. "time"
  11. "github.com/juju/loggo"
  12. "github.com/juju/utils"
  13. "github.com/juju/utils/parallel"
  14. "github.com/juju/utils/shell"
  15. coreCloudinit "github.com/juju/juju/cloudinit"
  16. "github.com/juju/juju/cloudinit/sshinit"
  17. "github.com/juju/juju/environs"
  18. "github.com/juju/juju/environs/bootstrap"
  19. "github.com/juju/juju/environs/cloudinit"
  20. "github.com/juju/juju/environs/config"
  21. "github.com/juju/juju/instance"
  22. "github.com/juju/juju/network"
  23. coretools "github.com/juju/juju/tools"
  24. "github.com/juju/juju/utils/ssh"
  25. )
  26. var logger = loggo.GetLogger("juju.provider.common")
  27. // Bootstrap is a common implementation of the Bootstrap method defined on
  28. // environs.Environ; we strongly recommend that this implementation be used
  29. // when writing a new provider.
  30. func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams) (err error) {
  31. // TODO make safe in the case of racing Bootstraps
  32. // If two Bootstraps are called concurrently, there's
  33. // no way to make sure that only one succeeds.
  34. var inst instance.Instance
  35. defer func() { handleBootstrapError(err, ctx, inst, env) }()
  36. // First thing, ensure we have tools otherwise there's no point.
  37. selectedTools, err := EnsureBootstrapTools(ctx, env, config.PreferredSeries(env.Config()), args.Constraints.Arch)
  38. if err != nil {
  39. return err
  40. }
  41. // Get the bootstrap SSH client. Do this early, so we know
  42. // not to bother with any of the below if we can't finish the job.
  43. client := ssh.DefaultClient
  44. if client == nil {
  45. // This should never happen: if we don't have OpenSSH, then
  46. // go.crypto/ssh should be used with an auto-generated key.
  47. return fmt.Errorf("no SSH client available")
  48. }
  49. privateKey, err := GenerateSystemSSHKey(env)
  50. if err != nil {
  51. return err
  52. }
  53. machineConfig := environs.NewBootstrapMachineConfig(privateKey)
  54. fmt.Fprintln(ctx.GetStderr(), "Launching instance")
  55. inst, hw, _, err := env.StartInstance(environs.StartInstanceParams{
  56. Constraints: args.Constraints,
  57. Tools: selectedTools,
  58. MachineConfig: machineConfig,
  59. Placement: args.Placement,
  60. })
  61. if err != nil {
  62. return fmt.Errorf("cannot start bootstrap instance: %v", err)
  63. }
  64. fmt.Fprintf(ctx.GetStderr(), " - %s\n", inst.Id())
  65. machineConfig.InstanceId = inst.Id()
  66. machineConfig.HardwareCharacteristics = hw
  67. err = bootstrap.SaveState(
  68. env.Storage(),
  69. &bootstrap.BootstrapState{
  70. StateInstances: []instance.Id{inst.Id()},
  71. })
  72. if err != nil {
  73. return fmt.Errorf("cannot save state: %v", err)
  74. }
  75. return FinishBootstrap(ctx, client, inst, machineConfig)
  76. }
  77. // GenerateSystemSSHKey creates a new key for the system identity. The
  78. // authorized_keys in the environment config is updated to include the public
  79. // key for the generated key.
  80. func GenerateSystemSSHKey(env environs.Environ) (privateKey string, err error) {
  81. logger.Debugf("generate a system ssh key")
  82. // Create a new system ssh key and add that to the authorized keys.
  83. privateKey, publicKey, err := ssh.GenerateKey(config.JujuSystemKey)
  84. if err != nil {
  85. return "", fmt.Errorf("failed to create system key: %v", err)
  86. }
  87. authorized_keys := config.ConcatAuthKeys(env.Config().AuthorizedKeys(), publicKey)
  88. newConfig, err := env.Config().Apply(map[string]interface{}{
  89. config.AuthKeysConfig: authorized_keys,
  90. })
  91. if err != nil {
  92. return "", fmt.Errorf("failed to create new config: %v", err)
  93. }
  94. if err = env.SetConfig(newConfig); err != nil {
  95. return "", fmt.Errorf("failed to set new config: %v", err)
  96. }
  97. return privateKey, nil
  98. }
  99. // handleBootstrapError cleans up after a failed bootstrap.
  100. func handleBootstrapError(err error, ctx environs.BootstrapContext, inst instance.Instance, env environs.Environ) {
  101. if err == nil {
  102. return
  103. }
  104. logger.Errorf("bootstrap failed: %v", err)
  105. ch := make(chan os.Signal, 1)
  106. ctx.InterruptNotify(ch)
  107. defer ctx.StopInterruptNotify(ch)
  108. defer close(ch)
  109. go func() {
  110. for _ = range ch {
  111. fmt.Fprintln(ctx.GetStderr(), "Cleaning up failed bootstrap")
  112. }
  113. }()
  114. if inst != nil {
  115. fmt.Fprintln(ctx.GetStderr(), "Stopping instance...")
  116. if stoperr := env.StopInstances(inst.Id()); stoperr != nil {
  117. logger.Errorf("cannot stop failed bootstrap instance %q: %v", inst.Id(), stoperr)
  118. } else {
  119. // set to nil so we know we can safely delete the state file
  120. inst = nil
  121. }
  122. }
  123. // We only delete the bootstrap state file if either we didn't
  124. // start an instance, or we managed to cleanly stop it.
  125. if inst == nil {
  126. if rmerr := bootstrap.DeleteStateFile(env.Storage()); rmerr != nil {
  127. logger.Errorf("cannot delete bootstrap state file: %v", rmerr)
  128. }
  129. }
  130. }
  131. // FinishBootstrap completes the bootstrap process by connecting
  132. // to the instance via SSH and carrying out the cloud-config.
  133. //
  134. // Note: FinishBootstrap is exposed so it can be replaced for testing.
  135. var FinishBootstrap = func(ctx environs.BootstrapContext, client ssh.Client, inst instance.Instance, machineConfig *cloudinit.MachineConfig) error {
  136. interrupted := make(chan os.Signal, 1)
  137. ctx.InterruptNotify(interrupted)
  138. defer ctx.StopInterruptNotify(interrupted)
  139. // Each attempt to connect to an address must verify the machine is the
  140. // bootstrap machine by checking its nonce file exists and contains the
  141. // nonce in the MachineConfig. This also blocks sshinit from proceeding
  142. // until cloud-init has completed, which is necessary to ensure apt
  143. // invocations don't trample each other.
  144. nonceFile := utils.ShQuote(path.Join(machineConfig.DataDir, cloudinit.NonceFile))
  145. checkNonceCommand := fmt.Sprintf(`
  146. noncefile=%s
  147. if [ ! -e "$noncefile" ]; then
  148. echo "$noncefile does not exist" >&2
  149. exit 1
  150. fi
  151. content=$(cat $noncefile)
  152. if [ "$content" != %s ]; then
  153. echo "$noncefile contents do not match machine nonce" >&2
  154. exit 1
  155. fi
  156. `, nonceFile, utils.ShQuote(machineConfig.MachineNonce))
  157. addr, err := waitSSH(
  158. ctx,
  159. interrupted,
  160. client,
  161. checkNonceCommand,
  162. inst,
  163. machineConfig.Config.BootstrapSSHOpts(),
  164. )
  165. if err != nil {
  166. return err
  167. }
  168. // Bootstrap is synchronous, and will spawn a subprocess
  169. // to complete the procedure. If the user hits Ctrl-C,
  170. // SIGINT is sent to the foreground process attached to
  171. // the terminal, which will be the ssh subprocess at this
  172. // point. For that reason, we do not call StopInterruptNotify
  173. // until this function completes.
  174. cloudcfg := coreCloudinit.New()
  175. if err := cloudinit.ConfigureJuju(machineConfig, cloudcfg); err != nil {
  176. return err
  177. }
  178. configScript, err := sshinit.ConfigureScript(cloudcfg)
  179. if err != nil {
  180. return err
  181. }
  182. script := shell.DumpFileOnErrorScript(machineConfig.CloudInitOutputLog) + configScript
  183. return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{
  184. Host: "ubuntu@" + addr,
  185. Client: client,
  186. Config: cloudcfg,
  187. ProgressWriter: ctx.GetStderr(),
  188. })
  189. }
  190. type addresser interface {
  191. // Refresh refreshes the addresses for the instance.
  192. Refresh() error
  193. // Addresses returns the addresses for the instance.
  194. // To ensure that the results are up to date, call
  195. // Refresh first.
  196. Addresses() ([]network.Address, error)
  197. }
  198. type hostChecker struct {
  199. addr network.Address
  200. client ssh.Client
  201. // checkDelay is the amount of time to wait between retries.
  202. checkDelay time.Duration
  203. // checkHostScript is executed on the host via SSH.
  204. // hostChecker.loop will return once the script
  205. // runs without error.
  206. checkHostScript string
  207. // closed is closed to indicate that the host checker should
  208. // return, without waiting for the result of any ongoing
  209. // attempts.
  210. closed <-chan struct{}
  211. }
  212. // Close implements io.Closer, as required by parallel.Try.
  213. func (*hostChecker) Close() error {
  214. return nil
  215. }
  216. func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) {
  217. // The value of connectSSH is taken outside the goroutine that may outlive
  218. // hostChecker.loop, or we evoke the wrath of the race detector.
  219. connectSSH := connectSSH
  220. done := make(chan error, 1)
  221. var lastErr error
  222. for {
  223. go func() {
  224. done <- connectSSH(hc.client, hc.addr.Value, hc.checkHostScript)
  225. }()
  226. select {
  227. case <-hc.closed:
  228. return hc, lastErr
  229. case <-dying:
  230. return hc, lastErr
  231. case lastErr = <-done:
  232. if lastErr == nil {
  233. return hc, nil
  234. }
  235. }
  236. select {
  237. case <-hc.closed:
  238. case <-dying:
  239. case <-time.After(hc.checkDelay):
  240. }
  241. }
  242. }
  243. type parallelHostChecker struct {
  244. *parallel.Try
  245. client ssh.Client
  246. stderr io.Writer
  247. // active is a map of adresses to channels for addresses actively
  248. // being tested. The goroutine testing the address will continue
  249. // to attempt connecting to the address until it succeeds, the Try
  250. // is killed, or the corresponding channel in this map is closed.
  251. active map[network.Address]chan struct{}
  252. // checkDelay is how long each hostChecker waits between attempts.
  253. checkDelay time.Duration
  254. // checkHostScript is the script to run on each host to check that
  255. // it is the host we expect.
  256. checkHostScript string
  257. }
  258. func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) {
  259. for _, addr := range addrs {
  260. if _, ok := p.active[addr]; ok {
  261. continue
  262. }
  263. fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value)
  264. closed := make(chan struct{})
  265. hc := &hostChecker{
  266. addr: addr,
  267. client: p.client,
  268. checkDelay: p.checkDelay,
  269. checkHostScript: p.checkHostScript,
  270. closed: closed,
  271. }
  272. p.active[addr] = closed
  273. p.Start(hc.loop)
  274. }
  275. }
  276. // Close prevents additional functions from being added to
  277. // the Try, and tells each active hostChecker to exit.
  278. func (p *parallelHostChecker) Close() error {
  279. // We signal each checker to stop and wait for them
  280. // each to complete; this allows us to get the error,
  281. // as opposed to when using try.Kill which does not
  282. // wait for the functions to complete.
  283. p.Try.Close()
  284. for _, ch := range p.active {
  285. close(ch)
  286. }
  287. return nil
  288. }
  289. // connectSSH is called to connect to the specified host and
  290. // execute the "checkHostScript" bash script on it.
  291. var connectSSH = func(client ssh.Client, host, checkHostScript string) error {
  292. cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil)
  293. cmd.Stdin = strings.NewReader(checkHostScript)
  294. output, err := cmd.CombinedOutput()
  295. if err != nil && len(output) > 0 {
  296. err = fmt.Errorf("%s", strings.TrimSpace(string(output)))
  297. }
  298. return err
  299. }
  300. // waitSSH waits for the instance to be assigned a routable
  301. // address, then waits until we can connect to it via SSH.
  302. //
  303. // waitSSH attempts on all addresses returned by the instance
  304. // in parallel; the first succeeding one wins. We ensure that
  305. // private addresses are for the correct machine by checking
  306. // the presence of a file on the machine that contains the
  307. // machine's nonce. The "checkHostScript" is a bash script
  308. // that performs this file check.
  309. func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) {
  310. globalTimeout := time.After(timeout.Timeout)
  311. pollAddresses := time.NewTimer(0)
  312. // checker checks each address in a loop, in parallel,
  313. // until one succeeds, the global timeout is reached,
  314. // or the tomb is killed.
  315. checker := parallelHostChecker{
  316. Try: parallel.NewTry(0, nil),
  317. client: client,
  318. stderr: ctx.GetStderr(),
  319. active: make(map[network.Address]chan struct{}),
  320. checkDelay: timeout.RetryDelay,
  321. checkHostScript: checkHostScript,
  322. }
  323. defer checker.Kill()
  324. fmt.Fprintln(ctx.GetStderr(), "Waiting for address")
  325. for {
  326. select {
  327. case <-pollAddresses.C:
  328. pollAddresses.Reset(timeout.AddressesDelay)
  329. if err := inst.Refresh(); err != nil {
  330. return "", fmt.Errorf("refreshing addresses: %v", err)
  331. }
  332. addresses, err := inst.Addresses()
  333. if err != nil {
  334. return "", fmt.Errorf("getting addresses: %v", err)
  335. }
  336. checker.UpdateAddresses(addresses)
  337. case <-globalTimeout:
  338. checker.Close()
  339. lastErr := checker.Wait()
  340. format := "waited for %v "
  341. args := []interface{}{timeout.Timeout}
  342. if len(checker.active) == 0 {
  343. format += "without getting any addresses"
  344. } else {
  345. format += "without being able to connect"
  346. }
  347. if lastErr != nil && lastErr != parallel.ErrStopped {
  348. format += ": %v"
  349. args = append(args, lastErr)
  350. }
  351. return "", fmt.Errorf(format, args...)
  352. case <-interrupted:
  353. return "", fmt.Errorf("interrupted")
  354. case <-checker.Dead():
  355. result, err := checker.Result()
  356. if err != nil {
  357. return "", err
  358. }
  359. return result.(*hostChecker).addr.Value, nil
  360. }
  361. }
  362. }
  363. // EnsureBootstrapTools finds tools, syncing with an external tools source as
  364. // necessary; it then selects the newest tools to bootstrap with, and sets
  365. // agent-version.
  366. func EnsureBootstrapTools(ctx environs.BootstrapContext, env environs.Environ, series string, arch *string) (coretools.List, error) {
  367. possibleTools, err := bootstrap.EnsureToolsAvailability(ctx, env, series, arch)
  368. if err != nil {
  369. return nil, err
  370. }
  371. return bootstrap.SetBootstrapTools(env, possibleTools)
  372. }