Exec { path => "/bin:/usr/bin", } # # Install Hadoop from tarball # class hadoop { exec { gethadoop: command => "wget -nc http://puppetmasterlocation/packages/hadoop-0.20.1.tar.gz", cwd => "/usr/local", path => "/usr/bin:/usr/sbin:/bin" } # untar Hadoop, but only do so if a tarball was fetched. exec { untar-hadoop: command => "tar xzf /usr/local/hadoop-0.20.1.tar.gz", cwd => "/usr/local", # put hadoop in /usr/local creates => "/usr/local/hadoop-0.20.1", subscribe => Exec[gethadoop], refreshonly => true; } # create a nice Hadoop symlink after the tarball is untarred. file { "/usr/local/hadoop": ensure => "/usr/local/hadoop-0.20.1", require => Exec[untar-hadoop]; } # create the Hadoop user and group. user { "hadoop": shell => "/bin/bash", password => 'my-haoop-user-password', managehome => "true", home => "/home/hadoop", ensure => "present"; } # # Ensure local directories exist for writing tasks # file { "/datadrivea/mapred": ensure => "directory", owner => "hadoop", group => "hadoop", path => "/datadrivea/mapred", recurse => true; } file { "/datadrivea/mapred/local": ensure => "directory", owner => "hadoop", group => "hadoop", path => "/datadrivea/mapred/local", recurse => true; } file { "/usr/local/hadoop/logs": ensure => "directory", owner => "hadoop", group => "hadoop", path => "/usr/local/hadoop/logs", recurse => true; } file { "/tmp/hadoop": ensure => "directory", owner => "hadoop", group => "hadoop", path => "/tmp/hadoop", recurse => true; } file { "/tmp/hadoop/mapred": ensure => "directory", owner => "hadoop", group => "hadoop", path => "/tmp/hadoop/mapred", recurse => true; } file { "/tmp/hadoop/mapred/system/": ensure => "directory", owner => "hadoop", group => "hadoop", path => "/tmp/hadoop/mapred/system/", recurse => true; } # # Ensure writeable hdfs directories exist for storage # file { "/datadrivea/hdfs": ensure => "directory", owner => "hadoop", group => "hadoop", path => "/datadrivea/hdfs"; } file { "/datadriveb/hdfs": ensure => "directory", owner => "hadoop", group => "hadoop", path => "/datadriveb/hdfs"; } # Hand out authorized key (for hadoop1) which must be installed on all machines in the cluster ssh_authorized_key{ "hadoopsshauthkey": ensure => present, key => "My secret hadoop auth key", name => "hadoop@myhadoopserver", type => "ssh-dss", user => "hadoop", target => "/home/hadoop/.ssh/authorized_keys"; } # Copy the Global Relay hadoop config files over the top of the default ones exec { gethadoopconfig: command => "wget -nd -N -l1 --no-parent -r http://puppetmasterlocation/packages/hadoopconf && rm index.html*", cwd => "/usr/local/hadoop/conf", user => "hadoop", group => "hadoop", path => "/usr/bin:/usr/sbin:/bin" } # change the permissions of the Hadoop installation. exec { chown-hadoop: command => "chown -R hadoop:hadoop /usr/local/hadoop*", require => [ User[hadoop], Exec[gethadoop] ], subscribe => Exec[untar-hadoop], refreshonly => true; } }