There appears to be a security flaw with using HDFS NFSGateway (I am using HDP 3.1.4) and I'm not sure how to address it (not very familiar with networking or security etc). Here is an example situation:
- Some user with root access on their own machine is given access to the HDFS via NFSGateway (ie. has it mounted to their machine)
- They then make a user with same uid that the user they want to impersonate has on the hadoop cluster nodes
- They
su
to that user and access the user's mounted version of theirhdfs:///user/<username>
folder on thier machine (users they would not necessarily have access to as root on the client machine)
Eg.
[root@HW006 user]# #Here is a mounted HDFS NFSGateway
[root@HW006 user]# ls -lh /hdpnfs/user/
total 5.0K
drwx------. 3 1008 1004 96 Sep 11 14:53 accumulo
drwxrwx---. 9 1019 1004 288 Sep 11 15:17 ambari-qa
drwxr-xr-x. 3 1018 1006 96 Sep 11 14:54 druid
drwxr-xr-x. 2 1010 1004 64 Sep 11 14:53 hbase
drwxr-xr-x. 2 1000 1004 64 Sep 11 14:52 hive
drwxrwxr-x. 3 1013 1004 96 Sep 11 15:00 oozie
drwxrwxr-x. 2 1009 1004 64 Sep 11 15:07 spark
drwxr-xr-x. 3 testuser001 hdptestusers 96 Sep 12 13:44 testuser001
drwxrwx---. 4 1001 1006 128 Sep 11 14:50 yarn-ats
drwxr-xr-x. 5 1015 1004 160 Sep 11 14:51 zeppelin
[root@HW006 user]#
[root@HW006 user]#
[root@HW006 user]#
[root@HW006 user]# # Now I'll make a user to impersonate the oozie user
[root@HW006 user]# useradd --uid 1015 --no-create-home imposteruser
[root@HW006 user]# ls -lh
total 5.0K
drwx------. 3 1008 1004 96 Sep 11 14:53 accumulo
drwxrwx---. 9 1019 1004 288 Sep 11 15:17 ambari-qa
drwxr-xr-x. 3 1018 1006 96 Sep 11 14:54 druid
drwxr-xr-x. 2 1010 1004 64 Sep 11 14:53 hbase
drwxr-xr-x. 2 1000 1004 64 Sep 11 14:52 hive
drwxrwxr-x. 3 1013 1004 96 Sep 11 15:00 oozie
drwxrwxr-x. 2 1009 1004 64 Sep 11 15:07 spark
drwxr-xr-x. 3 testuser001 hdptestusers 96 Sep 12 13:44 testuser001
drwxrwx---. 4 1001 1006 128 Sep 11 14:50 yarn-ats
drwxr-xr-x. 5 imposteruser 1004 160 Sep 11 14:51 zeppelin
[root@HW006 user]#
[root@HW006 user]#
[root@HW006 user]#
[root@HW006 user]# # Now lets see if imposteruser is able to write to the hdfs:///user/zeppelin directory
[root@HW006 user]# su imposteruser
bash-4.2$ pwd
/hdpnfs/user
bash-4.2$ cd zeppelin/
bash-4.2$ pwd
/hdpnfs/user/zeppelin
bash-4.2$ ls
conf notebook test
bash-4.2$ touch tmp.txt
bash-4.2$ ls -lh
total 1.5K
drwxr-xr-x. 3 imposteruser 1004 96 Sep 12 11:50 conf
drwxr-xr-x. 4 imposteruser 1004 128 Sep 11 14:51 notebook
drwxr-xr-x. 2 imposteruser 1004 64 Sep 11 14:51 test
-rw-r--r--. 1 imposteruser 1004 0 Sep 13 12:12 tmp.txt
bash-4.2$ exit
exit
[root@HW006 user]#
[root@HW006 user]#
[root@HW006 user]#
[root@HW006 user]# # Lets go to a cluster node and see how it looks from there
[root@HW006 user]# ssh hw001
root@hw001's password:
[root@HW001 ~]# hadoop fs -ls /user/zeppelin
Found 4 items
drwxr-xr-x - zeppelin hdfs 0 2019-09-12 11:50 /user/zeppelin/conf
drwxr-xr-x - zeppelin hdfs 0 2019-09-11 14:51 /user/zeppelin/notebook
drwxr-xr-x - zeppelin hdfs 0 2019-09-11 14:51 /user/zeppelin/test
-rw-r--r-- 3 zeppelin hdfs 0 2019-09-13 12:12 /user/zeppelin/tmp.txt
[root@HW006 user]#
[root@HW006 user]#
[root@HW006 user]# # So you can see that it looks as is the user zeppelin created the tmp.txt file, when really it was the imposteruser on the NFSGateway-accessing server.
Is there any recommended / best-practice way to stop this (though still giving users on that client machine NFS access to the HDFS folders they need to access)?