Dropbox Interview Question
SDE1sCountry: United States
import java.io.File;
import java.util.HashSet;
public class ReadDuplicateFileInDir {
public static void main(String[] args) {
File folder=new File("C:/Users/sumitksi/Downloads");
HashSet<File> al=new HashSet<File>();
getAllFilesRecursevely(al,folder);
for(File file:al)
{System.out.println(file);}
}
public static HashSet<File> getAllFilesRecursevely(HashSet<File> al,File folder){
for(File file:folder.listFiles())
{
if(file.isFile())
if(al.add(file));
//Do nothing.
else
System.out.println("Its duplicate :"+file);
else
getAllFilesRecursevely(al,file);
}
return al;
}
}
import java.io.File;
import java.util.HashSet;
public class ReadDuplicateFileInDir {
public static void main(String[] args) {
File folder=new File("C:/Users/sumitksi/Downloads");
HashSet<File> al=new HashSet<File>();
getAllFilesRecursevely(al,folder);
for(File file:al)
{System.out.println(file);}
}
public static HashSet<File> getAllFilesRecursevely(HashSet<File> al,File folder){
for(File file:folder.listFiles())
{
if(file.isFile())
if(al.add(file));
//Do nothing.
else
System.out.println("Its duplicate :"+file);
else
getAllFilesRecursevely(al,file);
}
return al; }}
def findDup(parentFolder):
# Dups in format {hash:[names]}
dups = {}
for dirName, subdirs, fileList in os.walk(parentFolder):
print('Scanning %s...' % dirName)
for filename in fileList:
# Get the path to the file
path = os.path.join(dirName, filename)
# Calculate hash
file_hash = hashfile(path)
# Add or append the file path
if file_hash in dups:
dups[file_hash].append(path)
else:
dups[file_hash] = [path]
return dups
def findDup(parentFolder):
# Dups in format {hash:[names]}
dups = {}
for dirName, subdirs, fileList in os.walk(parentFolder):
print('Scanning %s...' % dirName)
for filename in fileList:
# Get the path to the file
path = os.path.join(dirName, filename)
# Calculate hash
file_hash = hashfile(path)
# Add or append the file path
if file_hash in dups:
dups[file_hash].append(path)
else:
dups[file_hash] = [path]
return dups
def findDup(root):
# Dups in format {hash:[names]}
dups = {}
for dirName, subdirs, fileList in os.walk(parentFolder):
print('Scanning %s...' % dirName)
for filename in fileList:
# Get the path to the file
path = os.path.join(dirName, filename)
# Calculate hash
file_hash = hashfile(path)
# Add or append the file path
if file_hash in dups:
dups[file_hash].append(path)
else:
dups[file_hash] = [path]
return dups
Use recursion to traverse all the files in the folder. For each file generate it's MD5 checksum and use a hash table to keep track of duplicates using MD5 as key and frecuency as value.
- Inucoder May 08, 2016