Sapient Corporation Interview Question
Senior Software Development EngineersCountry: India
Interview Type: Written Test
import java.io.File;
//The following class is written to extract all the files from the give directory
public class DirectoryReader {
static long startTime,timecheck,elapsedTime;
static int spc_count = -1;
static String option,pattern;
static Long timeout;
static void Process(File aFile,String option, String pattern, Long timeout) {
spc_count++;
String spcs = "";
boolean fileName=aFile.getName().matches(pattern);
timecheck = System.currentTimeMillis();
elapsedTime = timecheck - startTime;
for (int i = 0; i < spc_count; i++)
spcs += " ";
if (aFile.isFile()) {
System.out.println(spcs + "[FILE] " + aFile.getName()
+ aFile.getPath());
}
else if (aFile.isDirectory()) {
System.out.println(spcs + "[DIR] " + aFile.getName());
File[] listOfFiles = aFile.listFiles();
if (listOfFiles != null) {
for (int i = 0; i < listOfFiles.length; i++)
Process(listOfFiles[i],option,pattern,timeout);
} else {
System.out.println(spcs + " [ACCESS DENIED]");
}}
spc_count--;
}
public static void main(String args[]){
System.out.println("**************Top Folder Name*********************"+args[0]);
System.out.println("**************Search Option*********************"+args[1]);
System.out.println("**************Search Pattern*********************"+args[2]);
System.out.println("**************Timeout in seconds********************"+args[3]);
startTime=System.currentTimeMillis();
if (args.length > 0){
option=args[1];
pattern=args[2];
timeout=Long.parseLong(args[3]);
File aFile = new File(args[0]);
Process(aFile,args[1],args[2],timeout);}
}
}
I think recursive call should be something like this, but i have not completed .
The best solution could multithreading the folder processing as the filesystem is way slower than the processor handling multiple threads and any locking overhead so it will throw all hardrive request at once for listing files so the harddrive will reading will be optimized based on all the open read requests.
It should either keep track of all the threads that are still searching and stop once the time is up.
Again this is high performance because it is using the much resources from memory and CPU as much as it can while it waits for the FileSystem to return the list of files and folders.
I put a max threads in order to limit how many threads are created.
class FileSearcher {
private Timer timer = null;
private bool stopProcessing = false;
private ICollection<SearchResult> results {get;set;}
private lockObject = new object();
// Determines which type of result you want
private ResultType SearchResultType { get; set; }
private RegEx regExFilter = null;
// Making a limit in order to don't run out of memory
// This could be also calculate based on how much
// memory we want to consume but for now is 20
const int MaxNumberOfThreads = 20;
private int numberOfThreads = 0;
// Lock to increment and decrement the number of threads
private object numberOfThreadsLock = new object();
// This is the delegate function template in order to invoke asynchronously
private delegate SearchFolderCoreDelegate (Directory rooFolder);
/// Enumerable to determine if it's a file or folder (not sure if it matter in the problem)
enum ResultType
{
Both = 0
Folder = 1,
File = 2,
}
// Helper class to store the search results
public class SearchResult
{
ResultType ResultType { get; set; }
string Path { get; set; }
}
// Call back when it times out
private void TimeOutCallBack(object sender)
{
stopProcessing = true;
}
IEnumerable<SearchResult> SearchFolders(
string rootFolderName,
string filter,
ResultType searchResultType
TimeStamp timeout)
{
if(DirectoryExists(rootFolderName))
{
throw ArgumentException(
"The root folder does not exits. Check the path and try again.\n" +
"Path: " + rootFolderName);
}
// Making the method thread safe
lock(lockObject)
{
using(this.timer = new Timer(
new TimerCallBack(TimeOutCallBack),
null,
timeout
new TimeSpan(-1)))
{
this.timer
this.SearchResultType = searchResultType;
// Simple convertion to regEx
filter = filter.Replace(".", "\.")
filter = filter.Replace("*", ".*");
regExFilter = new RegEx(filter);
// Using list but I believe there is a thread safe collection somewhere
results = new List<SearchResult>();
SearchFoldersCore(
new DirectoryInfo(rootFolder),
filter,
results);
}
}
return results;
}
// This the function that actually performs the search in its folder
void SearchFoldersCore(string rootFolder)
{
// Check before accessing the disk
if(stopProcessing)
{
return;
}
private List<SearchFolderCoreDelegate> createdThreads =
new List<SearchFolderCoreDelegate>();
try {
// Doing files first as I would like to get results from shallowest folder first
// and deeper folders after.
if (this.SearchResultType == ResultType.Both ||
this.SearchResultType == ResultType.File)
{
foreach(string filename in Directory.GetFiles(rootFolder))
{
// Check during the iteration
if(stopProcessing)
{
return;
}
if(regExFilter.Match(file.Name))
{
this.AddSearchResult(
ResultType.File,
rootFolder + "\" + filename);
}
}
}
if(stopProcessing)
{
return;
}
// Doing try and catch in case there is an IO Exception in between so I want to make
// sure to take care of the threads.
try {
foreach(string folderName in Directory.GetDirectories(rootFolder))
{
// Only add if its required by the search parameter
if(this.SearchResultType == ResultType.Both ||
this.SearchResultType == ResultType.Folder)
{
if(regExFilter.Matches(folderName)
{
this.AddSearchResult(
ResultType.Folder,
rootFolder + "\" + folderName);
}
}
if(stopProcessing)
{
return;
}
// Get the delegate that was invoked
SearchFolderCoreDelegate thread = ExecuteSearchFolderCore(
rootFolder + "\" + folderName);
// If its null it means that it was executed and finished.
if(thread != null)
{
createdThreads.Add(thread);
}
}
}
catch(IOException)
{
// Ignoring probably but a Verbose trace might be good here
}
finally
{
// Once we are done processing we just wait for the threads to finish by themselves
// I'm debating on cheeking for stopProcessing but eventually each thread will
// finish almost immediately as each method call checks every time it needs to stop
foreach(SearchFolderCoreDelegate thread in createdThreads)
{
// Wait until the delegate finish it's execution.
thread.EndInvoke(null);
}
}
}
// This function initiates the execution of the SearchFolderCore function and
// return it's delegate. If it can't allocate a new thread it will execute the function
// Synchronously and return null as it already finished.
SearchFolderCoreDelegate ExecuteSearchFolderCore(string root)
{
if (numberOfThreads < MaxNumberOfThreads )
{
lock (numberOfThreadsLock)
{
// Making sure that the condition is still true after the getting lock
if (numberOfThreads < MaxNumberOfThreads)
{
SearchFoldersCore result =
new SearchFolderCoreDelegate(SearchFoldersCore);
result.BefingInvoke(
root,
new AsyncCallback(SearchFolderCoreDelegateCallBack));
numberOfThreads++;
return result;
}
}
}
// If can't allocate a thread then we just call it synchronously
SearchFoldersCore(root);
return null;
}
// Callback one the thread finishes it decreases the thread count
private void SearchFolderCoreDelegateCallBack(IAsyncResult ar)
{
lock (numberOfThreadsLock)
{
numberOfThreads--;
}
}
// Adds a search results in a thread safe fashion
private void AddSearchResult(ResultType type, string path)
{
SearchResult result = new SearchResult()
{
ResultType = type,
Path = path
}
lock(this.results)
{
this.results.Add(result);
}
}
Another way would be to create only 2 main threads and one thread Safe List with the found folders.
So the thread:
#1 Traverse the folders populating the list (could be async also).
#2 Searches each folder in the list for a match while the traverse is not done and there are folders to traverse otherwise it waits will the traversal finds a new folder. (This thread could also spawn another sub threads to process separate found folder).
package com.san;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.util.ArrayList;
public class MyTestMain {
private final String pattern=".*my.*.txt";
static ArrayList<String> result = new ArrayList<String>();
void search(File file) {
if (file!=null && file.isDirectory()) {
System.out.println("Searching directory ... "+ file.getAbsoluteFile());
if (file.canRead()) {
for (File fileDetails : file.listFiles()) {
if (fileDetails.isDirectory()) {
search(fileDetails);
} else {
boolean isMatchFound=fileDetails.getName().matches(pattern);
if(isMatchFound){
result.add(fileDetails.getAbsoluteFile().toString());
}
}
}
} else {
System.out
.println(file.getAbsoluteFile() + "Permission Denied");
}
}
}
public static void main(String[] args) {
BufferedReader bufferReaderObj = new BufferedReader(new InputStreamReader(System.in));
System.out.println("Please enter 'Quit' to exit");
System.out.println("Please enter root folder name to search");
String inputString=null;
try {
while((inputString = bufferReaderObj.readLine()) != null) {
inputString = inputString.trim();
if(!inputString.equalsIgnoreCase("Quit")){
String rootFolderStr = inputString;
try {
File rootFolder = new File(rootFolderStr);
if(rootFolder.isDirectory()){
System.out.println("Path is correct");
result.clear();
MyTestMain testObj = new MyTestMain();
testObj.search(rootFolder);
if(!result.isEmpty()){
System.out.println("Following match found :");
for(String match : result){
System.out.println(match);
}
}else{
System.out.println("No match found");
}
}else{
System.out.println("Please reenter proper root folder name");
}
} catch (Exception e) {
System.out.println("Error Occure. Please reenter proper root folder name"+e);
}
}else{
System.out.println("system exit");
Thread.sleep(1000);
System.exit(0);
}
}
} catch (Exception e) {
System.out.println("Error in main"+e.toString());
e.printStackTrace();
}
}
}
Maintaining a Hash . which will take FileName/FolderName as Key and Value as the Path.
This can be extended to LinkedHashMap, since FileNames can be same but path may be different.
Also on each Operation of Files/Folders(viz : Copy,Rename, delete, cut) we need to update the hash accordingly.
and once search starts : Compare the Keys and display the Paths(based on your root folder i.e rootFolder is present in path, if present display it else don't display it)
Producer -consumer idea can be used to implement this program.
1. use an task queue --> producer queue
2. use executor service
3. create 1 function which will do following
traverse the folder and if item is file print the name
if item is folder add it to the producer queue.
submit the task to executor service on the entry from producer queue
so by above many threads will keep on adding the folders in parallel to producer queue
executor tasks will take it from queue and will scan all the folders
package Interview_Specific;
import java.io.File;
import java.util.*;
/**
* Created by GOOGLE on 8/19/2015.
*
* Write a high performance file search utility program .
* You are required to write a program that takes command line arguments and searches the required file in the current
* folder and the sub folders . Search could be for files or folders or both . Below are the four parameters that would
* be passed . You are also required to so proper exception handling and you code must be optimized as much as possible.
1) A top folder name : Folder from where search would begin. (Ex : C:/MyFolder )
2) Search Option (File /Folder /Both )
3) Search Pattern : A regular expression : (*my*.txt )
4) Timeout in seconds : once the operation treaches this timeout search should be stopped saying " Could not complete
operation " and results obtained till then must be returned .
*/
public class HighUtilitySearch {
public List<File> search(List<File> outputList, String topFolderName, String searchOption, String searchPattern, Integer timeout) {
Long currentTime = System.currentTimeMillis();
File file = new File(topFolderName);
if (file.exists()) {
File[] allFiles = file.listFiles();
if (allFiles != null && allFiles.length > 0) {
for (File tempFile : allFiles) {
switch (isTimeOver(currentTime, timeout)) {
case 0:
break;
case 1:
switch (searchOption) {
case "BOTH":
addMatchedFileNameToList(outputList, tempFile, searchPattern);
if (tempFile.isDirectory()) {
search(outputList, tempFile.getPath(), searchOption, searchPattern, timeout);
}
break;
case "FILE":
if (!tempFile.isDirectory()) {
addMatchedFileNameToList(outputList, tempFile, searchPattern);
}
search(outputList, tempFile.getPath(), searchOption, searchPattern, timeout);
break;
case "FOLDER":
if (tempFile.isDirectory()) {
addMatchedFileNameToList(outputList, tempFile, searchPattern);
}
break;
default:
System.out.println("-------------INVALID SEARCH OPTION-------------");
break;
}
}
}
}
}
return outputList;
}
private Integer isTimeOver(Long startTime, Integer timeout) {
Long currentTime = System.currentTimeMillis();
if (currentTime >= startTime + timeout) {
return 0;
}
return 1;
}
public List<File> addMatchedFileNameToList(List<File> fileList, File file, String searchPattern) {
if (file.getName().contains(searchPattern)) {
fileList.add(file);
}
return fileList;
}
public static void main(String[] args) {
List<File> outputSearchResults = new ArrayList<>();
HighUtilitySearch highUtilitySearch = new HighUtilitySearch();
outputSearchResults = highUtilitySearch.search(outputSearchResults, "C:\\Users\\GOOGLE\\IdeaProjects", "FILE", ".class", 1000000000);
System.out.println("------------HERE ARE THE SEARCH RESULTS------------");
for (File outputFile : outputSearchResults) {
System.out.println(outputFile.getPath() + "\n");
}
System.out.println("--------------END OF SEARCH RESULTS--------------\n");
System.out.println("YOUR SEARCH RETURNED " + outputSearchResults.size() + " RESULTS");
}
}
package Interview_Specific;
import java.io.File;
import java.util.*;
/**
* Created by GOOGLE on 8/19/2015.
*
*/
public class HighUtilitySearch {
public List<File> search(List<File> outputList, String topFolderName, String searchOption, String searchPattern, Integer timeout) {
Long currentTime = System.currentTimeMillis();
File file = new File(topFolderName);
if (file.exists()) {
File[] allFiles = file.listFiles();
if (allFiles != null && allFiles.length > 0) {
for (File tempFile : allFiles) {
switch (isTimeOver(currentTime, timeout)) {
case 0:
break;
case 1:
switch (searchOption) {
case "BOTH":
addMatchedFileNameToList(outputList, tempFile, searchPattern);
if (tempFile.isDirectory()) {
search(outputList, tempFile.getPath(), searchOption, searchPattern, timeout);
}
break;
case "FILE":
if (!tempFile.isDirectory()) {
addMatchedFileNameToList(outputList, tempFile, searchPattern);
}
search(outputList, tempFile.getPath(), searchOption, searchPattern, timeout);
break;
case "FOLDER":
if (tempFile.isDirectory()) {
addMatchedFileNameToList(outputList, tempFile, searchPattern);
}
break;
default:
System.out.println("-------------INVALID SEARCH OPTION-------------");
break;
}
}
}
}
}
return outputList;
}
private Integer isTimeOver(Long startTime, Integer timeout) {
Long currentTime = System.currentTimeMillis();
if (currentTime >= startTime + timeout) {
return 0;
}
return 1;
}
public List<File> addMatchedFileNameToList(List<File> fileList, File file, String searchPattern) {
if (file.getName().contains(searchPattern)) {
fileList.add(file);
}
return fileList;
}
public static void main(String[] args) {
List<File> outputSearchResults = new ArrayList<>();
HighUtilitySearch highUtilitySearch = new HighUtilitySearch();
outputSearchResults = highUtilitySearch.search(outputSearchResults, "C:\\Users\\GOOGLE\\IdeaProjects", "FILE", ".class", 1000000000);
System.out.println("------------HERE ARE THE SEARCH RESULTS------------");
for (File outputFile : outputSearchResults) {
System.out.println(outputFile.getPath() + "\n");
}
System.out.println("--------------END OF SEARCH RESULTS--------------\n");
System.out.println("YOUR SEARCH RETURNED " + outputSearchResults.size() + " RESULTS");
}
}
package com;
import java.util.Scanner;
import java.util.Timer;
import java.util.TimerTask;
import search.FileSearchComponent;
public class FileSearch {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
final FileSearchComponent fsc = new FileSearchComponent();
Scanner input = new Scanner(System.in);
System.out.println("Please enter directoy path to search: ");
final String dirEntered = input.nextLine();
fsc.setRoot(dirEntered);
System.out.println("Please enter search type 0-Files; 1-Folders; 2-Both: ");
final String typeEntered = input.nextLine();
fsc.setType(Integer.parseInt(typeEntered));
System.out.println("Please enter search pattern: ");
final String patternEntered = input.nextLine();
fsc.setSearchPattern(patternEntered);
System.out.println("Please enter search timeout in seconds: ");
final String timeOutEntered = input.nextLine();
fsc.start();
new Timer(true).schedule(new TimerTask() {
public void run() {
System.out.println("Requesting stop");
fsc.requestStop();
}
}, getTimeOut(timeOutEntered));
}
private static long getTimeOut(String timeOutEntered) {
return Long.parseLong(timeOutEntered) * 1000;
}
}
package search;
import java.io.File;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class FileSearchComponent extends Thread {
private int fileCounter;
private String root;
private int type;
private String searchPattern;
private volatile boolean stop = false;
boolean isTaskCompleted = false;
boolean stillProcessing = false;
private static Pattern p;
private static Matcher m;
private long initialTime;
public int getFileCounter() {
return fileCounter;
}
public void setFileCounter(int fileCounter) {
this.fileCounter = fileCounter;
}
public String getRoot() {
return root;
}
public void setRoot(String root) {
this.root = root;
}
public int getType() {
return type;
}
public void setType(int type) {
this.type = type;
}
public String getSearchPattern() {
return searchPattern;
}
public void setSearchPattern(String searchPattern) {
this.searchPattern = searchPattern;
}
private void getFiles(String dirEntered, int typeEntered,
String patternEntered) {
try {
File file = new File(dirEntered);
if (file != null && file.isDirectory()) {
if (stop) {
throw new Exception("Search thread interrupted");
}
for (File f : file.listFiles()) {
if (f.isFile()) {
p = Pattern.compile(patternEntered);
m = p.matcher(f.getName());
if (m.find()) {
fileCounter++;
System.out.println("File: " + f.getAbsolutePath());
}
} else {
getFiles(f.getAbsolutePath(), typeEntered,
patternEntered);
}
}
}
} catch (Exception e) {
e.printStackTrace();
System.exit(0);
}
}
private void getFolders(String dirEntered, int typeEntered,
String patternEntered) {
try {
File file = new File(dirEntered);
if (file != null && file.isDirectory()) {
if (stop) {
throw new Exception("Search thread interrupted");
}
for (File f : file.listFiles()) {
if (f.isDirectory()) {
p = Pattern.compile(patternEntered);
m = p.matcher(f.getName());
if (m.find()) {
fileCounter++;
System.out.println("Filder: " + f.getAbsolutePath());
}
getFolders(f.getAbsolutePath(), typeEntered,
patternEntered);
}
}
}
} catch (Exception e) {
e.printStackTrace();
System.exit(0);
}
}
public void run() {
try {
if (stop) {
throw new Exception("run(): Search thread interrupted");
}
initialTime = System.currentTimeMillis();
switch (type) {
case 0:
getFiles(root, type, searchPattern);
break;
case 1:
getFolders(root, type, searchPattern);
break;
case 2:
getFiles(root, type, searchPattern);
getFolders(root, type, searchPattern);
break;
default:
break;
}
System.out.println("Search completed successfully ...");
System.out.println("Search start time: "
+ System.currentTimeMillis());
System.out.println("Time took to perform search: "
+ (System.currentTimeMillis() - initialTime));
} catch (Exception e) {
e.printStackTrace();
System.exit(0);
}
}
public void requestStop() {
stop = true;
}
}
Is this process is recursive, means sub folders are allowed to take participate?
- Debasis Jana July 22, 2013Instead of using recursive method you can maintain list of folders and each thread should be responsible for file(s)/folder(s) into the current directory.