Facebook Interview Question
Software Engineer / DevelopersIMO its a good variation of longest increasing sub sequence.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct
{
int start;
int end;
int score;
}genePred_t;
void readInputFromFile(char* fileName, int* pNumOfGenePred, genePred_t** ppGenePred)
{
FILE* fp = 0;
char* pFilePtr=0, *pGene=0, *token=0;
int len=0, curLen=0,bytes=0,index=0, geneLen=0, numOfGenePred=0;
genePred_t* pGenePred = 0;
/*open the file*/
fp = fopen(fileName, "r");
/*get length of gene*/
len = getline(&pFilePtr, &bytes, fp);
pFilePtr[len-1]=0;
geneLen = atoi(pFilePtr);
free(pFilePtr);
pFilePtr=0;
len=0;
/*get the gene*/
pGene = (char*)malloc(geneLen+1);
memset(pGene,0x00,geneLen+1);
while(len<geneLen)
{
curLen = getline(&pFilePtr, &bytes, fp);
pFilePtr[curLen-1]=0;
strcat(pGene,pFilePtr);
free(pFilePtr);
pFilePtr=0;
len += curLen-1;
}
/*get the number of gene predection*/
len = getline(&pFilePtr, &bytes, fp);
pFilePtr[len-1]=0;
numOfGenePred = atoi(pFilePtr);
free(pFilePtr);
pFilePtr=0;
/*get Gene predection*/
pGenePred = (genePred_t*)malloc(sizeof(genePred_t)*numOfGenePred);
len=numOfGenePred;
while(len)
{
curLen = getline(&pFilePtr, &bytes, fp);
pFilePtr[curLen-1]=0;
token = strtok(pFilePtr," ");
pGenePred[index].start = atoi(token);
token = strtok(0," ");
pGenePred[index].end = atoi(token);
token = strtok(0," ");
pGenePred[index].score = atoi(token);
free(pFilePtr);
pFilePtr=0;
len--;
index++;
}
*pNumOfGenePred = numOfGenePred;
*ppGenePred = pGenePred;
fclose(fp);
}
void addScore(int maxIndex, int* pPrev, genePred_t* pGenePred, int* Score)
{
if(maxIndex==0)
{
return;
}
addScore(pPrev[maxIndex],pPrev,pGenePred,Score);
printf("[%d]", pGenePred[maxIndex-1].score);
*Score = *Score + pGenePred[maxIndex-1].score;
}
int addAllScore(genePred_t* pGenePred, int* pIncSeq, int* pPrev, int numOfGenePred)
{
int maxIndex=0,i=0,score=0,max=0;
for(i=0;i<=numOfGenePred;i++)
{
if(max<pIncSeq[i])
{
max=pIncSeq[i];
maxIndex = i;
}
}
addScore(maxIndex, pPrev, pGenePred, &score);
return score;
}
int compare (void* arg1, void* arg2)
{
genePred_t* pGenePred1 = (genePred_t*)arg1;
genePred_t* pGenePred2 = (genePred_t*)arg2;
return (pGenePred1->start > pGenePred2->start)?(1):(-1);
}
void computeMaxScore(genePred_t* pGenePred,int numOfGenePred)
{
int i=0,j=0,max=0,score=0, maxScore=0, curScore=0;
int* pIncSeq=0,*pPrev=0,*pScore=0;
qsort(pGenePred, numOfGenePred, sizeof(genePred_t),compare);
pIncSeq = (int*)malloc(sizeof(int)*(numOfGenePred+1));
pPrev = (int*)malloc(sizeof(int)*(numOfGenePred+1));
pScore = (int*)malloc(sizeof(int)*(numOfGenePred+1));
memset(pIncSeq,0x00,sizeof(int)*(numOfGenePred+1));
memset(pPrev,0x00,sizeof(int)*(numOfGenePred+1));
memset(pScore,0x00,sizeof(int)*(numOfGenePred+1));
for(i=0;i<numOfGenePred;i++)
{
max=0;
curScore=0;
for(j=0;j<=i;j++)
{
if(pGenePred[i].start > pGenePred[j].end)
{
if(max<=pIncSeq[j+1])
{
curScore = pScore[j]+pGenePred[i].score;
if(maxScore < curScore)
{
pScore[j+1]=curScore;
maxScore=curScore;
max=pIncSeq[j+1];
pPrev[i+1] = j+1;
}
}
}
else
{
if(pScore[j+1]<pGenePred[i].score)
pScore[j+1]=pGenePred[i].score;
}
}
max=max+1;
pIncSeq[i+1]=max;
}
score = addAllScore(pGenePred,pIncSeq,pPrev,numOfGenePred);
printf("\n");
printf("Score: %d\n",score);
free(pIncSeq);
free(pPrev);
free(pScore);
}
int main(int argc,char* argv[])
{
int numOfGenePred=0;
genePred_t* pGenePred=0;
if(argc < 2)
{
printf("Invalid Input !!\n");
return 0;
}
readInputFromFile(argv[1],&numOfGenePred, &pGenePred);
computeMaxScore(pGenePred,numOfGenePred);
return 0;
}
I didn't run the code other than the input given in this post. Please let me know if there is a bug or mistake in logic. Code expects valid input text file with newline in the end.
I'll explain the detailed logic latter.
lengthy question lengthy code !!
I understood something but can u plz explain the logic completely?
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <cmath>
#include <stdlib.h>
typedef struct {
long start, finish, weight;
} Interval;
Interval *I;
long *M;
long *P;
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
int compare(const void * left, const void * right) {
return (((Interval *)left)->finish - ((Interval *) right)->finish);
}
int IntervalSearch(int start, int high){
if (high == -1) return -1;
int low = 0;
int best = -1;
int mid;
int finish;
while (low <= high) {
mid = (low + high) /2 ;
finish = I[mid].finish;
if (finish >= start) {
high = mid-1;
} else {
best = mid;
low = mid + 1;
}
}
return best;
}
long Compute_Opt(long j) {
if (j == 0)
return 0;
if (M[j] != -1l) {
return M[j];
}
M[j] = MAX(I[j].weight + Compute_Opt(P[j]), Compute_Opt(j - 1));
return M[j];
}
int main( int argc, char **argv) {
if ( argc <= 1 ) {
return 0;
}
std::ifstream dnaInput(argv[1]);
if ( dnaInput.peek() == EOF ) {
return 0;
}
int lenDnaStr;
dnaInput >> lenDnaStr ;
for(int index = 0; index <= std::ceil(((float)lenDnaStr)/80.0); index++) {
dnaInput.ignore( lenDnaStr + 1, '\n');
}
int numOfIv = 0;
dnaInput >> numOfIv;
if ( numOfIv == 0 ) {
return 0;
}
numOfIv = numOfIv + 1;
I = (Interval *) malloc (sizeof(Interval) * numOfIv);
M = (long *)malloc(sizeof(long) * numOfIv);
P = (long *)malloc(sizeof(long) * numOfIv);
Interval t;
t.start = 0;
t.finish = 0;
t.weight = 0;
I[0] = t;
for ( int index = 1; index < numOfIv; index++) {
int start = 0, finish = 0, weight = 0;
dnaInput >> start >> finish >> weight;
Interval t;
t.start = start;
t.finish = finish;
t.weight = weight;
I[index] = t;
}
dnaInput.close();
qsort(I, numOfIv, sizeof(Interval), compare);
int best;
for (int index = 1; index < numOfIv; index++){
M[index] = -1l;
best = IntervalSearch(I[index].start,index - 1);
if (best != -1) {
P[index] = best;
} else {
P[index] = 0;
}
}
long res = Compute_Opt(numOfIv - 1);
free ( P );
free ( I );
free ( M );
std::cout << res << std::endl;
}
#include <iostream>
#include <algorithm>
#include <vector>
#include <cstring>
using namespace std;
struct Endpoint {
int loc;
int score;
int best_score;
Endpoint * other;
Endpoint (int _loc, int _score) : loc(_loc), score(_score), best_score(0) {}
};
bool comp_endpt_ptr(Endpoint * e1, Endpoint * e2) {
return e1->loc < e2->loc;
}
int N, G;
vector<Endpoint *> endpts;
int main() {
// ignore actual DNA sequence... we only need this if we want to
// print results
string s;
cin >> N;
for (int i = 0; i < (N - 1)/80 + 1; i++)
cin >> s;
cin >> G;
for (int i = 0; i < G; i++) {
int start, end, score;
cin >> start >> end >> score;
Endpoint * left = new Endpoint(start, score);
Endpoint * right = new Endpoint(end + 1, score);
left->other = right;
right->other = left;
endpts.push_back(left);
endpts.push_back(right);
}
sort(endpts.begin(), endpts.end(), comp_endpt_ptr);
for (int i = 1; i < endpts.size(); i++) {
Endpoint * pt = endpts[i];
if (pt->other->loc > pt->loc)
pt->best_score = endpts[i - 1]->best_score;
else
pt->best_score = max(endpts[i - 1]->best_score,
pt->score + pt->other->best_score);
// cout << pt->loc << ": " << pt->best_score << endl;
}
cout << endpts.back()->best_score << endl;
}
<pre lang="" line="1" title="CodeMonkey8253" class="run-this">code in C#
class DNA : IComparable<DNA>
{
public int Start { get; set; }
public int End { get; set; }
public int Score { get; set; }
public DNA( int s, int e, int ds )
{
this.Start = s;
this.End = e;
this.Score = ds;
}
public int CompareTo( DNA d )
{
return this.End.CompareTo( d.End );
}
}
public static void FindMaxScore()
{
List<DNA> dnas = new List<DNA>();
// read input
dnas.Sort();
int N = 100;
int[] scores = new int[N];
int[] seleceted = new int[N];
for (int i = 0; i < dnas.Count; i++)
{
DNA d = dnas[i];
if (d.Score + scores[d.Start] >= scores[d.End - 1])
{
scores[d.End] = d.Score + scores[d.Start];
seleceted[d.End] = i+1;
}
else
{
scores[d.End] = scores[d.End - 1];
seleceted[d.End] = seleceted[d.End - 1];
}
int iEnd = N;
if (i < dnas.Count - 1)
{
iEnd = dnas[i + 1].End;
}
for (int j = d.End + 1; j < iEnd; j++)
{
scores[j] = scores[j - 1];
seleceted[j] = seleceted[j - 1];
}
}
//output max score
Console.WriteLine( scores[N - 1] );
//output selected DNA strings
int idx = seleceted[N - 1];
while (idx > 0)
{
Console.WriteLine( idx - 1 );
idx = seleceted[dnas[idx - 1].Start];
}
}</pre><pre title="CodeMonkey8253" input="yes">
</pre>
I think it is a weighted interval scheduling. It can be solved by Dynamic Programming i O(N) time. Given a set of DNA substrings --> intervals (structured in term of <bgn,end,score>). I[j] is not overlapped with I[k] if end_j < bgn_k. The trick is to sort the interval in earliest finish. We shall have an array of interval I of size N. Given OPT(j) be the optimal path defined on the first jth interval. OPT(j) has two options; one that include I[j] and one doesn't.
- Saimok July 04, 2011