Google Interview Question
Front-end Software EngineersWe can create a self defined message for serializing
Serialized Byte Array = No. Element (4 bytes) + element(1) + element(2) + .. + element(n)
where each element is represented as
element = Length (4 bytes) + Data (Length Bytes)
This helps us on as we can define the reserve function to avoid resizing of the vector while creating it during deserialization.
Tried and tested following code it works fine.
Let me know if you can find some edge cases when it would not work.
#include<vector>
#include<string>
#include<iostream>
using namespace std;
int serialize(const vector<string> & stringVector1)
{
FILE *fptr = fopen("C:\\serializedString.txt","w");
if(!fptr)
return 0;
vector<string>::const_iterator i = stringVector1.begin(), end = stringVector1.end();
for(; i<end;i++ )
{
::fputs((*i).c_str(),fptr);
::fputs("\n",fptr);
}
fclose(fptr);
return 0;
}
int Deserialize(vector<string> & stringVector2)
{
FILE *fptr = fopen("C:\\serializedString.txt","r");
if(!fptr)
return 0;
char arr[1024],*ptr=NULL;
while(!feof(fptr))
{
ptr = fgets(arr,1024,fptr);
if(ptr == arr)
stringVector2.push_back(string(arr));
}
fclose(fptr);
return 0;
}
int main()
{
vector<string> stringVector1,stringVector2;
stringVector1.push_back("Cracking");
stringVector1.push_back("Programming");
stringVector1.push_back("Interview");
stringVector1.push_back("with");
stringVector1.push_back("Arif");
stringVector1.push_back("and");
stringVector1.push_back("Krishna");
serialize(stringVector1);
Deserialize(stringVector2);
vector<string>::iterator i = stringVector2.begin(), end = stringVector2.end();
for(; i<end;i++ )
cout<<*i;
getchar();
return 0;
}
While serializing we need to change all the spaces of the strings in the vector with a character set like "%20". After that we can we can get our new formed string where all the independent strings which are differentiated from each other by a space.
While deserializing if we get a space character we form the string and change all the %20 character sets with spaces. The string vector is formed by adding each string to it.
<pre lang="c++" line="1" title="CodeMonkey10386" class="run-this">#include <string>
#include <string.h>
#include <vector>
#include <boost/lexical_cast.hpp>
#include <iostream>
using namespace std;
string serialize (vector<string> v) {
string result;
vector<int> lengths;
lengths.resize(v.size());
int sum = 0;
for (int i= 0; i < v.size(); i++) {
lengths[i] = v[i].size(); sum += lengths[i];
}
result += boost::lexical_cast<string>(lengths.size());
result += string(";");
for (int i = 0; i < v.size(); i++) {
result += boost::lexical_cast<string>(v[i].size());
result += string(";");
}
for (int i = 0; i < v.size(); i++) {
result += v[i];
}
cout << result.size() << endl;
return result;
}
vector<string> deserialize (string v) {
vector <int> lengths;
vector <string> output;
int pos = 0;
int seppos = v.find_first_of(';', pos);
int total = boost::lexical_cast<int>(v.substr(pos, seppos-pos));
pos = seppos + 1;
cout << "Total is:" << total << endl;
lengths.resize(total);
output.resize(total);
for (int i = 0; i < total; i++) {
seppos = v.find_first_of(';', pos);
lengths[i] = boost::lexical_cast<int>(v.substr(pos, seppos-pos));
pos = seppos + 1;
cout << "lengths [" << i << "]=" << lengths[i] << endl;
output[i].resize(lengths[i]);
}
for (int i = 0; i < total; i++) {
for (int j = 0; j < lengths[i]; j++) {
output[i][j] = v[pos++];
}
}
return output;
}
int main(int argc, char** argv) {
vector<string> input;
input.resize(argc);
for (int i = 0; i < argc; i++) {
input[i] = string(argv[i]);
}
string output = serialize(input);
cout << output << endl;
vector<string> deserialized = deserialize(output);
for (int i = 0; i < deserialized.size(); i++) {
cout << deserialized[i] << endl;
}
}
</pre><pre title="CodeMonkey10386" input="yes">test testtest testtesttest t </pre>
1.
- length of string in specific format (same length, etc) as prefix
- '\0' of each string can be removed
"001122ff" + string_0[0x001122ff] + "00000010" + string_1[0x00000010] + ......
2.
optimized prefix
"112#" + string_0[0x112] + ....
- first '#' as delimiter to extract string length
3.
further optimized
"Aa#" + string_0[??] + ...
- we have at least 52 alphabets + 10 digits + any other char except '#' and '\0' for each digit.
How about writing them on file using a delimiter but who knows if this delimiter occurs as a character in string. If it may occur we can replace all occurrences of such a delimiter in string with double delimiter just to mark it a escape character so that we ignore occurrence of double delimiter. But this approach would fail if vector contains empty strings as well. We can repeat it more to mark empty strings.
- ashish.agr82 August 16, 2010