This example shows how to use the FromContentStream function to parse and display a PDF content stream.
[C#]
StringBuilder sb = new StringBuilder();
using (Doc doc = new Doc()) {
doc.Read("spaceshuttle.pdf");
Page page = doc.ObjectSoup[doc.Page] as Page;
StreamObject[] layers = page.GetLayers();
MemoryStream st = new MemoryStream();
foreach (StreamObject layer in layers) {
if (!layer.Decompress())
throw new Exception("Unable to decompress stream.");
byte[] data = layer.GetData();
st.Write(data, 0, data.Length);
}
ArrayAtom array = ArrayAtom.FromContentStream(st.ToArray());
int indent = 0;
HashSet<string> indentPlus = new HashSet<string>(new string[] { "q", "BT" });
HashSet<string> indentMinus = new HashSet<string>(new string[] { "Q", "ET" });
IList<Tuple<string, int>> items = OpAtom.Find(array);
int index = 0;
foreach (var pair in items) {
string op = ((OpAtom)array[pair.Item2]).Text;
// add indent to code
if (indentMinus.Contains(op))
indent--;
for (int i = 0; i < indent; i++)
sb.Append(" ");
// write out the operators
for (int i = index; i <= pair.Item2; i++) {
if (i != index)
sb.Append(" ");
Atom item = array[i];
// we write arrays out individually so that
// we can override default cr lf behavior
ArrayAtom itemArray = item as ArrayAtom;
if (itemArray != null) {
int n = itemArray.Count;
for (int j = 0; j < n; j++) {
sb.Append(itemArray[j].ToString());
if (j != n - 1)
sb.Append(" ");
}
}
else {
sb.Append(item.ToString());
}
}
sb.AppendLine();
if (indentPlus.Contains(op))
indent++;
index = pair.Item2 + 1;
}
// write out any atoms that are left over
for (int i = index; i < array.Count; i++) {
sb.Append(" ");
sb.Append(array[i].ToString());
}
}
using (Doc doc = new Doc()) {
doc.Font = doc.AddFont("Courier");
doc.Rect.Inset(20, 20);
doc.AddText(sb.ToString());
doc.Save("PageContents.pdf");
}
[Visual Basic]
Dim sb As New StringBuilder()
Using doc As New Doc()
doc.Read("spaceshuttle.pdf")
Dim page As Page = TryCast(doc.ObjectSoup(doc.Page), Page)
Dim layers As StreamObject() = page.GetLayers()
Dim st As New MemoryStream()
For Each layer As StreamObject In layers
If Not layer.Decompress() Then
Throw New Exception("Unable to decompress stream.")
End If
Dim data As Byte() = layer.GetData()
st.Write(data, 0, data.Length)
Next
Dim array As ArrayAtom = ArrayAtom.FromContentStream(st.ToArray())
Dim indent As Integer = 0
Dim indentPlus As New HashSet(Of String)(New String() {"q", "BT"})
Dim indentMinus As New HashSet(Of String)(New String() {"Q", "ET"})
Dim items As IList(Of Tuple(Of String, Integer)) = OpAtom.Find(array)
Dim index As Integer = 0
For Each pair As var In items
Dim op As String = DirectCast(array(pair.Item2), OpAtom).Text
' add indent to code
If indentMinus.Contains(op) Then
indent -= 1
End If
For i As Integer = 0 To indent - 1
sb.Append(" ")
Next
' write out the operators
For i As Integer = index To pair.Item2
If i <> index Then
sb.Append(" ")
End If
Dim item As Atom = array(i)
' we write arrays out individually so that
' we can override default cr lf behavior
Dim itemArray As ArrayAtom = TryCast(item, ArrayAtom)
If itemArray IsNot Nothing Then
Dim n As Integer = itemArray.Count
For j As Integer = 0 To n - 1
sb.Append(itemArray(j).ToString())
If j <> n - 1 Then
sb.Append(" ")
End If
Next
Else
sb.Append(item.ToString())
End If
Next
sb.AppendLine()
If indentPlus.Contains(op) Then
indent += 1
End If
index = pair.Item2 + 1
Next
' write out any atoms that are left over
For i As Integer = index To array.Count - 1
sb.Append(" ")
sb.Append(array(i).ToString())
Next
End Using
Using doc As New Doc()
doc.Font = doc.AddFont("Courier")
doc.Rect.Inset(20, 20)
doc.AddText(sb.ToString())
doc.Save("PageContents.pdf")
End Using
End Sub
PageContents.pdf
|
|
|