Erlang實戰(zhàn)文本排版
文本排版大家都并不陌生,并且隨處可見,無論是當前的字處理軟件,如Word、PDF等,還是傳統(tǒng)的出版行業(yè),如:書籍、報刊、雜志等,都有良好的排版風格和樣式。好的排版風格讓人耳目一新,有一種繼續(xù)讀下去的沖動。本文主要是通過Erlang實現(xiàn)簡單的文本排版,借此學習Erlang、熟悉Erlang。
實戰(zhàn):文本排版。
要求如下:
在代碼中我附了注釋,代碼如下:
- -module(text_process).
- -export([process/2]).
- -import(lists,[reverse/1]).
- process(FileIn,FileOut) ->
- {Status,Value} = file:open(FileIn,read),
- if
- Status =:= ok ->
- Tokens = readFile(Value), %% 將要處理文本FileIn拆分為單詞列表
- io:format("token :~p~n",[Tokens]),
- writeToFile(FileOut,Tokens), %% 將單詞重新排版輸出到屏幕與文件FileOut
- io:format("~nsuccess!~n");
- Status =/= ok ->
- io:format("Open file error: file doesn't exist!")
- end.
- readFile(Value) -> readFile(Value,[]).
- readFile(S,Dict) ->
- OneLine = io:get_line(S,''), %%讀取文件的一行
- if
- OneLine =:= eof ->
- io:format("~nCome across the file end, stop!~n"),
- file:close(S),
- Dict;
- OneLine =/= eof ->
- Tokens = string:tokens(OneLine,"\r\t\n "), %%以空格、回車、換行、制表符為單詞分隔符
- Len = string:len(Tokens), %%獲得字符串的長度
- if
- Len =:= 0 -> %%遇到空行,停止處理
- io:format("~nCome across blank line, stop!~n"),
- file:close(S),
- Dict;
- Len =/= 0 ->
- NewDict = string:concat(Dict,Tokens), %% 將Tokens連接到Dict尾
- readFile(S,NewDict) %%未讀到空行、EOF,繼續(xù)讀取文本
- end
- end.
- writeToFile(FileOut,Tokens) ->
- {ok,S} = file:open(FileOut,write),
- Data = parse(Tokens,[]),
- String = lineToPara(Data),
- io:format("~n~s~n",[String]), %% 輸出到屏幕
- io:format(S,"~s~n",[String]). %% 輸出到文件
- %lists:foreach(fun({X}) -> io:format(S,"~s~n",[X]) end, Data).
- lineToPara(Data) -> lineToPara(Data,"").
- lineToPara([],Ret) -> Ret;
- lineToPara(Data,Ret) ->
- Last = lists:nth(1,Data),
- Element = element(1,Last),
- Temp = Ret ++ Element ++ "\n",
- %io:format("~s",[Temp]),
- lineToPara(lists:delete(Last,Data), Temp).
- %%解析單詞列表
- parse(Tokens,Contents) ->
- M =50,
- {ALine,RemainToken} = parse_line(Tokens,M),
- if
- length(RemainToken) =:= 0 ->
- Plain = plain(ALine),
- io:format("last line----:~s~n",[Plain]),
- NewContent = lists:append(Contents,[{Plain}]);
- length(RemainToken) =/= 0 ->
- NewContent = lists:append(Contents,[{ALine}]),
- parse(RemainToken,NewContent)
- end.
- plain(Last) ->plain(Last, "").
- plain([], String) -> String;
- plain(L, String) ->
- if
- length(L) =:= 1 ->
- Last = lists:nth(1,L),
- NewS = string:concat(String,Last);
- length(L) =/= 1 ->
- First = lists:nth(1,L),
- Temp = string:concat(First," "),
- NewS = string:concat(String,Temp),
- plain(lists:delete(First,L), NewS)
- end.
- parse_line(Tokens,M) -> parse_line(Tokens,M,[]).
- %% @ Tokens: 當前剩余單詞個數(shù)
- %% @ M:每行***字符閥值
- %% @ Line: 當前單詞行列表
- parse_line([],_M,Line) -> {Line,[]};
- parse_line(Tokens,M,Line) ->
- if
- length(Line) =:= 0 ->
- Element = lists:nth(1,Tokens),
- NewL = length(Element),
- NewToken = lists:delete(Element,Tokens),
- parse_line(NewToken,M,[Element]);
- length(Line) =/= 0 ->
- Element = lists:nth(1,Tokens),
- NewL = len(Line)+length(Element)+length(Line),
- case NewL > M of
- true ->
- String = toString(Line,M),
- {String,Tokens};
- false ->
- NewLine = lists:append(Line,[Element]),
- NewToken = lists:delete(Element,Tokens),
- parse_line(NewToken,M,NewLine)
- end
- end.
- %% 排版輸出當前行Line數(shù)據(jù)
- toString(Line,M) ->
- Len = len(Line),
- Blank = M - Len,
- %io:format("Blank:~p Len:~p length(Line):~p ~n",[Blank,Len,length(Line)]),
- Ret = justify(Line,Blank,length(Line)),
- io:format("Mess------Ret:~s length:~p~n",[Ret,length(Ret)]),
- Ret.
- justify(Line,Blank,Len) -> justify(Line,Blank,Len,"").
- %% @Line: 一行字符串數(shù)據(jù)
- %% @Blank:需要的空格個數(shù)
- %% @Len: 當前行中剩余的單詞個數(shù)
- %% @String: 返回一行矯正結(jié)果
- justify(Line,Blank,Len,String) ->
- if
- length(String) =:= 0 -> %%初始情況
- Temp = lists:nth(1,Line),
- Avg = Blank div Len, %% 計算單詞之間平均需要Avg個空格字符
- NewLine = lists:delete(Temp,Line),
- %io:format("1---avg: ~p Blank:~p Len: ~p~n",[Avg,Blank,Len]),
- case Avg =:= 0 of
- true ->
- NewS = while(1,Temp),
- justify(NewLine,Blank-1,length(NewLine),NewS);
- false ->
- NewS = while(Avg,Temp),
- justify(NewLine,Blank-Avg,length(NewLine),NewS)
- end;
- length(String) =/= 0 ->
- if
- length(Line) =:= 1 -> %%一行中***一個單詞,返回結(jié)果
- Last = lists:nth(1,Line),
- NewS = while(Blank,String), %%將剩余的Blank個字符補全
- string:concat(NewS,Last);
- length(Line) =/= 1 ->
- First = lists:nth(1,Line),
- Avg = Blank div Len,
- NewLine = lists:delete(First,Line),
- %io:format("2---avg: ~p Blank:~p Len: ~p~n",[Avg,Blank,Len]),
- case Avg =:= 0 of
- true ->
- NewS = while(1,String),
- justify(NewLine,Blank-1,length(NewLine), string:concat(NewS,First));
- false ->
- NewS = while(Avg,String),
- justify(NewLine,Blank-Avg,length(NewLine), string:concat(NewS,First))
- end
- end
- end.
- %% 功能: 子字符串后面加Count個空格
- while(0,String) -> String;
- while(Count,String) ->
- NewS = string:concat(String," "),
- while(Count-1,NewS).
- %%統(tǒng)計一行中字符的個數(shù)(只包括單詞字符,不包括空格)
- len(Line) -> len(Line,0).
- len([],Len) -> Len;
- len([H|T],Len) ->
- len(T,Len+length(H)).
看起來比較復(fù)雜,其實思路比較簡單:
1.將文本切分單詞列表;
2.設(shè)定一個閥值M,將每行的字符設(shè)置為M,同時必須保證單詞不能分割;
我覺得此程序的難度主要是設(shè)定單詞與單詞之間的空格字符的問題,我的思路是:根據(jù)每行中所有單詞所占的字符個數(shù)不同,求出單詞之間空格的平均Avg個數(shù),一般情況,單詞與單詞之間的字符個數(shù)為1,因此justify/4很好的處理了這個問題。
程序運行結(jié)果示意圖如下:
test.txt文件內(nèi)容:
結(jié)果如下:
好了,文檔排版Erlang實戰(zhàn)就到此為止了。說個小問題,我在Erlang實戰(zhàn):楊輝三角、選擇排序、集合交與并中的實戰(zhàn)2:選擇排序中不是用了兩次列表逆置來講一個元素加入到列表尾嗎,其實Erlang中模塊lists:append可以解決這個問題,我在本文代碼中多次用到了這個函數(shù),如:NewLine = lists:append(Line,[Element]),意思就是講Element元素加入到列表尾,但是append函數(shù)的兩個參數(shù)必須都為列表,因此Element元素需以列表形式加入:[Element],可見世上存在這樣一條真理:只有你不知道的,沒有不可能發(fā)生的。
原文:http://www.cnblogs.com/itfreer/archive/2012/05/08/Erlang_in_practise_text-process.html
【編輯推薦】